blob: ac6ee6574529fd4dbe2b0366be0b588488ef1b2d [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
R David Murrayc27e5222012-05-25 15:01:48 -040019import email.policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020
21from email.charset import Charset
22from email.header import Header, decode_header, make_header
23from email.parser import Parser, HeaderParser
24from email.generator import Generator, DecodedGenerator
25from email.message import Message
26from email.mime.application import MIMEApplication
27from email.mime.audio import MIMEAudio
28from email.mime.text import MIMEText
29from email.mime.image import MIMEImage
30from email.mime.base import MIMEBase
31from email.mime.message import MIMEMessage
32from email.mime.multipart import MIMEMultipart
33from email import utils
34from email import errors
35from email import encoders
36from email import iterators
37from email import base64mime
38from email import quoprimime
39
R David Murray28346b82011-03-31 11:40:20 -040040from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040041from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000042
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Guido van Rossum8b3febe2007-08-30 01:15:14 +000048# Test various aspects of the Message class's API
49class TestMessageAPI(TestEmailBase):
50 def test_get_all(self):
51 eq = self.assertEqual
52 msg = self._msgobj('msg_20.txt')
53 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
54 eq(msg.get_all('xx', 'n/a'), 'n/a')
55
R. David Murraye5db2632010-11-20 15:10:13 +000056 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 eq = self.assertEqual
58 msg = Message()
59 eq(msg.get_charset(), None)
60 charset = Charset('iso-8859-1')
61 msg.set_charset(charset)
62 eq(msg['mime-version'], '1.0')
63 eq(msg.get_content_type(), 'text/plain')
64 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
65 eq(msg.get_param('charset'), 'iso-8859-1')
66 eq(msg['content-transfer-encoding'], 'quoted-printable')
67 eq(msg.get_charset().input_charset, 'iso-8859-1')
68 # Remove the charset
69 msg.set_charset(None)
70 eq(msg.get_charset(), None)
71 eq(msg['content-type'], 'text/plain')
72 # Try adding a charset when there's already MIME headers present
73 msg = Message()
74 msg['MIME-Version'] = '2.0'
75 msg['Content-Type'] = 'text/x-weird'
76 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
77 msg.set_charset(charset)
78 eq(msg['mime-version'], '2.0')
79 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
80 eq(msg['content-transfer-encoding'], 'quinted-puntable')
81
82 def test_set_charset_from_string(self):
83 eq = self.assertEqual
84 msg = Message()
85 msg.set_charset('us-ascii')
86 eq(msg.get_charset().input_charset, 'us-ascii')
87 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
88
89 def test_set_payload_with_charset(self):
90 msg = Message()
91 charset = Charset('iso-8859-1')
92 msg.set_payload('This is a string payload', charset)
93 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
94
95 def test_get_charsets(self):
96 eq = self.assertEqual
97
98 msg = self._msgobj('msg_08.txt')
99 charsets = msg.get_charsets()
100 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
101
102 msg = self._msgobj('msg_09.txt')
103 charsets = msg.get_charsets('dingbat')
104 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
105 'koi8-r'])
106
107 msg = self._msgobj('msg_12.txt')
108 charsets = msg.get_charsets()
109 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
110 'iso-8859-3', 'us-ascii', 'koi8-r'])
111
112 def test_get_filename(self):
113 eq = self.assertEqual
114
115 msg = self._msgobj('msg_04.txt')
116 filenames = [p.get_filename() for p in msg.get_payload()]
117 eq(filenames, ['msg.txt', 'msg.txt'])
118
119 msg = self._msgobj('msg_07.txt')
120 subpart = msg.get_payload(1)
121 eq(subpart.get_filename(), 'dingusfish.gif')
122
123 def test_get_filename_with_name_parameter(self):
124 eq = self.assertEqual
125
126 msg = self._msgobj('msg_44.txt')
127 filenames = [p.get_filename() for p in msg.get_payload()]
128 eq(filenames, ['msg.txt', 'msg.txt'])
129
130 def test_get_boundary(self):
131 eq = self.assertEqual
132 msg = self._msgobj('msg_07.txt')
133 # No quotes!
134 eq(msg.get_boundary(), 'BOUNDARY')
135
136 def test_set_boundary(self):
137 eq = self.assertEqual
138 # This one has no existing boundary parameter, but the Content-Type:
139 # header appears fifth.
140 msg = self._msgobj('msg_01.txt')
141 msg.set_boundary('BOUNDARY')
142 header, value = msg.items()[4]
143 eq(header.lower(), 'content-type')
144 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
145 # This one has a Content-Type: header, with a boundary, stuck in the
146 # middle of its headers. Make sure the order is preserved; it should
147 # be fifth.
148 msg = self._msgobj('msg_04.txt')
149 msg.set_boundary('BOUNDARY')
150 header, value = msg.items()[4]
151 eq(header.lower(), 'content-type')
152 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
153 # And this one has no Content-Type: header at all.
154 msg = self._msgobj('msg_03.txt')
155 self.assertRaises(errors.HeaderParseError,
156 msg.set_boundary, 'BOUNDARY')
157
R. David Murray73a559d2010-12-21 18:07:59 +0000158 def test_make_boundary(self):
159 msg = MIMEMultipart('form-data')
160 # Note that when the boundary gets created is an implementation
161 # detail and might change.
162 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
163 # Trigger creation of boundary
164 msg.as_string()
165 self.assertEqual(msg.items()[0][1][:33],
166 'multipart/form-data; boundary="==')
167 # XXX: there ought to be tests of the uniqueness of the boundary, too.
168
R. David Murray57c45ac2010-02-21 04:39:40 +0000169 def test_message_rfc822_only(self):
170 # Issue 7970: message/rfc822 not in multipart parsed by
171 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400172 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000173 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000174 parser = HeaderParser()
175 msg = parser.parsestr(msgdata)
176 out = StringIO()
177 gen = Generator(out, True, 0)
178 gen.flatten(msg, False)
179 self.assertEqual(out.getvalue(), msgdata)
180
R David Murrayb35c8502011-04-13 16:46:05 -0400181 def test_byte_message_rfc822_only(self):
182 # Make sure new bytes header parser also passes this.
183 with openfile('msg_46.txt', 'rb') as fp:
184 msgdata = fp.read()
185 parser = email.parser.BytesHeaderParser()
186 msg = parser.parsebytes(msgdata)
187 out = BytesIO()
188 gen = email.generator.BytesGenerator(out)
189 gen.flatten(msg)
190 self.assertEqual(out.getvalue(), msgdata)
191
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000192 def test_get_decoded_payload(self):
193 eq = self.assertEqual
194 msg = self._msgobj('msg_10.txt')
195 # The outer message is a multipart
196 eq(msg.get_payload(decode=True), None)
197 # Subpart 1 is 7bit encoded
198 eq(msg.get_payload(0).get_payload(decode=True),
199 b'This is a 7bit encoded message.\n')
200 # Subpart 2 is quopri
201 eq(msg.get_payload(1).get_payload(decode=True),
202 b'\xa1This is a Quoted Printable encoded message!\n')
203 # Subpart 3 is base64
204 eq(msg.get_payload(2).get_payload(decode=True),
205 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000206 # Subpart 4 is base64 with a trailing newline, which
207 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000208 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000209 b'This is a Base64 encoded message.\n')
210 # Subpart 5 has no Content-Transfer-Encoding: header.
211 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000212 b'This has no Content-Transfer-Encoding: header.\n')
213
214 def test_get_decoded_uu_payload(self):
215 eq = self.assertEqual
216 msg = Message()
217 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
218 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
219 msg['content-transfer-encoding'] = cte
220 eq(msg.get_payload(decode=True), b'hello world')
221 # Now try some bogus data
222 msg.set_payload('foo')
223 eq(msg.get_payload(decode=True), b'foo')
224
R David Murraya2860e82011-04-16 09:20:30 -0400225 def test_get_payload_n_raises_on_non_multipart(self):
226 msg = Message()
227 self.assertRaises(TypeError, msg.get_payload, 1)
228
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000229 def test_decoded_generator(self):
230 eq = self.assertEqual
231 msg = self._msgobj('msg_07.txt')
232 with openfile('msg_17.txt') as fp:
233 text = fp.read()
234 s = StringIO()
235 g = DecodedGenerator(s)
236 g.flatten(msg)
237 eq(s.getvalue(), text)
238
239 def test__contains__(self):
240 msg = Message()
241 msg['From'] = 'Me'
242 msg['to'] = 'You'
243 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000244 self.assertTrue('from' in msg)
245 self.assertTrue('From' in msg)
246 self.assertTrue('FROM' in msg)
247 self.assertTrue('to' in msg)
248 self.assertTrue('To' in msg)
249 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000250
251 def test_as_string(self):
252 eq = self.ndiffAssertEqual
253 msg = self._msgobj('msg_01.txt')
254 with openfile('msg_01.txt') as fp:
255 text = fp.read()
256 eq(text, str(msg))
257 fullrepr = msg.as_string(unixfrom=True)
258 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000259 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000260 eq(text, NL.join(lines[1:]))
261
262 def test_bad_param(self):
263 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
264 self.assertEqual(msg.get_param('baz'), '')
265
266 def test_missing_filename(self):
267 msg = email.message_from_string("From: foo\n")
268 self.assertEqual(msg.get_filename(), None)
269
270 def test_bogus_filename(self):
271 msg = email.message_from_string(
272 "Content-Disposition: blarg; filename\n")
273 self.assertEqual(msg.get_filename(), '')
274
275 def test_missing_boundary(self):
276 msg = email.message_from_string("From: foo\n")
277 self.assertEqual(msg.get_boundary(), None)
278
279 def test_get_params(self):
280 eq = self.assertEqual
281 msg = email.message_from_string(
282 'X-Header: foo=one; bar=two; baz=three\n')
283 eq(msg.get_params(header='x-header'),
284 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
285 msg = email.message_from_string(
286 'X-Header: foo; bar=one; baz=two\n')
287 eq(msg.get_params(header='x-header'),
288 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
289 eq(msg.get_params(), None)
290 msg = email.message_from_string(
291 'X-Header: foo; bar="one"; baz=two\n')
292 eq(msg.get_params(header='x-header'),
293 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
294
295 def test_get_param_liberal(self):
296 msg = Message()
297 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
298 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
299
300 def test_get_param(self):
301 eq = self.assertEqual
302 msg = email.message_from_string(
303 "X-Header: foo=one; bar=two; baz=three\n")
304 eq(msg.get_param('bar', header='x-header'), 'two')
305 eq(msg.get_param('quuz', header='x-header'), None)
306 eq(msg.get_param('quuz'), None)
307 msg = email.message_from_string(
308 'X-Header: foo; bar="one"; baz=two\n')
309 eq(msg.get_param('foo', header='x-header'), '')
310 eq(msg.get_param('bar', header='x-header'), 'one')
311 eq(msg.get_param('baz', header='x-header'), 'two')
312 # XXX: We are not RFC-2045 compliant! We cannot parse:
313 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
314 # msg.get_param("weird")
315 # yet.
316
317 def test_get_param_funky_continuation_lines(self):
318 msg = self._msgobj('msg_22.txt')
319 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
320
321 def test_get_param_with_semis_in_quotes(self):
322 msg = email.message_from_string(
323 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
324 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
325 self.assertEqual(msg.get_param('name', unquote=False),
326 '"Jim&amp;&amp;Jill"')
327
R. David Murrayd48739f2010-04-14 18:59:18 +0000328 def test_get_param_with_quotes(self):
329 msg = email.message_from_string(
330 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
331 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
332 msg = email.message_from_string(
333 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
334 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
335
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000336 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000337 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000338 msg = email.message_from_string('Header: exists')
339 unless('header' in msg)
340 unless('Header' in msg)
341 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000342 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000343
344 def test_set_param(self):
345 eq = self.assertEqual
346 msg = Message()
347 msg.set_param('charset', 'iso-2022-jp')
348 eq(msg.get_param('charset'), 'iso-2022-jp')
349 msg.set_param('importance', 'high value')
350 eq(msg.get_param('importance'), 'high value')
351 eq(msg.get_param('importance', unquote=False), '"high value"')
352 eq(msg.get_params(), [('text/plain', ''),
353 ('charset', 'iso-2022-jp'),
354 ('importance', 'high value')])
355 eq(msg.get_params(unquote=False), [('text/plain', ''),
356 ('charset', '"iso-2022-jp"'),
357 ('importance', '"high value"')])
358 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
359 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
360
361 def test_del_param(self):
362 eq = self.assertEqual
363 msg = self._msgobj('msg_05.txt')
364 eq(msg.get_params(),
365 [('multipart/report', ''), ('report-type', 'delivery-status'),
366 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
367 old_val = msg.get_param("report-type")
368 msg.del_param("report-type")
369 eq(msg.get_params(),
370 [('multipart/report', ''),
371 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
372 msg.set_param("report-type", old_val)
373 eq(msg.get_params(),
374 [('multipart/report', ''),
375 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
376 ('report-type', old_val)])
377
378 def test_del_param_on_other_header(self):
379 msg = Message()
380 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
381 msg.del_param('filename', 'content-disposition')
382 self.assertEqual(msg['content-disposition'], 'attachment')
383
R David Murraya2860e82011-04-16 09:20:30 -0400384 def test_del_param_on_nonexistent_header(self):
385 msg = Message()
386 msg.del_param('filename', 'content-disposition')
387
388 def test_del_nonexistent_param(self):
389 msg = Message()
390 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
391 existing_header = msg['Content-Type']
392 msg.del_param('foobar', header='Content-Type')
393 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
394
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000395 def test_set_type(self):
396 eq = self.assertEqual
397 msg = Message()
398 self.assertRaises(ValueError, msg.set_type, 'text')
399 msg.set_type('text/plain')
400 eq(msg['content-type'], 'text/plain')
401 msg.set_param('charset', 'us-ascii')
402 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
403 msg.set_type('text/html')
404 eq(msg['content-type'], 'text/html; charset="us-ascii"')
405
406 def test_set_type_on_other_header(self):
407 msg = Message()
408 msg['X-Content-Type'] = 'text/plain'
409 msg.set_type('application/octet-stream', 'X-Content-Type')
410 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
411
412 def test_get_content_type_missing(self):
413 msg = Message()
414 self.assertEqual(msg.get_content_type(), 'text/plain')
415
416 def test_get_content_type_missing_with_default_type(self):
417 msg = Message()
418 msg.set_default_type('message/rfc822')
419 self.assertEqual(msg.get_content_type(), 'message/rfc822')
420
421 def test_get_content_type_from_message_implicit(self):
422 msg = self._msgobj('msg_30.txt')
423 self.assertEqual(msg.get_payload(0).get_content_type(),
424 'message/rfc822')
425
426 def test_get_content_type_from_message_explicit(self):
427 msg = self._msgobj('msg_28.txt')
428 self.assertEqual(msg.get_payload(0).get_content_type(),
429 'message/rfc822')
430
431 def test_get_content_type_from_message_text_plain_implicit(self):
432 msg = self._msgobj('msg_03.txt')
433 self.assertEqual(msg.get_content_type(), 'text/plain')
434
435 def test_get_content_type_from_message_text_plain_explicit(self):
436 msg = self._msgobj('msg_01.txt')
437 self.assertEqual(msg.get_content_type(), 'text/plain')
438
439 def test_get_content_maintype_missing(self):
440 msg = Message()
441 self.assertEqual(msg.get_content_maintype(), 'text')
442
443 def test_get_content_maintype_missing_with_default_type(self):
444 msg = Message()
445 msg.set_default_type('message/rfc822')
446 self.assertEqual(msg.get_content_maintype(), 'message')
447
448 def test_get_content_maintype_from_message_implicit(self):
449 msg = self._msgobj('msg_30.txt')
450 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
451
452 def test_get_content_maintype_from_message_explicit(self):
453 msg = self._msgobj('msg_28.txt')
454 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
455
456 def test_get_content_maintype_from_message_text_plain_implicit(self):
457 msg = self._msgobj('msg_03.txt')
458 self.assertEqual(msg.get_content_maintype(), 'text')
459
460 def test_get_content_maintype_from_message_text_plain_explicit(self):
461 msg = self._msgobj('msg_01.txt')
462 self.assertEqual(msg.get_content_maintype(), 'text')
463
464 def test_get_content_subtype_missing(self):
465 msg = Message()
466 self.assertEqual(msg.get_content_subtype(), 'plain')
467
468 def test_get_content_subtype_missing_with_default_type(self):
469 msg = Message()
470 msg.set_default_type('message/rfc822')
471 self.assertEqual(msg.get_content_subtype(), 'rfc822')
472
473 def test_get_content_subtype_from_message_implicit(self):
474 msg = self._msgobj('msg_30.txt')
475 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
476
477 def test_get_content_subtype_from_message_explicit(self):
478 msg = self._msgobj('msg_28.txt')
479 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
480
481 def test_get_content_subtype_from_message_text_plain_implicit(self):
482 msg = self._msgobj('msg_03.txt')
483 self.assertEqual(msg.get_content_subtype(), 'plain')
484
485 def test_get_content_subtype_from_message_text_plain_explicit(self):
486 msg = self._msgobj('msg_01.txt')
487 self.assertEqual(msg.get_content_subtype(), 'plain')
488
489 def test_get_content_maintype_error(self):
490 msg = Message()
491 msg['Content-Type'] = 'no-slash-in-this-string'
492 self.assertEqual(msg.get_content_maintype(), 'text')
493
494 def test_get_content_subtype_error(self):
495 msg = Message()
496 msg['Content-Type'] = 'no-slash-in-this-string'
497 self.assertEqual(msg.get_content_subtype(), 'plain')
498
499 def test_replace_header(self):
500 eq = self.assertEqual
501 msg = Message()
502 msg.add_header('First', 'One')
503 msg.add_header('Second', 'Two')
504 msg.add_header('Third', 'Three')
505 eq(msg.keys(), ['First', 'Second', 'Third'])
506 eq(msg.values(), ['One', 'Two', 'Three'])
507 msg.replace_header('Second', 'Twenty')
508 eq(msg.keys(), ['First', 'Second', 'Third'])
509 eq(msg.values(), ['One', 'Twenty', 'Three'])
510 msg.add_header('First', 'Eleven')
511 msg.replace_header('First', 'One Hundred')
512 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
513 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
514 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
515
516 def test_broken_base64_payload(self):
517 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
518 msg = Message()
519 msg['content-type'] = 'audio/x-midi'
520 msg['content-transfer-encoding'] = 'base64'
521 msg.set_payload(x)
522 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000523 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000524
R David Murraya2860e82011-04-16 09:20:30 -0400525 def test_broken_unicode_payload(self):
526 # This test improves coverage but is not a compliance test.
527 # The behavior in this situation is currently undefined by the API.
528 x = 'this is a br\xf6ken thing to do'
529 msg = Message()
530 msg['content-type'] = 'text/plain'
531 msg['content-transfer-encoding'] = '8bit'
532 msg.set_payload(x)
533 self.assertEqual(msg.get_payload(decode=True),
534 bytes(x, 'raw-unicode-escape'))
535
536 def test_questionable_bytes_payload(self):
537 # This test improves coverage but is not a compliance test,
538 # since it involves poking inside the black box.
539 x = 'this is a quéstionable thing to do'.encode('utf-8')
540 msg = Message()
541 msg['content-type'] = 'text/plain; charset="utf-8"'
542 msg['content-transfer-encoding'] = '8bit'
543 msg._payload = x
544 self.assertEqual(msg.get_payload(decode=True), x)
545
R. David Murray7ec754b2010-12-13 23:51:19 +0000546 # Issue 1078919
547 def test_ascii_add_header(self):
548 msg = Message()
549 msg.add_header('Content-Disposition', 'attachment',
550 filename='bud.gif')
551 self.assertEqual('attachment; filename="bud.gif"',
552 msg['Content-Disposition'])
553
554 def test_noascii_add_header(self):
555 msg = Message()
556 msg.add_header('Content-Disposition', 'attachment',
557 filename="Fußballer.ppt")
558 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000559 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000560 msg['Content-Disposition'])
561
562 def test_nonascii_add_header_via_triple(self):
563 msg = Message()
564 msg.add_header('Content-Disposition', 'attachment',
565 filename=('iso-8859-1', '', 'Fußballer.ppt'))
566 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000567 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
568 msg['Content-Disposition'])
569
570 def test_ascii_add_header_with_tspecial(self):
571 msg = Message()
572 msg.add_header('Content-Disposition', 'attachment',
573 filename="windows [filename].ppt")
574 self.assertEqual(
575 'attachment; filename="windows [filename].ppt"',
576 msg['Content-Disposition'])
577
578 def test_nonascii_add_header_with_tspecial(self):
579 msg = Message()
580 msg.add_header('Content-Disposition', 'attachment',
581 filename="Fußballer [filename].ppt")
582 self.assertEqual(
583 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000584 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000585
R David Murraya2860e82011-04-16 09:20:30 -0400586 def test_add_header_with_name_only_param(self):
587 msg = Message()
588 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
589 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
590
591 def test_add_header_with_no_value(self):
592 msg = Message()
593 msg.add_header('X-Status', None)
594 self.assertEqual('', msg['X-Status'])
595
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000596 # Issue 5871: reject an attempt to embed a header inside a header value
597 # (header injection attack).
598 def test_embeded_header_via_Header_rejected(self):
599 msg = Message()
600 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
601 self.assertRaises(errors.HeaderParseError, msg.as_string)
602
603 def test_embeded_header_via_string_rejected(self):
604 msg = Message()
605 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
606 self.assertRaises(errors.HeaderParseError, msg.as_string)
607
R David Murray7441a7a2012-03-14 02:59:51 -0400608 def test_unicode_header_defaults_to_utf8_encoding(self):
609 # Issue 14291
610 m = MIMEText('abc\n')
611 m['Subject'] = 'É test'
612 self.assertEqual(str(m),textwrap.dedent("""\
613 Content-Type: text/plain; charset="us-ascii"
614 MIME-Version: 1.0
615 Content-Transfer-Encoding: 7bit
616 Subject: =?utf-8?q?=C3=89_test?=
617
618 abc
619 """))
620
R David Murray8680bcc2012-03-22 22:17:51 -0400621 def test_unicode_body_defaults_to_utf8_encoding(self):
622 # Issue 14291
623 m = MIMEText('É testabc\n')
624 self.assertEqual(str(m),textwrap.dedent("""\
R David Murray8680bcc2012-03-22 22:17:51 -0400625 Content-Type: text/plain; charset="utf-8"
R David Murray42243c42012-03-22 22:40:44 -0400626 MIME-Version: 1.0
R David Murray8680bcc2012-03-22 22:17:51 -0400627 Content-Transfer-Encoding: base64
628
629 w4kgdGVzdGFiYwo=
630 """))
631
632
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000633# Test the email.encoders module
634class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400635
636 def test_EncodersEncode_base64(self):
637 with openfile('PyBanner048.gif', 'rb') as fp:
638 bindata = fp.read()
639 mimed = email.mime.image.MIMEImage(bindata)
640 base64ed = mimed.get_payload()
641 # the transfer-encoded body lines should all be <=76 characters
642 lines = base64ed.split('\n')
643 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
644
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000645 def test_encode_empty_payload(self):
646 eq = self.assertEqual
647 msg = Message()
648 msg.set_charset('us-ascii')
649 eq(msg['content-transfer-encoding'], '7bit')
650
651 def test_default_cte(self):
652 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000653 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000654 msg = MIMEText('hello world')
655 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000656 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000657 msg = MIMEText('hello \xf8 world')
R David Murray8680bcc2012-03-22 22:17:51 -0400658 eq(msg['content-transfer-encoding'], 'base64')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659 # And now with a different charset
660 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
661 eq(msg['content-transfer-encoding'], 'quoted-printable')
662
R. David Murraye85200d2010-05-06 01:41:14 +0000663 def test_encode7or8bit(self):
664 # Make sure a charset whose input character set is 8bit but
665 # whose output character set is 7bit gets a transfer-encoding
666 # of 7bit.
667 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000668 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000669 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000670
Ezio Melottib3aedd42010-11-20 19:04:17 +0000671
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000672# Test long header wrapping
673class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400674
675 maxDiff = None
676
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000677 def test_split_long_continuation(self):
678 eq = self.ndiffAssertEqual
679 msg = email.message_from_string("""\
680Subject: bug demonstration
681\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
682\tmore text
683
684test
685""")
686 sfp = StringIO()
687 g = Generator(sfp)
688 g.flatten(msg)
689 eq(sfp.getvalue(), """\
690Subject: bug demonstration
691\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
692\tmore text
693
694test
695""")
696
697 def test_another_long_almost_unsplittable_header(self):
698 eq = self.ndiffAssertEqual
699 hstr = """\
700bug demonstration
701\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
702\tmore text"""
703 h = Header(hstr, continuation_ws='\t')
704 eq(h.encode(), """\
705bug demonstration
706\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
707\tmore text""")
708 h = Header(hstr.replace('\t', ' '))
709 eq(h.encode(), """\
710bug demonstration
711 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
712 more text""")
713
714 def test_long_nonstring(self):
715 eq = self.ndiffAssertEqual
716 g = Charset("iso-8859-1")
717 cz = Charset("iso-8859-2")
718 utf8 = Charset("utf-8")
719 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
720 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
721 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
722 b'bef\xf6rdert. ')
723 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
724 b'd\xf9vtipu.. ')
725 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
726 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
727 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
728 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
729 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
730 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
731 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
732 '\u3044\u307e\u3059\u3002')
733 h = Header(g_head, g, header_name='Subject')
734 h.append(cz_head, cz)
735 h.append(utf8_head, utf8)
736 msg = Message()
737 msg['Subject'] = h
738 sfp = StringIO()
739 g = Generator(sfp)
740 g.flatten(msg)
741 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000742Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
743 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
744 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
745 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
746 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
747 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
748 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
749 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
750 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
751 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
752 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000753
754""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000755 eq(h.encode(maxlinelen=76), """\
756=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
757 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
758 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
759 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
760 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
761 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
762 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
763 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
764 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
765 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
766 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000767
768 def test_long_header_encode(self):
769 eq = self.ndiffAssertEqual
770 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
771 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
772 header_name='X-Foobar-Spoink-Defrobnit')
773 eq(h.encode(), '''\
774wasnipoop; giraffes="very-long-necked-animals";
775 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
776
777 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
778 eq = self.ndiffAssertEqual
779 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
780 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
781 header_name='X-Foobar-Spoink-Defrobnit',
782 continuation_ws='\t')
783 eq(h.encode(), '''\
784wasnipoop; giraffes="very-long-necked-animals";
785 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
786
787 def test_long_header_encode_with_tab_continuation(self):
788 eq = self.ndiffAssertEqual
789 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
790 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
791 header_name='X-Foobar-Spoink-Defrobnit',
792 continuation_ws='\t')
793 eq(h.encode(), '''\
794wasnipoop; giraffes="very-long-necked-animals";
795\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
796
R David Murray3a6152f2011-03-14 21:13:03 -0400797 def test_header_encode_with_different_output_charset(self):
798 h = Header('文', 'euc-jp')
799 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
800
801 def test_long_header_encode_with_different_output_charset(self):
802 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
803 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
804 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
805 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
806 res = """\
807=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
808 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
809 self.assertEqual(h.encode(), res)
810
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000811 def test_header_splitter(self):
812 eq = self.ndiffAssertEqual
813 msg = MIMEText('')
814 # It'd be great if we could use add_header() here, but that doesn't
815 # guarantee an order of the parameters.
816 msg['X-Foobar-Spoink-Defrobnit'] = (
817 'wasnipoop; giraffes="very-long-necked-animals"; '
818 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
819 sfp = StringIO()
820 g = Generator(sfp)
821 g.flatten(msg)
822 eq(sfp.getvalue(), '''\
823Content-Type: text/plain; charset="us-ascii"
824MIME-Version: 1.0
825Content-Transfer-Encoding: 7bit
826X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
827 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
828
829''')
830
831 def test_no_semis_header_splitter(self):
832 eq = self.ndiffAssertEqual
833 msg = Message()
834 msg['From'] = 'test@dom.ain'
835 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
836 msg.set_payload('Test')
837 sfp = StringIO()
838 g = Generator(sfp)
839 g.flatten(msg)
840 eq(sfp.getvalue(), """\
841From: test@dom.ain
842References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
843 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
844
845Test""")
846
R David Murray7da4db12011-04-07 20:37:17 -0400847 def test_last_split_chunk_does_not_fit(self):
848 eq = self.ndiffAssertEqual
849 h = Header('Subject: the first part of this is short, but_the_second'
850 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
851 '_all_by_itself')
852 eq(h.encode(), """\
853Subject: the first part of this is short,
854 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
855
856 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
857 eq = self.ndiffAssertEqual
858 h = Header(', but_the_second'
859 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
860 '_all_by_itself')
861 eq(h.encode(), """\
862,
863 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
864
865 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
866 eq = self.ndiffAssertEqual
867 h = Header(', , but_the_second'
868 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
869 '_all_by_itself')
870 eq(h.encode(), """\
871, ,
872 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
873
874 def test_trailing_splitable_on_overlong_unsplitable(self):
875 eq = self.ndiffAssertEqual
876 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
877 'be_on_a_line_all_by_itself;')
878 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
879 "be_on_a_line_all_by_itself;")
880
881 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
882 eq = self.ndiffAssertEqual
883 h = Header('; '
884 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400885 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400886 eq(h.encode(), """\
887;
R David Murray01581ee2011-04-18 10:04:34 -0400888 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400889
R David Murraye1292a22011-04-07 20:54:03 -0400890 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400891 eq = self.ndiffAssertEqual
892 h = Header('This is a long line that has two whitespaces in a row. '
893 'This used to cause truncation of the header when folded')
894 eq(h.encode(), """\
895This is a long line that has two whitespaces in a row. This used to cause
896 truncation of the header when folded""")
897
R David Murray01581ee2011-04-18 10:04:34 -0400898 def test_splitter_split_on_punctuation_only_if_fws(self):
899 eq = self.ndiffAssertEqual
900 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
901 'they;arenotlegal;fold,points')
902 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
903 "arenotlegal;fold,points")
904
905 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
906 eq = self.ndiffAssertEqual
907 h = Header('this is a test where we need to have more than one line '
908 'before; our final line that is just too big to fit;; '
909 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
910 'be_on_a_line_all_by_itself;')
911 eq(h.encode(), """\
912this is a test where we need to have more than one line before;
913 our final line that is just too big to fit;;
914 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
915
916 def test_overlong_last_part_followed_by_split_point(self):
917 eq = self.ndiffAssertEqual
918 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
919 'be_on_a_line_all_by_itself ')
920 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
921 "should_be_on_a_line_all_by_itself ")
922
923 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
924 eq = self.ndiffAssertEqual
925 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
926 'before_our_final_line_; ; '
927 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
928 'be_on_a_line_all_by_itself; ')
929 eq(h.encode(), """\
930this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
931 ;
932 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
933
934 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
935 eq = self.ndiffAssertEqual
936 h = Header('this is a test where we need to have more than one line '
937 'before our final line; ; '
938 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
939 'be_on_a_line_all_by_itself; ')
940 eq(h.encode(), """\
941this is a test where we need to have more than one line before our final line;
942 ;
943 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
944
945 def test_long_header_with_whitespace_runs(self):
946 eq = self.ndiffAssertEqual
947 msg = Message()
948 msg['From'] = 'test@dom.ain'
949 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
950 msg.set_payload('Test')
951 sfp = StringIO()
952 g = Generator(sfp)
953 g.flatten(msg)
954 eq(sfp.getvalue(), """\
955From: test@dom.ain
956References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
957 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
958 <foo@dom.ain> <foo@dom.ain>\x20\x20
959
960Test""")
961
962 def test_long_run_with_semi_header_splitter(self):
963 eq = self.ndiffAssertEqual
964 msg = Message()
965 msg['From'] = 'test@dom.ain'
966 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
967 msg.set_payload('Test')
968 sfp = StringIO()
969 g = Generator(sfp)
970 g.flatten(msg)
971 eq(sfp.getvalue(), """\
972From: test@dom.ain
973References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
974 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
975 <foo@dom.ain>; abc
976
977Test""")
978
979 def test_splitter_split_on_punctuation_only_if_fws(self):
980 eq = self.ndiffAssertEqual
981 msg = Message()
982 msg['From'] = 'test@dom.ain'
983 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
984 'they;arenotlegal;fold,points')
985 msg.set_payload('Test')
986 sfp = StringIO()
987 g = Generator(sfp)
988 g.flatten(msg)
989 # XXX the space after the header should not be there.
990 eq(sfp.getvalue(), """\
991From: test@dom.ain
992References:\x20
993 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
994
995Test""")
996
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000997 def test_no_split_long_header(self):
998 eq = self.ndiffAssertEqual
999 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +00001000 h = Header(hstr)
1001 # These come on two lines because Headers are really field value
1002 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001003 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001004References:
1005 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
1006 h = Header('x' * 80)
1007 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001008
1009 def test_splitting_multiple_long_lines(self):
1010 eq = self.ndiffAssertEqual
1011 hstr = """\
1012from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1013\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1014\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1015"""
1016 h = Header(hstr, continuation_ws='\t')
1017 eq(h.encode(), """\
1018from babylon.socal-raves.org (localhost [127.0.0.1]);
1019 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1020 for <mailman-admin@babylon.socal-raves.org>;
1021 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1022\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1023 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1024 for <mailman-admin@babylon.socal-raves.org>;
1025 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1026\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1027 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1028 for <mailman-admin@babylon.socal-raves.org>;
1029 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1030
1031 def test_splitting_first_line_only_is_long(self):
1032 eq = self.ndiffAssertEqual
1033 hstr = """\
1034from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1035\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1036\tid 17k4h5-00034i-00
1037\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1038 h = Header(hstr, maxlinelen=78, header_name='Received',
1039 continuation_ws='\t')
1040 eq(h.encode(), """\
1041from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1042 helo=cthulhu.gerg.ca)
1043\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1044\tid 17k4h5-00034i-00
1045\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1046
1047 def test_long_8bit_header(self):
1048 eq = self.ndiffAssertEqual
1049 msg = Message()
1050 h = Header('Britische Regierung gibt', 'iso-8859-1',
1051 header_name='Subject')
1052 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001053 eq(h.encode(maxlinelen=76), """\
1054=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1055 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001056 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001057 eq(msg.as_string(maxheaderlen=76), """\
1058Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1059 =?iso-8859-1?q?hore-Windkraftprojekte?=
1060
1061""")
1062 eq(msg.as_string(maxheaderlen=0), """\
1063Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001064
1065""")
1066
1067 def test_long_8bit_header_no_charset(self):
1068 eq = self.ndiffAssertEqual
1069 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001070 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1071 'f\xfcr Offshore-Windkraftprojekte '
1072 '<a-very-long-address@example.com>')
1073 msg['Reply-To'] = header_string
R David Murray7441a7a2012-03-14 02:59:51 -04001074 eq(msg.as_string(maxheaderlen=78), """\
1075Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1076 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
1077
1078""")
Barry Warsaw8c571042007-08-30 19:17:18 +00001079 msg = Message()
R David Murray7441a7a2012-03-14 02:59:51 -04001080 msg['Reply-To'] = Header(header_string,
Barry Warsaw8c571042007-08-30 19:17:18 +00001081 header_name='Reply-To')
1082 eq(msg.as_string(maxheaderlen=78), """\
1083Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1084 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001085
1086""")
1087
1088 def test_long_to_header(self):
1089 eq = self.ndiffAssertEqual
1090 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001091 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001092 '"Someone Test #B" <someone@umich.edu>, '
1093 '"Someone Test #C" <someone@eecs.umich.edu>, '
1094 '"Someone Test #D" <someone@eecs.umich.edu>')
1095 msg = Message()
1096 msg['To'] = to
1097 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001098To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001099 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001100 "Someone Test #C" <someone@eecs.umich.edu>,
1101 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001102
1103''')
1104
1105 def test_long_line_after_append(self):
1106 eq = self.ndiffAssertEqual
1107 s = 'This is an example of string which has almost the limit of header length.'
1108 h = Header(s)
1109 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001110 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001111This is an example of string which has almost the limit of header length.
1112 Add another line.""")
1113
1114 def test_shorter_line_with_append(self):
1115 eq = self.ndiffAssertEqual
1116 s = 'This is a shorter line.'
1117 h = Header(s)
1118 h.append('Add another sentence. (Surprise?)')
1119 eq(h.encode(),
1120 'This is a shorter line. Add another sentence. (Surprise?)')
1121
1122 def test_long_field_name(self):
1123 eq = self.ndiffAssertEqual
1124 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001125 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1126 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1127 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1128 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001129 h = Header(gs, 'iso-8859-1', header_name=fn)
1130 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001131 eq(h.encode(maxlinelen=76), """\
1132=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1133 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1134 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1135 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001136
1137 def test_long_received_header(self):
1138 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1139 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1140 'Wed, 05 Mar 2003 18:10:18 -0700')
1141 msg = Message()
1142 msg['Received-1'] = Header(h, continuation_ws='\t')
1143 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001144 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001145 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001146Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1147 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001148 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001149Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1150 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001151 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001152
1153""")
1154
1155 def test_string_headerinst_eq(self):
1156 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1157 'tu-muenchen.de> (David Bremner\'s message of '
1158 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1159 msg = Message()
1160 msg['Received-1'] = Header(h, header_name='Received-1',
1161 continuation_ws='\t')
1162 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001163 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001164 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001165Received-1:\x20
1166 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1167 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1168Received-2:\x20
1169 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1170 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001171
1172""")
1173
1174 def test_long_unbreakable_lines_with_continuation(self):
1175 eq = self.ndiffAssertEqual
1176 msg = Message()
1177 t = """\
1178iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1179 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1180 msg['Face-1'] = t
1181 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001182 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001183 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001184 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001185 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001186Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001187 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001188 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001189Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001190 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001191 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001192Face-3:\x20
1193 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1194 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001195
1196""")
1197
1198 def test_another_long_multiline_header(self):
1199 eq = self.ndiffAssertEqual
1200 m = ('Received: from siimage.com '
1201 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001202 'Microsoft SMTPSVC(5.0.2195.4905); '
1203 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001204 msg = email.message_from_string(m)
1205 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001206Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1207 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001208
1209''')
1210
1211 def test_long_lines_with_different_header(self):
1212 eq = self.ndiffAssertEqual
1213 h = ('List-Unsubscribe: '
1214 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1215 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1216 '?subject=unsubscribe>')
1217 msg = Message()
1218 msg['List'] = h
1219 msg['List'] = Header(h, header_name='List')
1220 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001221List: List-Unsubscribe:
1222 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001223 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001224List: List-Unsubscribe:
1225 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001226 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001227
1228""")
1229
R. David Murray6f0022d2011-01-07 21:57:25 +00001230 def test_long_rfc2047_header_with_embedded_fws(self):
1231 h = Header(textwrap.dedent("""\
1232 We're going to pretend this header is in a non-ascii character set
1233 \tto see if line wrapping with encoded words and embedded
1234 folding white space works"""),
1235 charset='utf-8',
1236 header_name='Test')
1237 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1238 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1239 =?utf-8?q?cter_set?=
1240 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1241 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1242
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001243
Ezio Melottib3aedd42010-11-20 19:04:17 +00001244
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001245# Test mangling of "From " lines in the body of a message
1246class TestFromMangling(unittest.TestCase):
1247 def setUp(self):
1248 self.msg = Message()
1249 self.msg['From'] = 'aaa@bbb.org'
1250 self.msg.set_payload("""\
1251From the desk of A.A.A.:
1252Blah blah blah
1253""")
1254
1255 def test_mangled_from(self):
1256 s = StringIO()
1257 g = Generator(s, mangle_from_=True)
1258 g.flatten(self.msg)
1259 self.assertEqual(s.getvalue(), """\
1260From: aaa@bbb.org
1261
1262>From the desk of A.A.A.:
1263Blah blah blah
1264""")
1265
1266 def test_dont_mangle_from(self):
1267 s = StringIO()
1268 g = Generator(s, mangle_from_=False)
1269 g.flatten(self.msg)
1270 self.assertEqual(s.getvalue(), """\
1271From: aaa@bbb.org
1272
1273From the desk of A.A.A.:
1274Blah blah blah
1275""")
1276
1277
Ezio Melottib3aedd42010-11-20 19:04:17 +00001278
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001279# Test the basic MIMEAudio class
1280class TestMIMEAudio(unittest.TestCase):
1281 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001282 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001283 self._audiodata = fp.read()
1284 self._au = MIMEAudio(self._audiodata)
1285
1286 def test_guess_minor_type(self):
1287 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1288
1289 def test_encoding(self):
1290 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001291 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1292 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001293
1294 def test_checkSetMinor(self):
1295 au = MIMEAudio(self._audiodata, 'fish')
1296 self.assertEqual(au.get_content_type(), 'audio/fish')
1297
1298 def test_add_header(self):
1299 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001300 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001301 self._au.add_header('Content-Disposition', 'attachment',
1302 filename='audiotest.au')
1303 eq(self._au['content-disposition'],
1304 'attachment; filename="audiotest.au"')
1305 eq(self._au.get_params(header='content-disposition'),
1306 [('attachment', ''), ('filename', 'audiotest.au')])
1307 eq(self._au.get_param('filename', header='content-disposition'),
1308 'audiotest.au')
1309 missing = []
1310 eq(self._au.get_param('attachment', header='content-disposition'), '')
1311 unless(self._au.get_param('foo', failobj=missing,
1312 header='content-disposition') is missing)
1313 # Try some missing stuff
1314 unless(self._au.get_param('foobar', missing) is missing)
1315 unless(self._au.get_param('attachment', missing,
1316 header='foobar') is missing)
1317
1318
Ezio Melottib3aedd42010-11-20 19:04:17 +00001319
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001320# Test the basic MIMEImage class
1321class TestMIMEImage(unittest.TestCase):
1322 def setUp(self):
1323 with openfile('PyBanner048.gif', 'rb') as fp:
1324 self._imgdata = fp.read()
1325 self._im = MIMEImage(self._imgdata)
1326
1327 def test_guess_minor_type(self):
1328 self.assertEqual(self._im.get_content_type(), 'image/gif')
1329
1330 def test_encoding(self):
1331 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001332 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1333 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001334
1335 def test_checkSetMinor(self):
1336 im = MIMEImage(self._imgdata, 'fish')
1337 self.assertEqual(im.get_content_type(), 'image/fish')
1338
1339 def test_add_header(self):
1340 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001341 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001342 self._im.add_header('Content-Disposition', 'attachment',
1343 filename='dingusfish.gif')
1344 eq(self._im['content-disposition'],
1345 'attachment; filename="dingusfish.gif"')
1346 eq(self._im.get_params(header='content-disposition'),
1347 [('attachment', ''), ('filename', 'dingusfish.gif')])
1348 eq(self._im.get_param('filename', header='content-disposition'),
1349 'dingusfish.gif')
1350 missing = []
1351 eq(self._im.get_param('attachment', header='content-disposition'), '')
1352 unless(self._im.get_param('foo', failobj=missing,
1353 header='content-disposition') is missing)
1354 # Try some missing stuff
1355 unless(self._im.get_param('foobar', missing) is missing)
1356 unless(self._im.get_param('attachment', missing,
1357 header='foobar') is missing)
1358
1359
Ezio Melottib3aedd42010-11-20 19:04:17 +00001360
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001361# Test the basic MIMEApplication class
1362class TestMIMEApplication(unittest.TestCase):
1363 def test_headers(self):
1364 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001365 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001366 eq(msg.get_content_type(), 'application/octet-stream')
1367 eq(msg['content-transfer-encoding'], 'base64')
1368
1369 def test_body(self):
1370 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001371 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1372 msg = MIMEApplication(bytesdata)
1373 # whitespace in the cte encoded block is RFC-irrelevant.
1374 eq(msg.get_payload().strip(), '+vv8/f7/')
1375 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001376
1377
Ezio Melottib3aedd42010-11-20 19:04:17 +00001378
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001379# Test the basic MIMEText class
1380class TestMIMEText(unittest.TestCase):
1381 def setUp(self):
1382 self._msg = MIMEText('hello there')
1383
1384 def test_types(self):
1385 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001386 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001387 eq(self._msg.get_content_type(), 'text/plain')
1388 eq(self._msg.get_param('charset'), 'us-ascii')
1389 missing = []
1390 unless(self._msg.get_param('foobar', missing) is missing)
1391 unless(self._msg.get_param('charset', missing, header='foobar')
1392 is missing)
1393
1394 def test_payload(self):
1395 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001396 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001397
1398 def test_charset(self):
1399 eq = self.assertEqual
1400 msg = MIMEText('hello there', _charset='us-ascii')
1401 eq(msg.get_charset().input_charset, 'us-ascii')
1402 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1403
R. David Murray850fc852010-06-03 01:58:28 +00001404 def test_7bit_input(self):
1405 eq = self.assertEqual
1406 msg = MIMEText('hello there', _charset='us-ascii')
1407 eq(msg.get_charset().input_charset, 'us-ascii')
1408 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1409
1410 def test_7bit_input_no_charset(self):
1411 eq = self.assertEqual
1412 msg = MIMEText('hello there')
1413 eq(msg.get_charset(), 'us-ascii')
1414 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1415 self.assertTrue('hello there' in msg.as_string())
1416
1417 def test_utf8_input(self):
1418 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1419 eq = self.assertEqual
1420 msg = MIMEText(teststr, _charset='utf-8')
1421 eq(msg.get_charset().output_charset, 'utf-8')
1422 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1423 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1424
1425 @unittest.skip("can't fix because of backward compat in email5, "
1426 "will fix in email6")
1427 def test_utf8_input_no_charset(self):
1428 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1429 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1430
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001431
Ezio Melottib3aedd42010-11-20 19:04:17 +00001432
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001433# Test complicated multipart/* messages
1434class TestMultipart(TestEmailBase):
1435 def setUp(self):
1436 with openfile('PyBanner048.gif', 'rb') as fp:
1437 data = fp.read()
1438 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1439 image = MIMEImage(data, name='dingusfish.gif')
1440 image.add_header('content-disposition', 'attachment',
1441 filename='dingusfish.gif')
1442 intro = MIMEText('''\
1443Hi there,
1444
1445This is the dingus fish.
1446''')
1447 container.attach(intro)
1448 container.attach(image)
1449 container['From'] = 'Barry <barry@digicool.com>'
1450 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1451 container['Subject'] = 'Here is your dingus fish'
1452
1453 now = 987809702.54848599
1454 timetuple = time.localtime(now)
1455 if timetuple[-1] == 0:
1456 tzsecs = time.timezone
1457 else:
1458 tzsecs = time.altzone
1459 if tzsecs > 0:
1460 sign = '-'
1461 else:
1462 sign = '+'
1463 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1464 container['Date'] = time.strftime(
1465 '%a, %d %b %Y %H:%M:%S',
1466 time.localtime(now)) + tzoffset
1467 self._msg = container
1468 self._im = image
1469 self._txt = intro
1470
1471 def test_hierarchy(self):
1472 # convenience
1473 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001474 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001475 raises = self.assertRaises
1476 # tests
1477 m = self._msg
1478 unless(m.is_multipart())
1479 eq(m.get_content_type(), 'multipart/mixed')
1480 eq(len(m.get_payload()), 2)
1481 raises(IndexError, m.get_payload, 2)
1482 m0 = m.get_payload(0)
1483 m1 = m.get_payload(1)
1484 unless(m0 is self._txt)
1485 unless(m1 is self._im)
1486 eq(m.get_payload(), [m0, m1])
1487 unless(not m0.is_multipart())
1488 unless(not m1.is_multipart())
1489
1490 def test_empty_multipart_idempotent(self):
1491 text = """\
1492Content-Type: multipart/mixed; boundary="BOUNDARY"
1493MIME-Version: 1.0
1494Subject: A subject
1495To: aperson@dom.ain
1496From: bperson@dom.ain
1497
1498
1499--BOUNDARY
1500
1501
1502--BOUNDARY--
1503"""
1504 msg = Parser().parsestr(text)
1505 self.ndiffAssertEqual(text, msg.as_string())
1506
1507 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1508 outer = MIMEBase('multipart', 'mixed')
1509 outer['Subject'] = 'A subject'
1510 outer['To'] = 'aperson@dom.ain'
1511 outer['From'] = 'bperson@dom.ain'
1512 outer.set_boundary('BOUNDARY')
1513 self.ndiffAssertEqual(outer.as_string(), '''\
1514Content-Type: multipart/mixed; boundary="BOUNDARY"
1515MIME-Version: 1.0
1516Subject: A subject
1517To: aperson@dom.ain
1518From: bperson@dom.ain
1519
1520--BOUNDARY
1521
1522--BOUNDARY--''')
1523
1524 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1525 outer = MIMEBase('multipart', 'mixed')
1526 outer['Subject'] = 'A subject'
1527 outer['To'] = 'aperson@dom.ain'
1528 outer['From'] = 'bperson@dom.ain'
1529 outer.preamble = ''
1530 outer.epilogue = ''
1531 outer.set_boundary('BOUNDARY')
1532 self.ndiffAssertEqual(outer.as_string(), '''\
1533Content-Type: multipart/mixed; boundary="BOUNDARY"
1534MIME-Version: 1.0
1535Subject: A subject
1536To: aperson@dom.ain
1537From: bperson@dom.ain
1538
1539
1540--BOUNDARY
1541
1542--BOUNDARY--
1543''')
1544
1545 def test_one_part_in_a_multipart(self):
1546 eq = self.ndiffAssertEqual
1547 outer = MIMEBase('multipart', 'mixed')
1548 outer['Subject'] = 'A subject'
1549 outer['To'] = 'aperson@dom.ain'
1550 outer['From'] = 'bperson@dom.ain'
1551 outer.set_boundary('BOUNDARY')
1552 msg = MIMEText('hello world')
1553 outer.attach(msg)
1554 eq(outer.as_string(), '''\
1555Content-Type: multipart/mixed; boundary="BOUNDARY"
1556MIME-Version: 1.0
1557Subject: A subject
1558To: aperson@dom.ain
1559From: bperson@dom.ain
1560
1561--BOUNDARY
1562Content-Type: text/plain; charset="us-ascii"
1563MIME-Version: 1.0
1564Content-Transfer-Encoding: 7bit
1565
1566hello world
1567--BOUNDARY--''')
1568
1569 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1570 eq = self.ndiffAssertEqual
1571 outer = MIMEBase('multipart', 'mixed')
1572 outer['Subject'] = 'A subject'
1573 outer['To'] = 'aperson@dom.ain'
1574 outer['From'] = 'bperson@dom.ain'
1575 outer.preamble = ''
1576 msg = MIMEText('hello world')
1577 outer.attach(msg)
1578 outer.set_boundary('BOUNDARY')
1579 eq(outer.as_string(), '''\
1580Content-Type: multipart/mixed; boundary="BOUNDARY"
1581MIME-Version: 1.0
1582Subject: A subject
1583To: aperson@dom.ain
1584From: bperson@dom.ain
1585
1586
1587--BOUNDARY
1588Content-Type: text/plain; charset="us-ascii"
1589MIME-Version: 1.0
1590Content-Transfer-Encoding: 7bit
1591
1592hello world
1593--BOUNDARY--''')
1594
1595
1596 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1597 eq = self.ndiffAssertEqual
1598 outer = MIMEBase('multipart', 'mixed')
1599 outer['Subject'] = 'A subject'
1600 outer['To'] = 'aperson@dom.ain'
1601 outer['From'] = 'bperson@dom.ain'
1602 outer.preamble = None
1603 msg = MIMEText('hello world')
1604 outer.attach(msg)
1605 outer.set_boundary('BOUNDARY')
1606 eq(outer.as_string(), '''\
1607Content-Type: multipart/mixed; boundary="BOUNDARY"
1608MIME-Version: 1.0
1609Subject: A subject
1610To: aperson@dom.ain
1611From: bperson@dom.ain
1612
1613--BOUNDARY
1614Content-Type: text/plain; charset="us-ascii"
1615MIME-Version: 1.0
1616Content-Transfer-Encoding: 7bit
1617
1618hello world
1619--BOUNDARY--''')
1620
1621
1622 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1623 eq = self.ndiffAssertEqual
1624 outer = MIMEBase('multipart', 'mixed')
1625 outer['Subject'] = 'A subject'
1626 outer['To'] = 'aperson@dom.ain'
1627 outer['From'] = 'bperson@dom.ain'
1628 outer.epilogue = None
1629 msg = MIMEText('hello world')
1630 outer.attach(msg)
1631 outer.set_boundary('BOUNDARY')
1632 eq(outer.as_string(), '''\
1633Content-Type: multipart/mixed; boundary="BOUNDARY"
1634MIME-Version: 1.0
1635Subject: A subject
1636To: aperson@dom.ain
1637From: bperson@dom.ain
1638
1639--BOUNDARY
1640Content-Type: text/plain; charset="us-ascii"
1641MIME-Version: 1.0
1642Content-Transfer-Encoding: 7bit
1643
1644hello world
1645--BOUNDARY--''')
1646
1647
1648 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1649 eq = self.ndiffAssertEqual
1650 outer = MIMEBase('multipart', 'mixed')
1651 outer['Subject'] = 'A subject'
1652 outer['To'] = 'aperson@dom.ain'
1653 outer['From'] = 'bperson@dom.ain'
1654 outer.epilogue = ''
1655 msg = MIMEText('hello world')
1656 outer.attach(msg)
1657 outer.set_boundary('BOUNDARY')
1658 eq(outer.as_string(), '''\
1659Content-Type: multipart/mixed; boundary="BOUNDARY"
1660MIME-Version: 1.0
1661Subject: A subject
1662To: aperson@dom.ain
1663From: bperson@dom.ain
1664
1665--BOUNDARY
1666Content-Type: text/plain; charset="us-ascii"
1667MIME-Version: 1.0
1668Content-Transfer-Encoding: 7bit
1669
1670hello world
1671--BOUNDARY--
1672''')
1673
1674
1675 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1676 eq = self.ndiffAssertEqual
1677 outer = MIMEBase('multipart', 'mixed')
1678 outer['Subject'] = 'A subject'
1679 outer['To'] = 'aperson@dom.ain'
1680 outer['From'] = 'bperson@dom.ain'
1681 outer.epilogue = '\n'
1682 msg = MIMEText('hello world')
1683 outer.attach(msg)
1684 outer.set_boundary('BOUNDARY')
1685 eq(outer.as_string(), '''\
1686Content-Type: multipart/mixed; boundary="BOUNDARY"
1687MIME-Version: 1.0
1688Subject: A subject
1689To: aperson@dom.ain
1690From: bperson@dom.ain
1691
1692--BOUNDARY
1693Content-Type: text/plain; charset="us-ascii"
1694MIME-Version: 1.0
1695Content-Transfer-Encoding: 7bit
1696
1697hello world
1698--BOUNDARY--
1699
1700''')
1701
1702 def test_message_external_body(self):
1703 eq = self.assertEqual
1704 msg = self._msgobj('msg_36.txt')
1705 eq(len(msg.get_payload()), 2)
1706 msg1 = msg.get_payload(1)
1707 eq(msg1.get_content_type(), 'multipart/alternative')
1708 eq(len(msg1.get_payload()), 2)
1709 for subpart in msg1.get_payload():
1710 eq(subpart.get_content_type(), 'message/external-body')
1711 eq(len(subpart.get_payload()), 1)
1712 subsubpart = subpart.get_payload(0)
1713 eq(subsubpart.get_content_type(), 'text/plain')
1714
1715 def test_double_boundary(self):
1716 # msg_37.txt is a multipart that contains two dash-boundary's in a
1717 # row. Our interpretation of RFC 2046 calls for ignoring the second
1718 # and subsequent boundaries.
1719 msg = self._msgobj('msg_37.txt')
1720 self.assertEqual(len(msg.get_payload()), 3)
1721
1722 def test_nested_inner_contains_outer_boundary(self):
1723 eq = self.ndiffAssertEqual
1724 # msg_38.txt has an inner part that contains outer boundaries. My
1725 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1726 # these are illegal and should be interpreted as unterminated inner
1727 # parts.
1728 msg = self._msgobj('msg_38.txt')
1729 sfp = StringIO()
1730 iterators._structure(msg, sfp)
1731 eq(sfp.getvalue(), """\
1732multipart/mixed
1733 multipart/mixed
1734 multipart/alternative
1735 text/plain
1736 text/plain
1737 text/plain
1738 text/plain
1739""")
1740
1741 def test_nested_with_same_boundary(self):
1742 eq = self.ndiffAssertEqual
1743 # msg 39.txt is similarly evil in that it's got inner parts that use
1744 # the same boundary as outer parts. Again, I believe the way this is
1745 # parsed is closest to the spirit of RFC 2046
1746 msg = self._msgobj('msg_39.txt')
1747 sfp = StringIO()
1748 iterators._structure(msg, sfp)
1749 eq(sfp.getvalue(), """\
1750multipart/mixed
1751 multipart/mixed
1752 multipart/alternative
1753 application/octet-stream
1754 application/octet-stream
1755 text/plain
1756""")
1757
1758 def test_boundary_in_non_multipart(self):
1759 msg = self._msgobj('msg_40.txt')
1760 self.assertEqual(msg.as_string(), '''\
1761MIME-Version: 1.0
1762Content-Type: text/html; boundary="--961284236552522269"
1763
1764----961284236552522269
1765Content-Type: text/html;
1766Content-Transfer-Encoding: 7Bit
1767
1768<html></html>
1769
1770----961284236552522269--
1771''')
1772
1773 def test_boundary_with_leading_space(self):
1774 eq = self.assertEqual
1775 msg = email.message_from_string('''\
1776MIME-Version: 1.0
1777Content-Type: multipart/mixed; boundary=" XXXX"
1778
1779-- XXXX
1780Content-Type: text/plain
1781
1782
1783-- XXXX
1784Content-Type: text/plain
1785
1786-- XXXX--
1787''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001788 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001789 eq(msg.get_boundary(), ' XXXX')
1790 eq(len(msg.get_payload()), 2)
1791
1792 def test_boundary_without_trailing_newline(self):
1793 m = Parser().parsestr("""\
1794Content-Type: multipart/mixed; boundary="===============0012394164=="
1795MIME-Version: 1.0
1796
1797--===============0012394164==
1798Content-Type: image/file1.jpg
1799MIME-Version: 1.0
1800Content-Transfer-Encoding: base64
1801
1802YXNkZg==
1803--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001804 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001805
1806
Ezio Melottib3aedd42010-11-20 19:04:17 +00001807
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001808# Test some badly formatted messages
R David Murrayc27e5222012-05-25 15:01:48 -04001809class TestNonConformant(TestEmailBase):
R David Murray3edd22a2011-04-18 13:59:37 -04001810
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001811 def test_parse_missing_minor_type(self):
1812 eq = self.assertEqual
1813 msg = self._msgobj('msg_14.txt')
1814 eq(msg.get_content_type(), 'text/plain')
1815 eq(msg.get_content_maintype(), 'text')
1816 eq(msg.get_content_subtype(), 'plain')
1817
R David Murrayc27e5222012-05-25 15:01:48 -04001818 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001819 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001820 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001821 msg = self._msgobj('msg_15.txt')
1822 # XXX We can probably eventually do better
1823 inner = msg.get_payload(0)
1824 unless(hasattr(inner, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001825 self.assertEqual(len(inner.defects), 1)
1826 unless(isinstance(inner.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001827 errors.StartBoundaryNotFoundDefect))
1828
R David Murrayc27e5222012-05-25 15:01:48 -04001829 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001830 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001831 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001832 msg = self._msgobj('msg_25.txt')
1833 unless(isinstance(msg.get_payload(), str))
R David Murrayc27e5222012-05-25 15:01:48 -04001834 self.assertEqual(len(msg.defects), 2)
1835 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001836 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001837 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001838 errors.MultipartInvariantViolationDefect))
1839
R David Murray749073a2011-06-22 13:47:53 -04001840 multipart_msg = textwrap.dedent("""\
1841 Date: Wed, 14 Nov 2007 12:56:23 GMT
1842 From: foo@bar.invalid
1843 To: foo@bar.invalid
1844 Subject: Content-Transfer-Encoding: base64 and multipart
1845 MIME-Version: 1.0
1846 Content-Type: multipart/mixed;
1847 boundary="===============3344438784458119861=="{}
1848
1849 --===============3344438784458119861==
1850 Content-Type: text/plain
1851
1852 Test message
1853
1854 --===============3344438784458119861==
1855 Content-Type: application/octet-stream
1856 Content-Transfer-Encoding: base64
1857
1858 YWJj
1859
1860 --===============3344438784458119861==--
1861 """)
1862
R David Murrayc27e5222012-05-25 15:01:48 -04001863 # test_parser.TestMessageDefectDetectionBase
R David Murray749073a2011-06-22 13:47:53 -04001864 def test_multipart_invalid_cte(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001865 msg = self._str_msg(
1866 self.multipart_msg.format("\nContent-Transfer-Encoding: base64"))
1867 self.assertEqual(len(msg.defects), 1)
1868 self.assertIsInstance(msg.defects[0],
R David Murray749073a2011-06-22 13:47:53 -04001869 errors.InvalidMultipartContentTransferEncodingDefect)
1870
R David Murrayc27e5222012-05-25 15:01:48 -04001871 # test_parser.TestMessageDefectDetectionBase
R David Murray749073a2011-06-22 13:47:53 -04001872 def test_multipart_no_cte_no_defect(self):
R David Murrayc27e5222012-05-25 15:01:48 -04001873 msg = self._str_msg(self.multipart_msg.format(''))
1874 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001875
R David Murrayc27e5222012-05-25 15:01:48 -04001876 # test_parser.TestMessageDefectDetectionBase
R David Murray749073a2011-06-22 13:47:53 -04001877 def test_multipart_valid_cte_no_defect(self):
1878 for cte in ('7bit', '8bit', 'BINary'):
R David Murrayc27e5222012-05-25 15:01:48 -04001879 msg = self._str_msg(
R David Murray749073a2011-06-22 13:47:53 -04001880 self.multipart_msg.format(
R David Murrayc27e5222012-05-25 15:01:48 -04001881 "\nContent-Transfer-Encoding: {}".format(cte)))
1882 self.assertEqual(len(msg.defects), 0)
R David Murray749073a2011-06-22 13:47:53 -04001883
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001884 def test_invalid_content_type(self):
1885 eq = self.assertEqual
1886 neq = self.ndiffAssertEqual
1887 msg = Message()
1888 # RFC 2045, $5.2 says invalid yields text/plain
1889 msg['Content-Type'] = 'text'
1890 eq(msg.get_content_maintype(), 'text')
1891 eq(msg.get_content_subtype(), 'plain')
1892 eq(msg.get_content_type(), 'text/plain')
1893 # Clear the old value and try something /really/ invalid
1894 del msg['content-type']
1895 msg['Content-Type'] = 'foo'
1896 eq(msg.get_content_maintype(), 'text')
1897 eq(msg.get_content_subtype(), 'plain')
1898 eq(msg.get_content_type(), 'text/plain')
1899 # Still, make sure that the message is idempotently generated
1900 s = StringIO()
1901 g = Generator(s)
1902 g.flatten(msg)
1903 neq(s.getvalue(), 'Content-Type: foo\n\n')
1904
1905 def test_no_start_boundary(self):
1906 eq = self.ndiffAssertEqual
1907 msg = self._msgobj('msg_31.txt')
1908 eq(msg.get_payload(), """\
1909--BOUNDARY
1910Content-Type: text/plain
1911
1912message 1
1913
1914--BOUNDARY
1915Content-Type: text/plain
1916
1917message 2
1918
1919--BOUNDARY--
1920""")
1921
1922 def test_no_separating_blank_line(self):
1923 eq = self.ndiffAssertEqual
1924 msg = self._msgobj('msg_35.txt')
1925 eq(msg.as_string(), """\
1926From: aperson@dom.ain
1927To: bperson@dom.ain
1928Subject: here's something interesting
1929
1930counter to RFC 2822, there's no separating newline here
1931""")
1932
R David Murrayc27e5222012-05-25 15:01:48 -04001933 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001934 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001935 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001936 msg = self._msgobj('msg_41.txt')
1937 unless(hasattr(msg, 'defects'))
R David Murrayc27e5222012-05-25 15:01:48 -04001938 self.assertEqual(len(msg.defects), 2)
1939 unless(isinstance(msg.defects[0],
R David Murray3edd22a2011-04-18 13:59:37 -04001940 errors.NoBoundaryInMultipartDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001941 unless(isinstance(msg.defects[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001942 errors.MultipartInvariantViolationDefect))
1943
R David Murrayc27e5222012-05-25 15:01:48 -04001944 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001945 def test_missing_start_boundary(self):
1946 outer = self._msgobj('msg_42.txt')
1947 # The message structure is:
1948 #
1949 # multipart/mixed
1950 # text/plain
1951 # message/rfc822
1952 # multipart/mixed [*]
1953 #
1954 # [*] This message is missing its start boundary
1955 bad = outer.get_payload(1).get_payload(0)
R David Murrayc27e5222012-05-25 15:01:48 -04001956 self.assertEqual(len(bad.defects), 1)
1957 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001958 errors.StartBoundaryNotFoundDefect))
1959
R David Murrayc27e5222012-05-25 15:01:48 -04001960 # test_parser.TestMessageDefectDetectionBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001961 def test_first_line_is_continuation_header(self):
1962 eq = self.assertEqual
1963 m = ' Line 1\nLine 2\nLine 3'
R David Murrayc27e5222012-05-25 15:01:48 -04001964 msg = email.message_from_string(m)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001965 eq(msg.keys(), [])
1966 eq(msg.get_payload(), 'Line 2\nLine 3')
R David Murrayc27e5222012-05-25 15:01:48 -04001967 eq(len(msg.defects), 1)
1968 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001969 errors.FirstHeaderLineIsContinuationDefect))
R David Murrayc27e5222012-05-25 15:01:48 -04001970 eq(msg.defects[0].line, ' Line 1\n')
R David Murray3edd22a2011-04-18 13:59:37 -04001971
Ezio Melottib3aedd42010-11-20 19:04:17 +00001972
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001973# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001974class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001975 def test_rfc2047_multiline(self):
1976 eq = self.assertEqual
1977 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1978 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1979 dh = decode_header(s)
1980 eq(dh, [
1981 (b'Re:', None),
1982 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1983 (b'baz foo bar', None),
1984 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1985 header = make_header(dh)
1986 eq(str(header),
1987 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001988 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001989Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1990 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001991
1992 def test_whitespace_eater_unicode(self):
1993 eq = self.assertEqual
1994 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1995 dh = decode_header(s)
1996 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1997 (b'Pirard <pirard@dom.ain>', None)])
1998 header = str(make_header(dh))
1999 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
2000
2001 def test_whitespace_eater_unicode_2(self):
2002 eq = self.assertEqual
2003 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
2004 dh = decode_header(s)
2005 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
2006 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
2007 hu = str(make_header(dh))
2008 eq(hu, 'The quick brown fox jumped over the lazy dog')
2009
2010 def test_rfc2047_missing_whitespace(self):
2011 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
2012 dh = decode_header(s)
2013 self.assertEqual(dh, [(s, None)])
2014
2015 def test_rfc2047_with_whitespace(self):
2016 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
2017 dh = decode_header(s)
2018 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
2019 (b'rg', None), (b'\xe5', 'iso-8859-1'),
2020 (b'sbord', None)])
2021
R. David Murrayc4e69cc2010-08-03 22:14:10 +00002022 def test_rfc2047_B_bad_padding(self):
2023 s = '=?iso-8859-1?B?%s?='
2024 data = [ # only test complete bytes
2025 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2026 ('dmk=', b'vi'), ('dmk', b'vi')
2027 ]
2028 for q, a in data:
2029 dh = decode_header(s % q)
2030 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002031
R. David Murray31e984c2010-10-01 15:40:20 +00002032 def test_rfc2047_Q_invalid_digits(self):
2033 # issue 10004.
2034 s = '=?iso-8659-1?Q?andr=e9=zz?='
2035 self.assertEqual(decode_header(s),
2036 [(b'andr\xe9=zz', 'iso-8659-1')])
2037
Ezio Melottib3aedd42010-11-20 19:04:17 +00002038
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002039# Test the MIMEMessage class
2040class TestMIMEMessage(TestEmailBase):
2041 def setUp(self):
2042 with openfile('msg_11.txt') as fp:
2043 self._text = fp.read()
2044
2045 def test_type_error(self):
2046 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2047
2048 def test_valid_argument(self):
2049 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002050 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002051 subject = 'A sub-message'
2052 m = Message()
2053 m['Subject'] = subject
2054 r = MIMEMessage(m)
2055 eq(r.get_content_type(), 'message/rfc822')
2056 payload = r.get_payload()
2057 unless(isinstance(payload, list))
2058 eq(len(payload), 1)
2059 subpart = payload[0]
2060 unless(subpart is m)
2061 eq(subpart['subject'], subject)
2062
2063 def test_bad_multipart(self):
2064 eq = self.assertEqual
2065 msg1 = Message()
2066 msg1['Subject'] = 'subpart 1'
2067 msg2 = Message()
2068 msg2['Subject'] = 'subpart 2'
2069 r = MIMEMessage(msg1)
2070 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2071
2072 def test_generate(self):
2073 # First craft the message to be encapsulated
2074 m = Message()
2075 m['Subject'] = 'An enclosed message'
2076 m.set_payload('Here is the body of the message.\n')
2077 r = MIMEMessage(m)
2078 r['Subject'] = 'The enclosing message'
2079 s = StringIO()
2080 g = Generator(s)
2081 g.flatten(r)
2082 self.assertEqual(s.getvalue(), """\
2083Content-Type: message/rfc822
2084MIME-Version: 1.0
2085Subject: The enclosing message
2086
2087Subject: An enclosed message
2088
2089Here is the body of the message.
2090""")
2091
2092 def test_parse_message_rfc822(self):
2093 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002094 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002095 msg = self._msgobj('msg_11.txt')
2096 eq(msg.get_content_type(), 'message/rfc822')
2097 payload = msg.get_payload()
2098 unless(isinstance(payload, list))
2099 eq(len(payload), 1)
2100 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002101 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002102 eq(submsg['subject'], 'An enclosed message')
2103 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2104
2105 def test_dsn(self):
2106 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002107 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002108 # msg 16 is a Delivery Status Notification, see RFC 1894
2109 msg = self._msgobj('msg_16.txt')
2110 eq(msg.get_content_type(), 'multipart/report')
2111 unless(msg.is_multipart())
2112 eq(len(msg.get_payload()), 3)
2113 # Subpart 1 is a text/plain, human readable section
2114 subpart = msg.get_payload(0)
2115 eq(subpart.get_content_type(), 'text/plain')
2116 eq(subpart.get_payload(), """\
2117This report relates to a message you sent with the following header fields:
2118
2119 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2120 Date: Sun, 23 Sep 2001 20:10:55 -0700
2121 From: "Ian T. Henry" <henryi@oxy.edu>
2122 To: SoCal Raves <scr@socal-raves.org>
2123 Subject: [scr] yeah for Ians!!
2124
2125Your message cannot be delivered to the following recipients:
2126
2127 Recipient address: jangel1@cougar.noc.ucla.edu
2128 Reason: recipient reached disk quota
2129
2130""")
2131 # Subpart 2 contains the machine parsable DSN information. It
2132 # consists of two blocks of headers, represented by two nested Message
2133 # objects.
2134 subpart = msg.get_payload(1)
2135 eq(subpart.get_content_type(), 'message/delivery-status')
2136 eq(len(subpart.get_payload()), 2)
2137 # message/delivery-status should treat each block as a bunch of
2138 # headers, i.e. a bunch of Message objects.
2139 dsn1 = subpart.get_payload(0)
2140 unless(isinstance(dsn1, Message))
2141 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2142 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2143 # Try a missing one <wink>
2144 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2145 dsn2 = subpart.get_payload(1)
2146 unless(isinstance(dsn2, Message))
2147 eq(dsn2['action'], 'failed')
2148 eq(dsn2.get_params(header='original-recipient'),
2149 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2150 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2151 # Subpart 3 is the original message
2152 subpart = msg.get_payload(2)
2153 eq(subpart.get_content_type(), 'message/rfc822')
2154 payload = subpart.get_payload()
2155 unless(isinstance(payload, list))
2156 eq(len(payload), 1)
2157 subsubpart = payload[0]
2158 unless(isinstance(subsubpart, Message))
2159 eq(subsubpart.get_content_type(), 'text/plain')
2160 eq(subsubpart['message-id'],
2161 '<002001c144a6$8752e060$56104586@oxy.edu>')
2162
2163 def test_epilogue(self):
2164 eq = self.ndiffAssertEqual
2165 with openfile('msg_21.txt') as fp:
2166 text = fp.read()
2167 msg = Message()
2168 msg['From'] = 'aperson@dom.ain'
2169 msg['To'] = 'bperson@dom.ain'
2170 msg['Subject'] = 'Test'
2171 msg.preamble = 'MIME message'
2172 msg.epilogue = 'End of MIME message\n'
2173 msg1 = MIMEText('One')
2174 msg2 = MIMEText('Two')
2175 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2176 msg.attach(msg1)
2177 msg.attach(msg2)
2178 sfp = StringIO()
2179 g = Generator(sfp)
2180 g.flatten(msg)
2181 eq(sfp.getvalue(), text)
2182
2183 def test_no_nl_preamble(self):
2184 eq = self.ndiffAssertEqual
2185 msg = Message()
2186 msg['From'] = 'aperson@dom.ain'
2187 msg['To'] = 'bperson@dom.ain'
2188 msg['Subject'] = 'Test'
2189 msg.preamble = 'MIME message'
2190 msg.epilogue = ''
2191 msg1 = MIMEText('One')
2192 msg2 = MIMEText('Two')
2193 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2194 msg.attach(msg1)
2195 msg.attach(msg2)
2196 eq(msg.as_string(), """\
2197From: aperson@dom.ain
2198To: bperson@dom.ain
2199Subject: Test
2200Content-Type: multipart/mixed; boundary="BOUNDARY"
2201
2202MIME message
2203--BOUNDARY
2204Content-Type: text/plain; charset="us-ascii"
2205MIME-Version: 1.0
2206Content-Transfer-Encoding: 7bit
2207
2208One
2209--BOUNDARY
2210Content-Type: text/plain; charset="us-ascii"
2211MIME-Version: 1.0
2212Content-Transfer-Encoding: 7bit
2213
2214Two
2215--BOUNDARY--
2216""")
2217
2218 def test_default_type(self):
2219 eq = self.assertEqual
2220 with openfile('msg_30.txt') as fp:
2221 msg = email.message_from_file(fp)
2222 container1 = msg.get_payload(0)
2223 eq(container1.get_default_type(), 'message/rfc822')
2224 eq(container1.get_content_type(), 'message/rfc822')
2225 container2 = msg.get_payload(1)
2226 eq(container2.get_default_type(), 'message/rfc822')
2227 eq(container2.get_content_type(), 'message/rfc822')
2228 container1a = container1.get_payload(0)
2229 eq(container1a.get_default_type(), 'text/plain')
2230 eq(container1a.get_content_type(), 'text/plain')
2231 container2a = container2.get_payload(0)
2232 eq(container2a.get_default_type(), 'text/plain')
2233 eq(container2a.get_content_type(), 'text/plain')
2234
2235 def test_default_type_with_explicit_container_type(self):
2236 eq = self.assertEqual
2237 with openfile('msg_28.txt') as fp:
2238 msg = email.message_from_file(fp)
2239 container1 = msg.get_payload(0)
2240 eq(container1.get_default_type(), 'message/rfc822')
2241 eq(container1.get_content_type(), 'message/rfc822')
2242 container2 = msg.get_payload(1)
2243 eq(container2.get_default_type(), 'message/rfc822')
2244 eq(container2.get_content_type(), 'message/rfc822')
2245 container1a = container1.get_payload(0)
2246 eq(container1a.get_default_type(), 'text/plain')
2247 eq(container1a.get_content_type(), 'text/plain')
2248 container2a = container2.get_payload(0)
2249 eq(container2a.get_default_type(), 'text/plain')
2250 eq(container2a.get_content_type(), 'text/plain')
2251
2252 def test_default_type_non_parsed(self):
2253 eq = self.assertEqual
2254 neq = self.ndiffAssertEqual
2255 # Set up container
2256 container = MIMEMultipart('digest', 'BOUNDARY')
2257 container.epilogue = ''
2258 # Set up subparts
2259 subpart1a = MIMEText('message 1\n')
2260 subpart2a = MIMEText('message 2\n')
2261 subpart1 = MIMEMessage(subpart1a)
2262 subpart2 = MIMEMessage(subpart2a)
2263 container.attach(subpart1)
2264 container.attach(subpart2)
2265 eq(subpart1.get_content_type(), 'message/rfc822')
2266 eq(subpart1.get_default_type(), 'message/rfc822')
2267 eq(subpart2.get_content_type(), 'message/rfc822')
2268 eq(subpart2.get_default_type(), 'message/rfc822')
2269 neq(container.as_string(0), '''\
2270Content-Type: multipart/digest; boundary="BOUNDARY"
2271MIME-Version: 1.0
2272
2273--BOUNDARY
2274Content-Type: message/rfc822
2275MIME-Version: 1.0
2276
2277Content-Type: text/plain; charset="us-ascii"
2278MIME-Version: 1.0
2279Content-Transfer-Encoding: 7bit
2280
2281message 1
2282
2283--BOUNDARY
2284Content-Type: message/rfc822
2285MIME-Version: 1.0
2286
2287Content-Type: text/plain; charset="us-ascii"
2288MIME-Version: 1.0
2289Content-Transfer-Encoding: 7bit
2290
2291message 2
2292
2293--BOUNDARY--
2294''')
2295 del subpart1['content-type']
2296 del subpart1['mime-version']
2297 del subpart2['content-type']
2298 del subpart2['mime-version']
2299 eq(subpart1.get_content_type(), 'message/rfc822')
2300 eq(subpart1.get_default_type(), 'message/rfc822')
2301 eq(subpart2.get_content_type(), 'message/rfc822')
2302 eq(subpart2.get_default_type(), 'message/rfc822')
2303 neq(container.as_string(0), '''\
2304Content-Type: multipart/digest; boundary="BOUNDARY"
2305MIME-Version: 1.0
2306
2307--BOUNDARY
2308
2309Content-Type: text/plain; charset="us-ascii"
2310MIME-Version: 1.0
2311Content-Transfer-Encoding: 7bit
2312
2313message 1
2314
2315--BOUNDARY
2316
2317Content-Type: text/plain; charset="us-ascii"
2318MIME-Version: 1.0
2319Content-Transfer-Encoding: 7bit
2320
2321message 2
2322
2323--BOUNDARY--
2324''')
2325
2326 def test_mime_attachments_in_constructor(self):
2327 eq = self.assertEqual
2328 text1 = MIMEText('')
2329 text2 = MIMEText('')
2330 msg = MIMEMultipart(_subparts=(text1, text2))
2331 eq(len(msg.get_payload()), 2)
2332 eq(msg.get_payload(0), text1)
2333 eq(msg.get_payload(1), text2)
2334
Christian Heimes587c2bf2008-01-19 16:21:02 +00002335 def test_default_multipart_constructor(self):
2336 msg = MIMEMultipart()
2337 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002338
Ezio Melottib3aedd42010-11-20 19:04:17 +00002339
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002340# A general test of parser->model->generator idempotency. IOW, read a message
2341# in, parse it into a message object tree, then without touching the tree,
2342# regenerate the plain text. The original text and the transformed text
2343# should be identical. Note: that we ignore the Unix-From since that may
2344# contain a changed date.
2345class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002346
2347 linesep = '\n'
2348
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002349 def _msgobj(self, filename):
2350 with openfile(filename) as fp:
2351 data = fp.read()
2352 msg = email.message_from_string(data)
2353 return msg, data
2354
R. David Murray719a4492010-11-21 16:53:48 +00002355 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002356 eq = self.ndiffAssertEqual
2357 s = StringIO()
2358 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002359 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002360 eq(text, s.getvalue())
2361
2362 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002363 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002364 msg, text = self._msgobj('msg_01.txt')
2365 eq(msg.get_content_type(), 'text/plain')
2366 eq(msg.get_content_maintype(), 'text')
2367 eq(msg.get_content_subtype(), 'plain')
2368 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2369 eq(msg.get_param('charset'), 'us-ascii')
2370 eq(msg.preamble, None)
2371 eq(msg.epilogue, None)
2372 self._idempotent(msg, text)
2373
2374 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002375 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002376 msg, text = self._msgobj('msg_03.txt')
2377 eq(msg.get_content_type(), 'text/plain')
2378 eq(msg.get_params(), None)
2379 eq(msg.get_param('charset'), None)
2380 self._idempotent(msg, text)
2381
2382 def test_simple_multipart(self):
2383 msg, text = self._msgobj('msg_04.txt')
2384 self._idempotent(msg, text)
2385
2386 def test_MIME_digest(self):
2387 msg, text = self._msgobj('msg_02.txt')
2388 self._idempotent(msg, text)
2389
2390 def test_long_header(self):
2391 msg, text = self._msgobj('msg_27.txt')
2392 self._idempotent(msg, text)
2393
2394 def test_MIME_digest_with_part_headers(self):
2395 msg, text = self._msgobj('msg_28.txt')
2396 self._idempotent(msg, text)
2397
2398 def test_mixed_with_image(self):
2399 msg, text = self._msgobj('msg_06.txt')
2400 self._idempotent(msg, text)
2401
2402 def test_multipart_report(self):
2403 msg, text = self._msgobj('msg_05.txt')
2404 self._idempotent(msg, text)
2405
2406 def test_dsn(self):
2407 msg, text = self._msgobj('msg_16.txt')
2408 self._idempotent(msg, text)
2409
2410 def test_preamble_epilogue(self):
2411 msg, text = self._msgobj('msg_21.txt')
2412 self._idempotent(msg, text)
2413
2414 def test_multipart_one_part(self):
2415 msg, text = self._msgobj('msg_23.txt')
2416 self._idempotent(msg, text)
2417
2418 def test_multipart_no_parts(self):
2419 msg, text = self._msgobj('msg_24.txt')
2420 self._idempotent(msg, text)
2421
2422 def test_no_start_boundary(self):
2423 msg, text = self._msgobj('msg_31.txt')
2424 self._idempotent(msg, text)
2425
2426 def test_rfc2231_charset(self):
2427 msg, text = self._msgobj('msg_32.txt')
2428 self._idempotent(msg, text)
2429
2430 def test_more_rfc2231_parameters(self):
2431 msg, text = self._msgobj('msg_33.txt')
2432 self._idempotent(msg, text)
2433
2434 def test_text_plain_in_a_multipart_digest(self):
2435 msg, text = self._msgobj('msg_34.txt')
2436 self._idempotent(msg, text)
2437
2438 def test_nested_multipart_mixeds(self):
2439 msg, text = self._msgobj('msg_12a.txt')
2440 self._idempotent(msg, text)
2441
2442 def test_message_external_body_idempotent(self):
2443 msg, text = self._msgobj('msg_36.txt')
2444 self._idempotent(msg, text)
2445
R. David Murray719a4492010-11-21 16:53:48 +00002446 def test_message_delivery_status(self):
2447 msg, text = self._msgobj('msg_43.txt')
2448 self._idempotent(msg, text, unixfrom=True)
2449
R. David Murray96fd54e2010-10-08 15:55:28 +00002450 def test_message_signed_idempotent(self):
2451 msg, text = self._msgobj('msg_45.txt')
2452 self._idempotent(msg, text)
2453
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002454 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002455 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002456 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002457 # Get a message object and reset the seek pointer for other tests
2458 msg, text = self._msgobj('msg_05.txt')
2459 eq(msg.get_content_type(), 'multipart/report')
2460 # Test the Content-Type: parameters
2461 params = {}
2462 for pk, pv in msg.get_params():
2463 params[pk] = pv
2464 eq(params['report-type'], 'delivery-status')
2465 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002466 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2467 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002468 eq(len(msg.get_payload()), 3)
2469 # Make sure the subparts are what we expect
2470 msg1 = msg.get_payload(0)
2471 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002472 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002473 msg2 = msg.get_payload(1)
2474 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002475 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002476 msg3 = msg.get_payload(2)
2477 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002478 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002479 payload = msg3.get_payload()
2480 unless(isinstance(payload, list))
2481 eq(len(payload), 1)
2482 msg4 = payload[0]
2483 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002484 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002485
2486 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002487 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002488 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002489 msg, text = self._msgobj('msg_06.txt')
2490 # Check some of the outer headers
2491 eq(msg.get_content_type(), 'message/rfc822')
2492 # Make sure the payload is a list of exactly one sub-Message, and that
2493 # that submessage has a type of text/plain
2494 payload = msg.get_payload()
2495 unless(isinstance(payload, list))
2496 eq(len(payload), 1)
2497 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002498 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002499 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002500 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002501 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002502
2503
Ezio Melottib3aedd42010-11-20 19:04:17 +00002504
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002505# Test various other bits of the package's functionality
2506class TestMiscellaneous(TestEmailBase):
2507 def test_message_from_string(self):
2508 with openfile('msg_01.txt') as fp:
2509 text = fp.read()
2510 msg = email.message_from_string(text)
2511 s = StringIO()
2512 # Don't wrap/continue long headers since we're trying to test
2513 # idempotency.
2514 g = Generator(s, maxheaderlen=0)
2515 g.flatten(msg)
2516 self.assertEqual(text, s.getvalue())
2517
2518 def test_message_from_file(self):
2519 with openfile('msg_01.txt') as fp:
2520 text = fp.read()
2521 fp.seek(0)
2522 msg = email.message_from_file(fp)
2523 s = StringIO()
2524 # Don't wrap/continue long headers since we're trying to test
2525 # idempotency.
2526 g = Generator(s, maxheaderlen=0)
2527 g.flatten(msg)
2528 self.assertEqual(text, s.getvalue())
2529
2530 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002531 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002532 with openfile('msg_01.txt') as fp:
2533 text = fp.read()
2534
2535 # Create a subclass
2536 class MyMessage(Message):
2537 pass
2538
2539 msg = email.message_from_string(text, MyMessage)
2540 unless(isinstance(msg, MyMessage))
2541 # Try something more complicated
2542 with openfile('msg_02.txt') as fp:
2543 text = fp.read()
2544 msg = email.message_from_string(text, MyMessage)
2545 for subpart in msg.walk():
2546 unless(isinstance(subpart, MyMessage))
2547
2548 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002549 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002550 # Create a subclass
2551 class MyMessage(Message):
2552 pass
2553
2554 with openfile('msg_01.txt') as fp:
2555 msg = email.message_from_file(fp, MyMessage)
2556 unless(isinstance(msg, MyMessage))
2557 # Try something more complicated
2558 with openfile('msg_02.txt') as fp:
2559 msg = email.message_from_file(fp, MyMessage)
2560 for subpart in msg.walk():
2561 unless(isinstance(subpart, MyMessage))
2562
R David Murrayc27e5222012-05-25 15:01:48 -04002563 def test_custom_message_does_not_require_arguments(self):
2564 class MyMessage(Message):
2565 def __init__(self):
2566 super().__init__()
2567 msg = self._str_msg("Subject: test\n\ntest", MyMessage)
2568 self.assertTrue(isinstance(msg, MyMessage))
2569
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002570 def test__all__(self):
2571 module = __import__('email')
R David Murray1b6c7242012-03-16 22:43:05 -04002572 self.assertEqual(sorted(module.__all__), [
2573 'base64mime', 'charset', 'encoders', 'errors', 'feedparser',
2574 'generator', 'header', 'iterators', 'message',
2575 'message_from_binary_file', 'message_from_bytes',
2576 'message_from_file', 'message_from_string', 'mime', 'parser',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002577 'quoprimime', 'utils',
2578 ])
2579
2580 def test_formatdate(self):
2581 now = time.time()
2582 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2583 time.gmtime(now)[:6])
2584
2585 def test_formatdate_localtime(self):
2586 now = time.time()
2587 self.assertEqual(
2588 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2589 time.localtime(now)[:6])
2590
2591 def test_formatdate_usegmt(self):
2592 now = time.time()
2593 self.assertEqual(
2594 utils.formatdate(now, localtime=False),
2595 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2596 self.assertEqual(
2597 utils.formatdate(now, localtime=False, usegmt=True),
2598 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2599
2600 def test_parsedate_none(self):
2601 self.assertEqual(utils.parsedate(''), None)
2602
2603 def test_parsedate_compact(self):
2604 # The FWS after the comma is optional
2605 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2606 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2607
2608 def test_parsedate_no_dayofweek(self):
2609 eq = self.assertEqual
2610 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2611 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2612
2613 def test_parsedate_compact_no_dayofweek(self):
2614 eq = self.assertEqual
2615 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2616 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2617
R. David Murray4a62e892010-12-23 20:35:46 +00002618 def test_parsedate_no_space_before_positive_offset(self):
2619 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2620 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2621
2622 def test_parsedate_no_space_before_negative_offset(self):
2623 # Issue 1155362: we already handled '+' for this case.
2624 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2625 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2626
2627
R David Murrayaccd1c02011-03-13 20:06:23 -04002628 def test_parsedate_accepts_time_with_dots(self):
2629 eq = self.assertEqual
2630 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2631 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2632 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2633 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2634
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002635 def test_parsedate_acceptable_to_time_functions(self):
2636 eq = self.assertEqual
2637 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2638 t = int(time.mktime(timetup))
2639 eq(time.localtime(t)[:6], timetup[:6])
2640 eq(int(time.strftime('%Y', timetup)), 2003)
2641 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2642 t = int(time.mktime(timetup[:9]))
2643 eq(time.localtime(t)[:6], timetup[:6])
2644 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2645
R. David Murray219d1c82010-08-25 00:45:55 +00002646 def test_parsedate_y2k(self):
2647 """Test for parsing a date with a two-digit year.
2648
2649 Parsing a date with a two-digit year should return the correct
2650 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2651 obsoletes RFC822) requires four-digit years.
2652
2653 """
2654 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2655 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2656 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2657 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2658
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002659 def test_parseaddr_empty(self):
2660 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2661 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2662
2663 def test_noquote_dump(self):
2664 self.assertEqual(
2665 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2666 'A Silly Person <person@dom.ain>')
2667
2668 def test_escape_dump(self):
2669 self.assertEqual(
2670 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
R David Murrayb53319f2012-03-14 15:31:47 -04002671 r'"A (Very) Silly Person" <person@dom.ain>')
2672 self.assertEqual(
2673 utils.parseaddr(r'"A \(Very\) Silly Person" <person@dom.ain>'),
2674 ('A (Very) Silly Person', 'person@dom.ain'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002675 a = r'A \(Special\) Person'
2676 b = 'person@dom.ain'
2677 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2678
2679 def test_escape_backslashes(self):
2680 self.assertEqual(
2681 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2682 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2683 a = r'Arthur \Backslash\ Foobar'
2684 b = 'person@dom.ain'
2685 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2686
R David Murray8debacb2011-04-06 09:35:57 -04002687 def test_quotes_unicode_names(self):
2688 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2689 name = "H\u00e4ns W\u00fcrst"
2690 addr = 'person@dom.ain'
2691 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2692 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2693 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2694 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2695 latin1_quopri)
2696
2697 def test_accepts_any_charset_like_object(self):
2698 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2699 name = "H\u00e4ns W\u00fcrst"
2700 addr = 'person@dom.ain'
2701 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2702 foobar = "FOOBAR"
2703 class CharsetMock:
2704 def header_encode(self, string):
2705 return foobar
2706 mock = CharsetMock()
2707 mock_expected = "%s <%s>" % (foobar, addr)
2708 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2709 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2710 utf8_base64)
2711
2712 def test_invalid_charset_like_object_raises_error(self):
2713 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2714 name = "H\u00e4ns W\u00fcrst"
2715 addr = 'person@dom.ain'
2716 # A object without a header_encode method:
2717 bad_charset = object()
2718 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2719 bad_charset)
2720
2721 def test_unicode_address_raises_error(self):
2722 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2723 addr = 'pers\u00f6n@dom.in'
2724 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2725 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2726
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002727 def test_name_with_dot(self):
2728 x = 'John X. Doe <jxd@example.com>'
2729 y = '"John X. Doe" <jxd@example.com>'
2730 a, b = ('John X. Doe', 'jxd@example.com')
2731 self.assertEqual(utils.parseaddr(x), (a, b))
2732 self.assertEqual(utils.parseaddr(y), (a, b))
2733 # formataddr() quotes the name if there's a dot in it
2734 self.assertEqual(utils.formataddr((a, b)), y)
2735
R. David Murray5397e862010-10-02 15:58:26 +00002736 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2737 # issue 10005. Note that in the third test the second pair of
2738 # backslashes is not actually a quoted pair because it is not inside a
2739 # comment or quoted string: the address being parsed has a quoted
2740 # string containing a quoted backslash, followed by 'example' and two
2741 # backslashes, followed by another quoted string containing a space and
2742 # the word 'example'. parseaddr copies those two backslashes
2743 # literally. Per rfc5322 this is not technically correct since a \ may
2744 # not appear in an address outside of a quoted string. It is probably
2745 # a sensible Postel interpretation, though.
2746 eq = self.assertEqual
2747 eq(utils.parseaddr('""example" example"@example.com'),
2748 ('', '""example" example"@example.com'))
2749 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2750 ('', '"\\"example\\" example"@example.com'))
2751 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2752 ('', '"\\\\"example\\\\" example"@example.com'))
2753
R. David Murray63563cd2010-12-18 18:25:38 +00002754 def test_parseaddr_preserves_spaces_in_local_part(self):
2755 # issue 9286. A normal RFC5322 local part should not contain any
2756 # folding white space, but legacy local parts can (they are a sequence
2757 # of atoms, not dotatoms). On the other hand we strip whitespace from
2758 # before the @ and around dots, on the assumption that the whitespace
2759 # around the punctuation is a mistake in what would otherwise be
2760 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2761 self.assertEqual(('', "merwok wok@xample.com"),
2762 utils.parseaddr("merwok wok@xample.com"))
2763 self.assertEqual(('', "merwok wok@xample.com"),
2764 utils.parseaddr("merwok wok@xample.com"))
2765 self.assertEqual(('', "merwok wok@xample.com"),
2766 utils.parseaddr(" merwok wok @xample.com"))
2767 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2768 utils.parseaddr('merwok"wok" wok@xample.com'))
2769 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2770 utils.parseaddr('merwok. wok . wok@xample.com'))
2771
R David Murrayb53319f2012-03-14 15:31:47 -04002772 def test_formataddr_does_not_quote_parens_in_quoted_string(self):
2773 addr = ("'foo@example.com' (foo@example.com)",
2774 'foo@example.com')
2775 addrstr = ('"\'foo@example.com\' '
2776 '(foo@example.com)" <foo@example.com>')
2777 self.assertEqual(utils.parseaddr(addrstr), addr)
2778 self.assertEqual(utils.formataddr(addr), addrstr)
2779
2780
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002781 def test_multiline_from_comment(self):
2782 x = """\
2783Foo
2784\tBar <foo@example.com>"""
2785 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2786
2787 def test_quote_dump(self):
2788 self.assertEqual(
2789 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2790 r'"A Silly; Person" <person@dom.ain>')
2791
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002792 def test_charset_richcomparisons(self):
2793 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002794 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002795 cset1 = Charset()
2796 cset2 = Charset()
2797 eq(cset1, 'us-ascii')
2798 eq(cset1, 'US-ASCII')
2799 eq(cset1, 'Us-AsCiI')
2800 eq('us-ascii', cset1)
2801 eq('US-ASCII', cset1)
2802 eq('Us-AsCiI', cset1)
2803 ne(cset1, 'usascii')
2804 ne(cset1, 'USASCII')
2805 ne(cset1, 'UsAsCiI')
2806 ne('usascii', cset1)
2807 ne('USASCII', cset1)
2808 ne('UsAsCiI', cset1)
2809 eq(cset1, cset2)
2810 eq(cset2, cset1)
2811
2812 def test_getaddresses(self):
2813 eq = self.assertEqual
2814 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2815 'Bud Person <bperson@dom.ain>']),
2816 [('Al Person', 'aperson@dom.ain'),
2817 ('Bud Person', 'bperson@dom.ain')])
2818
2819 def test_getaddresses_nasty(self):
2820 eq = self.assertEqual
2821 eq(utils.getaddresses(['foo: ;']), [('', '')])
2822 eq(utils.getaddresses(
2823 ['[]*-- =~$']),
2824 [('', ''), ('', ''), ('', '*--')])
2825 eq(utils.getaddresses(
2826 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2827 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2828
2829 def test_getaddresses_embedded_comment(self):
2830 """Test proper handling of a nested comment"""
2831 eq = self.assertEqual
2832 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2833 eq(addrs[0][1], 'foo@bar.com')
2834
2835 def test_utils_quote_unquote(self):
2836 eq = self.assertEqual
2837 msg = Message()
2838 msg.add_header('content-disposition', 'attachment',
2839 filename='foo\\wacky"name')
2840 eq(msg.get_filename(), 'foo\\wacky"name')
2841
2842 def test_get_body_encoding_with_bogus_charset(self):
2843 charset = Charset('not a charset')
2844 self.assertEqual(charset.get_body_encoding(), 'base64')
2845
2846 def test_get_body_encoding_with_uppercase_charset(self):
2847 eq = self.assertEqual
2848 msg = Message()
2849 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2850 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2851 charsets = msg.get_charsets()
2852 eq(len(charsets), 1)
2853 eq(charsets[0], 'utf-8')
2854 charset = Charset(charsets[0])
2855 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002856 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002857 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2858 eq(msg.get_payload(decode=True), b'hello world')
2859 eq(msg['content-transfer-encoding'], 'base64')
2860 # Try another one
2861 msg = Message()
2862 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2863 charsets = msg.get_charsets()
2864 eq(len(charsets), 1)
2865 eq(charsets[0], 'us-ascii')
2866 charset = Charset(charsets[0])
2867 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2868 msg.set_payload('hello world', charset=charset)
2869 eq(msg.get_payload(), 'hello world')
2870 eq(msg['content-transfer-encoding'], '7bit')
2871
2872 def test_charsets_case_insensitive(self):
2873 lc = Charset('us-ascii')
2874 uc = Charset('US-ASCII')
2875 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2876
2877 def test_partial_falls_inside_message_delivery_status(self):
2878 eq = self.ndiffAssertEqual
2879 # The Parser interface provides chunks of data to FeedParser in 8192
2880 # byte gulps. SF bug #1076485 found one of those chunks inside
2881 # message/delivery-status header block, which triggered an
2882 # unreadline() of NeedMoreData.
2883 msg = self._msgobj('msg_43.txt')
2884 sfp = StringIO()
2885 iterators._structure(msg, sfp)
2886 eq(sfp.getvalue(), """\
2887multipart/report
2888 text/plain
2889 message/delivery-status
2890 text/plain
2891 text/plain
2892 text/plain
2893 text/plain
2894 text/plain
2895 text/plain
2896 text/plain
2897 text/plain
2898 text/plain
2899 text/plain
2900 text/plain
2901 text/plain
2902 text/plain
2903 text/plain
2904 text/plain
2905 text/plain
2906 text/plain
2907 text/plain
2908 text/plain
2909 text/plain
2910 text/plain
2911 text/plain
2912 text/plain
2913 text/plain
2914 text/plain
2915 text/plain
2916 text/rfc822-headers
2917""")
2918
R. David Murraya0b44b52010-12-02 21:47:19 +00002919 def test_make_msgid_domain(self):
2920 self.assertEqual(
2921 email.utils.make_msgid(domain='testdomain-string')[-19:],
2922 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002923
Ezio Melottib3aedd42010-11-20 19:04:17 +00002924
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002925# Test the iterator/generators
2926class TestIterators(TestEmailBase):
2927 def test_body_line_iterator(self):
2928 eq = self.assertEqual
2929 neq = self.ndiffAssertEqual
2930 # First a simple non-multipart message
2931 msg = self._msgobj('msg_01.txt')
2932 it = iterators.body_line_iterator(msg)
2933 lines = list(it)
2934 eq(len(lines), 6)
2935 neq(EMPTYSTRING.join(lines), msg.get_payload())
2936 # Now a more complicated multipart
2937 msg = self._msgobj('msg_02.txt')
2938 it = iterators.body_line_iterator(msg)
2939 lines = list(it)
2940 eq(len(lines), 43)
2941 with openfile('msg_19.txt') as fp:
2942 neq(EMPTYSTRING.join(lines), fp.read())
2943
2944 def test_typed_subpart_iterator(self):
2945 eq = self.assertEqual
2946 msg = self._msgobj('msg_04.txt')
2947 it = iterators.typed_subpart_iterator(msg, 'text')
2948 lines = []
2949 subparts = 0
2950 for subpart in it:
2951 subparts += 1
2952 lines.append(subpart.get_payload())
2953 eq(subparts, 2)
2954 eq(EMPTYSTRING.join(lines), """\
2955a simple kind of mirror
2956to reflect upon our own
2957a simple kind of mirror
2958to reflect upon our own
2959""")
2960
2961 def test_typed_subpart_iterator_default_type(self):
2962 eq = self.assertEqual
2963 msg = self._msgobj('msg_03.txt')
2964 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2965 lines = []
2966 subparts = 0
2967 for subpart in it:
2968 subparts += 1
2969 lines.append(subpart.get_payload())
2970 eq(subparts, 1)
2971 eq(EMPTYSTRING.join(lines), """\
2972
2973Hi,
2974
2975Do you like this message?
2976
2977-Me
2978""")
2979
R. David Murray45bf773f2010-07-17 01:19:57 +00002980 def test_pushCR_LF(self):
2981 '''FeedParser BufferedSubFile.push() assumed it received complete
2982 line endings. A CR ending one push() followed by a LF starting
2983 the next push() added an empty line.
2984 '''
2985 imt = [
2986 ("a\r \n", 2),
2987 ("b", 0),
2988 ("c\n", 1),
2989 ("", 0),
2990 ("d\r\n", 1),
2991 ("e\r", 0),
2992 ("\nf", 1),
2993 ("\r\n", 1),
2994 ]
2995 from email.feedparser import BufferedSubFile, NeedMoreData
2996 bsf = BufferedSubFile()
2997 om = []
2998 nt = 0
2999 for il, n in imt:
3000 bsf.push(il)
3001 nt += n
3002 n1 = 0
3003 while True:
3004 ol = bsf.readline()
3005 if ol == NeedMoreData:
3006 break
3007 om.append(ol)
3008 n1 += 1
3009 self.assertTrue(n == n1)
3010 self.assertTrue(len(om) == nt)
3011 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
3012
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003013
Ezio Melottib3aedd42010-11-20 19:04:17 +00003014
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003015class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04003016
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003017 def test_header_parser(self):
3018 eq = self.assertEqual
3019 # Parse only the headers of a complex multipart MIME document
3020 with openfile('msg_02.txt') as fp:
3021 msg = HeaderParser().parse(fp)
3022 eq(msg['from'], 'ppp-request@zzz.org')
3023 eq(msg['to'], 'ppp@zzz.org')
3024 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003025 self.assertFalse(msg.is_multipart())
3026 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003027
R David Murrayb35c8502011-04-13 16:46:05 -04003028 def test_bytes_header_parser(self):
3029 eq = self.assertEqual
3030 # Parse only the headers of a complex multipart MIME document
3031 with openfile('msg_02.txt', 'rb') as fp:
3032 msg = email.parser.BytesHeaderParser().parse(fp)
3033 eq(msg['from'], 'ppp-request@zzz.org')
3034 eq(msg['to'], 'ppp@zzz.org')
3035 eq(msg.get_content_type(), 'multipart/mixed')
3036 self.assertFalse(msg.is_multipart())
3037 self.assertTrue(isinstance(msg.get_payload(), str))
3038 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
3039
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003040 def test_whitespace_continuation(self):
3041 eq = self.assertEqual
3042 # This message contains a line after the Subject: header that has only
3043 # whitespace, but it is not empty!
3044 msg = email.message_from_string("""\
3045From: aperson@dom.ain
3046To: bperson@dom.ain
3047Subject: the next line has a space on it
3048\x20
3049Date: Mon, 8 Apr 2002 15:09:19 -0400
3050Message-ID: spam
3051
3052Here's the message body
3053""")
3054 eq(msg['subject'], 'the next line has a space on it\n ')
3055 eq(msg['message-id'], 'spam')
3056 eq(msg.get_payload(), "Here's the message body\n")
3057
3058 def test_whitespace_continuation_last_header(self):
3059 eq = self.assertEqual
3060 # Like the previous test, but the subject line is the last
3061 # header.
3062 msg = email.message_from_string("""\
3063From: aperson@dom.ain
3064To: bperson@dom.ain
3065Date: Mon, 8 Apr 2002 15:09:19 -0400
3066Message-ID: spam
3067Subject: the next line has a space on it
3068\x20
3069
3070Here's the message body
3071""")
3072 eq(msg['subject'], 'the next line has a space on it\n ')
3073 eq(msg['message-id'], 'spam')
3074 eq(msg.get_payload(), "Here's the message body\n")
3075
3076 def test_crlf_separation(self):
3077 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003078 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003079 msg = Parser().parse(fp)
3080 eq(len(msg.get_payload()), 2)
3081 part1 = msg.get_payload(0)
3082 eq(part1.get_content_type(), 'text/plain')
3083 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3084 part2 = msg.get_payload(1)
3085 eq(part2.get_content_type(), 'application/riscos')
3086
R. David Murray8451c4b2010-10-23 22:19:56 +00003087 def test_crlf_flatten(self):
3088 # Using newline='\n' preserves the crlfs in this input file.
3089 with openfile('msg_26.txt', newline='\n') as fp:
3090 text = fp.read()
3091 msg = email.message_from_string(text)
3092 s = StringIO()
3093 g = Generator(s)
3094 g.flatten(msg, linesep='\r\n')
3095 self.assertEqual(s.getvalue(), text)
3096
3097 maxDiff = None
3098
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003099 def test_multipart_digest_with_extra_mime_headers(self):
3100 eq = self.assertEqual
3101 neq = self.ndiffAssertEqual
3102 with openfile('msg_28.txt') as fp:
3103 msg = email.message_from_file(fp)
3104 # Structure is:
3105 # multipart/digest
3106 # message/rfc822
3107 # text/plain
3108 # message/rfc822
3109 # text/plain
3110 eq(msg.is_multipart(), 1)
3111 eq(len(msg.get_payload()), 2)
3112 part1 = msg.get_payload(0)
3113 eq(part1.get_content_type(), 'message/rfc822')
3114 eq(part1.is_multipart(), 1)
3115 eq(len(part1.get_payload()), 1)
3116 part1a = part1.get_payload(0)
3117 eq(part1a.is_multipart(), 0)
3118 eq(part1a.get_content_type(), 'text/plain')
3119 neq(part1a.get_payload(), 'message 1\n')
3120 # next message/rfc822
3121 part2 = msg.get_payload(1)
3122 eq(part2.get_content_type(), 'message/rfc822')
3123 eq(part2.is_multipart(), 1)
3124 eq(len(part2.get_payload()), 1)
3125 part2a = part2.get_payload(0)
3126 eq(part2a.is_multipart(), 0)
3127 eq(part2a.get_content_type(), 'text/plain')
3128 neq(part2a.get_payload(), 'message 2\n')
3129
3130 def test_three_lines(self):
3131 # A bug report by Andrew McNamara
3132 lines = ['From: Andrew Person <aperson@dom.ain',
3133 'Subject: Test',
3134 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3135 msg = email.message_from_string(NL.join(lines))
3136 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3137
3138 def test_strip_line_feed_and_carriage_return_in_headers(self):
3139 eq = self.assertEqual
3140 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3141 value1 = 'text'
3142 value2 = 'more text'
3143 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3144 value1, value2)
3145 msg = email.message_from_string(m)
3146 eq(msg.get('Header'), value1)
3147 eq(msg.get('Next-Header'), value2)
3148
3149 def test_rfc2822_header_syntax(self):
3150 eq = self.assertEqual
3151 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3152 msg = email.message_from_string(m)
3153 eq(len(msg), 3)
3154 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3155 eq(msg.get_payload(), 'body')
3156
3157 def test_rfc2822_space_not_allowed_in_header(self):
3158 eq = self.assertEqual
3159 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3160 msg = email.message_from_string(m)
3161 eq(len(msg.keys()), 0)
3162
3163 def test_rfc2822_one_character_header(self):
3164 eq = self.assertEqual
3165 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3166 msg = email.message_from_string(m)
3167 headers = msg.keys()
3168 headers.sort()
3169 eq(headers, ['A', 'B', 'CC'])
3170 eq(msg.get_payload(), 'body')
3171
R. David Murray45e0e142010-06-16 02:19:40 +00003172 def test_CRLFLF_at_end_of_part(self):
3173 # issue 5610: feedparser should not eat two chars from body part ending
3174 # with "\r\n\n".
3175 m = (
3176 "From: foo@bar.com\n"
3177 "To: baz\n"
3178 "Mime-Version: 1.0\n"
3179 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3180 "\n"
3181 "--BOUNDARY\n"
3182 "Content-Type: text/plain\n"
3183 "\n"
3184 "body ending with CRLF newline\r\n"
3185 "\n"
3186 "--BOUNDARY--\n"
3187 )
3188 msg = email.message_from_string(m)
3189 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003190
Ezio Melottib3aedd42010-11-20 19:04:17 +00003191
R. David Murray96fd54e2010-10-08 15:55:28 +00003192class Test8BitBytesHandling(unittest.TestCase):
3193 # In Python3 all input is string, but that doesn't work if the actual input
3194 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3195 # decode byte streams using the surrogateescape error handler, and
3196 # reconvert to binary at appropriate places if we detect surrogates. This
3197 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3198 # but it does allow us to parse and preserve them, and to decode body
3199 # parts that use an 8bit CTE.
3200
3201 bodytest_msg = textwrap.dedent("""\
3202 From: foo@bar.com
3203 To: baz
3204 Mime-Version: 1.0
3205 Content-Type: text/plain; charset={charset}
3206 Content-Transfer-Encoding: {cte}
3207
3208 {bodyline}
3209 """)
3210
3211 def test_known_8bit_CTE(self):
3212 m = self.bodytest_msg.format(charset='utf-8',
3213 cte='8bit',
3214 bodyline='pöstal').encode('utf-8')
3215 msg = email.message_from_bytes(m)
3216 self.assertEqual(msg.get_payload(), "pöstal\n")
3217 self.assertEqual(msg.get_payload(decode=True),
3218 "pöstal\n".encode('utf-8'))
3219
3220 def test_unknown_8bit_CTE(self):
3221 m = self.bodytest_msg.format(charset='notavalidcharset',
3222 cte='8bit',
3223 bodyline='pöstal').encode('utf-8')
3224 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003225 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003226 self.assertEqual(msg.get_payload(decode=True),
3227 "pöstal\n".encode('utf-8'))
3228
3229 def test_8bit_in_quopri_body(self):
3230 # This is non-RFC compliant data...without 'decode' the library code
3231 # decodes the body using the charset from the headers, and because the
3232 # source byte really is utf-8 this works. This is likely to fail
3233 # against real dirty data (ie: produce mojibake), but the data is
3234 # invalid anyway so it is as good a guess as any. But this means that
3235 # this test just confirms the current behavior; that behavior is not
3236 # necessarily the best possible behavior. With 'decode' it is
3237 # returning the raw bytes, so that test should be of correct behavior,
3238 # or at least produce the same result that email4 did.
3239 m = self.bodytest_msg.format(charset='utf-8',
3240 cte='quoted-printable',
3241 bodyline='p=C3=B6stál').encode('utf-8')
3242 msg = email.message_from_bytes(m)
3243 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3244 self.assertEqual(msg.get_payload(decode=True),
3245 'pöstál\n'.encode('utf-8'))
3246
3247 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3248 # This is similar to the previous test, but proves that if the 8bit
3249 # byte is undecodeable in the specified charset, it gets replaced
3250 # by the unicode 'unknown' character. Again, this may or may not
3251 # be the ideal behavior. Note that if decode=False none of the
3252 # decoders will get involved, so this is the only test we need
3253 # for this behavior.
3254 m = self.bodytest_msg.format(charset='ascii',
3255 cte='quoted-printable',
3256 bodyline='p=C3=B6stál').encode('utf-8')
3257 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003258 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003259 self.assertEqual(msg.get_payload(decode=True),
3260 'pöstál\n'.encode('utf-8'))
3261
3262 def test_8bit_in_base64_body(self):
3263 # Sticking an 8bit byte in a base64 block makes it undecodable by
3264 # normal means, so the block is returned undecoded, but as bytes.
3265 m = self.bodytest_msg.format(charset='utf-8',
3266 cte='base64',
3267 bodyline='cMO2c3RhbAá=').encode('utf-8')
3268 msg = email.message_from_bytes(m)
3269 self.assertEqual(msg.get_payload(decode=True),
3270 'cMO2c3RhbAá=\n'.encode('utf-8'))
3271
3272 def test_8bit_in_uuencode_body(self):
3273 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3274 # normal means, so the block is returned undecoded, but as bytes.
3275 m = self.bodytest_msg.format(charset='utf-8',
3276 cte='uuencode',
3277 bodyline='<,.V<W1A; á ').encode('utf-8')
3278 msg = email.message_from_bytes(m)
3279 self.assertEqual(msg.get_payload(decode=True),
3280 '<,.V<W1A; á \n'.encode('utf-8'))
3281
3282
R. David Murray92532142011-01-07 23:25:30 +00003283 headertest_headers = (
3284 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3285 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3286 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3287 '\tJean de Baddie',
3288 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3289 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3290 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3291 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3292 )
3293 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3294 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003295
3296 def test_get_8bit_header(self):
3297 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003298 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3299 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003300
3301 def test_print_8bit_headers(self):
3302 msg = email.message_from_bytes(self.headertest_msg)
3303 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003304 textwrap.dedent("""\
3305 From: {}
3306 To: {}
3307 Subject: {}
3308 From: {}
3309
3310 Yes, they are flying.
3311 """).format(*[expected[1] for (_, expected) in
3312 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003313
3314 def test_values_with_8bit_headers(self):
3315 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003316 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003317 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003318 'b\uFFFD\uFFFDz',
3319 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3320 'coll\uFFFD\uFFFDgue, le pouf '
3321 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003322 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003323 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003324
3325 def test_items_with_8bit_headers(self):
3326 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003327 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003328 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003329 ('To', 'b\uFFFD\uFFFDz'),
3330 ('Subject', 'Maintenant je vous '
3331 'pr\uFFFD\uFFFDsente '
3332 'mon coll\uFFFD\uFFFDgue, le pouf '
3333 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3334 '\tJean de Baddie'),
3335 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003336
3337 def test_get_all_with_8bit_headers(self):
3338 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003339 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003340 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003341 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003342
R David Murraya2150232011-03-16 21:11:23 -04003343 def test_get_content_type_with_8bit(self):
3344 msg = email.message_from_bytes(textwrap.dedent("""\
3345 Content-Type: text/pl\xA7in; charset=utf-8
3346 """).encode('latin-1'))
3347 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3348 self.assertEqual(msg.get_content_maintype(), "text")
3349 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3350
3351 def test_get_params_with_8bit(self):
3352 msg = email.message_from_bytes(
3353 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3354 self.assertEqual(msg.get_params(header='x-header'),
3355 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3356 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3357 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3358 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3359
3360 def test_get_rfc2231_params_with_8bit(self):
3361 msg = email.message_from_bytes(textwrap.dedent("""\
3362 Content-Type: text/plain; charset=us-ascii;
3363 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3364 ).encode('latin-1'))
3365 self.assertEqual(msg.get_param('title'),
3366 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3367
3368 def test_set_rfc2231_params_with_8bit(self):
3369 msg = email.message_from_bytes(textwrap.dedent("""\
3370 Content-Type: text/plain; charset=us-ascii;
3371 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3372 ).encode('latin-1'))
3373 msg.set_param('title', 'test')
3374 self.assertEqual(msg.get_param('title'), 'test')
3375
3376 def test_del_rfc2231_params_with_8bit(self):
3377 msg = email.message_from_bytes(textwrap.dedent("""\
3378 Content-Type: text/plain; charset=us-ascii;
3379 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3380 ).encode('latin-1'))
3381 msg.del_param('title')
3382 self.assertEqual(msg.get_param('title'), None)
3383 self.assertEqual(msg.get_content_maintype(), 'text')
3384
3385 def test_get_payload_with_8bit_cte_header(self):
3386 msg = email.message_from_bytes(textwrap.dedent("""\
3387 Content-Transfer-Encoding: b\xa7se64
3388 Content-Type: text/plain; charset=latin-1
3389
3390 payload
3391 """).encode('latin-1'))
3392 self.assertEqual(msg.get_payload(), 'payload\n')
3393 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3394
R. David Murray96fd54e2010-10-08 15:55:28 +00003395 non_latin_bin_msg = textwrap.dedent("""\
3396 From: foo@bar.com
3397 To: báz
3398 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3399 \tJean de Baddie
3400 Mime-Version: 1.0
3401 Content-Type: text/plain; charset="utf-8"
3402 Content-Transfer-Encoding: 8bit
3403
3404 Да, они летят.
3405 """).encode('utf-8')
3406
3407 def test_bytes_generator(self):
3408 msg = email.message_from_bytes(self.non_latin_bin_msg)
3409 out = BytesIO()
3410 email.generator.BytesGenerator(out).flatten(msg)
3411 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3412
R. David Murray7372a072011-01-26 21:21:32 +00003413 def test_bytes_generator_handles_None_body(self):
3414 #Issue 11019
3415 msg = email.message.Message()
3416 out = BytesIO()
3417 email.generator.BytesGenerator(out).flatten(msg)
3418 self.assertEqual(out.getvalue(), b"\n")
3419
R. David Murray92532142011-01-07 23:25:30 +00003420 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003421 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003422 To: =?unknown-8bit?q?b=C3=A1z?=
3423 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3424 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3425 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003426 Mime-Version: 1.0
3427 Content-Type: text/plain; charset="utf-8"
3428 Content-Transfer-Encoding: base64
3429
3430 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3431 """)
3432
3433 def test_generator_handles_8bit(self):
3434 msg = email.message_from_bytes(self.non_latin_bin_msg)
3435 out = StringIO()
3436 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003437 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003438
3439 def test_bytes_generator_with_unix_from(self):
3440 # The unixfrom contains a current date, so we can't check it
3441 # literally. Just make sure the first word is 'From' and the
3442 # rest of the message matches the input.
3443 msg = email.message_from_bytes(self.non_latin_bin_msg)
3444 out = BytesIO()
3445 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3446 lines = out.getvalue().split(b'\n')
3447 self.assertEqual(lines[0].split()[0], b'From')
3448 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3449
R. David Murray92532142011-01-07 23:25:30 +00003450 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3451 non_latin_bin_msg_as7bit[2:4] = [
3452 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3453 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3454 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3455
R. David Murray96fd54e2010-10-08 15:55:28 +00003456 def test_message_from_binary_file(self):
3457 fn = 'test.msg'
3458 self.addCleanup(unlink, fn)
3459 with open(fn, 'wb') as testfile:
3460 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003461 with open(fn, 'rb') as testfile:
3462 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003463 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3464
3465 latin_bin_msg = textwrap.dedent("""\
3466 From: foo@bar.com
3467 To: Dinsdale
3468 Subject: Nudge nudge, wink, wink
3469 Mime-Version: 1.0
3470 Content-Type: text/plain; charset="latin-1"
3471 Content-Transfer-Encoding: 8bit
3472
3473 oh là là, know what I mean, know what I mean?
3474 """).encode('latin-1')
3475
3476 latin_bin_msg_as7bit = textwrap.dedent("""\
3477 From: foo@bar.com
3478 To: Dinsdale
3479 Subject: Nudge nudge, wink, wink
3480 Mime-Version: 1.0
3481 Content-Type: text/plain; charset="iso-8859-1"
3482 Content-Transfer-Encoding: quoted-printable
3483
3484 oh l=E0 l=E0, know what I mean, know what I mean?
3485 """)
3486
3487 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3488 m = email.message_from_bytes(self.latin_bin_msg)
3489 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3490
3491 def test_decoded_generator_emits_unicode_body(self):
3492 m = email.message_from_bytes(self.latin_bin_msg)
3493 out = StringIO()
3494 email.generator.DecodedGenerator(out).flatten(m)
3495 #DecodedHeader output contains an extra blank line compared
3496 #to the input message. RDM: not sure if this is a bug or not,
3497 #but it is not specific to the 8bit->7bit conversion.
3498 self.assertEqual(out.getvalue(),
3499 self.latin_bin_msg.decode('latin-1')+'\n')
3500
3501 def test_bytes_feedparser(self):
3502 bfp = email.feedparser.BytesFeedParser()
3503 for i in range(0, len(self.latin_bin_msg), 10):
3504 bfp.feed(self.latin_bin_msg[i:i+10])
3505 m = bfp.close()
3506 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3507
R. David Murray8451c4b2010-10-23 22:19:56 +00003508 def test_crlf_flatten(self):
3509 with openfile('msg_26.txt', 'rb') as fp:
3510 text = fp.read()
3511 msg = email.message_from_bytes(text)
3512 s = BytesIO()
3513 g = email.generator.BytesGenerator(s)
3514 g.flatten(msg, linesep='\r\n')
3515 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003516
3517 def test_8bit_multipart(self):
3518 # Issue 11605
3519 source = textwrap.dedent("""\
3520 Date: Fri, 18 Mar 2011 17:15:43 +0100
3521 To: foo@example.com
3522 From: foodwatch-Newsletter <bar@example.com>
3523 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3524 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3525 MIME-Version: 1.0
3526 Content-Type: multipart/alternative;
3527 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3528
3529 --b1_76a486bee62b0d200f33dc2ca08220ad
3530 Content-Type: text/plain; charset="utf-8"
3531 Content-Transfer-Encoding: 8bit
3532
3533 Guten Tag, ,
3534
3535 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3536 Nachrichten aus Japan.
3537
3538
3539 --b1_76a486bee62b0d200f33dc2ca08220ad
3540 Content-Type: text/html; charset="utf-8"
3541 Content-Transfer-Encoding: 8bit
3542
3543 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3544 "http://www.w3.org/TR/html4/loose.dtd">
3545 <html lang="de">
3546 <head>
3547 <title>foodwatch - Newsletter</title>
3548 </head>
3549 <body>
3550 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3551 die Nachrichten aus Japan.</p>
3552 </body>
3553 </html>
3554 --b1_76a486bee62b0d200f33dc2ca08220ad--
3555
3556 """).encode('utf-8')
3557 msg = email.message_from_bytes(source)
3558 s = BytesIO()
3559 g = email.generator.BytesGenerator(s)
3560 g.flatten(msg)
3561 self.assertEqual(s.getvalue(), source)
3562
R David Murray9fd170e2012-03-14 14:05:03 -04003563 def test_bytes_generator_b_encoding_linesep(self):
3564 # Issue 14062: b encoding was tacking on an extra \n.
3565 m = Message()
3566 # This has enough non-ascii that it should always end up b encoded.
3567 m['Subject'] = Header('žluťoučký kůň')
3568 s = BytesIO()
3569 g = email.generator.BytesGenerator(s)
3570 g.flatten(m, linesep='\r\n')
3571 self.assertEqual(
3572 s.getvalue(),
3573 b'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3574
3575 def test_generator_b_encoding_linesep(self):
3576 # Since this broke in ByteGenerator, test Generator for completeness.
3577 m = Message()
3578 # This has enough non-ascii that it should always end up b encoded.
3579 m['Subject'] = Header('žluťoučký kůň')
3580 s = StringIO()
3581 g = email.generator.Generator(s)
3582 g.flatten(m, linesep='\r\n')
3583 self.assertEqual(
3584 s.getvalue(),
3585 'Subject: =?utf-8?b?xb5sdcWlb3XEjWvDvSBrxa/FiA==?=\r\n\r\n')
3586
R. David Murray8451c4b2010-10-23 22:19:56 +00003587 maxDiff = None
3588
Ezio Melottib3aedd42010-11-20 19:04:17 +00003589
R. David Murray719a4492010-11-21 16:53:48 +00003590class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003591
R. David Murraye5db2632010-11-20 15:10:13 +00003592 maxDiff = None
3593
R. David Murray96fd54e2010-10-08 15:55:28 +00003594 def _msgobj(self, filename):
3595 with openfile(filename, 'rb') as fp:
3596 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003597 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003598 msg = email.message_from_bytes(data)
3599 return msg, data
3600
R. David Murray719a4492010-11-21 16:53:48 +00003601 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003602 b = BytesIO()
3603 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003604 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003605 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003606
3607
R. David Murray719a4492010-11-21 16:53:48 +00003608class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3609 TestIdempotent):
3610 linesep = '\n'
3611 blinesep = b'\n'
3612 normalize_linesep_regex = re.compile(br'\r\n')
3613
3614
3615class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3616 TestIdempotent):
3617 linesep = '\r\n'
3618 blinesep = b'\r\n'
3619 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3620
Ezio Melottib3aedd42010-11-20 19:04:17 +00003621
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003622class TestBase64(unittest.TestCase):
3623 def test_len(self):
3624 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003625 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003626 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003627 for size in range(15):
3628 if size == 0 : bsize = 0
3629 elif size <= 3 : bsize = 4
3630 elif size <= 6 : bsize = 8
3631 elif size <= 9 : bsize = 12
3632 elif size <= 12: bsize = 16
3633 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003634 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003635
3636 def test_decode(self):
3637 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003638 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003639 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003640
3641 def test_encode(self):
3642 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003643 eq(base64mime.body_encode(b''), b'')
3644 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003645 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003646 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003647 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003648 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003649eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3650eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3651eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3652eHh4eCB4eHh4IA==
3653""")
3654 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003655 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003656 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003657eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3658eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3659eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3660eHh4eCB4eHh4IA==\r
3661""")
3662
3663 def test_header_encode(self):
3664 eq = self.assertEqual
3665 he = base64mime.header_encode
3666 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003667 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3668 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003669 # Test the charset option
3670 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3671 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003672
3673
Ezio Melottib3aedd42010-11-20 19:04:17 +00003674
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003675class TestQuopri(unittest.TestCase):
3676 def setUp(self):
3677 # Set of characters (as byte integers) that don't need to be encoded
3678 # in headers.
3679 self.hlit = list(chain(
3680 range(ord('a'), ord('z') + 1),
3681 range(ord('A'), ord('Z') + 1),
3682 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003683 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003684 # Set of characters (as byte integers) that do need to be encoded in
3685 # headers.
3686 self.hnon = [c for c in range(256) if c not in self.hlit]
3687 assert len(self.hlit) + len(self.hnon) == 256
3688 # Set of characters (as byte integers) that don't need to be encoded
3689 # in bodies.
3690 self.blit = list(range(ord(' '), ord('~') + 1))
3691 self.blit.append(ord('\t'))
3692 self.blit.remove(ord('='))
3693 # Set of characters (as byte integers) that do need to be encoded in
3694 # bodies.
3695 self.bnon = [c for c in range(256) if c not in self.blit]
3696 assert len(self.blit) + len(self.bnon) == 256
3697
Guido van Rossum9604e662007-08-30 03:46:43 +00003698 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003699 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003700 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003701 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003702 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003703 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003704 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003705
Guido van Rossum9604e662007-08-30 03:46:43 +00003706 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003707 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003708 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003709 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003710 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003711 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003712 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003713
3714 def test_header_quopri_len(self):
3715 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003716 eq(quoprimime.header_length(b'hello'), 5)
3717 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003718 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003719 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003720 # =?xxx?q?...?= means 10 extra characters
3721 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003722 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3723 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003724 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003725 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003726 # =?xxx?q?...?= means 10 extra characters
3727 10)
3728 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003729 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003730 'expected length 1 for %r' % chr(c))
3731 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003732 # Space is special; it's encoded to _
3733 if c == ord(' '):
3734 continue
3735 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003736 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003737 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003738
3739 def test_body_quopri_len(self):
3740 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003741 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003742 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003743 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003744 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003745
3746 def test_quote_unquote_idempotent(self):
3747 for x in range(256):
3748 c = chr(x)
3749 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3750
R David Murrayec1b5b82011-03-23 14:19:05 -04003751 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3752 if charset is None:
3753 encoded_header = quoprimime.header_encode(header)
3754 else:
3755 encoded_header = quoprimime.header_encode(header, charset)
3756 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003757
R David Murraycafd79d2011-03-23 15:25:55 -04003758 def test_header_encode_null(self):
3759 self._test_header_encode(b'', '')
3760
R David Murrayec1b5b82011-03-23 14:19:05 -04003761 def test_header_encode_one_word(self):
3762 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3763
3764 def test_header_encode_two_lines(self):
3765 self._test_header_encode(b'hello\nworld',
3766 '=?iso-8859-1?q?hello=0Aworld?=')
3767
3768 def test_header_encode_non_ascii(self):
3769 self._test_header_encode(b'hello\xc7there',
3770 '=?iso-8859-1?q?hello=C7there?=')
3771
3772 def test_header_encode_alt_charset(self):
3773 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3774 charset='iso-8859-2')
3775
3776 def _test_header_decode(self, encoded_header, expected_decoded_header):
3777 decoded_header = quoprimime.header_decode(encoded_header)
3778 self.assertEqual(decoded_header, expected_decoded_header)
3779
3780 def test_header_decode_null(self):
3781 self._test_header_decode('', '')
3782
3783 def test_header_decode_one_word(self):
3784 self._test_header_decode('hello', 'hello')
3785
3786 def test_header_decode_two_lines(self):
3787 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3788
3789 def test_header_decode_non_ascii(self):
3790 self._test_header_decode('hello=C7there', 'hello\xc7there')
3791
3792 def _test_decode(self, encoded, expected_decoded, eol=None):
3793 if eol is None:
3794 decoded = quoprimime.decode(encoded)
3795 else:
3796 decoded = quoprimime.decode(encoded, eol=eol)
3797 self.assertEqual(decoded, expected_decoded)
3798
3799 def test_decode_null_word(self):
3800 self._test_decode('', '')
3801
3802 def test_decode_null_line_null_word(self):
3803 self._test_decode('\r\n', '\n')
3804
3805 def test_decode_one_word(self):
3806 self._test_decode('hello', 'hello')
3807
3808 def test_decode_one_word_eol(self):
3809 self._test_decode('hello', 'hello', eol='X')
3810
3811 def test_decode_one_line(self):
3812 self._test_decode('hello\r\n', 'hello\n')
3813
3814 def test_decode_one_line_lf(self):
3815 self._test_decode('hello\n', 'hello\n')
3816
R David Murraycafd79d2011-03-23 15:25:55 -04003817 def test_decode_one_line_cr(self):
3818 self._test_decode('hello\r', 'hello\n')
3819
3820 def test_decode_one_line_nl(self):
3821 self._test_decode('hello\n', 'helloX', eol='X')
3822
3823 def test_decode_one_line_crnl(self):
3824 self._test_decode('hello\r\n', 'helloX', eol='X')
3825
R David Murrayec1b5b82011-03-23 14:19:05 -04003826 def test_decode_one_line_one_word(self):
3827 self._test_decode('hello\r\nworld', 'hello\nworld')
3828
3829 def test_decode_one_line_one_word_eol(self):
3830 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3831
3832 def test_decode_two_lines(self):
3833 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3834
R David Murraycafd79d2011-03-23 15:25:55 -04003835 def test_decode_two_lines_eol(self):
3836 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3837
R David Murrayec1b5b82011-03-23 14:19:05 -04003838 def test_decode_one_long_line(self):
3839 self._test_decode('Spam' * 250, 'Spam' * 250)
3840
3841 def test_decode_one_space(self):
3842 self._test_decode(' ', '')
3843
3844 def test_decode_multiple_spaces(self):
3845 self._test_decode(' ' * 5, '')
3846
3847 def test_decode_one_line_trailing_spaces(self):
3848 self._test_decode('hello \r\n', 'hello\n')
3849
3850 def test_decode_two_lines_trailing_spaces(self):
3851 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3852
3853 def test_decode_quoted_word(self):
3854 self._test_decode('=22quoted=20words=22', '"quoted words"')
3855
3856 def test_decode_uppercase_quoting(self):
3857 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3858
3859 def test_decode_lowercase_quoting(self):
3860 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3861
3862 def test_decode_soft_line_break(self):
3863 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3864
3865 def test_decode_false_quoting(self):
3866 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3867
3868 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3869 kwargs = {}
3870 if maxlinelen is None:
3871 # Use body_encode's default.
3872 maxlinelen = 76
3873 else:
3874 kwargs['maxlinelen'] = maxlinelen
3875 if eol is None:
3876 # Use body_encode's default.
3877 eol = '\n'
3878 else:
3879 kwargs['eol'] = eol
3880 encoded_body = quoprimime.body_encode(body, **kwargs)
3881 self.assertEqual(encoded_body, expected_encoded_body)
3882 if eol == '\n' or eol == '\r\n':
3883 # We know how to split the result back into lines, so maxlinelen
3884 # can be checked.
3885 for line in encoded_body.splitlines():
3886 self.assertLessEqual(len(line), maxlinelen)
3887
3888 def test_encode_null(self):
3889 self._test_encode('', '')
3890
3891 def test_encode_null_lines(self):
3892 self._test_encode('\n\n', '\n\n')
3893
3894 def test_encode_one_line(self):
3895 self._test_encode('hello\n', 'hello\n')
3896
3897 def test_encode_one_line_crlf(self):
3898 self._test_encode('hello\r\n', 'hello\n')
3899
3900 def test_encode_one_line_eol(self):
3901 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3902
3903 def test_encode_one_space(self):
3904 self._test_encode(' ', '=20')
3905
3906 def test_encode_one_line_one_space(self):
3907 self._test_encode(' \n', '=20\n')
3908
R David Murrayb938c8c2011-03-24 12:19:26 -04003909# XXX: body_encode() expect strings, but uses ord(char) from these strings
3910# to index into a 256-entry list. For code points above 255, this will fail.
3911# Should there be a check for 8-bit only ord() values in body, or at least
3912# a comment about the expected input?
3913
3914 def test_encode_two_lines_one_space(self):
3915 self._test_encode(' \n \n', '=20\n=20\n')
3916
R David Murrayec1b5b82011-03-23 14:19:05 -04003917 def test_encode_one_word_trailing_spaces(self):
3918 self._test_encode('hello ', 'hello =20')
3919
3920 def test_encode_one_line_trailing_spaces(self):
3921 self._test_encode('hello \n', 'hello =20\n')
3922
3923 def test_encode_one_word_trailing_tab(self):
3924 self._test_encode('hello \t', 'hello =09')
3925
3926 def test_encode_one_line_trailing_tab(self):
3927 self._test_encode('hello \t\n', 'hello =09\n')
3928
3929 def test_encode_trailing_space_before_maxlinelen(self):
3930 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3931
R David Murrayb938c8c2011-03-24 12:19:26 -04003932 def test_encode_trailing_space_at_maxlinelen(self):
3933 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3934
R David Murrayec1b5b82011-03-23 14:19:05 -04003935 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003936 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3937
3938 def test_encode_whitespace_lines(self):
3939 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003940
3941 def test_encode_quoted_equals(self):
3942 self._test_encode('a = b', 'a =3D b')
3943
3944 def test_encode_one_long_string(self):
3945 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3946
3947 def test_encode_one_long_line(self):
3948 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3949
3950 def test_encode_one_very_long_line(self):
3951 self._test_encode('x' * 200 + '\n',
3952 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3953
3954 def test_encode_one_long_line(self):
3955 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3956
3957 def test_encode_shortest_maxlinelen(self):
3958 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003959
R David Murrayb938c8c2011-03-24 12:19:26 -04003960 def test_encode_maxlinelen_too_small(self):
3961 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3962
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003963 def test_encode(self):
3964 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003965 eq(quoprimime.body_encode(''), '')
3966 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003967 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003968 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003969 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003970 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003971xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3972 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3973x xxxx xxxx xxxx xxxx=20""")
3974 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003975 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3976 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003977xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3978 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3979x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003980 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003981one line
3982
3983two line"""), """\
3984one line
3985
3986two line""")
3987
3988
Ezio Melottib3aedd42010-11-20 19:04:17 +00003989
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003990# Test the Charset class
3991class TestCharset(unittest.TestCase):
3992 def tearDown(self):
3993 from email import charset as CharsetModule
3994 try:
3995 del CharsetModule.CHARSETS['fake']
3996 except KeyError:
3997 pass
3998
Guido van Rossum9604e662007-08-30 03:46:43 +00003999 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004000 eq = self.assertEqual
4001 # Make sure us-ascii = no Unicode conversion
4002 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00004003 eq(c.header_encode('Hello World!'), 'Hello World!')
4004 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004005 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00004006 self.assertRaises(UnicodeError, c.header_encode, s)
4007 c = Charset('utf-8')
4008 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004009
4010 def test_body_encode(self):
4011 eq = self.assertEqual
4012 # Try a charset with QP body encoding
4013 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004014 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004015 # Try a charset with Base64 body encoding
4016 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004017 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004018 # Try a charset with None body encoding
4019 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004020 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004021 # Try the convert argument, where input codec != output codec
4022 c = Charset('euc-jp')
4023 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004024 # XXX FIXME
4025## try:
4026## eq('\x1b$B5FCO;~IW\x1b(B',
4027## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4028## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4029## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4030## except LookupError:
4031## # We probably don't have the Japanese codecs installed
4032## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004033 # Testing SF bug #625509, which we have to fake, since there are no
4034 # built-in encodings where the header encoding is QP but the body
4035 # encoding is not.
4036 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004037 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004038 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004039 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004040
4041 def test_unicode_charset_name(self):
4042 charset = Charset('us-ascii')
4043 self.assertEqual(str(charset), 'us-ascii')
4044 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4045
4046
Ezio Melottib3aedd42010-11-20 19:04:17 +00004047
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004048# Test multilingual MIME headers.
4049class TestHeader(TestEmailBase):
4050 def test_simple(self):
4051 eq = self.ndiffAssertEqual
4052 h = Header('Hello World!')
4053 eq(h.encode(), 'Hello World!')
4054 h.append(' Goodbye World!')
4055 eq(h.encode(), 'Hello World! Goodbye World!')
4056
4057 def test_simple_surprise(self):
4058 eq = self.ndiffAssertEqual
4059 h = Header('Hello World!')
4060 eq(h.encode(), 'Hello World!')
4061 h.append('Goodbye World!')
4062 eq(h.encode(), 'Hello World! Goodbye World!')
4063
4064 def test_header_needs_no_decoding(self):
4065 h = 'no decoding needed'
4066 self.assertEqual(decode_header(h), [(h, None)])
4067
4068 def test_long(self):
4069 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4070 maxlinelen=76)
4071 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004072 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004073
4074 def test_multilingual(self):
4075 eq = self.ndiffAssertEqual
4076 g = Charset("iso-8859-1")
4077 cz = Charset("iso-8859-2")
4078 utf8 = Charset("utf-8")
4079 g_head = (b'Die Mieter treten hier ein werden mit einem '
4080 b'Foerderband komfortabel den Korridor entlang, '
4081 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4082 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4083 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4084 b'd\xf9vtipu.. ')
4085 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4086 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4087 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4088 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4089 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4090 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4091 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4092 '\u3044\u307e\u3059\u3002')
4093 h = Header(g_head, g)
4094 h.append(cz_head, cz)
4095 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004096 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004097 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004098=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4099 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4100 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4101 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004102 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4103 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4104 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4105 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004106 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4107 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4108 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4109 decoded = decode_header(enc)
4110 eq(len(decoded), 3)
4111 eq(decoded[0], (g_head, 'iso-8859-1'))
4112 eq(decoded[1], (cz_head, 'iso-8859-2'))
4113 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004114 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004115 eq(ustr,
4116 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4117 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4118 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4119 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4120 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4121 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4122 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4123 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4124 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4125 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4126 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4127 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4128 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4129 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4130 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4131 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4132 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004133 # Test make_header()
4134 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004135 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004136
4137 def test_empty_header_encode(self):
4138 h = Header()
4139 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004140
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004141 def test_header_ctor_default_args(self):
4142 eq = self.ndiffAssertEqual
4143 h = Header()
4144 eq(h, '')
4145 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004146 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004147
4148 def test_explicit_maxlinelen(self):
4149 eq = self.ndiffAssertEqual
4150 hstr = ('A very long line that must get split to something other '
4151 'than at the 76th character boundary to test the non-default '
4152 'behavior')
4153 h = Header(hstr)
4154 eq(h.encode(), '''\
4155A very long line that must get split to something other than at the 76th
4156 character boundary to test the non-default behavior''')
4157 eq(str(h), hstr)
4158 h = Header(hstr, header_name='Subject')
4159 eq(h.encode(), '''\
4160A very long line that must get split to something other than at the
4161 76th character boundary to test the non-default behavior''')
4162 eq(str(h), hstr)
4163 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4164 eq(h.encode(), hstr)
4165 eq(str(h), hstr)
4166
Guido van Rossum9604e662007-08-30 03:46:43 +00004167 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004168 eq = self.ndiffAssertEqual
4169 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004170 x = 'xxxx ' * 20
4171 h.append(x)
4172 s = h.encode()
4173 eq(s, """\
4174=?iso-8859-1?q?xxx?=
4175 =?iso-8859-1?q?x_?=
4176 =?iso-8859-1?q?xx?=
4177 =?iso-8859-1?q?xx?=
4178 =?iso-8859-1?q?_x?=
4179 =?iso-8859-1?q?xx?=
4180 =?iso-8859-1?q?x_?=
4181 =?iso-8859-1?q?xx?=
4182 =?iso-8859-1?q?xx?=
4183 =?iso-8859-1?q?_x?=
4184 =?iso-8859-1?q?xx?=
4185 =?iso-8859-1?q?x_?=
4186 =?iso-8859-1?q?xx?=
4187 =?iso-8859-1?q?xx?=
4188 =?iso-8859-1?q?_x?=
4189 =?iso-8859-1?q?xx?=
4190 =?iso-8859-1?q?x_?=
4191 =?iso-8859-1?q?xx?=
4192 =?iso-8859-1?q?xx?=
4193 =?iso-8859-1?q?_x?=
4194 =?iso-8859-1?q?xx?=
4195 =?iso-8859-1?q?x_?=
4196 =?iso-8859-1?q?xx?=
4197 =?iso-8859-1?q?xx?=
4198 =?iso-8859-1?q?_x?=
4199 =?iso-8859-1?q?xx?=
4200 =?iso-8859-1?q?x_?=
4201 =?iso-8859-1?q?xx?=
4202 =?iso-8859-1?q?xx?=
4203 =?iso-8859-1?q?_x?=
4204 =?iso-8859-1?q?xx?=
4205 =?iso-8859-1?q?x_?=
4206 =?iso-8859-1?q?xx?=
4207 =?iso-8859-1?q?xx?=
4208 =?iso-8859-1?q?_x?=
4209 =?iso-8859-1?q?xx?=
4210 =?iso-8859-1?q?x_?=
4211 =?iso-8859-1?q?xx?=
4212 =?iso-8859-1?q?xx?=
4213 =?iso-8859-1?q?_x?=
4214 =?iso-8859-1?q?xx?=
4215 =?iso-8859-1?q?x_?=
4216 =?iso-8859-1?q?xx?=
4217 =?iso-8859-1?q?xx?=
4218 =?iso-8859-1?q?_x?=
4219 =?iso-8859-1?q?xx?=
4220 =?iso-8859-1?q?x_?=
4221 =?iso-8859-1?q?xx?=
4222 =?iso-8859-1?q?xx?=
4223 =?iso-8859-1?q?_?=""")
4224 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004225 h = Header(charset='iso-8859-1', maxlinelen=40)
4226 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004227 s = h.encode()
4228 eq(s, """\
4229=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4230 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4231 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4232 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4233 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4234 eq(x, str(make_header(decode_header(s))))
4235
4236 def test_base64_splittable(self):
4237 eq = self.ndiffAssertEqual
4238 h = Header(charset='koi8-r', maxlinelen=20)
4239 x = 'xxxx ' * 20
4240 h.append(x)
4241 s = h.encode()
4242 eq(s, """\
4243=?koi8-r?b?eHh4?=
4244 =?koi8-r?b?eCB4?=
4245 =?koi8-r?b?eHh4?=
4246 =?koi8-r?b?IHh4?=
4247 =?koi8-r?b?eHgg?=
4248 =?koi8-r?b?eHh4?=
4249 =?koi8-r?b?eCB4?=
4250 =?koi8-r?b?eHh4?=
4251 =?koi8-r?b?IHh4?=
4252 =?koi8-r?b?eHgg?=
4253 =?koi8-r?b?eHh4?=
4254 =?koi8-r?b?eCB4?=
4255 =?koi8-r?b?eHh4?=
4256 =?koi8-r?b?IHh4?=
4257 =?koi8-r?b?eHgg?=
4258 =?koi8-r?b?eHh4?=
4259 =?koi8-r?b?eCB4?=
4260 =?koi8-r?b?eHh4?=
4261 =?koi8-r?b?IHh4?=
4262 =?koi8-r?b?eHgg?=
4263 =?koi8-r?b?eHh4?=
4264 =?koi8-r?b?eCB4?=
4265 =?koi8-r?b?eHh4?=
4266 =?koi8-r?b?IHh4?=
4267 =?koi8-r?b?eHgg?=
4268 =?koi8-r?b?eHh4?=
4269 =?koi8-r?b?eCB4?=
4270 =?koi8-r?b?eHh4?=
4271 =?koi8-r?b?IHh4?=
4272 =?koi8-r?b?eHgg?=
4273 =?koi8-r?b?eHh4?=
4274 =?koi8-r?b?eCB4?=
4275 =?koi8-r?b?eHh4?=
4276 =?koi8-r?b?IA==?=""")
4277 eq(x, str(make_header(decode_header(s))))
4278 h = Header(charset='koi8-r', maxlinelen=40)
4279 h.append(x)
4280 s = h.encode()
4281 eq(s, """\
4282=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4283 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4284 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4285 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4286 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4287 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4288 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004289
4290 def test_us_ascii_header(self):
4291 eq = self.assertEqual
4292 s = 'hello'
4293 x = decode_header(s)
4294 eq(x, [('hello', None)])
4295 h = make_header(x)
4296 eq(s, h.encode())
4297
4298 def test_string_charset(self):
4299 eq = self.assertEqual
4300 h = Header()
4301 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004302 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004303
4304## def test_unicode_error(self):
4305## raises = self.assertRaises
4306## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4307## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4308## h = Header()
4309## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4310## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4311## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4312
4313 def test_utf8_shortest(self):
4314 eq = self.assertEqual
4315 h = Header('p\xf6stal', 'utf-8')
4316 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4317 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4318 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4319
4320 def test_bad_8bit_header(self):
4321 raises = self.assertRaises
4322 eq = self.assertEqual
4323 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4324 raises(UnicodeError, Header, x)
4325 h = Header()
4326 raises(UnicodeError, h.append, x)
4327 e = x.decode('utf-8', 'replace')
4328 eq(str(Header(x, errors='replace')), e)
4329 h.append(x, errors='replace')
4330 eq(str(h), e)
4331
R David Murray041015c2011-03-25 15:10:55 -04004332 def test_escaped_8bit_header(self):
4333 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
R David Murray6bdb1762011-06-18 12:30:55 -04004334 e = x.decode('ascii', 'surrogateescape')
4335 h = Header(e, charset=email.charset.UNKNOWN8BIT)
R David Murray041015c2011-03-25 15:10:55 -04004336 self.assertEqual(str(h),
4337 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4338 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4339
R David Murraye5e366c2011-06-18 12:57:28 -04004340 def test_header_handles_binary_unknown8bit(self):
4341 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4342 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4343 self.assertEqual(str(h),
4344 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4345 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4346
4347 def test_make_header_handles_binary_unknown8bit(self):
4348 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4349 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4350 h2 = email.header.make_header(email.header.decode_header(h))
4351 self.assertEqual(str(h2),
4352 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4353 self.assertEqual(email.header.decode_header(h2), [(x, 'unknown-8bit')])
4354
R David Murray041015c2011-03-25 15:10:55 -04004355 def test_modify_returned_list_does_not_change_header(self):
4356 h = Header('test')
4357 chunks = email.header.decode_header(h)
4358 chunks.append(('ascii', 'test2'))
4359 self.assertEqual(str(h), 'test')
4360
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004361 def test_encoded_adjacent_nonencoded(self):
4362 eq = self.assertEqual
4363 h = Header()
4364 h.append('hello', 'iso-8859-1')
4365 h.append('world')
4366 s = h.encode()
4367 eq(s, '=?iso-8859-1?q?hello?= world')
4368 h = make_header(decode_header(s))
4369 eq(h.encode(), s)
4370
4371 def test_whitespace_eater(self):
4372 eq = self.assertEqual
4373 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4374 parts = decode_header(s)
4375 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4376 hdr = make_header(parts)
4377 eq(hdr.encode(),
4378 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4379
4380 def test_broken_base64_header(self):
4381 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004382 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004383 raises(errors.HeaderParseError, decode_header, s)
4384
R. David Murray477efb32011-01-05 01:39:32 +00004385 def test_shift_jis_charset(self):
4386 h = Header('文', charset='shift_jis')
4387 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4388
R David Murrayde912762011-03-16 18:26:23 -04004389 def test_flatten_header_with_no_value(self):
4390 # Issue 11401 (regression from email 4.x) Note that the space after
4391 # the header doesn't reflect the input, but this is also the way
4392 # email 4.x behaved. At some point it would be nice to fix that.
4393 msg = email.message_from_string("EmptyHeader:")
4394 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4395
R David Murray01581ee2011-04-18 10:04:34 -04004396 def test_encode_preserves_leading_ws_on_value(self):
4397 msg = Message()
4398 msg['SomeHeader'] = ' value with leading ws'
4399 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4400
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004401
Ezio Melottib3aedd42010-11-20 19:04:17 +00004402
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004403# Test RFC 2231 header parameters (en/de)coding
4404class TestRFC2231(TestEmailBase):
4405 def test_get_param(self):
4406 eq = self.assertEqual
4407 msg = self._msgobj('msg_29.txt')
4408 eq(msg.get_param('title'),
4409 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4410 eq(msg.get_param('title', unquote=False),
4411 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4412
4413 def test_set_param(self):
4414 eq = self.ndiffAssertEqual
4415 msg = Message()
4416 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4417 charset='us-ascii')
4418 eq(msg.get_param('title'),
4419 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4420 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4421 charset='us-ascii', language='en')
4422 eq(msg.get_param('title'),
4423 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4424 msg = self._msgobj('msg_01.txt')
4425 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4426 charset='us-ascii', language='en')
4427 eq(msg.as_string(maxheaderlen=78), """\
4428Return-Path: <bbb@zzz.org>
4429Delivered-To: bbb@zzz.org
4430Received: by mail.zzz.org (Postfix, from userid 889)
4431\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4432MIME-Version: 1.0
4433Content-Transfer-Encoding: 7bit
4434Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4435From: bbb@ddd.com (John X. Doe)
4436To: bbb@zzz.org
4437Subject: This is a test message
4438Date: Fri, 4 May 2001 14:05:44 -0400
4439Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004440 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004441
4442
4443Hi,
4444
4445Do you like this message?
4446
4447-Me
4448""")
4449
R David Murraya2860e82011-04-16 09:20:30 -04004450 def test_set_param_requote(self):
4451 msg = Message()
4452 msg.set_param('title', 'foo')
4453 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4454 msg.set_param('title', 'bar', requote=False)
4455 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4456 # tspecial is still quoted.
4457 msg.set_param('title', "(bar)bell", requote=False)
4458 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4459
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004460 def test_del_param(self):
4461 eq = self.ndiffAssertEqual
4462 msg = self._msgobj('msg_01.txt')
4463 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4464 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4465 charset='us-ascii', language='en')
4466 msg.del_param('foo', header='Content-Type')
4467 eq(msg.as_string(maxheaderlen=78), """\
4468Return-Path: <bbb@zzz.org>
4469Delivered-To: bbb@zzz.org
4470Received: by mail.zzz.org (Postfix, from userid 889)
4471\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4472MIME-Version: 1.0
4473Content-Transfer-Encoding: 7bit
4474Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4475From: bbb@ddd.com (John X. Doe)
4476To: bbb@zzz.org
4477Subject: This is a test message
4478Date: Fri, 4 May 2001 14:05:44 -0400
4479Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004480 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004481
4482
4483Hi,
4484
4485Do you like this message?
4486
4487-Me
4488""")
4489
4490 def test_rfc2231_get_content_charset(self):
4491 eq = self.assertEqual
4492 msg = self._msgobj('msg_32.txt')
4493 eq(msg.get_content_charset(), 'us-ascii')
4494
R. David Murraydfd7eb02010-12-24 22:36:49 +00004495 def test_rfc2231_parse_rfc_quoting(self):
4496 m = textwrap.dedent('''\
4497 Content-Disposition: inline;
4498 \tfilename*0*=''This%20is%20even%20more%20;
4499 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4500 \tfilename*2="is it not.pdf"
4501
4502 ''')
4503 msg = email.message_from_string(m)
4504 self.assertEqual(msg.get_filename(),
4505 'This is even more ***fun*** is it not.pdf')
4506 self.assertEqual(m, msg.as_string())
4507
4508 def test_rfc2231_parse_extra_quoting(self):
4509 m = textwrap.dedent('''\
4510 Content-Disposition: inline;
4511 \tfilename*0*="''This%20is%20even%20more%20";
4512 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4513 \tfilename*2="is it not.pdf"
4514
4515 ''')
4516 msg = email.message_from_string(m)
4517 self.assertEqual(msg.get_filename(),
4518 'This is even more ***fun*** is it not.pdf')
4519 self.assertEqual(m, msg.as_string())
4520
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004521 def test_rfc2231_no_language_or_charset(self):
4522 m = '''\
4523Content-Transfer-Encoding: 8bit
4524Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4525Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4526
4527'''
4528 msg = email.message_from_string(m)
4529 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004530 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004531 self.assertEqual(
4532 param,
4533 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4534
4535 def test_rfc2231_no_language_or_charset_in_filename(self):
4536 m = '''\
4537Content-Disposition: inline;
4538\tfilename*0*="''This%20is%20even%20more%20";
4539\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4540\tfilename*2="is it not.pdf"
4541
4542'''
4543 msg = email.message_from_string(m)
4544 self.assertEqual(msg.get_filename(),
4545 'This is even more ***fun*** is it not.pdf')
4546
4547 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4548 m = '''\
4549Content-Disposition: inline;
4550\tfilename*0*="''This%20is%20even%20more%20";
4551\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4552\tfilename*2="is it not.pdf"
4553
4554'''
4555 msg = email.message_from_string(m)
4556 self.assertEqual(msg.get_filename(),
4557 'This is even more ***fun*** is it not.pdf')
4558
4559 def test_rfc2231_partly_encoded(self):
4560 m = '''\
4561Content-Disposition: inline;
4562\tfilename*0="''This%20is%20even%20more%20";
4563\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4564\tfilename*2="is it not.pdf"
4565
4566'''
4567 msg = email.message_from_string(m)
4568 self.assertEqual(
4569 msg.get_filename(),
4570 'This%20is%20even%20more%20***fun*** is it not.pdf')
4571
4572 def test_rfc2231_partly_nonencoded(self):
4573 m = '''\
4574Content-Disposition: inline;
4575\tfilename*0="This%20is%20even%20more%20";
4576\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4577\tfilename*2="is it not.pdf"
4578
4579'''
4580 msg = email.message_from_string(m)
4581 self.assertEqual(
4582 msg.get_filename(),
4583 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4584
4585 def test_rfc2231_no_language_or_charset_in_boundary(self):
4586 m = '''\
4587Content-Type: multipart/alternative;
4588\tboundary*0*="''This%20is%20even%20more%20";
4589\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4590\tboundary*2="is it not.pdf"
4591
4592'''
4593 msg = email.message_from_string(m)
4594 self.assertEqual(msg.get_boundary(),
4595 'This is even more ***fun*** is it not.pdf')
4596
4597 def test_rfc2231_no_language_or_charset_in_charset(self):
4598 # This is a nonsensical charset value, but tests the code anyway
4599 m = '''\
4600Content-Type: text/plain;
4601\tcharset*0*="This%20is%20even%20more%20";
4602\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4603\tcharset*2="is it not.pdf"
4604
4605'''
4606 msg = email.message_from_string(m)
4607 self.assertEqual(msg.get_content_charset(),
4608 'this is even more ***fun*** is it not.pdf')
4609
4610 def test_rfc2231_bad_encoding_in_filename(self):
4611 m = '''\
4612Content-Disposition: inline;
4613\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4614\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4615\tfilename*2="is it not.pdf"
4616
4617'''
4618 msg = email.message_from_string(m)
4619 self.assertEqual(msg.get_filename(),
4620 'This is even more ***fun*** is it not.pdf')
4621
4622 def test_rfc2231_bad_encoding_in_charset(self):
4623 m = """\
4624Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4625
4626"""
4627 msg = email.message_from_string(m)
4628 # This should return None because non-ascii characters in the charset
4629 # are not allowed.
4630 self.assertEqual(msg.get_content_charset(), None)
4631
4632 def test_rfc2231_bad_character_in_charset(self):
4633 m = """\
4634Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4635
4636"""
4637 msg = email.message_from_string(m)
4638 # This should return None because non-ascii characters in the charset
4639 # are not allowed.
4640 self.assertEqual(msg.get_content_charset(), None)
4641
4642 def test_rfc2231_bad_character_in_filename(self):
4643 m = '''\
4644Content-Disposition: inline;
4645\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4646\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4647\tfilename*2*="is it not.pdf%E2"
4648
4649'''
4650 msg = email.message_from_string(m)
4651 self.assertEqual(msg.get_filename(),
4652 'This is even more ***fun*** is it not.pdf\ufffd')
4653
4654 def test_rfc2231_unknown_encoding(self):
4655 m = """\
4656Content-Transfer-Encoding: 8bit
4657Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4658
4659"""
4660 msg = email.message_from_string(m)
4661 self.assertEqual(msg.get_filename(), 'myfile.txt')
4662
4663 def test_rfc2231_single_tick_in_filename_extended(self):
4664 eq = self.assertEqual
4665 m = """\
4666Content-Type: application/x-foo;
4667\tname*0*=\"Frank's\"; name*1*=\" Document\"
4668
4669"""
4670 msg = email.message_from_string(m)
4671 charset, language, s = msg.get_param('name')
4672 eq(charset, None)
4673 eq(language, None)
4674 eq(s, "Frank's Document")
4675
4676 def test_rfc2231_single_tick_in_filename(self):
4677 m = """\
4678Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4679
4680"""
4681 msg = email.message_from_string(m)
4682 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004683 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004684 self.assertEqual(param, "Frank's Document")
4685
4686 def test_rfc2231_tick_attack_extended(self):
4687 eq = self.assertEqual
4688 m = """\
4689Content-Type: application/x-foo;
4690\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4691
4692"""
4693 msg = email.message_from_string(m)
4694 charset, language, s = msg.get_param('name')
4695 eq(charset, 'us-ascii')
4696 eq(language, 'en-us')
4697 eq(s, "Frank's Document")
4698
4699 def test_rfc2231_tick_attack(self):
4700 m = """\
4701Content-Type: application/x-foo;
4702\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4703
4704"""
4705 msg = email.message_from_string(m)
4706 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004707 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004708 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4709
4710 def test_rfc2231_no_extended_values(self):
4711 eq = self.assertEqual
4712 m = """\
4713Content-Type: application/x-foo; name=\"Frank's Document\"
4714
4715"""
4716 msg = email.message_from_string(m)
4717 eq(msg.get_param('name'), "Frank's Document")
4718
4719 def test_rfc2231_encoded_then_unencoded_segments(self):
4720 eq = self.assertEqual
4721 m = """\
4722Content-Type: application/x-foo;
4723\tname*0*=\"us-ascii'en-us'My\";
4724\tname*1=\" Document\";
4725\tname*2*=\" For You\"
4726
4727"""
4728 msg = email.message_from_string(m)
4729 charset, language, s = msg.get_param('name')
4730 eq(charset, 'us-ascii')
4731 eq(language, 'en-us')
4732 eq(s, 'My Document For You')
4733
4734 def test_rfc2231_unencoded_then_encoded_segments(self):
4735 eq = self.assertEqual
4736 m = """\
4737Content-Type: application/x-foo;
4738\tname*0=\"us-ascii'en-us'My\";
4739\tname*1*=\" Document\";
4740\tname*2*=\" For You\"
4741
4742"""
4743 msg = email.message_from_string(m)
4744 charset, language, s = msg.get_param('name')
4745 eq(charset, 'us-ascii')
4746 eq(language, 'en-us')
4747 eq(s, 'My Document For You')
4748
4749
Ezio Melottib3aedd42010-11-20 19:04:17 +00004750
R. David Murraya8f480f2010-01-16 18:30:03 +00004751# Tests to ensure that signed parts of an email are completely preserved, as
4752# required by RFC1847 section 2.1. Note that these are incomplete, because the
4753# email package does not currently always preserve the body. See issue 1670765.
4754class TestSigned(TestEmailBase):
4755
4756 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004757 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004758 original = fp.read()
4759 msg = email.message_from_string(original)
4760 return original, msg
4761
4762 def _signed_parts_eq(self, original, result):
4763 # Extract the first mime part of each message
4764 import re
4765 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4766 inpart = repart.search(original).group(2)
4767 outpart = repart.search(result).group(2)
4768 self.assertEqual(outpart, inpart)
4769
4770 def test_long_headers_as_string(self):
4771 original, msg = self._msg_and_obj('msg_45.txt')
4772 result = msg.as_string()
4773 self._signed_parts_eq(original, result)
4774
4775 def test_long_headers_as_string_maxheaderlen(self):
4776 original, msg = self._msg_and_obj('msg_45.txt')
4777 result = msg.as_string(maxheaderlen=60)
4778 self._signed_parts_eq(original, result)
4779
4780 def test_long_headers_flatten(self):
4781 original, msg = self._msg_and_obj('msg_45.txt')
4782 fp = StringIO()
4783 Generator(fp).flatten(msg)
4784 result = fp.getvalue()
4785 self._signed_parts_eq(original, result)
4786
4787
Ezio Melottib3aedd42010-11-20 19:04:17 +00004788
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004789if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004790 unittest.main()