blob: 8530e5e9a119c0daec53f289d36f84fd73e71213 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R David Murray28346b82011-03-31 11:40:20 -040039from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040040from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047# Test various aspects of the Message class's API
48class TestMessageAPI(TestEmailBase):
49 def test_get_all(self):
50 eq = self.assertEqual
51 msg = self._msgobj('msg_20.txt')
52 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
53 eq(msg.get_all('xx', 'n/a'), 'n/a')
54
R. David Murraye5db2632010-11-20 15:10:13 +000055 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 eq = self.assertEqual
57 msg = Message()
58 eq(msg.get_charset(), None)
59 charset = Charset('iso-8859-1')
60 msg.set_charset(charset)
61 eq(msg['mime-version'], '1.0')
62 eq(msg.get_content_type(), 'text/plain')
63 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
64 eq(msg.get_param('charset'), 'iso-8859-1')
65 eq(msg['content-transfer-encoding'], 'quoted-printable')
66 eq(msg.get_charset().input_charset, 'iso-8859-1')
67 # Remove the charset
68 msg.set_charset(None)
69 eq(msg.get_charset(), None)
70 eq(msg['content-type'], 'text/plain')
71 # Try adding a charset when there's already MIME headers present
72 msg = Message()
73 msg['MIME-Version'] = '2.0'
74 msg['Content-Type'] = 'text/x-weird'
75 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
76 msg.set_charset(charset)
77 eq(msg['mime-version'], '2.0')
78 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
79 eq(msg['content-transfer-encoding'], 'quinted-puntable')
80
81 def test_set_charset_from_string(self):
82 eq = self.assertEqual
83 msg = Message()
84 msg.set_charset('us-ascii')
85 eq(msg.get_charset().input_charset, 'us-ascii')
86 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
87
88 def test_set_payload_with_charset(self):
89 msg = Message()
90 charset = Charset('iso-8859-1')
91 msg.set_payload('This is a string payload', charset)
92 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
93
94 def test_get_charsets(self):
95 eq = self.assertEqual
96
97 msg = self._msgobj('msg_08.txt')
98 charsets = msg.get_charsets()
99 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
100
101 msg = self._msgobj('msg_09.txt')
102 charsets = msg.get_charsets('dingbat')
103 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
104 'koi8-r'])
105
106 msg = self._msgobj('msg_12.txt')
107 charsets = msg.get_charsets()
108 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
109 'iso-8859-3', 'us-ascii', 'koi8-r'])
110
111 def test_get_filename(self):
112 eq = self.assertEqual
113
114 msg = self._msgobj('msg_04.txt')
115 filenames = [p.get_filename() for p in msg.get_payload()]
116 eq(filenames, ['msg.txt', 'msg.txt'])
117
118 msg = self._msgobj('msg_07.txt')
119 subpart = msg.get_payload(1)
120 eq(subpart.get_filename(), 'dingusfish.gif')
121
122 def test_get_filename_with_name_parameter(self):
123 eq = self.assertEqual
124
125 msg = self._msgobj('msg_44.txt')
126 filenames = [p.get_filename() for p in msg.get_payload()]
127 eq(filenames, ['msg.txt', 'msg.txt'])
128
129 def test_get_boundary(self):
130 eq = self.assertEqual
131 msg = self._msgobj('msg_07.txt')
132 # No quotes!
133 eq(msg.get_boundary(), 'BOUNDARY')
134
135 def test_set_boundary(self):
136 eq = self.assertEqual
137 # This one has no existing boundary parameter, but the Content-Type:
138 # header appears fifth.
139 msg = self._msgobj('msg_01.txt')
140 msg.set_boundary('BOUNDARY')
141 header, value = msg.items()[4]
142 eq(header.lower(), 'content-type')
143 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
144 # This one has a Content-Type: header, with a boundary, stuck in the
145 # middle of its headers. Make sure the order is preserved; it should
146 # be fifth.
147 msg = self._msgobj('msg_04.txt')
148 msg.set_boundary('BOUNDARY')
149 header, value = msg.items()[4]
150 eq(header.lower(), 'content-type')
151 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
152 # And this one has no Content-Type: header at all.
153 msg = self._msgobj('msg_03.txt')
154 self.assertRaises(errors.HeaderParseError,
155 msg.set_boundary, 'BOUNDARY')
156
R. David Murray73a559d2010-12-21 18:07:59 +0000157 def test_make_boundary(self):
158 msg = MIMEMultipart('form-data')
159 # Note that when the boundary gets created is an implementation
160 # detail and might change.
161 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
162 # Trigger creation of boundary
163 msg.as_string()
164 self.assertEqual(msg.items()[0][1][:33],
165 'multipart/form-data; boundary="==')
166 # XXX: there ought to be tests of the uniqueness of the boundary, too.
167
R. David Murray57c45ac2010-02-21 04:39:40 +0000168 def test_message_rfc822_only(self):
169 # Issue 7970: message/rfc822 not in multipart parsed by
170 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400171 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000172 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000173 parser = HeaderParser()
174 msg = parser.parsestr(msgdata)
175 out = StringIO()
176 gen = Generator(out, True, 0)
177 gen.flatten(msg, False)
178 self.assertEqual(out.getvalue(), msgdata)
179
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000180 def test_get_decoded_payload(self):
181 eq = self.assertEqual
182 msg = self._msgobj('msg_10.txt')
183 # The outer message is a multipart
184 eq(msg.get_payload(decode=True), None)
185 # Subpart 1 is 7bit encoded
186 eq(msg.get_payload(0).get_payload(decode=True),
187 b'This is a 7bit encoded message.\n')
188 # Subpart 2 is quopri
189 eq(msg.get_payload(1).get_payload(decode=True),
190 b'\xa1This is a Quoted Printable encoded message!\n')
191 # Subpart 3 is base64
192 eq(msg.get_payload(2).get_payload(decode=True),
193 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000194 # Subpart 4 is base64 with a trailing newline, which
195 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000196 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000197 b'This is a Base64 encoded message.\n')
198 # Subpart 5 has no Content-Transfer-Encoding: header.
199 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000200 b'This has no Content-Transfer-Encoding: header.\n')
201
202 def test_get_decoded_uu_payload(self):
203 eq = self.assertEqual
204 msg = Message()
205 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
206 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
207 msg['content-transfer-encoding'] = cte
208 eq(msg.get_payload(decode=True), b'hello world')
209 # Now try some bogus data
210 msg.set_payload('foo')
211 eq(msg.get_payload(decode=True), b'foo')
212
213 def test_decoded_generator(self):
214 eq = self.assertEqual
215 msg = self._msgobj('msg_07.txt')
216 with openfile('msg_17.txt') as fp:
217 text = fp.read()
218 s = StringIO()
219 g = DecodedGenerator(s)
220 g.flatten(msg)
221 eq(s.getvalue(), text)
222
223 def test__contains__(self):
224 msg = Message()
225 msg['From'] = 'Me'
226 msg['to'] = 'You'
227 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000228 self.assertTrue('from' in msg)
229 self.assertTrue('From' in msg)
230 self.assertTrue('FROM' in msg)
231 self.assertTrue('to' in msg)
232 self.assertTrue('To' in msg)
233 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000234
235 def test_as_string(self):
236 eq = self.ndiffAssertEqual
237 msg = self._msgobj('msg_01.txt')
238 with openfile('msg_01.txt') as fp:
239 text = fp.read()
240 eq(text, str(msg))
241 fullrepr = msg.as_string(unixfrom=True)
242 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000244 eq(text, NL.join(lines[1:]))
245
246 def test_bad_param(self):
247 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
248 self.assertEqual(msg.get_param('baz'), '')
249
250 def test_missing_filename(self):
251 msg = email.message_from_string("From: foo\n")
252 self.assertEqual(msg.get_filename(), None)
253
254 def test_bogus_filename(self):
255 msg = email.message_from_string(
256 "Content-Disposition: blarg; filename\n")
257 self.assertEqual(msg.get_filename(), '')
258
259 def test_missing_boundary(self):
260 msg = email.message_from_string("From: foo\n")
261 self.assertEqual(msg.get_boundary(), None)
262
263 def test_get_params(self):
264 eq = self.assertEqual
265 msg = email.message_from_string(
266 'X-Header: foo=one; bar=two; baz=three\n')
267 eq(msg.get_params(header='x-header'),
268 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
269 msg = email.message_from_string(
270 'X-Header: foo; bar=one; baz=two\n')
271 eq(msg.get_params(header='x-header'),
272 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
273 eq(msg.get_params(), None)
274 msg = email.message_from_string(
275 'X-Header: foo; bar="one"; baz=two\n')
276 eq(msg.get_params(header='x-header'),
277 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
278
279 def test_get_param_liberal(self):
280 msg = Message()
281 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
282 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
283
284 def test_get_param(self):
285 eq = self.assertEqual
286 msg = email.message_from_string(
287 "X-Header: foo=one; bar=two; baz=three\n")
288 eq(msg.get_param('bar', header='x-header'), 'two')
289 eq(msg.get_param('quuz', header='x-header'), None)
290 eq(msg.get_param('quuz'), None)
291 msg = email.message_from_string(
292 'X-Header: foo; bar="one"; baz=two\n')
293 eq(msg.get_param('foo', header='x-header'), '')
294 eq(msg.get_param('bar', header='x-header'), 'one')
295 eq(msg.get_param('baz', header='x-header'), 'two')
296 # XXX: We are not RFC-2045 compliant! We cannot parse:
297 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
298 # msg.get_param("weird")
299 # yet.
300
301 def test_get_param_funky_continuation_lines(self):
302 msg = self._msgobj('msg_22.txt')
303 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
304
305 def test_get_param_with_semis_in_quotes(self):
306 msg = email.message_from_string(
307 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
308 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
309 self.assertEqual(msg.get_param('name', unquote=False),
310 '"Jim&amp;&amp;Jill"')
311
R. David Murrayd48739f2010-04-14 18:59:18 +0000312 def test_get_param_with_quotes(self):
313 msg = email.message_from_string(
314 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
315 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
316 msg = email.message_from_string(
317 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
318 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
319
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000320 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000321 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000322 msg = email.message_from_string('Header: exists')
323 unless('header' in msg)
324 unless('Header' in msg)
325 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000326 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000327
328 def test_set_param(self):
329 eq = self.assertEqual
330 msg = Message()
331 msg.set_param('charset', 'iso-2022-jp')
332 eq(msg.get_param('charset'), 'iso-2022-jp')
333 msg.set_param('importance', 'high value')
334 eq(msg.get_param('importance'), 'high value')
335 eq(msg.get_param('importance', unquote=False), '"high value"')
336 eq(msg.get_params(), [('text/plain', ''),
337 ('charset', 'iso-2022-jp'),
338 ('importance', 'high value')])
339 eq(msg.get_params(unquote=False), [('text/plain', ''),
340 ('charset', '"iso-2022-jp"'),
341 ('importance', '"high value"')])
342 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
343 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
344
345 def test_del_param(self):
346 eq = self.assertEqual
347 msg = self._msgobj('msg_05.txt')
348 eq(msg.get_params(),
349 [('multipart/report', ''), ('report-type', 'delivery-status'),
350 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
351 old_val = msg.get_param("report-type")
352 msg.del_param("report-type")
353 eq(msg.get_params(),
354 [('multipart/report', ''),
355 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
356 msg.set_param("report-type", old_val)
357 eq(msg.get_params(),
358 [('multipart/report', ''),
359 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
360 ('report-type', old_val)])
361
362 def test_del_param_on_other_header(self):
363 msg = Message()
364 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
365 msg.del_param('filename', 'content-disposition')
366 self.assertEqual(msg['content-disposition'], 'attachment')
367
368 def test_set_type(self):
369 eq = self.assertEqual
370 msg = Message()
371 self.assertRaises(ValueError, msg.set_type, 'text')
372 msg.set_type('text/plain')
373 eq(msg['content-type'], 'text/plain')
374 msg.set_param('charset', 'us-ascii')
375 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
376 msg.set_type('text/html')
377 eq(msg['content-type'], 'text/html; charset="us-ascii"')
378
379 def test_set_type_on_other_header(self):
380 msg = Message()
381 msg['X-Content-Type'] = 'text/plain'
382 msg.set_type('application/octet-stream', 'X-Content-Type')
383 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
384
385 def test_get_content_type_missing(self):
386 msg = Message()
387 self.assertEqual(msg.get_content_type(), 'text/plain')
388
389 def test_get_content_type_missing_with_default_type(self):
390 msg = Message()
391 msg.set_default_type('message/rfc822')
392 self.assertEqual(msg.get_content_type(), 'message/rfc822')
393
394 def test_get_content_type_from_message_implicit(self):
395 msg = self._msgobj('msg_30.txt')
396 self.assertEqual(msg.get_payload(0).get_content_type(),
397 'message/rfc822')
398
399 def test_get_content_type_from_message_explicit(self):
400 msg = self._msgobj('msg_28.txt')
401 self.assertEqual(msg.get_payload(0).get_content_type(),
402 'message/rfc822')
403
404 def test_get_content_type_from_message_text_plain_implicit(self):
405 msg = self._msgobj('msg_03.txt')
406 self.assertEqual(msg.get_content_type(), 'text/plain')
407
408 def test_get_content_type_from_message_text_plain_explicit(self):
409 msg = self._msgobj('msg_01.txt')
410 self.assertEqual(msg.get_content_type(), 'text/plain')
411
412 def test_get_content_maintype_missing(self):
413 msg = Message()
414 self.assertEqual(msg.get_content_maintype(), 'text')
415
416 def test_get_content_maintype_missing_with_default_type(self):
417 msg = Message()
418 msg.set_default_type('message/rfc822')
419 self.assertEqual(msg.get_content_maintype(), 'message')
420
421 def test_get_content_maintype_from_message_implicit(self):
422 msg = self._msgobj('msg_30.txt')
423 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
424
425 def test_get_content_maintype_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
428
429 def test_get_content_maintype_from_message_text_plain_implicit(self):
430 msg = self._msgobj('msg_03.txt')
431 self.assertEqual(msg.get_content_maintype(), 'text')
432
433 def test_get_content_maintype_from_message_text_plain_explicit(self):
434 msg = self._msgobj('msg_01.txt')
435 self.assertEqual(msg.get_content_maintype(), 'text')
436
437 def test_get_content_subtype_missing(self):
438 msg = Message()
439 self.assertEqual(msg.get_content_subtype(), 'plain')
440
441 def test_get_content_subtype_missing_with_default_type(self):
442 msg = Message()
443 msg.set_default_type('message/rfc822')
444 self.assertEqual(msg.get_content_subtype(), 'rfc822')
445
446 def test_get_content_subtype_from_message_implicit(self):
447 msg = self._msgobj('msg_30.txt')
448 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
449
450 def test_get_content_subtype_from_message_explicit(self):
451 msg = self._msgobj('msg_28.txt')
452 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
453
454 def test_get_content_subtype_from_message_text_plain_implicit(self):
455 msg = self._msgobj('msg_03.txt')
456 self.assertEqual(msg.get_content_subtype(), 'plain')
457
458 def test_get_content_subtype_from_message_text_plain_explicit(self):
459 msg = self._msgobj('msg_01.txt')
460 self.assertEqual(msg.get_content_subtype(), 'plain')
461
462 def test_get_content_maintype_error(self):
463 msg = Message()
464 msg['Content-Type'] = 'no-slash-in-this-string'
465 self.assertEqual(msg.get_content_maintype(), 'text')
466
467 def test_get_content_subtype_error(self):
468 msg = Message()
469 msg['Content-Type'] = 'no-slash-in-this-string'
470 self.assertEqual(msg.get_content_subtype(), 'plain')
471
472 def test_replace_header(self):
473 eq = self.assertEqual
474 msg = Message()
475 msg.add_header('First', 'One')
476 msg.add_header('Second', 'Two')
477 msg.add_header('Third', 'Three')
478 eq(msg.keys(), ['First', 'Second', 'Third'])
479 eq(msg.values(), ['One', 'Two', 'Three'])
480 msg.replace_header('Second', 'Twenty')
481 eq(msg.keys(), ['First', 'Second', 'Third'])
482 eq(msg.values(), ['One', 'Twenty', 'Three'])
483 msg.add_header('First', 'Eleven')
484 msg.replace_header('First', 'One Hundred')
485 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
486 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
487 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
488
489 def test_broken_base64_payload(self):
490 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
491 msg = Message()
492 msg['content-type'] = 'audio/x-midi'
493 msg['content-transfer-encoding'] = 'base64'
494 msg.set_payload(x)
495 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000496 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000497
R. David Murray7ec754b2010-12-13 23:51:19 +0000498 # Issue 1078919
499 def test_ascii_add_header(self):
500 msg = Message()
501 msg.add_header('Content-Disposition', 'attachment',
502 filename='bud.gif')
503 self.assertEqual('attachment; filename="bud.gif"',
504 msg['Content-Disposition'])
505
506 def test_noascii_add_header(self):
507 msg = Message()
508 msg.add_header('Content-Disposition', 'attachment',
509 filename="Fußballer.ppt")
510 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000511 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000512 msg['Content-Disposition'])
513
514 def test_nonascii_add_header_via_triple(self):
515 msg = Message()
516 msg.add_header('Content-Disposition', 'attachment',
517 filename=('iso-8859-1', '', 'Fußballer.ppt'))
518 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000519 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
520 msg['Content-Disposition'])
521
522 def test_ascii_add_header_with_tspecial(self):
523 msg = Message()
524 msg.add_header('Content-Disposition', 'attachment',
525 filename="windows [filename].ppt")
526 self.assertEqual(
527 'attachment; filename="windows [filename].ppt"',
528 msg['Content-Disposition'])
529
530 def test_nonascii_add_header_with_tspecial(self):
531 msg = Message()
532 msg.add_header('Content-Disposition', 'attachment',
533 filename="Fußballer [filename].ppt")
534 self.assertEqual(
535 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000536 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000537
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000538 # Issue 5871: reject an attempt to embed a header inside a header value
539 # (header injection attack).
540 def test_embeded_header_via_Header_rejected(self):
541 msg = Message()
542 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
543 self.assertRaises(errors.HeaderParseError, msg.as_string)
544
545 def test_embeded_header_via_string_rejected(self):
546 msg = Message()
547 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
548 self.assertRaises(errors.HeaderParseError, msg.as_string)
549
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000550# Test the email.encoders module
551class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400552
553 def test_EncodersEncode_base64(self):
554 with openfile('PyBanner048.gif', 'rb') as fp:
555 bindata = fp.read()
556 mimed = email.mime.image.MIMEImage(bindata)
557 base64ed = mimed.get_payload()
558 # the transfer-encoded body lines should all be <=76 characters
559 lines = base64ed.split('\n')
560 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
561
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000562 def test_encode_empty_payload(self):
563 eq = self.assertEqual
564 msg = Message()
565 msg.set_charset('us-ascii')
566 eq(msg['content-transfer-encoding'], '7bit')
567
568 def test_default_cte(self):
569 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000570 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000571 msg = MIMEText('hello world')
572 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000573 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000574 msg = MIMEText('hello \xf8 world')
575 eq(msg['content-transfer-encoding'], '8bit')
576 # And now with a different charset
577 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
578 eq(msg['content-transfer-encoding'], 'quoted-printable')
579
R. David Murraye85200d2010-05-06 01:41:14 +0000580 def test_encode7or8bit(self):
581 # Make sure a charset whose input character set is 8bit but
582 # whose output character set is 7bit gets a transfer-encoding
583 # of 7bit.
584 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000585 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000586 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000587
Ezio Melottib3aedd42010-11-20 19:04:17 +0000588
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000589# Test long header wrapping
590class TestLongHeaders(TestEmailBase):
591 def test_split_long_continuation(self):
592 eq = self.ndiffAssertEqual
593 msg = email.message_from_string("""\
594Subject: bug demonstration
595\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
596\tmore text
597
598test
599""")
600 sfp = StringIO()
601 g = Generator(sfp)
602 g.flatten(msg)
603 eq(sfp.getvalue(), """\
604Subject: bug demonstration
605\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
606\tmore text
607
608test
609""")
610
611 def test_another_long_almost_unsplittable_header(self):
612 eq = self.ndiffAssertEqual
613 hstr = """\
614bug demonstration
615\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
616\tmore text"""
617 h = Header(hstr, continuation_ws='\t')
618 eq(h.encode(), """\
619bug demonstration
620\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
621\tmore text""")
622 h = Header(hstr.replace('\t', ' '))
623 eq(h.encode(), """\
624bug demonstration
625 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
626 more text""")
627
628 def test_long_nonstring(self):
629 eq = self.ndiffAssertEqual
630 g = Charset("iso-8859-1")
631 cz = Charset("iso-8859-2")
632 utf8 = Charset("utf-8")
633 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
634 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
635 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
636 b'bef\xf6rdert. ')
637 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
638 b'd\xf9vtipu.. ')
639 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
640 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
641 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
642 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
643 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
644 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
645 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
646 '\u3044\u307e\u3059\u3002')
647 h = Header(g_head, g, header_name='Subject')
648 h.append(cz_head, cz)
649 h.append(utf8_head, utf8)
650 msg = Message()
651 msg['Subject'] = h
652 sfp = StringIO()
653 g = Generator(sfp)
654 g.flatten(msg)
655 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000656Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
657 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
658 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
659 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
660 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
661 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
662 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
663 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
664 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
665 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
666 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000667
668""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000669 eq(h.encode(maxlinelen=76), """\
670=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
671 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
672 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
673 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
674 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
675 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
676 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
677 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
678 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
679 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
680 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000681
682 def test_long_header_encode(self):
683 eq = self.ndiffAssertEqual
684 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
685 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
686 header_name='X-Foobar-Spoink-Defrobnit')
687 eq(h.encode(), '''\
688wasnipoop; giraffes="very-long-necked-animals";
689 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
690
691 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
692 eq = self.ndiffAssertEqual
693 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
694 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
695 header_name='X-Foobar-Spoink-Defrobnit',
696 continuation_ws='\t')
697 eq(h.encode(), '''\
698wasnipoop; giraffes="very-long-necked-animals";
699 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
700
701 def test_long_header_encode_with_tab_continuation(self):
702 eq = self.ndiffAssertEqual
703 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
704 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
705 header_name='X-Foobar-Spoink-Defrobnit',
706 continuation_ws='\t')
707 eq(h.encode(), '''\
708wasnipoop; giraffes="very-long-necked-animals";
709\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
710
R David Murray3a6152f2011-03-14 21:13:03 -0400711 def test_header_encode_with_different_output_charset(self):
712 h = Header('文', 'euc-jp')
713 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
714
715 def test_long_header_encode_with_different_output_charset(self):
716 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
717 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
718 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
719 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
720 res = """\
721=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
722 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
723 self.assertEqual(h.encode(), res)
724
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000725 def test_header_splitter(self):
726 eq = self.ndiffAssertEqual
727 msg = MIMEText('')
728 # It'd be great if we could use add_header() here, but that doesn't
729 # guarantee an order of the parameters.
730 msg['X-Foobar-Spoink-Defrobnit'] = (
731 'wasnipoop; giraffes="very-long-necked-animals"; '
732 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
733 sfp = StringIO()
734 g = Generator(sfp)
735 g.flatten(msg)
736 eq(sfp.getvalue(), '''\
737Content-Type: text/plain; charset="us-ascii"
738MIME-Version: 1.0
739Content-Transfer-Encoding: 7bit
740X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
741 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
742
743''')
744
745 def test_no_semis_header_splitter(self):
746 eq = self.ndiffAssertEqual
747 msg = Message()
748 msg['From'] = 'test@dom.ain'
749 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
750 msg.set_payload('Test')
751 sfp = StringIO()
752 g = Generator(sfp)
753 g.flatten(msg)
754 eq(sfp.getvalue(), """\
755From: test@dom.ain
756References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
757 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
758
759Test""")
760
761 def test_no_split_long_header(self):
762 eq = self.ndiffAssertEqual
763 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000764 h = Header(hstr)
765 # These come on two lines because Headers are really field value
766 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000767 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000768References:
769 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
770 h = Header('x' * 80)
771 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000772
773 def test_splitting_multiple_long_lines(self):
774 eq = self.ndiffAssertEqual
775 hstr = """\
776from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
777\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
778\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
779"""
780 h = Header(hstr, continuation_ws='\t')
781 eq(h.encode(), """\
782from babylon.socal-raves.org (localhost [127.0.0.1]);
783 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
784 for <mailman-admin@babylon.socal-raves.org>;
785 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
786\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
787 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
788 for <mailman-admin@babylon.socal-raves.org>;
789 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
790\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
791 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
792 for <mailman-admin@babylon.socal-raves.org>;
793 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
794
795 def test_splitting_first_line_only_is_long(self):
796 eq = self.ndiffAssertEqual
797 hstr = """\
798from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
799\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
800\tid 17k4h5-00034i-00
801\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
802 h = Header(hstr, maxlinelen=78, header_name='Received',
803 continuation_ws='\t')
804 eq(h.encode(), """\
805from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
806 helo=cthulhu.gerg.ca)
807\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
808\tid 17k4h5-00034i-00
809\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
810
811 def test_long_8bit_header(self):
812 eq = self.ndiffAssertEqual
813 msg = Message()
814 h = Header('Britische Regierung gibt', 'iso-8859-1',
815 header_name='Subject')
816 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000817 eq(h.encode(maxlinelen=76), """\
818=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
819 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000820 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000821 eq(msg.as_string(maxheaderlen=76), """\
822Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
823 =?iso-8859-1?q?hore-Windkraftprojekte?=
824
825""")
826 eq(msg.as_string(maxheaderlen=0), """\
827Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000828
829""")
830
831 def test_long_8bit_header_no_charset(self):
832 eq = self.ndiffAssertEqual
833 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000834 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
835 'f\xfcr Offshore-Windkraftprojekte '
836 '<a-very-long-address@example.com>')
837 msg['Reply-To'] = header_string
838 self.assertRaises(UnicodeEncodeError, msg.as_string)
839 msg = Message()
840 msg['Reply-To'] = Header(header_string, 'utf-8',
841 header_name='Reply-To')
842 eq(msg.as_string(maxheaderlen=78), """\
843Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
844 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000845
846""")
847
848 def test_long_to_header(self):
849 eq = self.ndiffAssertEqual
850 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
851 '<someone@eecs.umich.edu>,'
852 '"Someone Test #B" <someone@umich.edu>, '
853 '"Someone Test #C" <someone@eecs.umich.edu>, '
854 '"Someone Test #D" <someone@eecs.umich.edu>')
855 msg = Message()
856 msg['To'] = to
857 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000858To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000859 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000860 "Someone Test #C" <someone@eecs.umich.edu>,
861 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000862
863''')
864
865 def test_long_line_after_append(self):
866 eq = self.ndiffAssertEqual
867 s = 'This is an example of string which has almost the limit of header length.'
868 h = Header(s)
869 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000870 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000871This is an example of string which has almost the limit of header length.
872 Add another line.""")
873
874 def test_shorter_line_with_append(self):
875 eq = self.ndiffAssertEqual
876 s = 'This is a shorter line.'
877 h = Header(s)
878 h.append('Add another sentence. (Surprise?)')
879 eq(h.encode(),
880 'This is a shorter line. Add another sentence. (Surprise?)')
881
882 def test_long_field_name(self):
883 eq = self.ndiffAssertEqual
884 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000885 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
886 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
887 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
888 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000889 h = Header(gs, 'iso-8859-1', header_name=fn)
890 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000891 eq(h.encode(maxlinelen=76), """\
892=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
893 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
894 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
895 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000896
897 def test_long_received_header(self):
898 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
899 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
900 'Wed, 05 Mar 2003 18:10:18 -0700')
901 msg = Message()
902 msg['Received-1'] = Header(h, continuation_ws='\t')
903 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000904 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000905 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000906Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
907 Wed, 05 Mar 2003 18:10:18 -0700
908Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
909 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000910
911""")
912
913 def test_string_headerinst_eq(self):
914 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
915 'tu-muenchen.de> (David Bremner\'s message of '
916 '"Thu, 6 Mar 2003 13:58:21 +0100")')
917 msg = Message()
918 msg['Received-1'] = Header(h, header_name='Received-1',
919 continuation_ws='\t')
920 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000921 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000922 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000923Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
924 6 Mar 2003 13:58:21 +0100\")
925Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
926 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000927
928""")
929
930 def test_long_unbreakable_lines_with_continuation(self):
931 eq = self.ndiffAssertEqual
932 msg = Message()
933 t = """\
934iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
935 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
936 msg['Face-1'] = t
937 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000938 # XXX This splitting is all wrong. It the first value line should be
939 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000940 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000941Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000942 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000943 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000944Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000945 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000946 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
947
948""")
949
950 def test_another_long_multiline_header(self):
951 eq = self.ndiffAssertEqual
952 m = ('Received: from siimage.com '
953 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000954 'Microsoft SMTPSVC(5.0.2195.4905); '
955 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000956 msg = email.message_from_string(m)
957 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000958Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
959 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000960
961''')
962
963 def test_long_lines_with_different_header(self):
964 eq = self.ndiffAssertEqual
965 h = ('List-Unsubscribe: '
966 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
967 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
968 '?subject=unsubscribe>')
969 msg = Message()
970 msg['List'] = h
971 msg['List'] = Header(h, header_name='List')
972 eq(msg.as_string(maxheaderlen=78), """\
973List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000974 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000975List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000976 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000977
978""")
979
R. David Murray6f0022d2011-01-07 21:57:25 +0000980 def test_long_rfc2047_header_with_embedded_fws(self):
981 h = Header(textwrap.dedent("""\
982 We're going to pretend this header is in a non-ascii character set
983 \tto see if line wrapping with encoded words and embedded
984 folding white space works"""),
985 charset='utf-8',
986 header_name='Test')
987 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
988 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
989 =?utf-8?q?cter_set?=
990 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
991 =?utf-8?q?_folding_white_space_works?=""")+'\n')
992
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000993
Ezio Melottib3aedd42010-11-20 19:04:17 +0000994
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000995# Test mangling of "From " lines in the body of a message
996class TestFromMangling(unittest.TestCase):
997 def setUp(self):
998 self.msg = Message()
999 self.msg['From'] = 'aaa@bbb.org'
1000 self.msg.set_payload("""\
1001From the desk of A.A.A.:
1002Blah blah blah
1003""")
1004
1005 def test_mangled_from(self):
1006 s = StringIO()
1007 g = Generator(s, mangle_from_=True)
1008 g.flatten(self.msg)
1009 self.assertEqual(s.getvalue(), """\
1010From: aaa@bbb.org
1011
1012>From the desk of A.A.A.:
1013Blah blah blah
1014""")
1015
1016 def test_dont_mangle_from(self):
1017 s = StringIO()
1018 g = Generator(s, mangle_from_=False)
1019 g.flatten(self.msg)
1020 self.assertEqual(s.getvalue(), """\
1021From: aaa@bbb.org
1022
1023From the desk of A.A.A.:
1024Blah blah blah
1025""")
1026
1027
Ezio Melottib3aedd42010-11-20 19:04:17 +00001028
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001029# Test the basic MIMEAudio class
1030class TestMIMEAudio(unittest.TestCase):
1031 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001032 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001033 self._audiodata = fp.read()
1034 self._au = MIMEAudio(self._audiodata)
1035
1036 def test_guess_minor_type(self):
1037 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1038
1039 def test_encoding(self):
1040 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001041 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1042 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001043
1044 def test_checkSetMinor(self):
1045 au = MIMEAudio(self._audiodata, 'fish')
1046 self.assertEqual(au.get_content_type(), 'audio/fish')
1047
1048 def test_add_header(self):
1049 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001050 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001051 self._au.add_header('Content-Disposition', 'attachment',
1052 filename='audiotest.au')
1053 eq(self._au['content-disposition'],
1054 'attachment; filename="audiotest.au"')
1055 eq(self._au.get_params(header='content-disposition'),
1056 [('attachment', ''), ('filename', 'audiotest.au')])
1057 eq(self._au.get_param('filename', header='content-disposition'),
1058 'audiotest.au')
1059 missing = []
1060 eq(self._au.get_param('attachment', header='content-disposition'), '')
1061 unless(self._au.get_param('foo', failobj=missing,
1062 header='content-disposition') is missing)
1063 # Try some missing stuff
1064 unless(self._au.get_param('foobar', missing) is missing)
1065 unless(self._au.get_param('attachment', missing,
1066 header='foobar') is missing)
1067
1068
Ezio Melottib3aedd42010-11-20 19:04:17 +00001069
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001070# Test the basic MIMEImage class
1071class TestMIMEImage(unittest.TestCase):
1072 def setUp(self):
1073 with openfile('PyBanner048.gif', 'rb') as fp:
1074 self._imgdata = fp.read()
1075 self._im = MIMEImage(self._imgdata)
1076
1077 def test_guess_minor_type(self):
1078 self.assertEqual(self._im.get_content_type(), 'image/gif')
1079
1080 def test_encoding(self):
1081 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001082 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1083 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001084
1085 def test_checkSetMinor(self):
1086 im = MIMEImage(self._imgdata, 'fish')
1087 self.assertEqual(im.get_content_type(), 'image/fish')
1088
1089 def test_add_header(self):
1090 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001091 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001092 self._im.add_header('Content-Disposition', 'attachment',
1093 filename='dingusfish.gif')
1094 eq(self._im['content-disposition'],
1095 'attachment; filename="dingusfish.gif"')
1096 eq(self._im.get_params(header='content-disposition'),
1097 [('attachment', ''), ('filename', 'dingusfish.gif')])
1098 eq(self._im.get_param('filename', header='content-disposition'),
1099 'dingusfish.gif')
1100 missing = []
1101 eq(self._im.get_param('attachment', header='content-disposition'), '')
1102 unless(self._im.get_param('foo', failobj=missing,
1103 header='content-disposition') is missing)
1104 # Try some missing stuff
1105 unless(self._im.get_param('foobar', missing) is missing)
1106 unless(self._im.get_param('attachment', missing,
1107 header='foobar') is missing)
1108
1109
Ezio Melottib3aedd42010-11-20 19:04:17 +00001110
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001111# Test the basic MIMEApplication class
1112class TestMIMEApplication(unittest.TestCase):
1113 def test_headers(self):
1114 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001115 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001116 eq(msg.get_content_type(), 'application/octet-stream')
1117 eq(msg['content-transfer-encoding'], 'base64')
1118
1119 def test_body(self):
1120 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001121 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1122 msg = MIMEApplication(bytesdata)
1123 # whitespace in the cte encoded block is RFC-irrelevant.
1124 eq(msg.get_payload().strip(), '+vv8/f7/')
1125 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001126
1127
Ezio Melottib3aedd42010-11-20 19:04:17 +00001128
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001129# Test the basic MIMEText class
1130class TestMIMEText(unittest.TestCase):
1131 def setUp(self):
1132 self._msg = MIMEText('hello there')
1133
1134 def test_types(self):
1135 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001136 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001137 eq(self._msg.get_content_type(), 'text/plain')
1138 eq(self._msg.get_param('charset'), 'us-ascii')
1139 missing = []
1140 unless(self._msg.get_param('foobar', missing) is missing)
1141 unless(self._msg.get_param('charset', missing, header='foobar')
1142 is missing)
1143
1144 def test_payload(self):
1145 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001146 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001147
1148 def test_charset(self):
1149 eq = self.assertEqual
1150 msg = MIMEText('hello there', _charset='us-ascii')
1151 eq(msg.get_charset().input_charset, 'us-ascii')
1152 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1153
R. David Murray850fc852010-06-03 01:58:28 +00001154 def test_7bit_input(self):
1155 eq = self.assertEqual
1156 msg = MIMEText('hello there', _charset='us-ascii')
1157 eq(msg.get_charset().input_charset, 'us-ascii')
1158 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1159
1160 def test_7bit_input_no_charset(self):
1161 eq = self.assertEqual
1162 msg = MIMEText('hello there')
1163 eq(msg.get_charset(), 'us-ascii')
1164 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1165 self.assertTrue('hello there' in msg.as_string())
1166
1167 def test_utf8_input(self):
1168 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1169 eq = self.assertEqual
1170 msg = MIMEText(teststr, _charset='utf-8')
1171 eq(msg.get_charset().output_charset, 'utf-8')
1172 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1173 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1174
1175 @unittest.skip("can't fix because of backward compat in email5, "
1176 "will fix in email6")
1177 def test_utf8_input_no_charset(self):
1178 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1179 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1180
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001181
Ezio Melottib3aedd42010-11-20 19:04:17 +00001182
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001183# Test complicated multipart/* messages
1184class TestMultipart(TestEmailBase):
1185 def setUp(self):
1186 with openfile('PyBanner048.gif', 'rb') as fp:
1187 data = fp.read()
1188 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1189 image = MIMEImage(data, name='dingusfish.gif')
1190 image.add_header('content-disposition', 'attachment',
1191 filename='dingusfish.gif')
1192 intro = MIMEText('''\
1193Hi there,
1194
1195This is the dingus fish.
1196''')
1197 container.attach(intro)
1198 container.attach(image)
1199 container['From'] = 'Barry <barry@digicool.com>'
1200 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1201 container['Subject'] = 'Here is your dingus fish'
1202
1203 now = 987809702.54848599
1204 timetuple = time.localtime(now)
1205 if timetuple[-1] == 0:
1206 tzsecs = time.timezone
1207 else:
1208 tzsecs = time.altzone
1209 if tzsecs > 0:
1210 sign = '-'
1211 else:
1212 sign = '+'
1213 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1214 container['Date'] = time.strftime(
1215 '%a, %d %b %Y %H:%M:%S',
1216 time.localtime(now)) + tzoffset
1217 self._msg = container
1218 self._im = image
1219 self._txt = intro
1220
1221 def test_hierarchy(self):
1222 # convenience
1223 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001224 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001225 raises = self.assertRaises
1226 # tests
1227 m = self._msg
1228 unless(m.is_multipart())
1229 eq(m.get_content_type(), 'multipart/mixed')
1230 eq(len(m.get_payload()), 2)
1231 raises(IndexError, m.get_payload, 2)
1232 m0 = m.get_payload(0)
1233 m1 = m.get_payload(1)
1234 unless(m0 is self._txt)
1235 unless(m1 is self._im)
1236 eq(m.get_payload(), [m0, m1])
1237 unless(not m0.is_multipart())
1238 unless(not m1.is_multipart())
1239
1240 def test_empty_multipart_idempotent(self):
1241 text = """\
1242Content-Type: multipart/mixed; boundary="BOUNDARY"
1243MIME-Version: 1.0
1244Subject: A subject
1245To: aperson@dom.ain
1246From: bperson@dom.ain
1247
1248
1249--BOUNDARY
1250
1251
1252--BOUNDARY--
1253"""
1254 msg = Parser().parsestr(text)
1255 self.ndiffAssertEqual(text, msg.as_string())
1256
1257 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1258 outer = MIMEBase('multipart', 'mixed')
1259 outer['Subject'] = 'A subject'
1260 outer['To'] = 'aperson@dom.ain'
1261 outer['From'] = 'bperson@dom.ain'
1262 outer.set_boundary('BOUNDARY')
1263 self.ndiffAssertEqual(outer.as_string(), '''\
1264Content-Type: multipart/mixed; boundary="BOUNDARY"
1265MIME-Version: 1.0
1266Subject: A subject
1267To: aperson@dom.ain
1268From: bperson@dom.ain
1269
1270--BOUNDARY
1271
1272--BOUNDARY--''')
1273
1274 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1275 outer = MIMEBase('multipart', 'mixed')
1276 outer['Subject'] = 'A subject'
1277 outer['To'] = 'aperson@dom.ain'
1278 outer['From'] = 'bperson@dom.ain'
1279 outer.preamble = ''
1280 outer.epilogue = ''
1281 outer.set_boundary('BOUNDARY')
1282 self.ndiffAssertEqual(outer.as_string(), '''\
1283Content-Type: multipart/mixed; boundary="BOUNDARY"
1284MIME-Version: 1.0
1285Subject: A subject
1286To: aperson@dom.ain
1287From: bperson@dom.ain
1288
1289
1290--BOUNDARY
1291
1292--BOUNDARY--
1293''')
1294
1295 def test_one_part_in_a_multipart(self):
1296 eq = self.ndiffAssertEqual
1297 outer = MIMEBase('multipart', 'mixed')
1298 outer['Subject'] = 'A subject'
1299 outer['To'] = 'aperson@dom.ain'
1300 outer['From'] = 'bperson@dom.ain'
1301 outer.set_boundary('BOUNDARY')
1302 msg = MIMEText('hello world')
1303 outer.attach(msg)
1304 eq(outer.as_string(), '''\
1305Content-Type: multipart/mixed; boundary="BOUNDARY"
1306MIME-Version: 1.0
1307Subject: A subject
1308To: aperson@dom.ain
1309From: bperson@dom.ain
1310
1311--BOUNDARY
1312Content-Type: text/plain; charset="us-ascii"
1313MIME-Version: 1.0
1314Content-Transfer-Encoding: 7bit
1315
1316hello world
1317--BOUNDARY--''')
1318
1319 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1320 eq = self.ndiffAssertEqual
1321 outer = MIMEBase('multipart', 'mixed')
1322 outer['Subject'] = 'A subject'
1323 outer['To'] = 'aperson@dom.ain'
1324 outer['From'] = 'bperson@dom.ain'
1325 outer.preamble = ''
1326 msg = MIMEText('hello world')
1327 outer.attach(msg)
1328 outer.set_boundary('BOUNDARY')
1329 eq(outer.as_string(), '''\
1330Content-Type: multipart/mixed; boundary="BOUNDARY"
1331MIME-Version: 1.0
1332Subject: A subject
1333To: aperson@dom.ain
1334From: bperson@dom.ain
1335
1336
1337--BOUNDARY
1338Content-Type: text/plain; charset="us-ascii"
1339MIME-Version: 1.0
1340Content-Transfer-Encoding: 7bit
1341
1342hello world
1343--BOUNDARY--''')
1344
1345
1346 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1347 eq = self.ndiffAssertEqual
1348 outer = MIMEBase('multipart', 'mixed')
1349 outer['Subject'] = 'A subject'
1350 outer['To'] = 'aperson@dom.ain'
1351 outer['From'] = 'bperson@dom.ain'
1352 outer.preamble = None
1353 msg = MIMEText('hello world')
1354 outer.attach(msg)
1355 outer.set_boundary('BOUNDARY')
1356 eq(outer.as_string(), '''\
1357Content-Type: multipart/mixed; boundary="BOUNDARY"
1358MIME-Version: 1.0
1359Subject: A subject
1360To: aperson@dom.ain
1361From: bperson@dom.ain
1362
1363--BOUNDARY
1364Content-Type: text/plain; charset="us-ascii"
1365MIME-Version: 1.0
1366Content-Transfer-Encoding: 7bit
1367
1368hello world
1369--BOUNDARY--''')
1370
1371
1372 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1373 eq = self.ndiffAssertEqual
1374 outer = MIMEBase('multipart', 'mixed')
1375 outer['Subject'] = 'A subject'
1376 outer['To'] = 'aperson@dom.ain'
1377 outer['From'] = 'bperson@dom.ain'
1378 outer.epilogue = None
1379 msg = MIMEText('hello world')
1380 outer.attach(msg)
1381 outer.set_boundary('BOUNDARY')
1382 eq(outer.as_string(), '''\
1383Content-Type: multipart/mixed; boundary="BOUNDARY"
1384MIME-Version: 1.0
1385Subject: A subject
1386To: aperson@dom.ain
1387From: bperson@dom.ain
1388
1389--BOUNDARY
1390Content-Type: text/plain; charset="us-ascii"
1391MIME-Version: 1.0
1392Content-Transfer-Encoding: 7bit
1393
1394hello world
1395--BOUNDARY--''')
1396
1397
1398 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1399 eq = self.ndiffAssertEqual
1400 outer = MIMEBase('multipart', 'mixed')
1401 outer['Subject'] = 'A subject'
1402 outer['To'] = 'aperson@dom.ain'
1403 outer['From'] = 'bperson@dom.ain'
1404 outer.epilogue = ''
1405 msg = MIMEText('hello world')
1406 outer.attach(msg)
1407 outer.set_boundary('BOUNDARY')
1408 eq(outer.as_string(), '''\
1409Content-Type: multipart/mixed; boundary="BOUNDARY"
1410MIME-Version: 1.0
1411Subject: A subject
1412To: aperson@dom.ain
1413From: bperson@dom.ain
1414
1415--BOUNDARY
1416Content-Type: text/plain; charset="us-ascii"
1417MIME-Version: 1.0
1418Content-Transfer-Encoding: 7bit
1419
1420hello world
1421--BOUNDARY--
1422''')
1423
1424
1425 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1426 eq = self.ndiffAssertEqual
1427 outer = MIMEBase('multipart', 'mixed')
1428 outer['Subject'] = 'A subject'
1429 outer['To'] = 'aperson@dom.ain'
1430 outer['From'] = 'bperson@dom.ain'
1431 outer.epilogue = '\n'
1432 msg = MIMEText('hello world')
1433 outer.attach(msg)
1434 outer.set_boundary('BOUNDARY')
1435 eq(outer.as_string(), '''\
1436Content-Type: multipart/mixed; boundary="BOUNDARY"
1437MIME-Version: 1.0
1438Subject: A subject
1439To: aperson@dom.ain
1440From: bperson@dom.ain
1441
1442--BOUNDARY
1443Content-Type: text/plain; charset="us-ascii"
1444MIME-Version: 1.0
1445Content-Transfer-Encoding: 7bit
1446
1447hello world
1448--BOUNDARY--
1449
1450''')
1451
1452 def test_message_external_body(self):
1453 eq = self.assertEqual
1454 msg = self._msgobj('msg_36.txt')
1455 eq(len(msg.get_payload()), 2)
1456 msg1 = msg.get_payload(1)
1457 eq(msg1.get_content_type(), 'multipart/alternative')
1458 eq(len(msg1.get_payload()), 2)
1459 for subpart in msg1.get_payload():
1460 eq(subpart.get_content_type(), 'message/external-body')
1461 eq(len(subpart.get_payload()), 1)
1462 subsubpart = subpart.get_payload(0)
1463 eq(subsubpart.get_content_type(), 'text/plain')
1464
1465 def test_double_boundary(self):
1466 # msg_37.txt is a multipart that contains two dash-boundary's in a
1467 # row. Our interpretation of RFC 2046 calls for ignoring the second
1468 # and subsequent boundaries.
1469 msg = self._msgobj('msg_37.txt')
1470 self.assertEqual(len(msg.get_payload()), 3)
1471
1472 def test_nested_inner_contains_outer_boundary(self):
1473 eq = self.ndiffAssertEqual
1474 # msg_38.txt has an inner part that contains outer boundaries. My
1475 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1476 # these are illegal and should be interpreted as unterminated inner
1477 # parts.
1478 msg = self._msgobj('msg_38.txt')
1479 sfp = StringIO()
1480 iterators._structure(msg, sfp)
1481 eq(sfp.getvalue(), """\
1482multipart/mixed
1483 multipart/mixed
1484 multipart/alternative
1485 text/plain
1486 text/plain
1487 text/plain
1488 text/plain
1489""")
1490
1491 def test_nested_with_same_boundary(self):
1492 eq = self.ndiffAssertEqual
1493 # msg 39.txt is similarly evil in that it's got inner parts that use
1494 # the same boundary as outer parts. Again, I believe the way this is
1495 # parsed is closest to the spirit of RFC 2046
1496 msg = self._msgobj('msg_39.txt')
1497 sfp = StringIO()
1498 iterators._structure(msg, sfp)
1499 eq(sfp.getvalue(), """\
1500multipart/mixed
1501 multipart/mixed
1502 multipart/alternative
1503 application/octet-stream
1504 application/octet-stream
1505 text/plain
1506""")
1507
1508 def test_boundary_in_non_multipart(self):
1509 msg = self._msgobj('msg_40.txt')
1510 self.assertEqual(msg.as_string(), '''\
1511MIME-Version: 1.0
1512Content-Type: text/html; boundary="--961284236552522269"
1513
1514----961284236552522269
1515Content-Type: text/html;
1516Content-Transfer-Encoding: 7Bit
1517
1518<html></html>
1519
1520----961284236552522269--
1521''')
1522
1523 def test_boundary_with_leading_space(self):
1524 eq = self.assertEqual
1525 msg = email.message_from_string('''\
1526MIME-Version: 1.0
1527Content-Type: multipart/mixed; boundary=" XXXX"
1528
1529-- XXXX
1530Content-Type: text/plain
1531
1532
1533-- XXXX
1534Content-Type: text/plain
1535
1536-- XXXX--
1537''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001538 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001539 eq(msg.get_boundary(), ' XXXX')
1540 eq(len(msg.get_payload()), 2)
1541
1542 def test_boundary_without_trailing_newline(self):
1543 m = Parser().parsestr("""\
1544Content-Type: multipart/mixed; boundary="===============0012394164=="
1545MIME-Version: 1.0
1546
1547--===============0012394164==
1548Content-Type: image/file1.jpg
1549MIME-Version: 1.0
1550Content-Transfer-Encoding: base64
1551
1552YXNkZg==
1553--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001554 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001555
1556
Ezio Melottib3aedd42010-11-20 19:04:17 +00001557
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001558# Test some badly formatted messages
1559class TestNonConformant(TestEmailBase):
1560 def test_parse_missing_minor_type(self):
1561 eq = self.assertEqual
1562 msg = self._msgobj('msg_14.txt')
1563 eq(msg.get_content_type(), 'text/plain')
1564 eq(msg.get_content_maintype(), 'text')
1565 eq(msg.get_content_subtype(), 'plain')
1566
1567 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001568 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001569 msg = self._msgobj('msg_15.txt')
1570 # XXX We can probably eventually do better
1571 inner = msg.get_payload(0)
1572 unless(hasattr(inner, 'defects'))
1573 self.assertEqual(len(inner.defects), 1)
1574 unless(isinstance(inner.defects[0],
1575 errors.StartBoundaryNotFoundDefect))
1576
1577 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001578 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001579 msg = self._msgobj('msg_25.txt')
1580 unless(isinstance(msg.get_payload(), str))
1581 self.assertEqual(len(msg.defects), 2)
1582 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1583 unless(isinstance(msg.defects[1],
1584 errors.MultipartInvariantViolationDefect))
1585
1586 def test_invalid_content_type(self):
1587 eq = self.assertEqual
1588 neq = self.ndiffAssertEqual
1589 msg = Message()
1590 # RFC 2045, $5.2 says invalid yields text/plain
1591 msg['Content-Type'] = 'text'
1592 eq(msg.get_content_maintype(), 'text')
1593 eq(msg.get_content_subtype(), 'plain')
1594 eq(msg.get_content_type(), 'text/plain')
1595 # Clear the old value and try something /really/ invalid
1596 del msg['content-type']
1597 msg['Content-Type'] = 'foo'
1598 eq(msg.get_content_maintype(), 'text')
1599 eq(msg.get_content_subtype(), 'plain')
1600 eq(msg.get_content_type(), 'text/plain')
1601 # Still, make sure that the message is idempotently generated
1602 s = StringIO()
1603 g = Generator(s)
1604 g.flatten(msg)
1605 neq(s.getvalue(), 'Content-Type: foo\n\n')
1606
1607 def test_no_start_boundary(self):
1608 eq = self.ndiffAssertEqual
1609 msg = self._msgobj('msg_31.txt')
1610 eq(msg.get_payload(), """\
1611--BOUNDARY
1612Content-Type: text/plain
1613
1614message 1
1615
1616--BOUNDARY
1617Content-Type: text/plain
1618
1619message 2
1620
1621--BOUNDARY--
1622""")
1623
1624 def test_no_separating_blank_line(self):
1625 eq = self.ndiffAssertEqual
1626 msg = self._msgobj('msg_35.txt')
1627 eq(msg.as_string(), """\
1628From: aperson@dom.ain
1629To: bperson@dom.ain
1630Subject: here's something interesting
1631
1632counter to RFC 2822, there's no separating newline here
1633""")
1634
1635 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001636 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001637 msg = self._msgobj('msg_41.txt')
1638 unless(hasattr(msg, 'defects'))
1639 self.assertEqual(len(msg.defects), 2)
1640 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1641 unless(isinstance(msg.defects[1],
1642 errors.MultipartInvariantViolationDefect))
1643
1644 def test_missing_start_boundary(self):
1645 outer = self._msgobj('msg_42.txt')
1646 # The message structure is:
1647 #
1648 # multipart/mixed
1649 # text/plain
1650 # message/rfc822
1651 # multipart/mixed [*]
1652 #
1653 # [*] This message is missing its start boundary
1654 bad = outer.get_payload(1).get_payload(0)
1655 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001656 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001657 errors.StartBoundaryNotFoundDefect))
1658
1659 def test_first_line_is_continuation_header(self):
1660 eq = self.assertEqual
1661 m = ' Line 1\nLine 2\nLine 3'
1662 msg = email.message_from_string(m)
1663 eq(msg.keys(), [])
1664 eq(msg.get_payload(), 'Line 2\nLine 3')
1665 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001666 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001667 errors.FirstHeaderLineIsContinuationDefect))
1668 eq(msg.defects[0].line, ' Line 1\n')
1669
1670
Ezio Melottib3aedd42010-11-20 19:04:17 +00001671
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001672# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001673class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001674 def test_rfc2047_multiline(self):
1675 eq = self.assertEqual
1676 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1677 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1678 dh = decode_header(s)
1679 eq(dh, [
1680 (b'Re:', None),
1681 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1682 (b'baz foo bar', None),
1683 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1684 header = make_header(dh)
1685 eq(str(header),
1686 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001687 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001688Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1689 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001690
1691 def test_whitespace_eater_unicode(self):
1692 eq = self.assertEqual
1693 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1694 dh = decode_header(s)
1695 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1696 (b'Pirard <pirard@dom.ain>', None)])
1697 header = str(make_header(dh))
1698 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1699
1700 def test_whitespace_eater_unicode_2(self):
1701 eq = self.assertEqual
1702 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1703 dh = decode_header(s)
1704 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1705 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1706 hu = str(make_header(dh))
1707 eq(hu, 'The quick brown fox jumped over the lazy dog')
1708
1709 def test_rfc2047_missing_whitespace(self):
1710 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1711 dh = decode_header(s)
1712 self.assertEqual(dh, [(s, None)])
1713
1714 def test_rfc2047_with_whitespace(self):
1715 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1716 dh = decode_header(s)
1717 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1718 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1719 (b'sbord', None)])
1720
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001721 def test_rfc2047_B_bad_padding(self):
1722 s = '=?iso-8859-1?B?%s?='
1723 data = [ # only test complete bytes
1724 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1725 ('dmk=', b'vi'), ('dmk', b'vi')
1726 ]
1727 for q, a in data:
1728 dh = decode_header(s % q)
1729 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001730
R. David Murray31e984c2010-10-01 15:40:20 +00001731 def test_rfc2047_Q_invalid_digits(self):
1732 # issue 10004.
1733 s = '=?iso-8659-1?Q?andr=e9=zz?='
1734 self.assertEqual(decode_header(s),
1735 [(b'andr\xe9=zz', 'iso-8659-1')])
1736
Ezio Melottib3aedd42010-11-20 19:04:17 +00001737
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001738# Test the MIMEMessage class
1739class TestMIMEMessage(TestEmailBase):
1740 def setUp(self):
1741 with openfile('msg_11.txt') as fp:
1742 self._text = fp.read()
1743
1744 def test_type_error(self):
1745 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1746
1747 def test_valid_argument(self):
1748 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001749 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001750 subject = 'A sub-message'
1751 m = Message()
1752 m['Subject'] = subject
1753 r = MIMEMessage(m)
1754 eq(r.get_content_type(), 'message/rfc822')
1755 payload = r.get_payload()
1756 unless(isinstance(payload, list))
1757 eq(len(payload), 1)
1758 subpart = payload[0]
1759 unless(subpart is m)
1760 eq(subpart['subject'], subject)
1761
1762 def test_bad_multipart(self):
1763 eq = self.assertEqual
1764 msg1 = Message()
1765 msg1['Subject'] = 'subpart 1'
1766 msg2 = Message()
1767 msg2['Subject'] = 'subpart 2'
1768 r = MIMEMessage(msg1)
1769 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1770
1771 def test_generate(self):
1772 # First craft the message to be encapsulated
1773 m = Message()
1774 m['Subject'] = 'An enclosed message'
1775 m.set_payload('Here is the body of the message.\n')
1776 r = MIMEMessage(m)
1777 r['Subject'] = 'The enclosing message'
1778 s = StringIO()
1779 g = Generator(s)
1780 g.flatten(r)
1781 self.assertEqual(s.getvalue(), """\
1782Content-Type: message/rfc822
1783MIME-Version: 1.0
1784Subject: The enclosing message
1785
1786Subject: An enclosed message
1787
1788Here is the body of the message.
1789""")
1790
1791 def test_parse_message_rfc822(self):
1792 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001793 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001794 msg = self._msgobj('msg_11.txt')
1795 eq(msg.get_content_type(), 'message/rfc822')
1796 payload = msg.get_payload()
1797 unless(isinstance(payload, list))
1798 eq(len(payload), 1)
1799 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001800 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001801 eq(submsg['subject'], 'An enclosed message')
1802 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1803
1804 def test_dsn(self):
1805 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001806 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001807 # msg 16 is a Delivery Status Notification, see RFC 1894
1808 msg = self._msgobj('msg_16.txt')
1809 eq(msg.get_content_type(), 'multipart/report')
1810 unless(msg.is_multipart())
1811 eq(len(msg.get_payload()), 3)
1812 # Subpart 1 is a text/plain, human readable section
1813 subpart = msg.get_payload(0)
1814 eq(subpart.get_content_type(), 'text/plain')
1815 eq(subpart.get_payload(), """\
1816This report relates to a message you sent with the following header fields:
1817
1818 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1819 Date: Sun, 23 Sep 2001 20:10:55 -0700
1820 From: "Ian T. Henry" <henryi@oxy.edu>
1821 To: SoCal Raves <scr@socal-raves.org>
1822 Subject: [scr] yeah for Ians!!
1823
1824Your message cannot be delivered to the following recipients:
1825
1826 Recipient address: jangel1@cougar.noc.ucla.edu
1827 Reason: recipient reached disk quota
1828
1829""")
1830 # Subpart 2 contains the machine parsable DSN information. It
1831 # consists of two blocks of headers, represented by two nested Message
1832 # objects.
1833 subpart = msg.get_payload(1)
1834 eq(subpart.get_content_type(), 'message/delivery-status')
1835 eq(len(subpart.get_payload()), 2)
1836 # message/delivery-status should treat each block as a bunch of
1837 # headers, i.e. a bunch of Message objects.
1838 dsn1 = subpart.get_payload(0)
1839 unless(isinstance(dsn1, Message))
1840 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1841 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1842 # Try a missing one <wink>
1843 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1844 dsn2 = subpart.get_payload(1)
1845 unless(isinstance(dsn2, Message))
1846 eq(dsn2['action'], 'failed')
1847 eq(dsn2.get_params(header='original-recipient'),
1848 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1849 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1850 # Subpart 3 is the original message
1851 subpart = msg.get_payload(2)
1852 eq(subpart.get_content_type(), 'message/rfc822')
1853 payload = subpart.get_payload()
1854 unless(isinstance(payload, list))
1855 eq(len(payload), 1)
1856 subsubpart = payload[0]
1857 unless(isinstance(subsubpart, Message))
1858 eq(subsubpart.get_content_type(), 'text/plain')
1859 eq(subsubpart['message-id'],
1860 '<002001c144a6$8752e060$56104586@oxy.edu>')
1861
1862 def test_epilogue(self):
1863 eq = self.ndiffAssertEqual
1864 with openfile('msg_21.txt') as fp:
1865 text = fp.read()
1866 msg = Message()
1867 msg['From'] = 'aperson@dom.ain'
1868 msg['To'] = 'bperson@dom.ain'
1869 msg['Subject'] = 'Test'
1870 msg.preamble = 'MIME message'
1871 msg.epilogue = 'End of MIME message\n'
1872 msg1 = MIMEText('One')
1873 msg2 = MIMEText('Two')
1874 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1875 msg.attach(msg1)
1876 msg.attach(msg2)
1877 sfp = StringIO()
1878 g = Generator(sfp)
1879 g.flatten(msg)
1880 eq(sfp.getvalue(), text)
1881
1882 def test_no_nl_preamble(self):
1883 eq = self.ndiffAssertEqual
1884 msg = Message()
1885 msg['From'] = 'aperson@dom.ain'
1886 msg['To'] = 'bperson@dom.ain'
1887 msg['Subject'] = 'Test'
1888 msg.preamble = 'MIME message'
1889 msg.epilogue = ''
1890 msg1 = MIMEText('One')
1891 msg2 = MIMEText('Two')
1892 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1893 msg.attach(msg1)
1894 msg.attach(msg2)
1895 eq(msg.as_string(), """\
1896From: aperson@dom.ain
1897To: bperson@dom.ain
1898Subject: Test
1899Content-Type: multipart/mixed; boundary="BOUNDARY"
1900
1901MIME message
1902--BOUNDARY
1903Content-Type: text/plain; charset="us-ascii"
1904MIME-Version: 1.0
1905Content-Transfer-Encoding: 7bit
1906
1907One
1908--BOUNDARY
1909Content-Type: text/plain; charset="us-ascii"
1910MIME-Version: 1.0
1911Content-Transfer-Encoding: 7bit
1912
1913Two
1914--BOUNDARY--
1915""")
1916
1917 def test_default_type(self):
1918 eq = self.assertEqual
1919 with openfile('msg_30.txt') as fp:
1920 msg = email.message_from_file(fp)
1921 container1 = msg.get_payload(0)
1922 eq(container1.get_default_type(), 'message/rfc822')
1923 eq(container1.get_content_type(), 'message/rfc822')
1924 container2 = msg.get_payload(1)
1925 eq(container2.get_default_type(), 'message/rfc822')
1926 eq(container2.get_content_type(), 'message/rfc822')
1927 container1a = container1.get_payload(0)
1928 eq(container1a.get_default_type(), 'text/plain')
1929 eq(container1a.get_content_type(), 'text/plain')
1930 container2a = container2.get_payload(0)
1931 eq(container2a.get_default_type(), 'text/plain')
1932 eq(container2a.get_content_type(), 'text/plain')
1933
1934 def test_default_type_with_explicit_container_type(self):
1935 eq = self.assertEqual
1936 with openfile('msg_28.txt') as fp:
1937 msg = email.message_from_file(fp)
1938 container1 = msg.get_payload(0)
1939 eq(container1.get_default_type(), 'message/rfc822')
1940 eq(container1.get_content_type(), 'message/rfc822')
1941 container2 = msg.get_payload(1)
1942 eq(container2.get_default_type(), 'message/rfc822')
1943 eq(container2.get_content_type(), 'message/rfc822')
1944 container1a = container1.get_payload(0)
1945 eq(container1a.get_default_type(), 'text/plain')
1946 eq(container1a.get_content_type(), 'text/plain')
1947 container2a = container2.get_payload(0)
1948 eq(container2a.get_default_type(), 'text/plain')
1949 eq(container2a.get_content_type(), 'text/plain')
1950
1951 def test_default_type_non_parsed(self):
1952 eq = self.assertEqual
1953 neq = self.ndiffAssertEqual
1954 # Set up container
1955 container = MIMEMultipart('digest', 'BOUNDARY')
1956 container.epilogue = ''
1957 # Set up subparts
1958 subpart1a = MIMEText('message 1\n')
1959 subpart2a = MIMEText('message 2\n')
1960 subpart1 = MIMEMessage(subpart1a)
1961 subpart2 = MIMEMessage(subpart2a)
1962 container.attach(subpart1)
1963 container.attach(subpart2)
1964 eq(subpart1.get_content_type(), 'message/rfc822')
1965 eq(subpart1.get_default_type(), 'message/rfc822')
1966 eq(subpart2.get_content_type(), 'message/rfc822')
1967 eq(subpart2.get_default_type(), 'message/rfc822')
1968 neq(container.as_string(0), '''\
1969Content-Type: multipart/digest; boundary="BOUNDARY"
1970MIME-Version: 1.0
1971
1972--BOUNDARY
1973Content-Type: message/rfc822
1974MIME-Version: 1.0
1975
1976Content-Type: text/plain; charset="us-ascii"
1977MIME-Version: 1.0
1978Content-Transfer-Encoding: 7bit
1979
1980message 1
1981
1982--BOUNDARY
1983Content-Type: message/rfc822
1984MIME-Version: 1.0
1985
1986Content-Type: text/plain; charset="us-ascii"
1987MIME-Version: 1.0
1988Content-Transfer-Encoding: 7bit
1989
1990message 2
1991
1992--BOUNDARY--
1993''')
1994 del subpart1['content-type']
1995 del subpart1['mime-version']
1996 del subpart2['content-type']
1997 del subpart2['mime-version']
1998 eq(subpart1.get_content_type(), 'message/rfc822')
1999 eq(subpart1.get_default_type(), 'message/rfc822')
2000 eq(subpart2.get_content_type(), 'message/rfc822')
2001 eq(subpart2.get_default_type(), 'message/rfc822')
2002 neq(container.as_string(0), '''\
2003Content-Type: multipart/digest; boundary="BOUNDARY"
2004MIME-Version: 1.0
2005
2006--BOUNDARY
2007
2008Content-Type: text/plain; charset="us-ascii"
2009MIME-Version: 1.0
2010Content-Transfer-Encoding: 7bit
2011
2012message 1
2013
2014--BOUNDARY
2015
2016Content-Type: text/plain; charset="us-ascii"
2017MIME-Version: 1.0
2018Content-Transfer-Encoding: 7bit
2019
2020message 2
2021
2022--BOUNDARY--
2023''')
2024
2025 def test_mime_attachments_in_constructor(self):
2026 eq = self.assertEqual
2027 text1 = MIMEText('')
2028 text2 = MIMEText('')
2029 msg = MIMEMultipart(_subparts=(text1, text2))
2030 eq(len(msg.get_payload()), 2)
2031 eq(msg.get_payload(0), text1)
2032 eq(msg.get_payload(1), text2)
2033
Christian Heimes587c2bf2008-01-19 16:21:02 +00002034 def test_default_multipart_constructor(self):
2035 msg = MIMEMultipart()
2036 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002037
Ezio Melottib3aedd42010-11-20 19:04:17 +00002038
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002039# A general test of parser->model->generator idempotency. IOW, read a message
2040# in, parse it into a message object tree, then without touching the tree,
2041# regenerate the plain text. The original text and the transformed text
2042# should be identical. Note: that we ignore the Unix-From since that may
2043# contain a changed date.
2044class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002045
2046 linesep = '\n'
2047
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002048 def _msgobj(self, filename):
2049 with openfile(filename) as fp:
2050 data = fp.read()
2051 msg = email.message_from_string(data)
2052 return msg, data
2053
R. David Murray719a4492010-11-21 16:53:48 +00002054 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002055 eq = self.ndiffAssertEqual
2056 s = StringIO()
2057 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002058 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002059 eq(text, s.getvalue())
2060
2061 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002062 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002063 msg, text = self._msgobj('msg_01.txt')
2064 eq(msg.get_content_type(), 'text/plain')
2065 eq(msg.get_content_maintype(), 'text')
2066 eq(msg.get_content_subtype(), 'plain')
2067 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2068 eq(msg.get_param('charset'), 'us-ascii')
2069 eq(msg.preamble, None)
2070 eq(msg.epilogue, None)
2071 self._idempotent(msg, text)
2072
2073 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002074 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002075 msg, text = self._msgobj('msg_03.txt')
2076 eq(msg.get_content_type(), 'text/plain')
2077 eq(msg.get_params(), None)
2078 eq(msg.get_param('charset'), None)
2079 self._idempotent(msg, text)
2080
2081 def test_simple_multipart(self):
2082 msg, text = self._msgobj('msg_04.txt')
2083 self._idempotent(msg, text)
2084
2085 def test_MIME_digest(self):
2086 msg, text = self._msgobj('msg_02.txt')
2087 self._idempotent(msg, text)
2088
2089 def test_long_header(self):
2090 msg, text = self._msgobj('msg_27.txt')
2091 self._idempotent(msg, text)
2092
2093 def test_MIME_digest_with_part_headers(self):
2094 msg, text = self._msgobj('msg_28.txt')
2095 self._idempotent(msg, text)
2096
2097 def test_mixed_with_image(self):
2098 msg, text = self._msgobj('msg_06.txt')
2099 self._idempotent(msg, text)
2100
2101 def test_multipart_report(self):
2102 msg, text = self._msgobj('msg_05.txt')
2103 self._idempotent(msg, text)
2104
2105 def test_dsn(self):
2106 msg, text = self._msgobj('msg_16.txt')
2107 self._idempotent(msg, text)
2108
2109 def test_preamble_epilogue(self):
2110 msg, text = self._msgobj('msg_21.txt')
2111 self._idempotent(msg, text)
2112
2113 def test_multipart_one_part(self):
2114 msg, text = self._msgobj('msg_23.txt')
2115 self._idempotent(msg, text)
2116
2117 def test_multipart_no_parts(self):
2118 msg, text = self._msgobj('msg_24.txt')
2119 self._idempotent(msg, text)
2120
2121 def test_no_start_boundary(self):
2122 msg, text = self._msgobj('msg_31.txt')
2123 self._idempotent(msg, text)
2124
2125 def test_rfc2231_charset(self):
2126 msg, text = self._msgobj('msg_32.txt')
2127 self._idempotent(msg, text)
2128
2129 def test_more_rfc2231_parameters(self):
2130 msg, text = self._msgobj('msg_33.txt')
2131 self._idempotent(msg, text)
2132
2133 def test_text_plain_in_a_multipart_digest(self):
2134 msg, text = self._msgobj('msg_34.txt')
2135 self._idempotent(msg, text)
2136
2137 def test_nested_multipart_mixeds(self):
2138 msg, text = self._msgobj('msg_12a.txt')
2139 self._idempotent(msg, text)
2140
2141 def test_message_external_body_idempotent(self):
2142 msg, text = self._msgobj('msg_36.txt')
2143 self._idempotent(msg, text)
2144
R. David Murray719a4492010-11-21 16:53:48 +00002145 def test_message_delivery_status(self):
2146 msg, text = self._msgobj('msg_43.txt')
2147 self._idempotent(msg, text, unixfrom=True)
2148
R. David Murray96fd54e2010-10-08 15:55:28 +00002149 def test_message_signed_idempotent(self):
2150 msg, text = self._msgobj('msg_45.txt')
2151 self._idempotent(msg, text)
2152
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002153 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002154 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002155 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002156 # Get a message object and reset the seek pointer for other tests
2157 msg, text = self._msgobj('msg_05.txt')
2158 eq(msg.get_content_type(), 'multipart/report')
2159 # Test the Content-Type: parameters
2160 params = {}
2161 for pk, pv in msg.get_params():
2162 params[pk] = pv
2163 eq(params['report-type'], 'delivery-status')
2164 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002165 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2166 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002167 eq(len(msg.get_payload()), 3)
2168 # Make sure the subparts are what we expect
2169 msg1 = msg.get_payload(0)
2170 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002171 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002172 msg2 = msg.get_payload(1)
2173 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002174 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002175 msg3 = msg.get_payload(2)
2176 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002177 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002178 payload = msg3.get_payload()
2179 unless(isinstance(payload, list))
2180 eq(len(payload), 1)
2181 msg4 = payload[0]
2182 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002183 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002184
2185 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002186 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002187 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002188 msg, text = self._msgobj('msg_06.txt')
2189 # Check some of the outer headers
2190 eq(msg.get_content_type(), 'message/rfc822')
2191 # Make sure the payload is a list of exactly one sub-Message, and that
2192 # that submessage has a type of text/plain
2193 payload = msg.get_payload()
2194 unless(isinstance(payload, list))
2195 eq(len(payload), 1)
2196 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002197 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002198 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002199 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002200 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002201
2202
Ezio Melottib3aedd42010-11-20 19:04:17 +00002203
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002204# Test various other bits of the package's functionality
2205class TestMiscellaneous(TestEmailBase):
2206 def test_message_from_string(self):
2207 with openfile('msg_01.txt') as fp:
2208 text = fp.read()
2209 msg = email.message_from_string(text)
2210 s = StringIO()
2211 # Don't wrap/continue long headers since we're trying to test
2212 # idempotency.
2213 g = Generator(s, maxheaderlen=0)
2214 g.flatten(msg)
2215 self.assertEqual(text, s.getvalue())
2216
2217 def test_message_from_file(self):
2218 with openfile('msg_01.txt') as fp:
2219 text = fp.read()
2220 fp.seek(0)
2221 msg = email.message_from_file(fp)
2222 s = StringIO()
2223 # Don't wrap/continue long headers since we're trying to test
2224 # idempotency.
2225 g = Generator(s, maxheaderlen=0)
2226 g.flatten(msg)
2227 self.assertEqual(text, s.getvalue())
2228
2229 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002230 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002231 with openfile('msg_01.txt') as fp:
2232 text = fp.read()
2233
2234 # Create a subclass
2235 class MyMessage(Message):
2236 pass
2237
2238 msg = email.message_from_string(text, MyMessage)
2239 unless(isinstance(msg, MyMessage))
2240 # Try something more complicated
2241 with openfile('msg_02.txt') as fp:
2242 text = fp.read()
2243 msg = email.message_from_string(text, MyMessage)
2244 for subpart in msg.walk():
2245 unless(isinstance(subpart, MyMessage))
2246
2247 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002248 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002249 # Create a subclass
2250 class MyMessage(Message):
2251 pass
2252
2253 with openfile('msg_01.txt') as fp:
2254 msg = email.message_from_file(fp, MyMessage)
2255 unless(isinstance(msg, MyMessage))
2256 # Try something more complicated
2257 with openfile('msg_02.txt') as fp:
2258 msg = email.message_from_file(fp, MyMessage)
2259 for subpart in msg.walk():
2260 unless(isinstance(subpart, MyMessage))
2261
2262 def test__all__(self):
2263 module = __import__('email')
2264 # Can't use sorted() here due to Python 2.3 compatibility
2265 all = module.__all__[:]
2266 all.sort()
2267 self.assertEqual(all, [
2268 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002269 'header', 'iterators', 'message', 'message_from_binary_file',
2270 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002271 'message_from_string', 'mime', 'parser',
2272 'quoprimime', 'utils',
2273 ])
2274
2275 def test_formatdate(self):
2276 now = time.time()
2277 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2278 time.gmtime(now)[:6])
2279
2280 def test_formatdate_localtime(self):
2281 now = time.time()
2282 self.assertEqual(
2283 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2284 time.localtime(now)[:6])
2285
2286 def test_formatdate_usegmt(self):
2287 now = time.time()
2288 self.assertEqual(
2289 utils.formatdate(now, localtime=False),
2290 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2291 self.assertEqual(
2292 utils.formatdate(now, localtime=False, usegmt=True),
2293 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2294
2295 def test_parsedate_none(self):
2296 self.assertEqual(utils.parsedate(''), None)
2297
2298 def test_parsedate_compact(self):
2299 # The FWS after the comma is optional
2300 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2301 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2302
2303 def test_parsedate_no_dayofweek(self):
2304 eq = self.assertEqual
2305 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2306 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2307
2308 def test_parsedate_compact_no_dayofweek(self):
2309 eq = self.assertEqual
2310 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2311 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2312
R. David Murray4a62e892010-12-23 20:35:46 +00002313 def test_parsedate_no_space_before_positive_offset(self):
2314 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2315 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2316
2317 def test_parsedate_no_space_before_negative_offset(self):
2318 # Issue 1155362: we already handled '+' for this case.
2319 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2320 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2321
2322
R David Murrayaccd1c02011-03-13 20:06:23 -04002323 def test_parsedate_accepts_time_with_dots(self):
2324 eq = self.assertEqual
2325 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2326 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2327 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2328 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2329
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002330 def test_parsedate_acceptable_to_time_functions(self):
2331 eq = self.assertEqual
2332 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2333 t = int(time.mktime(timetup))
2334 eq(time.localtime(t)[:6], timetup[:6])
2335 eq(int(time.strftime('%Y', timetup)), 2003)
2336 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2337 t = int(time.mktime(timetup[:9]))
2338 eq(time.localtime(t)[:6], timetup[:6])
2339 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2340
R. David Murray219d1c82010-08-25 00:45:55 +00002341 def test_parsedate_y2k(self):
2342 """Test for parsing a date with a two-digit year.
2343
2344 Parsing a date with a two-digit year should return the correct
2345 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2346 obsoletes RFC822) requires four-digit years.
2347
2348 """
2349 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2350 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2351 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2352 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2353
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002354 def test_parseaddr_empty(self):
2355 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2356 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2357
2358 def test_noquote_dump(self):
2359 self.assertEqual(
2360 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2361 'A Silly Person <person@dom.ain>')
2362
2363 def test_escape_dump(self):
2364 self.assertEqual(
2365 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2366 r'"A \(Very\) Silly Person" <person@dom.ain>')
2367 a = r'A \(Special\) Person'
2368 b = 'person@dom.ain'
2369 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2370
2371 def test_escape_backslashes(self):
2372 self.assertEqual(
2373 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2374 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2375 a = r'Arthur \Backslash\ Foobar'
2376 b = 'person@dom.ain'
2377 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2378
R David Murray8debacb2011-04-06 09:35:57 -04002379 def test_quotes_unicode_names(self):
2380 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2381 name = "H\u00e4ns W\u00fcrst"
2382 addr = 'person@dom.ain'
2383 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2384 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2385 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2386 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2387 latin1_quopri)
2388
2389 def test_accepts_any_charset_like_object(self):
2390 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2391 name = "H\u00e4ns W\u00fcrst"
2392 addr = 'person@dom.ain'
2393 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2394 foobar = "FOOBAR"
2395 class CharsetMock:
2396 def header_encode(self, string):
2397 return foobar
2398 mock = CharsetMock()
2399 mock_expected = "%s <%s>" % (foobar, addr)
2400 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2401 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2402 utf8_base64)
2403
2404 def test_invalid_charset_like_object_raises_error(self):
2405 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2406 name = "H\u00e4ns W\u00fcrst"
2407 addr = 'person@dom.ain'
2408 # A object without a header_encode method:
2409 bad_charset = object()
2410 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2411 bad_charset)
2412
2413 def test_unicode_address_raises_error(self):
2414 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2415 addr = 'pers\u00f6n@dom.in'
2416 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2417 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2418
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002419 def test_name_with_dot(self):
2420 x = 'John X. Doe <jxd@example.com>'
2421 y = '"John X. Doe" <jxd@example.com>'
2422 a, b = ('John X. Doe', 'jxd@example.com')
2423 self.assertEqual(utils.parseaddr(x), (a, b))
2424 self.assertEqual(utils.parseaddr(y), (a, b))
2425 # formataddr() quotes the name if there's a dot in it
2426 self.assertEqual(utils.formataddr((a, b)), y)
2427
R. David Murray5397e862010-10-02 15:58:26 +00002428 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2429 # issue 10005. Note that in the third test the second pair of
2430 # backslashes is not actually a quoted pair because it is not inside a
2431 # comment or quoted string: the address being parsed has a quoted
2432 # string containing a quoted backslash, followed by 'example' and two
2433 # backslashes, followed by another quoted string containing a space and
2434 # the word 'example'. parseaddr copies those two backslashes
2435 # literally. Per rfc5322 this is not technically correct since a \ may
2436 # not appear in an address outside of a quoted string. It is probably
2437 # a sensible Postel interpretation, though.
2438 eq = self.assertEqual
2439 eq(utils.parseaddr('""example" example"@example.com'),
2440 ('', '""example" example"@example.com'))
2441 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2442 ('', '"\\"example\\" example"@example.com'))
2443 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2444 ('', '"\\\\"example\\\\" example"@example.com'))
2445
R. David Murray63563cd2010-12-18 18:25:38 +00002446 def test_parseaddr_preserves_spaces_in_local_part(self):
2447 # issue 9286. A normal RFC5322 local part should not contain any
2448 # folding white space, but legacy local parts can (they are a sequence
2449 # of atoms, not dotatoms). On the other hand we strip whitespace from
2450 # before the @ and around dots, on the assumption that the whitespace
2451 # around the punctuation is a mistake in what would otherwise be
2452 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2453 self.assertEqual(('', "merwok wok@xample.com"),
2454 utils.parseaddr("merwok wok@xample.com"))
2455 self.assertEqual(('', "merwok wok@xample.com"),
2456 utils.parseaddr("merwok wok@xample.com"))
2457 self.assertEqual(('', "merwok wok@xample.com"),
2458 utils.parseaddr(" merwok wok @xample.com"))
2459 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2460 utils.parseaddr('merwok"wok" wok@xample.com'))
2461 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2462 utils.parseaddr('merwok. wok . wok@xample.com'))
2463
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002464 def test_multiline_from_comment(self):
2465 x = """\
2466Foo
2467\tBar <foo@example.com>"""
2468 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2469
2470 def test_quote_dump(self):
2471 self.assertEqual(
2472 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2473 r'"A Silly; Person" <person@dom.ain>')
2474
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002475 def test_charset_richcomparisons(self):
2476 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002477 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002478 cset1 = Charset()
2479 cset2 = Charset()
2480 eq(cset1, 'us-ascii')
2481 eq(cset1, 'US-ASCII')
2482 eq(cset1, 'Us-AsCiI')
2483 eq('us-ascii', cset1)
2484 eq('US-ASCII', cset1)
2485 eq('Us-AsCiI', cset1)
2486 ne(cset1, 'usascii')
2487 ne(cset1, 'USASCII')
2488 ne(cset1, 'UsAsCiI')
2489 ne('usascii', cset1)
2490 ne('USASCII', cset1)
2491 ne('UsAsCiI', cset1)
2492 eq(cset1, cset2)
2493 eq(cset2, cset1)
2494
2495 def test_getaddresses(self):
2496 eq = self.assertEqual
2497 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2498 'Bud Person <bperson@dom.ain>']),
2499 [('Al Person', 'aperson@dom.ain'),
2500 ('Bud Person', 'bperson@dom.ain')])
2501
2502 def test_getaddresses_nasty(self):
2503 eq = self.assertEqual
2504 eq(utils.getaddresses(['foo: ;']), [('', '')])
2505 eq(utils.getaddresses(
2506 ['[]*-- =~$']),
2507 [('', ''), ('', ''), ('', '*--')])
2508 eq(utils.getaddresses(
2509 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2510 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2511
2512 def test_getaddresses_embedded_comment(self):
2513 """Test proper handling of a nested comment"""
2514 eq = self.assertEqual
2515 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2516 eq(addrs[0][1], 'foo@bar.com')
2517
2518 def test_utils_quote_unquote(self):
2519 eq = self.assertEqual
2520 msg = Message()
2521 msg.add_header('content-disposition', 'attachment',
2522 filename='foo\\wacky"name')
2523 eq(msg.get_filename(), 'foo\\wacky"name')
2524
2525 def test_get_body_encoding_with_bogus_charset(self):
2526 charset = Charset('not a charset')
2527 self.assertEqual(charset.get_body_encoding(), 'base64')
2528
2529 def test_get_body_encoding_with_uppercase_charset(self):
2530 eq = self.assertEqual
2531 msg = Message()
2532 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2533 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2534 charsets = msg.get_charsets()
2535 eq(len(charsets), 1)
2536 eq(charsets[0], 'utf-8')
2537 charset = Charset(charsets[0])
2538 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002539 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002540 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2541 eq(msg.get_payload(decode=True), b'hello world')
2542 eq(msg['content-transfer-encoding'], 'base64')
2543 # Try another one
2544 msg = Message()
2545 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2546 charsets = msg.get_charsets()
2547 eq(len(charsets), 1)
2548 eq(charsets[0], 'us-ascii')
2549 charset = Charset(charsets[0])
2550 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2551 msg.set_payload('hello world', charset=charset)
2552 eq(msg.get_payload(), 'hello world')
2553 eq(msg['content-transfer-encoding'], '7bit')
2554
2555 def test_charsets_case_insensitive(self):
2556 lc = Charset('us-ascii')
2557 uc = Charset('US-ASCII')
2558 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2559
2560 def test_partial_falls_inside_message_delivery_status(self):
2561 eq = self.ndiffAssertEqual
2562 # The Parser interface provides chunks of data to FeedParser in 8192
2563 # byte gulps. SF bug #1076485 found one of those chunks inside
2564 # message/delivery-status header block, which triggered an
2565 # unreadline() of NeedMoreData.
2566 msg = self._msgobj('msg_43.txt')
2567 sfp = StringIO()
2568 iterators._structure(msg, sfp)
2569 eq(sfp.getvalue(), """\
2570multipart/report
2571 text/plain
2572 message/delivery-status
2573 text/plain
2574 text/plain
2575 text/plain
2576 text/plain
2577 text/plain
2578 text/plain
2579 text/plain
2580 text/plain
2581 text/plain
2582 text/plain
2583 text/plain
2584 text/plain
2585 text/plain
2586 text/plain
2587 text/plain
2588 text/plain
2589 text/plain
2590 text/plain
2591 text/plain
2592 text/plain
2593 text/plain
2594 text/plain
2595 text/plain
2596 text/plain
2597 text/plain
2598 text/plain
2599 text/rfc822-headers
2600""")
2601
R. David Murraya0b44b52010-12-02 21:47:19 +00002602 def test_make_msgid_domain(self):
2603 self.assertEqual(
2604 email.utils.make_msgid(domain='testdomain-string')[-19:],
2605 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002606
Ezio Melottib3aedd42010-11-20 19:04:17 +00002607
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002608# Test the iterator/generators
2609class TestIterators(TestEmailBase):
2610 def test_body_line_iterator(self):
2611 eq = self.assertEqual
2612 neq = self.ndiffAssertEqual
2613 # First a simple non-multipart message
2614 msg = self._msgobj('msg_01.txt')
2615 it = iterators.body_line_iterator(msg)
2616 lines = list(it)
2617 eq(len(lines), 6)
2618 neq(EMPTYSTRING.join(lines), msg.get_payload())
2619 # Now a more complicated multipart
2620 msg = self._msgobj('msg_02.txt')
2621 it = iterators.body_line_iterator(msg)
2622 lines = list(it)
2623 eq(len(lines), 43)
2624 with openfile('msg_19.txt') as fp:
2625 neq(EMPTYSTRING.join(lines), fp.read())
2626
2627 def test_typed_subpart_iterator(self):
2628 eq = self.assertEqual
2629 msg = self._msgobj('msg_04.txt')
2630 it = iterators.typed_subpart_iterator(msg, 'text')
2631 lines = []
2632 subparts = 0
2633 for subpart in it:
2634 subparts += 1
2635 lines.append(subpart.get_payload())
2636 eq(subparts, 2)
2637 eq(EMPTYSTRING.join(lines), """\
2638a simple kind of mirror
2639to reflect upon our own
2640a simple kind of mirror
2641to reflect upon our own
2642""")
2643
2644 def test_typed_subpart_iterator_default_type(self):
2645 eq = self.assertEqual
2646 msg = self._msgobj('msg_03.txt')
2647 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2648 lines = []
2649 subparts = 0
2650 for subpart in it:
2651 subparts += 1
2652 lines.append(subpart.get_payload())
2653 eq(subparts, 1)
2654 eq(EMPTYSTRING.join(lines), """\
2655
2656Hi,
2657
2658Do you like this message?
2659
2660-Me
2661""")
2662
R. David Murray45bf773f2010-07-17 01:19:57 +00002663 def test_pushCR_LF(self):
2664 '''FeedParser BufferedSubFile.push() assumed it received complete
2665 line endings. A CR ending one push() followed by a LF starting
2666 the next push() added an empty line.
2667 '''
2668 imt = [
2669 ("a\r \n", 2),
2670 ("b", 0),
2671 ("c\n", 1),
2672 ("", 0),
2673 ("d\r\n", 1),
2674 ("e\r", 0),
2675 ("\nf", 1),
2676 ("\r\n", 1),
2677 ]
2678 from email.feedparser import BufferedSubFile, NeedMoreData
2679 bsf = BufferedSubFile()
2680 om = []
2681 nt = 0
2682 for il, n in imt:
2683 bsf.push(il)
2684 nt += n
2685 n1 = 0
2686 while True:
2687 ol = bsf.readline()
2688 if ol == NeedMoreData:
2689 break
2690 om.append(ol)
2691 n1 += 1
2692 self.assertTrue(n == n1)
2693 self.assertTrue(len(om) == nt)
2694 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2695
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002696
Ezio Melottib3aedd42010-11-20 19:04:17 +00002697
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002698class TestParsers(TestEmailBase):
2699 def test_header_parser(self):
2700 eq = self.assertEqual
2701 # Parse only the headers of a complex multipart MIME document
2702 with openfile('msg_02.txt') as fp:
2703 msg = HeaderParser().parse(fp)
2704 eq(msg['from'], 'ppp-request@zzz.org')
2705 eq(msg['to'], 'ppp@zzz.org')
2706 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002707 self.assertFalse(msg.is_multipart())
2708 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002709
2710 def test_whitespace_continuation(self):
2711 eq = self.assertEqual
2712 # This message contains a line after the Subject: header that has only
2713 # whitespace, but it is not empty!
2714 msg = email.message_from_string("""\
2715From: aperson@dom.ain
2716To: bperson@dom.ain
2717Subject: the next line has a space on it
2718\x20
2719Date: Mon, 8 Apr 2002 15:09:19 -0400
2720Message-ID: spam
2721
2722Here's the message body
2723""")
2724 eq(msg['subject'], 'the next line has a space on it\n ')
2725 eq(msg['message-id'], 'spam')
2726 eq(msg.get_payload(), "Here's the message body\n")
2727
2728 def test_whitespace_continuation_last_header(self):
2729 eq = self.assertEqual
2730 # Like the previous test, but the subject line is the last
2731 # header.
2732 msg = email.message_from_string("""\
2733From: aperson@dom.ain
2734To: bperson@dom.ain
2735Date: Mon, 8 Apr 2002 15:09:19 -0400
2736Message-ID: spam
2737Subject: the next line has a space on it
2738\x20
2739
2740Here's the message body
2741""")
2742 eq(msg['subject'], 'the next line has a space on it\n ')
2743 eq(msg['message-id'], 'spam')
2744 eq(msg.get_payload(), "Here's the message body\n")
2745
2746 def test_crlf_separation(self):
2747 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002748 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002749 msg = Parser().parse(fp)
2750 eq(len(msg.get_payload()), 2)
2751 part1 = msg.get_payload(0)
2752 eq(part1.get_content_type(), 'text/plain')
2753 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2754 part2 = msg.get_payload(1)
2755 eq(part2.get_content_type(), 'application/riscos')
2756
R. David Murray8451c4b2010-10-23 22:19:56 +00002757 def test_crlf_flatten(self):
2758 # Using newline='\n' preserves the crlfs in this input file.
2759 with openfile('msg_26.txt', newline='\n') as fp:
2760 text = fp.read()
2761 msg = email.message_from_string(text)
2762 s = StringIO()
2763 g = Generator(s)
2764 g.flatten(msg, linesep='\r\n')
2765 self.assertEqual(s.getvalue(), text)
2766
2767 maxDiff = None
2768
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002769 def test_multipart_digest_with_extra_mime_headers(self):
2770 eq = self.assertEqual
2771 neq = self.ndiffAssertEqual
2772 with openfile('msg_28.txt') as fp:
2773 msg = email.message_from_file(fp)
2774 # Structure is:
2775 # multipart/digest
2776 # message/rfc822
2777 # text/plain
2778 # message/rfc822
2779 # text/plain
2780 eq(msg.is_multipart(), 1)
2781 eq(len(msg.get_payload()), 2)
2782 part1 = msg.get_payload(0)
2783 eq(part1.get_content_type(), 'message/rfc822')
2784 eq(part1.is_multipart(), 1)
2785 eq(len(part1.get_payload()), 1)
2786 part1a = part1.get_payload(0)
2787 eq(part1a.is_multipart(), 0)
2788 eq(part1a.get_content_type(), 'text/plain')
2789 neq(part1a.get_payload(), 'message 1\n')
2790 # next message/rfc822
2791 part2 = msg.get_payload(1)
2792 eq(part2.get_content_type(), 'message/rfc822')
2793 eq(part2.is_multipart(), 1)
2794 eq(len(part2.get_payload()), 1)
2795 part2a = part2.get_payload(0)
2796 eq(part2a.is_multipart(), 0)
2797 eq(part2a.get_content_type(), 'text/plain')
2798 neq(part2a.get_payload(), 'message 2\n')
2799
2800 def test_three_lines(self):
2801 # A bug report by Andrew McNamara
2802 lines = ['From: Andrew Person <aperson@dom.ain',
2803 'Subject: Test',
2804 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2805 msg = email.message_from_string(NL.join(lines))
2806 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2807
2808 def test_strip_line_feed_and_carriage_return_in_headers(self):
2809 eq = self.assertEqual
2810 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2811 value1 = 'text'
2812 value2 = 'more text'
2813 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2814 value1, value2)
2815 msg = email.message_from_string(m)
2816 eq(msg.get('Header'), value1)
2817 eq(msg.get('Next-Header'), value2)
2818
2819 def test_rfc2822_header_syntax(self):
2820 eq = self.assertEqual
2821 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2822 msg = email.message_from_string(m)
2823 eq(len(msg), 3)
2824 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2825 eq(msg.get_payload(), 'body')
2826
2827 def test_rfc2822_space_not_allowed_in_header(self):
2828 eq = self.assertEqual
2829 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2830 msg = email.message_from_string(m)
2831 eq(len(msg.keys()), 0)
2832
2833 def test_rfc2822_one_character_header(self):
2834 eq = self.assertEqual
2835 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2836 msg = email.message_from_string(m)
2837 headers = msg.keys()
2838 headers.sort()
2839 eq(headers, ['A', 'B', 'CC'])
2840 eq(msg.get_payload(), 'body')
2841
R. David Murray45e0e142010-06-16 02:19:40 +00002842 def test_CRLFLF_at_end_of_part(self):
2843 # issue 5610: feedparser should not eat two chars from body part ending
2844 # with "\r\n\n".
2845 m = (
2846 "From: foo@bar.com\n"
2847 "To: baz\n"
2848 "Mime-Version: 1.0\n"
2849 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2850 "\n"
2851 "--BOUNDARY\n"
2852 "Content-Type: text/plain\n"
2853 "\n"
2854 "body ending with CRLF newline\r\n"
2855 "\n"
2856 "--BOUNDARY--\n"
2857 )
2858 msg = email.message_from_string(m)
2859 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002860
Ezio Melottib3aedd42010-11-20 19:04:17 +00002861
R. David Murray96fd54e2010-10-08 15:55:28 +00002862class Test8BitBytesHandling(unittest.TestCase):
2863 # In Python3 all input is string, but that doesn't work if the actual input
2864 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2865 # decode byte streams using the surrogateescape error handler, and
2866 # reconvert to binary at appropriate places if we detect surrogates. This
2867 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2868 # but it does allow us to parse and preserve them, and to decode body
2869 # parts that use an 8bit CTE.
2870
2871 bodytest_msg = textwrap.dedent("""\
2872 From: foo@bar.com
2873 To: baz
2874 Mime-Version: 1.0
2875 Content-Type: text/plain; charset={charset}
2876 Content-Transfer-Encoding: {cte}
2877
2878 {bodyline}
2879 """)
2880
2881 def test_known_8bit_CTE(self):
2882 m = self.bodytest_msg.format(charset='utf-8',
2883 cte='8bit',
2884 bodyline='pöstal').encode('utf-8')
2885 msg = email.message_from_bytes(m)
2886 self.assertEqual(msg.get_payload(), "pöstal\n")
2887 self.assertEqual(msg.get_payload(decode=True),
2888 "pöstal\n".encode('utf-8'))
2889
2890 def test_unknown_8bit_CTE(self):
2891 m = self.bodytest_msg.format(charset='notavalidcharset',
2892 cte='8bit',
2893 bodyline='pöstal').encode('utf-8')
2894 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002895 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00002896 self.assertEqual(msg.get_payload(decode=True),
2897 "pöstal\n".encode('utf-8'))
2898
2899 def test_8bit_in_quopri_body(self):
2900 # This is non-RFC compliant data...without 'decode' the library code
2901 # decodes the body using the charset from the headers, and because the
2902 # source byte really is utf-8 this works. This is likely to fail
2903 # against real dirty data (ie: produce mojibake), but the data is
2904 # invalid anyway so it is as good a guess as any. But this means that
2905 # this test just confirms the current behavior; that behavior is not
2906 # necessarily the best possible behavior. With 'decode' it is
2907 # returning the raw bytes, so that test should be of correct behavior,
2908 # or at least produce the same result that email4 did.
2909 m = self.bodytest_msg.format(charset='utf-8',
2910 cte='quoted-printable',
2911 bodyline='p=C3=B6stál').encode('utf-8')
2912 msg = email.message_from_bytes(m)
2913 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2914 self.assertEqual(msg.get_payload(decode=True),
2915 'pöstál\n'.encode('utf-8'))
2916
2917 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2918 # This is similar to the previous test, but proves that if the 8bit
2919 # byte is undecodeable in the specified charset, it gets replaced
2920 # by the unicode 'unknown' character. Again, this may or may not
2921 # be the ideal behavior. Note that if decode=False none of the
2922 # decoders will get involved, so this is the only test we need
2923 # for this behavior.
2924 m = self.bodytest_msg.format(charset='ascii',
2925 cte='quoted-printable',
2926 bodyline='p=C3=B6stál').encode('utf-8')
2927 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00002928 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00002929 self.assertEqual(msg.get_payload(decode=True),
2930 'pöstál\n'.encode('utf-8'))
2931
2932 def test_8bit_in_base64_body(self):
2933 # Sticking an 8bit byte in a base64 block makes it undecodable by
2934 # normal means, so the block is returned undecoded, but as bytes.
2935 m = self.bodytest_msg.format(charset='utf-8',
2936 cte='base64',
2937 bodyline='cMO2c3RhbAá=').encode('utf-8')
2938 msg = email.message_from_bytes(m)
2939 self.assertEqual(msg.get_payload(decode=True),
2940 'cMO2c3RhbAá=\n'.encode('utf-8'))
2941
2942 def test_8bit_in_uuencode_body(self):
2943 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2944 # normal means, so the block is returned undecoded, but as bytes.
2945 m = self.bodytest_msg.format(charset='utf-8',
2946 cte='uuencode',
2947 bodyline='<,.V<W1A; á ').encode('utf-8')
2948 msg = email.message_from_bytes(m)
2949 self.assertEqual(msg.get_payload(decode=True),
2950 '<,.V<W1A; á \n'.encode('utf-8'))
2951
2952
R. David Murray92532142011-01-07 23:25:30 +00002953 headertest_headers = (
2954 ('From: foo@bar.com', ('From', 'foo@bar.com')),
2955 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
2956 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
2957 '\tJean de Baddie',
2958 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
2959 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
2960 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
2961 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
2962 )
2963 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
2964 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00002965
2966 def test_get_8bit_header(self):
2967 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002968 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
2969 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00002970
2971 def test_print_8bit_headers(self):
2972 msg = email.message_from_bytes(self.headertest_msg)
2973 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00002974 textwrap.dedent("""\
2975 From: {}
2976 To: {}
2977 Subject: {}
2978 From: {}
2979
2980 Yes, they are flying.
2981 """).format(*[expected[1] for (_, expected) in
2982 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00002983
2984 def test_values_with_8bit_headers(self):
2985 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002986 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002987 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00002988 'b\uFFFD\uFFFDz',
2989 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
2990 'coll\uFFFD\uFFFDgue, le pouf '
2991 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00002992 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00002993 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00002994
2995 def test_items_with_8bit_headers(self):
2996 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00002997 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00002998 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00002999 ('To', 'b\uFFFD\uFFFDz'),
3000 ('Subject', 'Maintenant je vous '
3001 'pr\uFFFD\uFFFDsente '
3002 'mon coll\uFFFD\uFFFDgue, le pouf '
3003 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3004 '\tJean de Baddie'),
3005 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003006
3007 def test_get_all_with_8bit_headers(self):
3008 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003009 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003010 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003011 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003012
R David Murraya2150232011-03-16 21:11:23 -04003013 def test_get_content_type_with_8bit(self):
3014 msg = email.message_from_bytes(textwrap.dedent("""\
3015 Content-Type: text/pl\xA7in; charset=utf-8
3016 """).encode('latin-1'))
3017 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3018 self.assertEqual(msg.get_content_maintype(), "text")
3019 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3020
3021 def test_get_params_with_8bit(self):
3022 msg = email.message_from_bytes(
3023 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3024 self.assertEqual(msg.get_params(header='x-header'),
3025 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3026 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3027 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3028 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3029
3030 def test_get_rfc2231_params_with_8bit(self):
3031 msg = email.message_from_bytes(textwrap.dedent("""\
3032 Content-Type: text/plain; charset=us-ascii;
3033 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3034 ).encode('latin-1'))
3035 self.assertEqual(msg.get_param('title'),
3036 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3037
3038 def test_set_rfc2231_params_with_8bit(self):
3039 msg = email.message_from_bytes(textwrap.dedent("""\
3040 Content-Type: text/plain; charset=us-ascii;
3041 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3042 ).encode('latin-1'))
3043 msg.set_param('title', 'test')
3044 self.assertEqual(msg.get_param('title'), 'test')
3045
3046 def test_del_rfc2231_params_with_8bit(self):
3047 msg = email.message_from_bytes(textwrap.dedent("""\
3048 Content-Type: text/plain; charset=us-ascii;
3049 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3050 ).encode('latin-1'))
3051 msg.del_param('title')
3052 self.assertEqual(msg.get_param('title'), None)
3053 self.assertEqual(msg.get_content_maintype(), 'text')
3054
3055 def test_get_payload_with_8bit_cte_header(self):
3056 msg = email.message_from_bytes(textwrap.dedent("""\
3057 Content-Transfer-Encoding: b\xa7se64
3058 Content-Type: text/plain; charset=latin-1
3059
3060 payload
3061 """).encode('latin-1'))
3062 self.assertEqual(msg.get_payload(), 'payload\n')
3063 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3064
R. David Murray96fd54e2010-10-08 15:55:28 +00003065 non_latin_bin_msg = textwrap.dedent("""\
3066 From: foo@bar.com
3067 To: báz
3068 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3069 \tJean de Baddie
3070 Mime-Version: 1.0
3071 Content-Type: text/plain; charset="utf-8"
3072 Content-Transfer-Encoding: 8bit
3073
3074 Да, они летят.
3075 """).encode('utf-8')
3076
3077 def test_bytes_generator(self):
3078 msg = email.message_from_bytes(self.non_latin_bin_msg)
3079 out = BytesIO()
3080 email.generator.BytesGenerator(out).flatten(msg)
3081 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3082
R. David Murray7372a072011-01-26 21:21:32 +00003083 def test_bytes_generator_handles_None_body(self):
3084 #Issue 11019
3085 msg = email.message.Message()
3086 out = BytesIO()
3087 email.generator.BytesGenerator(out).flatten(msg)
3088 self.assertEqual(out.getvalue(), b"\n")
3089
R. David Murray92532142011-01-07 23:25:30 +00003090 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003091 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003092 To: =?unknown-8bit?q?b=C3=A1z?=
3093 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3094 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3095 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003096 Mime-Version: 1.0
3097 Content-Type: text/plain; charset="utf-8"
3098 Content-Transfer-Encoding: base64
3099
3100 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3101 """)
3102
3103 def test_generator_handles_8bit(self):
3104 msg = email.message_from_bytes(self.non_latin_bin_msg)
3105 out = StringIO()
3106 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003107 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003108
3109 def test_bytes_generator_with_unix_from(self):
3110 # The unixfrom contains a current date, so we can't check it
3111 # literally. Just make sure the first word is 'From' and the
3112 # rest of the message matches the input.
3113 msg = email.message_from_bytes(self.non_latin_bin_msg)
3114 out = BytesIO()
3115 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3116 lines = out.getvalue().split(b'\n')
3117 self.assertEqual(lines[0].split()[0], b'From')
3118 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3119
R. David Murray92532142011-01-07 23:25:30 +00003120 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3121 non_latin_bin_msg_as7bit[2:4] = [
3122 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3123 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3124 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3125
R. David Murray96fd54e2010-10-08 15:55:28 +00003126 def test_message_from_binary_file(self):
3127 fn = 'test.msg'
3128 self.addCleanup(unlink, fn)
3129 with open(fn, 'wb') as testfile:
3130 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003131 with open(fn, 'rb') as testfile:
3132 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003133 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3134
3135 latin_bin_msg = textwrap.dedent("""\
3136 From: foo@bar.com
3137 To: Dinsdale
3138 Subject: Nudge nudge, wink, wink
3139 Mime-Version: 1.0
3140 Content-Type: text/plain; charset="latin-1"
3141 Content-Transfer-Encoding: 8bit
3142
3143 oh là là, know what I mean, know what I mean?
3144 """).encode('latin-1')
3145
3146 latin_bin_msg_as7bit = textwrap.dedent("""\
3147 From: foo@bar.com
3148 To: Dinsdale
3149 Subject: Nudge nudge, wink, wink
3150 Mime-Version: 1.0
3151 Content-Type: text/plain; charset="iso-8859-1"
3152 Content-Transfer-Encoding: quoted-printable
3153
3154 oh l=E0 l=E0, know what I mean, know what I mean?
3155 """)
3156
3157 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3158 m = email.message_from_bytes(self.latin_bin_msg)
3159 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3160
3161 def test_decoded_generator_emits_unicode_body(self):
3162 m = email.message_from_bytes(self.latin_bin_msg)
3163 out = StringIO()
3164 email.generator.DecodedGenerator(out).flatten(m)
3165 #DecodedHeader output contains an extra blank line compared
3166 #to the input message. RDM: not sure if this is a bug or not,
3167 #but it is not specific to the 8bit->7bit conversion.
3168 self.assertEqual(out.getvalue(),
3169 self.latin_bin_msg.decode('latin-1')+'\n')
3170
3171 def test_bytes_feedparser(self):
3172 bfp = email.feedparser.BytesFeedParser()
3173 for i in range(0, len(self.latin_bin_msg), 10):
3174 bfp.feed(self.latin_bin_msg[i:i+10])
3175 m = bfp.close()
3176 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3177
R. David Murray8451c4b2010-10-23 22:19:56 +00003178 def test_crlf_flatten(self):
3179 with openfile('msg_26.txt', 'rb') as fp:
3180 text = fp.read()
3181 msg = email.message_from_bytes(text)
3182 s = BytesIO()
3183 g = email.generator.BytesGenerator(s)
3184 g.flatten(msg, linesep='\r\n')
3185 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003186
3187 def test_8bit_multipart(self):
3188 # Issue 11605
3189 source = textwrap.dedent("""\
3190 Date: Fri, 18 Mar 2011 17:15:43 +0100
3191 To: foo@example.com
3192 From: foodwatch-Newsletter <bar@example.com>
3193 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3194 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3195 MIME-Version: 1.0
3196 Content-Type: multipart/alternative;
3197 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3198
3199 --b1_76a486bee62b0d200f33dc2ca08220ad
3200 Content-Type: text/plain; charset="utf-8"
3201 Content-Transfer-Encoding: 8bit
3202
3203 Guten Tag, ,
3204
3205 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3206 Nachrichten aus Japan.
3207
3208
3209 --b1_76a486bee62b0d200f33dc2ca08220ad
3210 Content-Type: text/html; charset="utf-8"
3211 Content-Transfer-Encoding: 8bit
3212
3213 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3214 "http://www.w3.org/TR/html4/loose.dtd">
3215 <html lang="de">
3216 <head>
3217 <title>foodwatch - Newsletter</title>
3218 </head>
3219 <body>
3220 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3221 die Nachrichten aus Japan.</p>
3222 </body>
3223 </html>
3224 --b1_76a486bee62b0d200f33dc2ca08220ad--
3225
3226 """).encode('utf-8')
3227 msg = email.message_from_bytes(source)
3228 s = BytesIO()
3229 g = email.generator.BytesGenerator(s)
3230 g.flatten(msg)
3231 self.assertEqual(s.getvalue(), source)
3232
R. David Murray8451c4b2010-10-23 22:19:56 +00003233 maxDiff = None
3234
Ezio Melottib3aedd42010-11-20 19:04:17 +00003235
R. David Murray719a4492010-11-21 16:53:48 +00003236class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003237
R. David Murraye5db2632010-11-20 15:10:13 +00003238 maxDiff = None
3239
R. David Murray96fd54e2010-10-08 15:55:28 +00003240 def _msgobj(self, filename):
3241 with openfile(filename, 'rb') as fp:
3242 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003243 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003244 msg = email.message_from_bytes(data)
3245 return msg, data
3246
R. David Murray719a4492010-11-21 16:53:48 +00003247 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003248 b = BytesIO()
3249 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003250 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003251 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003252
3253
R. David Murray719a4492010-11-21 16:53:48 +00003254class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3255 TestIdempotent):
3256 linesep = '\n'
3257 blinesep = b'\n'
3258 normalize_linesep_regex = re.compile(br'\r\n')
3259
3260
3261class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3262 TestIdempotent):
3263 linesep = '\r\n'
3264 blinesep = b'\r\n'
3265 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3266
Ezio Melottib3aedd42010-11-20 19:04:17 +00003267
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003268class TestBase64(unittest.TestCase):
3269 def test_len(self):
3270 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003271 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003272 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003273 for size in range(15):
3274 if size == 0 : bsize = 0
3275 elif size <= 3 : bsize = 4
3276 elif size <= 6 : bsize = 8
3277 elif size <= 9 : bsize = 12
3278 elif size <= 12: bsize = 16
3279 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003280 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003281
3282 def test_decode(self):
3283 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003284 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003285 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003286
3287 def test_encode(self):
3288 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003289 eq(base64mime.body_encode(b''), b'')
3290 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003291 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003292 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003293 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003294 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003295eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3296eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3297eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3298eHh4eCB4eHh4IA==
3299""")
3300 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003301 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003302 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003303eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3304eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3305eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3306eHh4eCB4eHh4IA==\r
3307""")
3308
3309 def test_header_encode(self):
3310 eq = self.assertEqual
3311 he = base64mime.header_encode
3312 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003313 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3314 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003315 # Test the charset option
3316 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3317 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003318
3319
Ezio Melottib3aedd42010-11-20 19:04:17 +00003320
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003321class TestQuopri(unittest.TestCase):
3322 def setUp(self):
3323 # Set of characters (as byte integers) that don't need to be encoded
3324 # in headers.
3325 self.hlit = list(chain(
3326 range(ord('a'), ord('z') + 1),
3327 range(ord('A'), ord('Z') + 1),
3328 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003329 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003330 # Set of characters (as byte integers) that do need to be encoded in
3331 # headers.
3332 self.hnon = [c for c in range(256) if c not in self.hlit]
3333 assert len(self.hlit) + len(self.hnon) == 256
3334 # Set of characters (as byte integers) that don't need to be encoded
3335 # in bodies.
3336 self.blit = list(range(ord(' '), ord('~') + 1))
3337 self.blit.append(ord('\t'))
3338 self.blit.remove(ord('='))
3339 # Set of characters (as byte integers) that do need to be encoded in
3340 # bodies.
3341 self.bnon = [c for c in range(256) if c not in self.blit]
3342 assert len(self.blit) + len(self.bnon) == 256
3343
Guido van Rossum9604e662007-08-30 03:46:43 +00003344 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003345 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003346 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003347 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003348 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003349 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003350 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003351
Guido van Rossum9604e662007-08-30 03:46:43 +00003352 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003353 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003354 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003355 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003356 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003357 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003358 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003359
3360 def test_header_quopri_len(self):
3361 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003362 eq(quoprimime.header_length(b'hello'), 5)
3363 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003364 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003365 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003366 # =?xxx?q?...?= means 10 extra characters
3367 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003368 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3369 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003370 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003371 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003372 # =?xxx?q?...?= means 10 extra characters
3373 10)
3374 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003375 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003376 'expected length 1 for %r' % chr(c))
3377 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003378 # Space is special; it's encoded to _
3379 if c == ord(' '):
3380 continue
3381 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003382 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003383 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003384
3385 def test_body_quopri_len(self):
3386 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003387 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003388 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003389 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003390 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003391
3392 def test_quote_unquote_idempotent(self):
3393 for x in range(256):
3394 c = chr(x)
3395 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3396
R David Murrayec1b5b82011-03-23 14:19:05 -04003397 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3398 if charset is None:
3399 encoded_header = quoprimime.header_encode(header)
3400 else:
3401 encoded_header = quoprimime.header_encode(header, charset)
3402 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003403
R David Murraycafd79d2011-03-23 15:25:55 -04003404 def test_header_encode_null(self):
3405 self._test_header_encode(b'', '')
3406
R David Murrayec1b5b82011-03-23 14:19:05 -04003407 def test_header_encode_one_word(self):
3408 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3409
3410 def test_header_encode_two_lines(self):
3411 self._test_header_encode(b'hello\nworld',
3412 '=?iso-8859-1?q?hello=0Aworld?=')
3413
3414 def test_header_encode_non_ascii(self):
3415 self._test_header_encode(b'hello\xc7there',
3416 '=?iso-8859-1?q?hello=C7there?=')
3417
3418 def test_header_encode_alt_charset(self):
3419 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3420 charset='iso-8859-2')
3421
3422 def _test_header_decode(self, encoded_header, expected_decoded_header):
3423 decoded_header = quoprimime.header_decode(encoded_header)
3424 self.assertEqual(decoded_header, expected_decoded_header)
3425
3426 def test_header_decode_null(self):
3427 self._test_header_decode('', '')
3428
3429 def test_header_decode_one_word(self):
3430 self._test_header_decode('hello', 'hello')
3431
3432 def test_header_decode_two_lines(self):
3433 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3434
3435 def test_header_decode_non_ascii(self):
3436 self._test_header_decode('hello=C7there', 'hello\xc7there')
3437
3438 def _test_decode(self, encoded, expected_decoded, eol=None):
3439 if eol is None:
3440 decoded = quoprimime.decode(encoded)
3441 else:
3442 decoded = quoprimime.decode(encoded, eol=eol)
3443 self.assertEqual(decoded, expected_decoded)
3444
3445 def test_decode_null_word(self):
3446 self._test_decode('', '')
3447
3448 def test_decode_null_line_null_word(self):
3449 self._test_decode('\r\n', '\n')
3450
3451 def test_decode_one_word(self):
3452 self._test_decode('hello', 'hello')
3453
3454 def test_decode_one_word_eol(self):
3455 self._test_decode('hello', 'hello', eol='X')
3456
3457 def test_decode_one_line(self):
3458 self._test_decode('hello\r\n', 'hello\n')
3459
3460 def test_decode_one_line_lf(self):
3461 self._test_decode('hello\n', 'hello\n')
3462
R David Murraycafd79d2011-03-23 15:25:55 -04003463 def test_decode_one_line_cr(self):
3464 self._test_decode('hello\r', 'hello\n')
3465
3466 def test_decode_one_line_nl(self):
3467 self._test_decode('hello\n', 'helloX', eol='X')
3468
3469 def test_decode_one_line_crnl(self):
3470 self._test_decode('hello\r\n', 'helloX', eol='X')
3471
R David Murrayec1b5b82011-03-23 14:19:05 -04003472 def test_decode_one_line_one_word(self):
3473 self._test_decode('hello\r\nworld', 'hello\nworld')
3474
3475 def test_decode_one_line_one_word_eol(self):
3476 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3477
3478 def test_decode_two_lines(self):
3479 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3480
R David Murraycafd79d2011-03-23 15:25:55 -04003481 def test_decode_two_lines_eol(self):
3482 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3483
R David Murrayec1b5b82011-03-23 14:19:05 -04003484 def test_decode_one_long_line(self):
3485 self._test_decode('Spam' * 250, 'Spam' * 250)
3486
3487 def test_decode_one_space(self):
3488 self._test_decode(' ', '')
3489
3490 def test_decode_multiple_spaces(self):
3491 self._test_decode(' ' * 5, '')
3492
3493 def test_decode_one_line_trailing_spaces(self):
3494 self._test_decode('hello \r\n', 'hello\n')
3495
3496 def test_decode_two_lines_trailing_spaces(self):
3497 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3498
3499 def test_decode_quoted_word(self):
3500 self._test_decode('=22quoted=20words=22', '"quoted words"')
3501
3502 def test_decode_uppercase_quoting(self):
3503 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3504
3505 def test_decode_lowercase_quoting(self):
3506 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3507
3508 def test_decode_soft_line_break(self):
3509 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3510
3511 def test_decode_false_quoting(self):
3512 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3513
3514 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3515 kwargs = {}
3516 if maxlinelen is None:
3517 # Use body_encode's default.
3518 maxlinelen = 76
3519 else:
3520 kwargs['maxlinelen'] = maxlinelen
3521 if eol is None:
3522 # Use body_encode's default.
3523 eol = '\n'
3524 else:
3525 kwargs['eol'] = eol
3526 encoded_body = quoprimime.body_encode(body, **kwargs)
3527 self.assertEqual(encoded_body, expected_encoded_body)
3528 if eol == '\n' or eol == '\r\n':
3529 # We know how to split the result back into lines, so maxlinelen
3530 # can be checked.
3531 for line in encoded_body.splitlines():
3532 self.assertLessEqual(len(line), maxlinelen)
3533
3534 def test_encode_null(self):
3535 self._test_encode('', '')
3536
3537 def test_encode_null_lines(self):
3538 self._test_encode('\n\n', '\n\n')
3539
3540 def test_encode_one_line(self):
3541 self._test_encode('hello\n', 'hello\n')
3542
3543 def test_encode_one_line_crlf(self):
3544 self._test_encode('hello\r\n', 'hello\n')
3545
3546 def test_encode_one_line_eol(self):
3547 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3548
3549 def test_encode_one_space(self):
3550 self._test_encode(' ', '=20')
3551
3552 def test_encode_one_line_one_space(self):
3553 self._test_encode(' \n', '=20\n')
3554
R David Murrayb938c8c2011-03-24 12:19:26 -04003555# XXX: body_encode() expect strings, but uses ord(char) from these strings
3556# to index into a 256-entry list. For code points above 255, this will fail.
3557# Should there be a check for 8-bit only ord() values in body, or at least
3558# a comment about the expected input?
3559
3560 def test_encode_two_lines_one_space(self):
3561 self._test_encode(' \n \n', '=20\n=20\n')
3562
R David Murrayec1b5b82011-03-23 14:19:05 -04003563 def test_encode_one_word_trailing_spaces(self):
3564 self._test_encode('hello ', 'hello =20')
3565
3566 def test_encode_one_line_trailing_spaces(self):
3567 self._test_encode('hello \n', 'hello =20\n')
3568
3569 def test_encode_one_word_trailing_tab(self):
3570 self._test_encode('hello \t', 'hello =09')
3571
3572 def test_encode_one_line_trailing_tab(self):
3573 self._test_encode('hello \t\n', 'hello =09\n')
3574
3575 def test_encode_trailing_space_before_maxlinelen(self):
3576 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3577
R David Murrayb938c8c2011-03-24 12:19:26 -04003578 def test_encode_trailing_space_at_maxlinelen(self):
3579 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3580
R David Murrayec1b5b82011-03-23 14:19:05 -04003581 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003582 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3583
3584 def test_encode_whitespace_lines(self):
3585 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003586
3587 def test_encode_quoted_equals(self):
3588 self._test_encode('a = b', 'a =3D b')
3589
3590 def test_encode_one_long_string(self):
3591 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3592
3593 def test_encode_one_long_line(self):
3594 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3595
3596 def test_encode_one_very_long_line(self):
3597 self._test_encode('x' * 200 + '\n',
3598 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3599
3600 def test_encode_one_long_line(self):
3601 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3602
3603 def test_encode_shortest_maxlinelen(self):
3604 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003605
R David Murrayb938c8c2011-03-24 12:19:26 -04003606 def test_encode_maxlinelen_too_small(self):
3607 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3608
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003609 def test_encode(self):
3610 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003611 eq(quoprimime.body_encode(''), '')
3612 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003613 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003614 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003615 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003616 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003617xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3618 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3619x xxxx xxxx xxxx xxxx=20""")
3620 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003621 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3622 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003623xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3624 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3625x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003626 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003627one line
3628
3629two line"""), """\
3630one line
3631
3632two line""")
3633
3634
Ezio Melottib3aedd42010-11-20 19:04:17 +00003635
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003636# Test the Charset class
3637class TestCharset(unittest.TestCase):
3638 def tearDown(self):
3639 from email import charset as CharsetModule
3640 try:
3641 del CharsetModule.CHARSETS['fake']
3642 except KeyError:
3643 pass
3644
Guido van Rossum9604e662007-08-30 03:46:43 +00003645 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003646 eq = self.assertEqual
3647 # Make sure us-ascii = no Unicode conversion
3648 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003649 eq(c.header_encode('Hello World!'), 'Hello World!')
3650 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003651 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003652 self.assertRaises(UnicodeError, c.header_encode, s)
3653 c = Charset('utf-8')
3654 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003655
3656 def test_body_encode(self):
3657 eq = self.assertEqual
3658 # Try a charset with QP body encoding
3659 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003660 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003661 # Try a charset with Base64 body encoding
3662 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003663 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003664 # Try a charset with None body encoding
3665 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003666 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003667 # Try the convert argument, where input codec != output codec
3668 c = Charset('euc-jp')
3669 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003670 # XXX FIXME
3671## try:
3672## eq('\x1b$B5FCO;~IW\x1b(B',
3673## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3674## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3675## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3676## except LookupError:
3677## # We probably don't have the Japanese codecs installed
3678## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003679 # Testing SF bug #625509, which we have to fake, since there are no
3680 # built-in encodings where the header encoding is QP but the body
3681 # encoding is not.
3682 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04003683 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003684 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04003685 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003686
3687 def test_unicode_charset_name(self):
3688 charset = Charset('us-ascii')
3689 self.assertEqual(str(charset), 'us-ascii')
3690 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3691
3692
Ezio Melottib3aedd42010-11-20 19:04:17 +00003693
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003694# Test multilingual MIME headers.
3695class TestHeader(TestEmailBase):
3696 def test_simple(self):
3697 eq = self.ndiffAssertEqual
3698 h = Header('Hello World!')
3699 eq(h.encode(), 'Hello World!')
3700 h.append(' Goodbye World!')
3701 eq(h.encode(), 'Hello World! Goodbye World!')
3702
3703 def test_simple_surprise(self):
3704 eq = self.ndiffAssertEqual
3705 h = Header('Hello World!')
3706 eq(h.encode(), 'Hello World!')
3707 h.append('Goodbye World!')
3708 eq(h.encode(), 'Hello World! Goodbye World!')
3709
3710 def test_header_needs_no_decoding(self):
3711 h = 'no decoding needed'
3712 self.assertEqual(decode_header(h), [(h, None)])
3713
3714 def test_long(self):
3715 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3716 maxlinelen=76)
3717 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003718 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003719
3720 def test_multilingual(self):
3721 eq = self.ndiffAssertEqual
3722 g = Charset("iso-8859-1")
3723 cz = Charset("iso-8859-2")
3724 utf8 = Charset("utf-8")
3725 g_head = (b'Die Mieter treten hier ein werden mit einem '
3726 b'Foerderband komfortabel den Korridor entlang, '
3727 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3728 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3729 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3730 b'd\xf9vtipu.. ')
3731 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3732 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3733 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3734 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3735 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3736 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3737 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3738 '\u3044\u307e\u3059\u3002')
3739 h = Header(g_head, g)
3740 h.append(cz_head, cz)
3741 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003742 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003743 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003744=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3745 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3746 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3747 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003748 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3749 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3750 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3751 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003752 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3753 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3754 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3755 decoded = decode_header(enc)
3756 eq(len(decoded), 3)
3757 eq(decoded[0], (g_head, 'iso-8859-1'))
3758 eq(decoded[1], (cz_head, 'iso-8859-2'))
3759 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003760 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003761 eq(ustr,
3762 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3763 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3764 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3765 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3766 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3767 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3768 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3769 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3770 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3771 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3772 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3773 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3774 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3775 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3776 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3777 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3778 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003779 # Test make_header()
3780 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003781 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003782
3783 def test_empty_header_encode(self):
3784 h = Header()
3785 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003786
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003787 def test_header_ctor_default_args(self):
3788 eq = self.ndiffAssertEqual
3789 h = Header()
3790 eq(h, '')
3791 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003792 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003793
3794 def test_explicit_maxlinelen(self):
3795 eq = self.ndiffAssertEqual
3796 hstr = ('A very long line that must get split to something other '
3797 'than at the 76th character boundary to test the non-default '
3798 'behavior')
3799 h = Header(hstr)
3800 eq(h.encode(), '''\
3801A very long line that must get split to something other than at the 76th
3802 character boundary to test the non-default behavior''')
3803 eq(str(h), hstr)
3804 h = Header(hstr, header_name='Subject')
3805 eq(h.encode(), '''\
3806A very long line that must get split to something other than at the
3807 76th character boundary to test the non-default behavior''')
3808 eq(str(h), hstr)
3809 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3810 eq(h.encode(), hstr)
3811 eq(str(h), hstr)
3812
Guido van Rossum9604e662007-08-30 03:46:43 +00003813 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003814 eq = self.ndiffAssertEqual
3815 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003816 x = 'xxxx ' * 20
3817 h.append(x)
3818 s = h.encode()
3819 eq(s, """\
3820=?iso-8859-1?q?xxx?=
3821 =?iso-8859-1?q?x_?=
3822 =?iso-8859-1?q?xx?=
3823 =?iso-8859-1?q?xx?=
3824 =?iso-8859-1?q?_x?=
3825 =?iso-8859-1?q?xx?=
3826 =?iso-8859-1?q?x_?=
3827 =?iso-8859-1?q?xx?=
3828 =?iso-8859-1?q?xx?=
3829 =?iso-8859-1?q?_x?=
3830 =?iso-8859-1?q?xx?=
3831 =?iso-8859-1?q?x_?=
3832 =?iso-8859-1?q?xx?=
3833 =?iso-8859-1?q?xx?=
3834 =?iso-8859-1?q?_x?=
3835 =?iso-8859-1?q?xx?=
3836 =?iso-8859-1?q?x_?=
3837 =?iso-8859-1?q?xx?=
3838 =?iso-8859-1?q?xx?=
3839 =?iso-8859-1?q?_x?=
3840 =?iso-8859-1?q?xx?=
3841 =?iso-8859-1?q?x_?=
3842 =?iso-8859-1?q?xx?=
3843 =?iso-8859-1?q?xx?=
3844 =?iso-8859-1?q?_x?=
3845 =?iso-8859-1?q?xx?=
3846 =?iso-8859-1?q?x_?=
3847 =?iso-8859-1?q?xx?=
3848 =?iso-8859-1?q?xx?=
3849 =?iso-8859-1?q?_x?=
3850 =?iso-8859-1?q?xx?=
3851 =?iso-8859-1?q?x_?=
3852 =?iso-8859-1?q?xx?=
3853 =?iso-8859-1?q?xx?=
3854 =?iso-8859-1?q?_x?=
3855 =?iso-8859-1?q?xx?=
3856 =?iso-8859-1?q?x_?=
3857 =?iso-8859-1?q?xx?=
3858 =?iso-8859-1?q?xx?=
3859 =?iso-8859-1?q?_x?=
3860 =?iso-8859-1?q?xx?=
3861 =?iso-8859-1?q?x_?=
3862 =?iso-8859-1?q?xx?=
3863 =?iso-8859-1?q?xx?=
3864 =?iso-8859-1?q?_x?=
3865 =?iso-8859-1?q?xx?=
3866 =?iso-8859-1?q?x_?=
3867 =?iso-8859-1?q?xx?=
3868 =?iso-8859-1?q?xx?=
3869 =?iso-8859-1?q?_?=""")
3870 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003871 h = Header(charset='iso-8859-1', maxlinelen=40)
3872 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003873 s = h.encode()
3874 eq(s, """\
3875=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3876 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3877 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3878 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3879 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3880 eq(x, str(make_header(decode_header(s))))
3881
3882 def test_base64_splittable(self):
3883 eq = self.ndiffAssertEqual
3884 h = Header(charset='koi8-r', maxlinelen=20)
3885 x = 'xxxx ' * 20
3886 h.append(x)
3887 s = h.encode()
3888 eq(s, """\
3889=?koi8-r?b?eHh4?=
3890 =?koi8-r?b?eCB4?=
3891 =?koi8-r?b?eHh4?=
3892 =?koi8-r?b?IHh4?=
3893 =?koi8-r?b?eHgg?=
3894 =?koi8-r?b?eHh4?=
3895 =?koi8-r?b?eCB4?=
3896 =?koi8-r?b?eHh4?=
3897 =?koi8-r?b?IHh4?=
3898 =?koi8-r?b?eHgg?=
3899 =?koi8-r?b?eHh4?=
3900 =?koi8-r?b?eCB4?=
3901 =?koi8-r?b?eHh4?=
3902 =?koi8-r?b?IHh4?=
3903 =?koi8-r?b?eHgg?=
3904 =?koi8-r?b?eHh4?=
3905 =?koi8-r?b?eCB4?=
3906 =?koi8-r?b?eHh4?=
3907 =?koi8-r?b?IHh4?=
3908 =?koi8-r?b?eHgg?=
3909 =?koi8-r?b?eHh4?=
3910 =?koi8-r?b?eCB4?=
3911 =?koi8-r?b?eHh4?=
3912 =?koi8-r?b?IHh4?=
3913 =?koi8-r?b?eHgg?=
3914 =?koi8-r?b?eHh4?=
3915 =?koi8-r?b?eCB4?=
3916 =?koi8-r?b?eHh4?=
3917 =?koi8-r?b?IHh4?=
3918 =?koi8-r?b?eHgg?=
3919 =?koi8-r?b?eHh4?=
3920 =?koi8-r?b?eCB4?=
3921 =?koi8-r?b?eHh4?=
3922 =?koi8-r?b?IA==?=""")
3923 eq(x, str(make_header(decode_header(s))))
3924 h = Header(charset='koi8-r', maxlinelen=40)
3925 h.append(x)
3926 s = h.encode()
3927 eq(s, """\
3928=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3929 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3930 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3931 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3932 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3933 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3934 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003935
3936 def test_us_ascii_header(self):
3937 eq = self.assertEqual
3938 s = 'hello'
3939 x = decode_header(s)
3940 eq(x, [('hello', None)])
3941 h = make_header(x)
3942 eq(s, h.encode())
3943
3944 def test_string_charset(self):
3945 eq = self.assertEqual
3946 h = Header()
3947 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003948 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003949
3950## def test_unicode_error(self):
3951## raises = self.assertRaises
3952## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3953## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3954## h = Header()
3955## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3956## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3957## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3958
3959 def test_utf8_shortest(self):
3960 eq = self.assertEqual
3961 h = Header('p\xf6stal', 'utf-8')
3962 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3963 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3964 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3965
3966 def test_bad_8bit_header(self):
3967 raises = self.assertRaises
3968 eq = self.assertEqual
3969 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3970 raises(UnicodeError, Header, x)
3971 h = Header()
3972 raises(UnicodeError, h.append, x)
3973 e = x.decode('utf-8', 'replace')
3974 eq(str(Header(x, errors='replace')), e)
3975 h.append(x, errors='replace')
3976 eq(str(h), e)
3977
R David Murray041015c2011-03-25 15:10:55 -04003978 def test_escaped_8bit_header(self):
3979 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3980 x = x.decode('ascii', 'surrogateescape')
3981 h = Header(x, charset=email.charset.UNKNOWN8BIT)
3982 self.assertEqual(str(h),
3983 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
3984 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
3985
3986 def test_modify_returned_list_does_not_change_header(self):
3987 h = Header('test')
3988 chunks = email.header.decode_header(h)
3989 chunks.append(('ascii', 'test2'))
3990 self.assertEqual(str(h), 'test')
3991
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003992 def test_encoded_adjacent_nonencoded(self):
3993 eq = self.assertEqual
3994 h = Header()
3995 h.append('hello', 'iso-8859-1')
3996 h.append('world')
3997 s = h.encode()
3998 eq(s, '=?iso-8859-1?q?hello?= world')
3999 h = make_header(decode_header(s))
4000 eq(h.encode(), s)
4001
4002 def test_whitespace_eater(self):
4003 eq = self.assertEqual
4004 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4005 parts = decode_header(s)
4006 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4007 hdr = make_header(parts)
4008 eq(hdr.encode(),
4009 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4010
4011 def test_broken_base64_header(self):
4012 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004013 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004014 raises(errors.HeaderParseError, decode_header, s)
4015
R. David Murray477efb32011-01-05 01:39:32 +00004016 def test_shift_jis_charset(self):
4017 h = Header('文', charset='shift_jis')
4018 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4019
R David Murrayde912762011-03-16 18:26:23 -04004020 def test_flatten_header_with_no_value(self):
4021 # Issue 11401 (regression from email 4.x) Note that the space after
4022 # the header doesn't reflect the input, but this is also the way
4023 # email 4.x behaved. At some point it would be nice to fix that.
4024 msg = email.message_from_string("EmptyHeader:")
4025 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4026
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004027
Ezio Melottib3aedd42010-11-20 19:04:17 +00004028
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004029# Test RFC 2231 header parameters (en/de)coding
4030class TestRFC2231(TestEmailBase):
4031 def test_get_param(self):
4032 eq = self.assertEqual
4033 msg = self._msgobj('msg_29.txt')
4034 eq(msg.get_param('title'),
4035 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4036 eq(msg.get_param('title', unquote=False),
4037 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4038
4039 def test_set_param(self):
4040 eq = self.ndiffAssertEqual
4041 msg = Message()
4042 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4043 charset='us-ascii')
4044 eq(msg.get_param('title'),
4045 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4046 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4047 charset='us-ascii', language='en')
4048 eq(msg.get_param('title'),
4049 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4050 msg = self._msgobj('msg_01.txt')
4051 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4052 charset='us-ascii', language='en')
4053 eq(msg.as_string(maxheaderlen=78), """\
4054Return-Path: <bbb@zzz.org>
4055Delivered-To: bbb@zzz.org
4056Received: by mail.zzz.org (Postfix, from userid 889)
4057\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4058MIME-Version: 1.0
4059Content-Transfer-Encoding: 7bit
4060Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4061From: bbb@ddd.com (John X. Doe)
4062To: bbb@zzz.org
4063Subject: This is a test message
4064Date: Fri, 4 May 2001 14:05:44 -0400
4065Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004066 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004067
4068
4069Hi,
4070
4071Do you like this message?
4072
4073-Me
4074""")
4075
4076 def test_del_param(self):
4077 eq = self.ndiffAssertEqual
4078 msg = self._msgobj('msg_01.txt')
4079 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4080 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4081 charset='us-ascii', language='en')
4082 msg.del_param('foo', header='Content-Type')
4083 eq(msg.as_string(maxheaderlen=78), """\
4084Return-Path: <bbb@zzz.org>
4085Delivered-To: bbb@zzz.org
4086Received: by mail.zzz.org (Postfix, from userid 889)
4087\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4088MIME-Version: 1.0
4089Content-Transfer-Encoding: 7bit
4090Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4091From: bbb@ddd.com (John X. Doe)
4092To: bbb@zzz.org
4093Subject: This is a test message
4094Date: Fri, 4 May 2001 14:05:44 -0400
4095Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004096 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004097
4098
4099Hi,
4100
4101Do you like this message?
4102
4103-Me
4104""")
4105
4106 def test_rfc2231_get_content_charset(self):
4107 eq = self.assertEqual
4108 msg = self._msgobj('msg_32.txt')
4109 eq(msg.get_content_charset(), 'us-ascii')
4110
R. David Murraydfd7eb02010-12-24 22:36:49 +00004111 def test_rfc2231_parse_rfc_quoting(self):
4112 m = textwrap.dedent('''\
4113 Content-Disposition: inline;
4114 \tfilename*0*=''This%20is%20even%20more%20;
4115 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4116 \tfilename*2="is it not.pdf"
4117
4118 ''')
4119 msg = email.message_from_string(m)
4120 self.assertEqual(msg.get_filename(),
4121 'This is even more ***fun*** is it not.pdf')
4122 self.assertEqual(m, msg.as_string())
4123
4124 def test_rfc2231_parse_extra_quoting(self):
4125 m = textwrap.dedent('''\
4126 Content-Disposition: inline;
4127 \tfilename*0*="''This%20is%20even%20more%20";
4128 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4129 \tfilename*2="is it not.pdf"
4130
4131 ''')
4132 msg = email.message_from_string(m)
4133 self.assertEqual(msg.get_filename(),
4134 'This is even more ***fun*** is it not.pdf')
4135 self.assertEqual(m, msg.as_string())
4136
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004137 def test_rfc2231_no_language_or_charset(self):
4138 m = '''\
4139Content-Transfer-Encoding: 8bit
4140Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4141Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4142
4143'''
4144 msg = email.message_from_string(m)
4145 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004146 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004147 self.assertEqual(
4148 param,
4149 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4150
4151 def test_rfc2231_no_language_or_charset_in_filename(self):
4152 m = '''\
4153Content-Disposition: inline;
4154\tfilename*0*="''This%20is%20even%20more%20";
4155\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4156\tfilename*2="is it not.pdf"
4157
4158'''
4159 msg = email.message_from_string(m)
4160 self.assertEqual(msg.get_filename(),
4161 'This is even more ***fun*** is it not.pdf')
4162
4163 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4164 m = '''\
4165Content-Disposition: inline;
4166\tfilename*0*="''This%20is%20even%20more%20";
4167\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4168\tfilename*2="is it not.pdf"
4169
4170'''
4171 msg = email.message_from_string(m)
4172 self.assertEqual(msg.get_filename(),
4173 'This is even more ***fun*** is it not.pdf')
4174
4175 def test_rfc2231_partly_encoded(self):
4176 m = '''\
4177Content-Disposition: inline;
4178\tfilename*0="''This%20is%20even%20more%20";
4179\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4180\tfilename*2="is it not.pdf"
4181
4182'''
4183 msg = email.message_from_string(m)
4184 self.assertEqual(
4185 msg.get_filename(),
4186 'This%20is%20even%20more%20***fun*** is it not.pdf')
4187
4188 def test_rfc2231_partly_nonencoded(self):
4189 m = '''\
4190Content-Disposition: inline;
4191\tfilename*0="This%20is%20even%20more%20";
4192\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4193\tfilename*2="is it not.pdf"
4194
4195'''
4196 msg = email.message_from_string(m)
4197 self.assertEqual(
4198 msg.get_filename(),
4199 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4200
4201 def test_rfc2231_no_language_or_charset_in_boundary(self):
4202 m = '''\
4203Content-Type: multipart/alternative;
4204\tboundary*0*="''This%20is%20even%20more%20";
4205\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4206\tboundary*2="is it not.pdf"
4207
4208'''
4209 msg = email.message_from_string(m)
4210 self.assertEqual(msg.get_boundary(),
4211 'This is even more ***fun*** is it not.pdf')
4212
4213 def test_rfc2231_no_language_or_charset_in_charset(self):
4214 # This is a nonsensical charset value, but tests the code anyway
4215 m = '''\
4216Content-Type: text/plain;
4217\tcharset*0*="This%20is%20even%20more%20";
4218\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4219\tcharset*2="is it not.pdf"
4220
4221'''
4222 msg = email.message_from_string(m)
4223 self.assertEqual(msg.get_content_charset(),
4224 'this is even more ***fun*** is it not.pdf')
4225
4226 def test_rfc2231_bad_encoding_in_filename(self):
4227 m = '''\
4228Content-Disposition: inline;
4229\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4230\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4231\tfilename*2="is it not.pdf"
4232
4233'''
4234 msg = email.message_from_string(m)
4235 self.assertEqual(msg.get_filename(),
4236 'This is even more ***fun*** is it not.pdf')
4237
4238 def test_rfc2231_bad_encoding_in_charset(self):
4239 m = """\
4240Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4241
4242"""
4243 msg = email.message_from_string(m)
4244 # This should return None because non-ascii characters in the charset
4245 # are not allowed.
4246 self.assertEqual(msg.get_content_charset(), None)
4247
4248 def test_rfc2231_bad_character_in_charset(self):
4249 m = """\
4250Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4251
4252"""
4253 msg = email.message_from_string(m)
4254 # This should return None because non-ascii characters in the charset
4255 # are not allowed.
4256 self.assertEqual(msg.get_content_charset(), None)
4257
4258 def test_rfc2231_bad_character_in_filename(self):
4259 m = '''\
4260Content-Disposition: inline;
4261\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4262\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4263\tfilename*2*="is it not.pdf%E2"
4264
4265'''
4266 msg = email.message_from_string(m)
4267 self.assertEqual(msg.get_filename(),
4268 'This is even more ***fun*** is it not.pdf\ufffd')
4269
4270 def test_rfc2231_unknown_encoding(self):
4271 m = """\
4272Content-Transfer-Encoding: 8bit
4273Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4274
4275"""
4276 msg = email.message_from_string(m)
4277 self.assertEqual(msg.get_filename(), 'myfile.txt')
4278
4279 def test_rfc2231_single_tick_in_filename_extended(self):
4280 eq = self.assertEqual
4281 m = """\
4282Content-Type: application/x-foo;
4283\tname*0*=\"Frank's\"; name*1*=\" Document\"
4284
4285"""
4286 msg = email.message_from_string(m)
4287 charset, language, s = msg.get_param('name')
4288 eq(charset, None)
4289 eq(language, None)
4290 eq(s, "Frank's Document")
4291
4292 def test_rfc2231_single_tick_in_filename(self):
4293 m = """\
4294Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4295
4296"""
4297 msg = email.message_from_string(m)
4298 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004299 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004300 self.assertEqual(param, "Frank's Document")
4301
4302 def test_rfc2231_tick_attack_extended(self):
4303 eq = self.assertEqual
4304 m = """\
4305Content-Type: application/x-foo;
4306\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4307
4308"""
4309 msg = email.message_from_string(m)
4310 charset, language, s = msg.get_param('name')
4311 eq(charset, 'us-ascii')
4312 eq(language, 'en-us')
4313 eq(s, "Frank's Document")
4314
4315 def test_rfc2231_tick_attack(self):
4316 m = """\
4317Content-Type: application/x-foo;
4318\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4319
4320"""
4321 msg = email.message_from_string(m)
4322 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004323 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004324 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4325
4326 def test_rfc2231_no_extended_values(self):
4327 eq = self.assertEqual
4328 m = """\
4329Content-Type: application/x-foo; name=\"Frank's Document\"
4330
4331"""
4332 msg = email.message_from_string(m)
4333 eq(msg.get_param('name'), "Frank's Document")
4334
4335 def test_rfc2231_encoded_then_unencoded_segments(self):
4336 eq = self.assertEqual
4337 m = """\
4338Content-Type: application/x-foo;
4339\tname*0*=\"us-ascii'en-us'My\";
4340\tname*1=\" Document\";
4341\tname*2*=\" For You\"
4342
4343"""
4344 msg = email.message_from_string(m)
4345 charset, language, s = msg.get_param('name')
4346 eq(charset, 'us-ascii')
4347 eq(language, 'en-us')
4348 eq(s, 'My Document For You')
4349
4350 def test_rfc2231_unencoded_then_encoded_segments(self):
4351 eq = self.assertEqual
4352 m = """\
4353Content-Type: application/x-foo;
4354\tname*0=\"us-ascii'en-us'My\";
4355\tname*1*=\" Document\";
4356\tname*2*=\" For You\"
4357
4358"""
4359 msg = email.message_from_string(m)
4360 charset, language, s = msg.get_param('name')
4361 eq(charset, 'us-ascii')
4362 eq(language, 'en-us')
4363 eq(s, 'My Document For You')
4364
4365
Ezio Melottib3aedd42010-11-20 19:04:17 +00004366
R. David Murraya8f480f2010-01-16 18:30:03 +00004367# Tests to ensure that signed parts of an email are completely preserved, as
4368# required by RFC1847 section 2.1. Note that these are incomplete, because the
4369# email package does not currently always preserve the body. See issue 1670765.
4370class TestSigned(TestEmailBase):
4371
4372 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004373 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004374 original = fp.read()
4375 msg = email.message_from_string(original)
4376 return original, msg
4377
4378 def _signed_parts_eq(self, original, result):
4379 # Extract the first mime part of each message
4380 import re
4381 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4382 inpart = repart.search(original).group(2)
4383 outpart = repart.search(result).group(2)
4384 self.assertEqual(outpart, inpart)
4385
4386 def test_long_headers_as_string(self):
4387 original, msg = self._msg_and_obj('msg_45.txt')
4388 result = msg.as_string()
4389 self._signed_parts_eq(original, result)
4390
4391 def test_long_headers_as_string_maxheaderlen(self):
4392 original, msg = self._msg_and_obj('msg_45.txt')
4393 result = msg.as_string(maxheaderlen=60)
4394 self._signed_parts_eq(original, result)
4395
4396 def test_long_headers_flatten(self):
4397 original, msg = self._msg_and_obj('msg_45.txt')
4398 fp = StringIO()
4399 Generator(fp).flatten(msg)
4400 result = fp.getvalue()
4401 self._signed_parts_eq(original, result)
4402
4403
Ezio Melottib3aedd42010-11-20 19:04:17 +00004404
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004405if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004406 unittest.main()