blob: 1657afc817121b9b84949a40f3afc71c0c3fd305 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R David Murray28346b82011-03-31 11:40:20 -040039from test.support import run_unittest, unlink
R David Murraya256bac2011-03-31 12:20:23 -040040from test.test_email import openfile, TestEmailBase
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42NL = '\n'
43EMPTYSTRING = ''
44SPACE = ' '
45
46
Guido van Rossum8b3febe2007-08-30 01:15:14 +000047# Test various aspects of the Message class's API
48class TestMessageAPI(TestEmailBase):
49 def test_get_all(self):
50 eq = self.assertEqual
51 msg = self._msgobj('msg_20.txt')
52 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
53 eq(msg.get_all('xx', 'n/a'), 'n/a')
54
R. David Murraye5db2632010-11-20 15:10:13 +000055 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 eq = self.assertEqual
57 msg = Message()
58 eq(msg.get_charset(), None)
59 charset = Charset('iso-8859-1')
60 msg.set_charset(charset)
61 eq(msg['mime-version'], '1.0')
62 eq(msg.get_content_type(), 'text/plain')
63 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
64 eq(msg.get_param('charset'), 'iso-8859-1')
65 eq(msg['content-transfer-encoding'], 'quoted-printable')
66 eq(msg.get_charset().input_charset, 'iso-8859-1')
67 # Remove the charset
68 msg.set_charset(None)
69 eq(msg.get_charset(), None)
70 eq(msg['content-type'], 'text/plain')
71 # Try adding a charset when there's already MIME headers present
72 msg = Message()
73 msg['MIME-Version'] = '2.0'
74 msg['Content-Type'] = 'text/x-weird'
75 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
76 msg.set_charset(charset)
77 eq(msg['mime-version'], '2.0')
78 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
79 eq(msg['content-transfer-encoding'], 'quinted-puntable')
80
81 def test_set_charset_from_string(self):
82 eq = self.assertEqual
83 msg = Message()
84 msg.set_charset('us-ascii')
85 eq(msg.get_charset().input_charset, 'us-ascii')
86 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
87
88 def test_set_payload_with_charset(self):
89 msg = Message()
90 charset = Charset('iso-8859-1')
91 msg.set_payload('This is a string payload', charset)
92 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
93
94 def test_get_charsets(self):
95 eq = self.assertEqual
96
97 msg = self._msgobj('msg_08.txt')
98 charsets = msg.get_charsets()
99 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
100
101 msg = self._msgobj('msg_09.txt')
102 charsets = msg.get_charsets('dingbat')
103 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
104 'koi8-r'])
105
106 msg = self._msgobj('msg_12.txt')
107 charsets = msg.get_charsets()
108 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
109 'iso-8859-3', 'us-ascii', 'koi8-r'])
110
111 def test_get_filename(self):
112 eq = self.assertEqual
113
114 msg = self._msgobj('msg_04.txt')
115 filenames = [p.get_filename() for p in msg.get_payload()]
116 eq(filenames, ['msg.txt', 'msg.txt'])
117
118 msg = self._msgobj('msg_07.txt')
119 subpart = msg.get_payload(1)
120 eq(subpart.get_filename(), 'dingusfish.gif')
121
122 def test_get_filename_with_name_parameter(self):
123 eq = self.assertEqual
124
125 msg = self._msgobj('msg_44.txt')
126 filenames = [p.get_filename() for p in msg.get_payload()]
127 eq(filenames, ['msg.txt', 'msg.txt'])
128
129 def test_get_boundary(self):
130 eq = self.assertEqual
131 msg = self._msgobj('msg_07.txt')
132 # No quotes!
133 eq(msg.get_boundary(), 'BOUNDARY')
134
135 def test_set_boundary(self):
136 eq = self.assertEqual
137 # This one has no existing boundary parameter, but the Content-Type:
138 # header appears fifth.
139 msg = self._msgobj('msg_01.txt')
140 msg.set_boundary('BOUNDARY')
141 header, value = msg.items()[4]
142 eq(header.lower(), 'content-type')
143 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
144 # This one has a Content-Type: header, with a boundary, stuck in the
145 # middle of its headers. Make sure the order is preserved; it should
146 # be fifth.
147 msg = self._msgobj('msg_04.txt')
148 msg.set_boundary('BOUNDARY')
149 header, value = msg.items()[4]
150 eq(header.lower(), 'content-type')
151 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
152 # And this one has no Content-Type: header at all.
153 msg = self._msgobj('msg_03.txt')
154 self.assertRaises(errors.HeaderParseError,
155 msg.set_boundary, 'BOUNDARY')
156
R. David Murray73a559d2010-12-21 18:07:59 +0000157 def test_make_boundary(self):
158 msg = MIMEMultipart('form-data')
159 # Note that when the boundary gets created is an implementation
160 # detail and might change.
161 self.assertEqual(msg.items()[0][1], 'multipart/form-data')
162 # Trigger creation of boundary
163 msg.as_string()
164 self.assertEqual(msg.items()[0][1][:33],
165 'multipart/form-data; boundary="==')
166 # XXX: there ought to be tests of the uniqueness of the boundary, too.
167
R. David Murray57c45ac2010-02-21 04:39:40 +0000168 def test_message_rfc822_only(self):
169 # Issue 7970: message/rfc822 not in multipart parsed by
170 # HeaderParser caused an exception when flattened.
R David Murray28346b82011-03-31 11:40:20 -0400171 with openfile('msg_46.txt') as fp:
Brett Cannon384917a2010-10-29 23:08:36 +0000172 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000173 parser = HeaderParser()
174 msg = parser.parsestr(msgdata)
175 out = StringIO()
176 gen = Generator(out, True, 0)
177 gen.flatten(msg, False)
178 self.assertEqual(out.getvalue(), msgdata)
179
R David Murrayb35c8502011-04-13 16:46:05 -0400180 def test_byte_message_rfc822_only(self):
181 # Make sure new bytes header parser also passes this.
182 with openfile('msg_46.txt', 'rb') as fp:
183 msgdata = fp.read()
184 parser = email.parser.BytesHeaderParser()
185 msg = parser.parsebytes(msgdata)
186 out = BytesIO()
187 gen = email.generator.BytesGenerator(out)
188 gen.flatten(msg)
189 self.assertEqual(out.getvalue(), msgdata)
190
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000191 def test_get_decoded_payload(self):
192 eq = self.assertEqual
193 msg = self._msgobj('msg_10.txt')
194 # The outer message is a multipart
195 eq(msg.get_payload(decode=True), None)
196 # Subpart 1 is 7bit encoded
197 eq(msg.get_payload(0).get_payload(decode=True),
198 b'This is a 7bit encoded message.\n')
199 # Subpart 2 is quopri
200 eq(msg.get_payload(1).get_payload(decode=True),
201 b'\xa1This is a Quoted Printable encoded message!\n')
202 # Subpart 3 is base64
203 eq(msg.get_payload(2).get_payload(decode=True),
204 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000205 # Subpart 4 is base64 with a trailing newline, which
206 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000207 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000208 b'This is a Base64 encoded message.\n')
209 # Subpart 5 has no Content-Transfer-Encoding: header.
210 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211 b'This has no Content-Transfer-Encoding: header.\n')
212
213 def test_get_decoded_uu_payload(self):
214 eq = self.assertEqual
215 msg = Message()
216 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
217 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
218 msg['content-transfer-encoding'] = cte
219 eq(msg.get_payload(decode=True), b'hello world')
220 # Now try some bogus data
221 msg.set_payload('foo')
222 eq(msg.get_payload(decode=True), b'foo')
223
R David Murraya2860e82011-04-16 09:20:30 -0400224 def test_get_payload_n_raises_on_non_multipart(self):
225 msg = Message()
226 self.assertRaises(TypeError, msg.get_payload, 1)
227
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000228 def test_decoded_generator(self):
229 eq = self.assertEqual
230 msg = self._msgobj('msg_07.txt')
231 with openfile('msg_17.txt') as fp:
232 text = fp.read()
233 s = StringIO()
234 g = DecodedGenerator(s)
235 g.flatten(msg)
236 eq(s.getvalue(), text)
237
238 def test__contains__(self):
239 msg = Message()
240 msg['From'] = 'Me'
241 msg['to'] = 'You'
242 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue('from' in msg)
244 self.assertTrue('From' in msg)
245 self.assertTrue('FROM' in msg)
246 self.assertTrue('to' in msg)
247 self.assertTrue('To' in msg)
248 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249
250 def test_as_string(self):
251 eq = self.ndiffAssertEqual
252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
255 eq(text, str(msg))
256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 eq(text, NL.join(lines[1:]))
260
261 def test_bad_param(self):
262 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
263 self.assertEqual(msg.get_param('baz'), '')
264
265 def test_missing_filename(self):
266 msg = email.message_from_string("From: foo\n")
267 self.assertEqual(msg.get_filename(), None)
268
269 def test_bogus_filename(self):
270 msg = email.message_from_string(
271 "Content-Disposition: blarg; filename\n")
272 self.assertEqual(msg.get_filename(), '')
273
274 def test_missing_boundary(self):
275 msg = email.message_from_string("From: foo\n")
276 self.assertEqual(msg.get_boundary(), None)
277
278 def test_get_params(self):
279 eq = self.assertEqual
280 msg = email.message_from_string(
281 'X-Header: foo=one; bar=two; baz=three\n')
282 eq(msg.get_params(header='x-header'),
283 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
284 msg = email.message_from_string(
285 'X-Header: foo; bar=one; baz=two\n')
286 eq(msg.get_params(header='x-header'),
287 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
288 eq(msg.get_params(), None)
289 msg = email.message_from_string(
290 'X-Header: foo; bar="one"; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293
294 def test_get_param_liberal(self):
295 msg = Message()
296 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
297 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
298
299 def test_get_param(self):
300 eq = self.assertEqual
301 msg = email.message_from_string(
302 "X-Header: foo=one; bar=two; baz=three\n")
303 eq(msg.get_param('bar', header='x-header'), 'two')
304 eq(msg.get_param('quuz', header='x-header'), None)
305 eq(msg.get_param('quuz'), None)
306 msg = email.message_from_string(
307 'X-Header: foo; bar="one"; baz=two\n')
308 eq(msg.get_param('foo', header='x-header'), '')
309 eq(msg.get_param('bar', header='x-header'), 'one')
310 eq(msg.get_param('baz', header='x-header'), 'two')
311 # XXX: We are not RFC-2045 compliant! We cannot parse:
312 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
313 # msg.get_param("weird")
314 # yet.
315
316 def test_get_param_funky_continuation_lines(self):
317 msg = self._msgobj('msg_22.txt')
318 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
319
320 def test_get_param_with_semis_in_quotes(self):
321 msg = email.message_from_string(
322 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
323 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
324 self.assertEqual(msg.get_param('name', unquote=False),
325 '"Jim&amp;&amp;Jill"')
326
R. David Murrayd48739f2010-04-14 18:59:18 +0000327 def test_get_param_with_quotes(self):
328 msg = email.message_from_string(
329 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
330 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
331 msg = email.message_from_string(
332 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
333 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
334
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000336 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 msg = email.message_from_string('Header: exists')
338 unless('header' in msg)
339 unless('Header' in msg)
340 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000341 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342
343 def test_set_param(self):
344 eq = self.assertEqual
345 msg = Message()
346 msg.set_param('charset', 'iso-2022-jp')
347 eq(msg.get_param('charset'), 'iso-2022-jp')
348 msg.set_param('importance', 'high value')
349 eq(msg.get_param('importance'), 'high value')
350 eq(msg.get_param('importance', unquote=False), '"high value"')
351 eq(msg.get_params(), [('text/plain', ''),
352 ('charset', 'iso-2022-jp'),
353 ('importance', 'high value')])
354 eq(msg.get_params(unquote=False), [('text/plain', ''),
355 ('charset', '"iso-2022-jp"'),
356 ('importance', '"high value"')])
357 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
358 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
359
360 def test_del_param(self):
361 eq = self.assertEqual
362 msg = self._msgobj('msg_05.txt')
363 eq(msg.get_params(),
364 [('multipart/report', ''), ('report-type', 'delivery-status'),
365 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
366 old_val = msg.get_param("report-type")
367 msg.del_param("report-type")
368 eq(msg.get_params(),
369 [('multipart/report', ''),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 msg.set_param("report-type", old_val)
372 eq(msg.get_params(),
373 [('multipart/report', ''),
374 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
375 ('report-type', old_val)])
376
377 def test_del_param_on_other_header(self):
378 msg = Message()
379 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
380 msg.del_param('filename', 'content-disposition')
381 self.assertEqual(msg['content-disposition'], 'attachment')
382
R David Murraya2860e82011-04-16 09:20:30 -0400383 def test_del_param_on_nonexistent_header(self):
384 msg = Message()
385 msg.del_param('filename', 'content-disposition')
386
387 def test_del_nonexistent_param(self):
388 msg = Message()
389 msg.add_header('Content-Type', 'text/plain', charset='utf-8')
390 existing_header = msg['Content-Type']
391 msg.del_param('foobar', header='Content-Type')
392 self.assertEqual(msg['Content-Type'], 'text/plain; charset="utf-8"')
393
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000394 def test_set_type(self):
395 eq = self.assertEqual
396 msg = Message()
397 self.assertRaises(ValueError, msg.set_type, 'text')
398 msg.set_type('text/plain')
399 eq(msg['content-type'], 'text/plain')
400 msg.set_param('charset', 'us-ascii')
401 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
402 msg.set_type('text/html')
403 eq(msg['content-type'], 'text/html; charset="us-ascii"')
404
405 def test_set_type_on_other_header(self):
406 msg = Message()
407 msg['X-Content-Type'] = 'text/plain'
408 msg.set_type('application/octet-stream', 'X-Content-Type')
409 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
410
411 def test_get_content_type_missing(self):
412 msg = Message()
413 self.assertEqual(msg.get_content_type(), 'text/plain')
414
415 def test_get_content_type_missing_with_default_type(self):
416 msg = Message()
417 msg.set_default_type('message/rfc822')
418 self.assertEqual(msg.get_content_type(), 'message/rfc822')
419
420 def test_get_content_type_from_message_implicit(self):
421 msg = self._msgobj('msg_30.txt')
422 self.assertEqual(msg.get_payload(0).get_content_type(),
423 'message/rfc822')
424
425 def test_get_content_type_from_message_explicit(self):
426 msg = self._msgobj('msg_28.txt')
427 self.assertEqual(msg.get_payload(0).get_content_type(),
428 'message/rfc822')
429
430 def test_get_content_type_from_message_text_plain_implicit(self):
431 msg = self._msgobj('msg_03.txt')
432 self.assertEqual(msg.get_content_type(), 'text/plain')
433
434 def test_get_content_type_from_message_text_plain_explicit(self):
435 msg = self._msgobj('msg_01.txt')
436 self.assertEqual(msg.get_content_type(), 'text/plain')
437
438 def test_get_content_maintype_missing(self):
439 msg = Message()
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_maintype_missing_with_default_type(self):
443 msg = Message()
444 msg.set_default_type('message/rfc822')
445 self.assertEqual(msg.get_content_maintype(), 'message')
446
447 def test_get_content_maintype_from_message_implicit(self):
448 msg = self._msgobj('msg_30.txt')
449 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
450
451 def test_get_content_maintype_from_message_explicit(self):
452 msg = self._msgobj('msg_28.txt')
453 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
454
455 def test_get_content_maintype_from_message_text_plain_implicit(self):
456 msg = self._msgobj('msg_03.txt')
457 self.assertEqual(msg.get_content_maintype(), 'text')
458
459 def test_get_content_maintype_from_message_text_plain_explicit(self):
460 msg = self._msgobj('msg_01.txt')
461 self.assertEqual(msg.get_content_maintype(), 'text')
462
463 def test_get_content_subtype_missing(self):
464 msg = Message()
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_subtype_missing_with_default_type(self):
468 msg = Message()
469 msg.set_default_type('message/rfc822')
470 self.assertEqual(msg.get_content_subtype(), 'rfc822')
471
472 def test_get_content_subtype_from_message_implicit(self):
473 msg = self._msgobj('msg_30.txt')
474 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
475
476 def test_get_content_subtype_from_message_explicit(self):
477 msg = self._msgobj('msg_28.txt')
478 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
479
480 def test_get_content_subtype_from_message_text_plain_implicit(self):
481 msg = self._msgobj('msg_03.txt')
482 self.assertEqual(msg.get_content_subtype(), 'plain')
483
484 def test_get_content_subtype_from_message_text_plain_explicit(self):
485 msg = self._msgobj('msg_01.txt')
486 self.assertEqual(msg.get_content_subtype(), 'plain')
487
488 def test_get_content_maintype_error(self):
489 msg = Message()
490 msg['Content-Type'] = 'no-slash-in-this-string'
491 self.assertEqual(msg.get_content_maintype(), 'text')
492
493 def test_get_content_subtype_error(self):
494 msg = Message()
495 msg['Content-Type'] = 'no-slash-in-this-string'
496 self.assertEqual(msg.get_content_subtype(), 'plain')
497
498 def test_replace_header(self):
499 eq = self.assertEqual
500 msg = Message()
501 msg.add_header('First', 'One')
502 msg.add_header('Second', 'Two')
503 msg.add_header('Third', 'Three')
504 eq(msg.keys(), ['First', 'Second', 'Third'])
505 eq(msg.values(), ['One', 'Two', 'Three'])
506 msg.replace_header('Second', 'Twenty')
507 eq(msg.keys(), ['First', 'Second', 'Third'])
508 eq(msg.values(), ['One', 'Twenty', 'Three'])
509 msg.add_header('First', 'Eleven')
510 msg.replace_header('First', 'One Hundred')
511 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
512 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
513 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
514
515 def test_broken_base64_payload(self):
516 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
517 msg = Message()
518 msg['content-type'] = 'audio/x-midi'
519 msg['content-transfer-encoding'] = 'base64'
520 msg.set_payload(x)
521 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000522 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000523
R David Murraya2860e82011-04-16 09:20:30 -0400524 def test_broken_unicode_payload(self):
525 # This test improves coverage but is not a compliance test.
526 # The behavior in this situation is currently undefined by the API.
527 x = 'this is a br\xf6ken thing to do'
528 msg = Message()
529 msg['content-type'] = 'text/plain'
530 msg['content-transfer-encoding'] = '8bit'
531 msg.set_payload(x)
532 self.assertEqual(msg.get_payload(decode=True),
533 bytes(x, 'raw-unicode-escape'))
534
535 def test_questionable_bytes_payload(self):
536 # This test improves coverage but is not a compliance test,
537 # since it involves poking inside the black box.
538 x = 'this is a quéstionable thing to do'.encode('utf-8')
539 msg = Message()
540 msg['content-type'] = 'text/plain; charset="utf-8"'
541 msg['content-transfer-encoding'] = '8bit'
542 msg._payload = x
543 self.assertEqual(msg.get_payload(decode=True), x)
544
R. David Murray7ec754b2010-12-13 23:51:19 +0000545 # Issue 1078919
546 def test_ascii_add_header(self):
547 msg = Message()
548 msg.add_header('Content-Disposition', 'attachment',
549 filename='bud.gif')
550 self.assertEqual('attachment; filename="bud.gif"',
551 msg['Content-Disposition'])
552
553 def test_noascii_add_header(self):
554 msg = Message()
555 msg.add_header('Content-Disposition', 'attachment',
556 filename="Fußballer.ppt")
557 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000558 'attachment; filename*=utf-8\'\'Fu%C3%9Fballer.ppt',
R. David Murray7ec754b2010-12-13 23:51:19 +0000559 msg['Content-Disposition'])
560
561 def test_nonascii_add_header_via_triple(self):
562 msg = Message()
563 msg.add_header('Content-Disposition', 'attachment',
564 filename=('iso-8859-1', '', 'Fußballer.ppt'))
565 self.assertEqual(
R. David Murraydfd7eb02010-12-24 22:36:49 +0000566 'attachment; filename*=iso-8859-1\'\'Fu%DFballer.ppt',
567 msg['Content-Disposition'])
568
569 def test_ascii_add_header_with_tspecial(self):
570 msg = Message()
571 msg.add_header('Content-Disposition', 'attachment',
572 filename="windows [filename].ppt")
573 self.assertEqual(
574 'attachment; filename="windows [filename].ppt"',
575 msg['Content-Disposition'])
576
577 def test_nonascii_add_header_with_tspecial(self):
578 msg = Message()
579 msg.add_header('Content-Disposition', 'attachment',
580 filename="Fußballer [filename].ppt")
581 self.assertEqual(
582 "attachment; filename*=utf-8''Fu%C3%9Fballer%20%5Bfilename%5D.ppt",
R. David Murray7ec754b2010-12-13 23:51:19 +0000583 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000584
R David Murraya2860e82011-04-16 09:20:30 -0400585 def test_add_header_with_name_only_param(self):
586 msg = Message()
587 msg.add_header('Content-Disposition', 'inline', foo_bar=None)
588 self.assertEqual("inline; foo-bar", msg['Content-Disposition'])
589
590 def test_add_header_with_no_value(self):
591 msg = Message()
592 msg.add_header('X-Status', None)
593 self.assertEqual('', msg['X-Status'])
594
R. David Murray5b2d9dd2011-01-09 02:35:24 +0000595 # Issue 5871: reject an attempt to embed a header inside a header value
596 # (header injection attack).
597 def test_embeded_header_via_Header_rejected(self):
598 msg = Message()
599 msg['Dummy'] = Header('dummy\nX-Injected-Header: test')
600 self.assertRaises(errors.HeaderParseError, msg.as_string)
601
602 def test_embeded_header_via_string_rejected(self):
603 msg = Message()
604 msg['Dummy'] = 'dummy\nX-Injected-Header: test'
605 self.assertRaises(errors.HeaderParseError, msg.as_string)
606
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000607# Test the email.encoders module
608class TestEncoders(unittest.TestCase):
R David Murray6d94bd42011-03-16 15:52:22 -0400609
610 def test_EncodersEncode_base64(self):
611 with openfile('PyBanner048.gif', 'rb') as fp:
612 bindata = fp.read()
613 mimed = email.mime.image.MIMEImage(bindata)
614 base64ed = mimed.get_payload()
615 # the transfer-encoded body lines should all be <=76 characters
616 lines = base64ed.split('\n')
617 self.assertLessEqual(max([ len(x) for x in lines ]), 76)
618
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000619 def test_encode_empty_payload(self):
620 eq = self.assertEqual
621 msg = Message()
622 msg.set_charset('us-ascii')
623 eq(msg['content-transfer-encoding'], '7bit')
624
625 def test_default_cte(self):
626 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000627 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000628 msg = MIMEText('hello world')
629 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000630 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000631 msg = MIMEText('hello \xf8 world')
632 eq(msg['content-transfer-encoding'], '8bit')
633 # And now with a different charset
634 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
635 eq(msg['content-transfer-encoding'], 'quoted-printable')
636
R. David Murraye85200d2010-05-06 01:41:14 +0000637 def test_encode7or8bit(self):
638 # Make sure a charset whose input character set is 8bit but
639 # whose output character set is 7bit gets a transfer-encoding
640 # of 7bit.
641 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000642 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000643 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000644
Ezio Melottib3aedd42010-11-20 19:04:17 +0000645
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000646# Test long header wrapping
647class TestLongHeaders(TestEmailBase):
R David Murray01581ee2011-04-18 10:04:34 -0400648
649 maxDiff = None
650
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000651 def test_split_long_continuation(self):
652 eq = self.ndiffAssertEqual
653 msg = email.message_from_string("""\
654Subject: bug demonstration
655\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
656\tmore text
657
658test
659""")
660 sfp = StringIO()
661 g = Generator(sfp)
662 g.flatten(msg)
663 eq(sfp.getvalue(), """\
664Subject: bug demonstration
665\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
666\tmore text
667
668test
669""")
670
671 def test_another_long_almost_unsplittable_header(self):
672 eq = self.ndiffAssertEqual
673 hstr = """\
674bug demonstration
675\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
676\tmore text"""
677 h = Header(hstr, continuation_ws='\t')
678 eq(h.encode(), """\
679bug demonstration
680\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
681\tmore text""")
682 h = Header(hstr.replace('\t', ' '))
683 eq(h.encode(), """\
684bug demonstration
685 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
686 more text""")
687
688 def test_long_nonstring(self):
689 eq = self.ndiffAssertEqual
690 g = Charset("iso-8859-1")
691 cz = Charset("iso-8859-2")
692 utf8 = Charset("utf-8")
693 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
694 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
695 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
696 b'bef\xf6rdert. ')
697 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
698 b'd\xf9vtipu.. ')
699 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
700 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
701 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
702 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
703 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
704 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
705 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
706 '\u3044\u307e\u3059\u3002')
707 h = Header(g_head, g, header_name='Subject')
708 h.append(cz_head, cz)
709 h.append(utf8_head, utf8)
710 msg = Message()
711 msg['Subject'] = h
712 sfp = StringIO()
713 g = Generator(sfp)
714 g.flatten(msg)
715 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000716Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
717 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
718 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
719 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
720 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
721 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
722 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
723 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
724 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
725 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
726 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000727
728""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000729 eq(h.encode(maxlinelen=76), """\
730=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
731 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
732 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
733 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
734 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
735 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
736 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
737 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
738 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
739 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
740 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000741
742 def test_long_header_encode(self):
743 eq = self.ndiffAssertEqual
744 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
745 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
746 header_name='X-Foobar-Spoink-Defrobnit')
747 eq(h.encode(), '''\
748wasnipoop; giraffes="very-long-necked-animals";
749 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
750
751 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
752 eq = self.ndiffAssertEqual
753 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
754 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
755 header_name='X-Foobar-Spoink-Defrobnit',
756 continuation_ws='\t')
757 eq(h.encode(), '''\
758wasnipoop; giraffes="very-long-necked-animals";
759 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
760
761 def test_long_header_encode_with_tab_continuation(self):
762 eq = self.ndiffAssertEqual
763 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
764 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
765 header_name='X-Foobar-Spoink-Defrobnit',
766 continuation_ws='\t')
767 eq(h.encode(), '''\
768wasnipoop; giraffes="very-long-necked-animals";
769\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
770
R David Murray3a6152f2011-03-14 21:13:03 -0400771 def test_header_encode_with_different_output_charset(self):
772 h = Header('文', 'euc-jp')
773 self.assertEqual(h.encode(), "=?iso-2022-jp?b?GyRCSjgbKEI=?=")
774
775 def test_long_header_encode_with_different_output_charset(self):
776 h = Header(b'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4'
777 b'\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4'
778 b'\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4'
779 b'\xa4\xa4\xde\xa4\xb9'.decode('euc-jp'), 'euc-jp')
780 res = """\
781=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYSE8JWskTztKMnE8VCROPjUbKEI=?=
782 =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?="""
783 self.assertEqual(h.encode(), res)
784
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000785 def test_header_splitter(self):
786 eq = self.ndiffAssertEqual
787 msg = MIMEText('')
788 # It'd be great if we could use add_header() here, but that doesn't
789 # guarantee an order of the parameters.
790 msg['X-Foobar-Spoink-Defrobnit'] = (
791 'wasnipoop; giraffes="very-long-necked-animals"; '
792 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
793 sfp = StringIO()
794 g = Generator(sfp)
795 g.flatten(msg)
796 eq(sfp.getvalue(), '''\
797Content-Type: text/plain; charset="us-ascii"
798MIME-Version: 1.0
799Content-Transfer-Encoding: 7bit
800X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
801 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
802
803''')
804
805 def test_no_semis_header_splitter(self):
806 eq = self.ndiffAssertEqual
807 msg = Message()
808 msg['From'] = 'test@dom.ain'
809 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
810 msg.set_payload('Test')
811 sfp = StringIO()
812 g = Generator(sfp)
813 g.flatten(msg)
814 eq(sfp.getvalue(), """\
815From: test@dom.ain
816References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
817 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
818
819Test""")
820
R David Murray7da4db12011-04-07 20:37:17 -0400821 def test_last_split_chunk_does_not_fit(self):
822 eq = self.ndiffAssertEqual
823 h = Header('Subject: the first part of this is short, but_the_second'
824 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
825 '_all_by_itself')
826 eq(h.encode(), """\
827Subject: the first part of this is short,
828 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
829
830 def test_splittable_leading_char_followed_by_overlong_unsplitable(self):
831 eq = self.ndiffAssertEqual
832 h = Header(', but_the_second'
833 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
834 '_all_by_itself')
835 eq(h.encode(), """\
836,
837 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
838
839 def test_multiple_splittable_leading_char_followed_by_overlong_unsplitable(self):
840 eq = self.ndiffAssertEqual
841 h = Header(', , but_the_second'
842 '_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line'
843 '_all_by_itself')
844 eq(h.encode(), """\
845, ,
846 but_the_second_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself""")
847
848 def test_trailing_splitable_on_overlong_unsplitable(self):
849 eq = self.ndiffAssertEqual
850 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
851 'be_on_a_line_all_by_itself;')
852 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_should_"
853 "be_on_a_line_all_by_itself;")
854
855 def test_trailing_splitable_on_overlong_unsplitable_with_leading_splitable(self):
856 eq = self.ndiffAssertEqual
857 h = Header('; '
858 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
R David Murray01581ee2011-04-18 10:04:34 -0400859 'be_on_a_line_all_by_itself; ')
R David Murray7da4db12011-04-07 20:37:17 -0400860 eq(h.encode(), """\
861;
R David Murray01581ee2011-04-18 10:04:34 -0400862 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
R David Murray7da4db12011-04-07 20:37:17 -0400863
R David Murraye1292a22011-04-07 20:54:03 -0400864 def test_long_header_with_multiple_sequential_split_chars(self):
R David Murraye1292a22011-04-07 20:54:03 -0400865 eq = self.ndiffAssertEqual
866 h = Header('This is a long line that has two whitespaces in a row. '
867 'This used to cause truncation of the header when folded')
868 eq(h.encode(), """\
869This is a long line that has two whitespaces in a row. This used to cause
870 truncation of the header when folded""")
871
R David Murray01581ee2011-04-18 10:04:34 -0400872 def test_splitter_split_on_punctuation_only_if_fws(self):
873 eq = self.ndiffAssertEqual
874 h = Header('thisverylongheaderhas;semicolons;and,commas,but'
875 'they;arenotlegal;fold,points')
876 eq(h.encode(), "thisverylongheaderhas;semicolons;and,commas,butthey;"
877 "arenotlegal;fold,points")
878
879 def test_leading_splittable_in_the_middle_just_before_overlong_last_part(self):
880 eq = self.ndiffAssertEqual
881 h = Header('this is a test where we need to have more than one line '
882 'before; our final line that is just too big to fit;; '
883 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
884 'be_on_a_line_all_by_itself;')
885 eq(h.encode(), """\
886this is a test where we need to have more than one line before;
887 our final line that is just too big to fit;;
888 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself;""")
889
890 def test_overlong_last_part_followed_by_split_point(self):
891 eq = self.ndiffAssertEqual
892 h = Header('this_part_does_not_fit_within_maxlinelen_and_thus_should_'
893 'be_on_a_line_all_by_itself ')
894 eq(h.encode(), "this_part_does_not_fit_within_maxlinelen_and_thus_"
895 "should_be_on_a_line_all_by_itself ")
896
897 def test_multiline_with_overlong_parts_separated_by_two_split_points(self):
898 eq = self.ndiffAssertEqual
899 h = Header('this_is_a__test_where_we_need_to_have_more_than_one_line_'
900 'before_our_final_line_; ; '
901 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
902 'be_on_a_line_all_by_itself; ')
903 eq(h.encode(), """\
904this_is_a__test_where_we_need_to_have_more_than_one_line_before_our_final_line_;
905 ;
906 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
907
908 def test_multiline_with_overlong_last_part_followed_by_split_point(self):
909 eq = self.ndiffAssertEqual
910 h = Header('this is a test where we need to have more than one line '
911 'before our final line; ; '
912 'this_part_does_not_fit_within_maxlinelen_and_thus_should_'
913 'be_on_a_line_all_by_itself; ')
914 eq(h.encode(), """\
915this is a test where we need to have more than one line before our final line;
916 ;
917 this_part_does_not_fit_within_maxlinelen_and_thus_should_be_on_a_line_all_by_itself; """)
918
919 def test_long_header_with_whitespace_runs(self):
920 eq = self.ndiffAssertEqual
921 msg = Message()
922 msg['From'] = 'test@dom.ain'
923 msg['References'] = SPACE.join(['<foo@dom.ain> '] * 10)
924 msg.set_payload('Test')
925 sfp = StringIO()
926 g = Generator(sfp)
927 g.flatten(msg)
928 eq(sfp.getvalue(), """\
929From: test@dom.ain
930References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
931 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
932 <foo@dom.ain> <foo@dom.ain>\x20\x20
933
934Test""")
935
936 def test_long_run_with_semi_header_splitter(self):
937 eq = self.ndiffAssertEqual
938 msg = Message()
939 msg['From'] = 'test@dom.ain'
940 msg['References'] = SPACE.join(['<foo@dom.ain>'] * 10) + '; abc'
941 msg.set_payload('Test')
942 sfp = StringIO()
943 g = Generator(sfp)
944 g.flatten(msg)
945 eq(sfp.getvalue(), """\
946From: test@dom.ain
947References: <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
948 <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain> <foo@dom.ain>
949 <foo@dom.ain>; abc
950
951Test""")
952
953 def test_splitter_split_on_punctuation_only_if_fws(self):
954 eq = self.ndiffAssertEqual
955 msg = Message()
956 msg['From'] = 'test@dom.ain'
957 msg['References'] = ('thisverylongheaderhas;semicolons;and,commas,but'
958 'they;arenotlegal;fold,points')
959 msg.set_payload('Test')
960 sfp = StringIO()
961 g = Generator(sfp)
962 g.flatten(msg)
963 # XXX the space after the header should not be there.
964 eq(sfp.getvalue(), """\
965From: test@dom.ain
966References:\x20
967 thisverylongheaderhas;semicolons;and,commas,butthey;arenotlegal;fold,points
968
969Test""")
970
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000971 def test_no_split_long_header(self):
972 eq = self.ndiffAssertEqual
973 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000974 h = Header(hstr)
975 # These come on two lines because Headers are really field value
976 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000977 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000978References:
979 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
980 h = Header('x' * 80)
981 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000982
983 def test_splitting_multiple_long_lines(self):
984 eq = self.ndiffAssertEqual
985 hstr = """\
986from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
987\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
988\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
989"""
990 h = Header(hstr, continuation_ws='\t')
991 eq(h.encode(), """\
992from babylon.socal-raves.org (localhost [127.0.0.1]);
993 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
994 for <mailman-admin@babylon.socal-raves.org>;
995 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
996\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
997 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
998 for <mailman-admin@babylon.socal-raves.org>;
999 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
1000\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
1001 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
1002 for <mailman-admin@babylon.socal-raves.org>;
1003 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
1004
1005 def test_splitting_first_line_only_is_long(self):
1006 eq = self.ndiffAssertEqual
1007 hstr = """\
1008from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
1009\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1010\tid 17k4h5-00034i-00
1011\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
1012 h = Header(hstr, maxlinelen=78, header_name='Received',
1013 continuation_ws='\t')
1014 eq(h.encode(), """\
1015from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
1016 helo=cthulhu.gerg.ca)
1017\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
1018\tid 17k4h5-00034i-00
1019\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
1020
1021 def test_long_8bit_header(self):
1022 eq = self.ndiffAssertEqual
1023 msg = Message()
1024 h = Header('Britische Regierung gibt', 'iso-8859-1',
1025 header_name='Subject')
1026 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +00001027 eq(h.encode(maxlinelen=76), """\
1028=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1029 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001030 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +00001031 eq(msg.as_string(maxheaderlen=76), """\
1032Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
1033 =?iso-8859-1?q?hore-Windkraftprojekte?=
1034
1035""")
1036 eq(msg.as_string(maxheaderlen=0), """\
1037Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001038
1039""")
1040
1041 def test_long_8bit_header_no_charset(self):
1042 eq = self.ndiffAssertEqual
1043 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +00001044 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
1045 'f\xfcr Offshore-Windkraftprojekte '
1046 '<a-very-long-address@example.com>')
1047 msg['Reply-To'] = header_string
1048 self.assertRaises(UnicodeEncodeError, msg.as_string)
1049 msg = Message()
1050 msg['Reply-To'] = Header(header_string, 'utf-8',
1051 header_name='Reply-To')
1052 eq(msg.as_string(maxheaderlen=78), """\
1053Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
1054 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001055
1056""")
1057
1058 def test_long_to_header(self):
1059 eq = self.ndiffAssertEqual
1060 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
R David Murray01581ee2011-04-18 10:04:34 -04001061 '<someone@eecs.umich.edu>, '
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001062 '"Someone Test #B" <someone@umich.edu>, '
1063 '"Someone Test #C" <someone@eecs.umich.edu>, '
1064 '"Someone Test #D" <someone@eecs.umich.edu>')
1065 msg = Message()
1066 msg['To'] = to
1067 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +00001068To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001069 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +00001070 "Someone Test #C" <someone@eecs.umich.edu>,
1071 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001072
1073''')
1074
1075 def test_long_line_after_append(self):
1076 eq = self.ndiffAssertEqual
1077 s = 'This is an example of string which has almost the limit of header length.'
1078 h = Header(s)
1079 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +00001080 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001081This is an example of string which has almost the limit of header length.
1082 Add another line.""")
1083
1084 def test_shorter_line_with_append(self):
1085 eq = self.ndiffAssertEqual
1086 s = 'This is a shorter line.'
1087 h = Header(s)
1088 h.append('Add another sentence. (Surprise?)')
1089 eq(h.encode(),
1090 'This is a shorter line. Add another sentence. (Surprise?)')
1091
1092 def test_long_field_name(self):
1093 eq = self.ndiffAssertEqual
1094 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +00001095 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
1096 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
1097 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
1098 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001099 h = Header(gs, 'iso-8859-1', header_name=fn)
1100 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +00001101 eq(h.encode(maxlinelen=76), """\
1102=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
1103 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
1104 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
1105 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001106
1107 def test_long_received_header(self):
1108 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
1109 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
1110 'Wed, 05 Mar 2003 18:10:18 -0700')
1111 msg = Message()
1112 msg['Received-1'] = Header(h, continuation_ws='\t')
1113 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +00001114 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001115 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001116Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1117 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001118 Wed, 05 Mar 2003 18:10:18 -0700
R David Murray01581ee2011-04-18 10:04:34 -04001119Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by
1120 hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
Barry Warsawbef9d212007-08-31 10:55:37 +00001121 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001122
1123""")
1124
1125 def test_string_headerinst_eq(self):
1126 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
1127 'tu-muenchen.de> (David Bremner\'s message of '
1128 '"Thu, 6 Mar 2003 13:58:21 +0100")')
1129 msg = Message()
1130 msg['Received-1'] = Header(h, header_name='Received-1',
1131 continuation_ws='\t')
1132 msg['Received-2'] = h
R David Murray01581ee2011-04-18 10:04:34 -04001133 # XXX The space after the ':' should not be there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001134 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001135Received-1:\x20
1136 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1137 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
1138Received-2:\x20
1139 <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David
1140 Bremner's message of \"Thu, 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001141
1142""")
1143
1144 def test_long_unbreakable_lines_with_continuation(self):
1145 eq = self.ndiffAssertEqual
1146 msg = Message()
1147 t = """\
1148iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1149 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
1150 msg['Face-1'] = t
1151 msg['Face-2'] = Header(t, header_name='Face-2')
R David Murray01581ee2011-04-18 10:04:34 -04001152 msg['Face-3'] = ' ' + t
Barry Warsawbef9d212007-08-31 10:55:37 +00001153 # XXX This splitting is all wrong. It the first value line should be
R David Murray01581ee2011-04-18 10:04:34 -04001154 # snug against the field name or the space after the header not there.
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001155 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +00001156Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +00001157 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001158 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +00001159Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +00001160 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001161 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
R David Murray01581ee2011-04-18 10:04:34 -04001162Face-3:\x20
1163 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
1164 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001165
1166""")
1167
1168 def test_another_long_multiline_header(self):
1169 eq = self.ndiffAssertEqual
1170 m = ('Received: from siimage.com '
1171 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +00001172 'Microsoft SMTPSVC(5.0.2195.4905); '
1173 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001174 msg = email.message_from_string(m)
1175 eq(msg.as_string(maxheaderlen=78), '''\
R David Murray01581ee2011-04-18 10:04:34 -04001176Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with
1177 Microsoft SMTPSVC(5.0.2195.4905); Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001178
1179''')
1180
1181 def test_long_lines_with_different_header(self):
1182 eq = self.ndiffAssertEqual
1183 h = ('List-Unsubscribe: '
1184 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
1185 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
1186 '?subject=unsubscribe>')
1187 msg = Message()
1188 msg['List'] = h
1189 msg['List'] = Header(h, header_name='List')
1190 eq(msg.as_string(maxheaderlen=78), """\
R David Murray01581ee2011-04-18 10:04:34 -04001191List: List-Unsubscribe:
1192 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001193 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
R David Murray01581ee2011-04-18 10:04:34 -04001194List: List-Unsubscribe:
1195 <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +00001196 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001197
1198""")
1199
R. David Murray6f0022d2011-01-07 21:57:25 +00001200 def test_long_rfc2047_header_with_embedded_fws(self):
1201 h = Header(textwrap.dedent("""\
1202 We're going to pretend this header is in a non-ascii character set
1203 \tto see if line wrapping with encoded words and embedded
1204 folding white space works"""),
1205 charset='utf-8',
1206 header_name='Test')
1207 self.assertEqual(h.encode()+'\n', textwrap.dedent("""\
1208 =?utf-8?q?We=27re_going_to_pretend_this_header_is_in_a_non-ascii_chara?=
1209 =?utf-8?q?cter_set?=
1210 =?utf-8?q?_to_see_if_line_wrapping_with_encoded_words_and_embedded?=
1211 =?utf-8?q?_folding_white_space_works?=""")+'\n')
1212
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001213
Ezio Melottib3aedd42010-11-20 19:04:17 +00001214
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001215# Test mangling of "From " lines in the body of a message
1216class TestFromMangling(unittest.TestCase):
1217 def setUp(self):
1218 self.msg = Message()
1219 self.msg['From'] = 'aaa@bbb.org'
1220 self.msg.set_payload("""\
1221From the desk of A.A.A.:
1222Blah blah blah
1223""")
1224
1225 def test_mangled_from(self):
1226 s = StringIO()
1227 g = Generator(s, mangle_from_=True)
1228 g.flatten(self.msg)
1229 self.assertEqual(s.getvalue(), """\
1230From: aaa@bbb.org
1231
1232>From the desk of A.A.A.:
1233Blah blah blah
1234""")
1235
1236 def test_dont_mangle_from(self):
1237 s = StringIO()
1238 g = Generator(s, mangle_from_=False)
1239 g.flatten(self.msg)
1240 self.assertEqual(s.getvalue(), """\
1241From: aaa@bbb.org
1242
1243From the desk of A.A.A.:
1244Blah blah blah
1245""")
1246
1247
Ezio Melottib3aedd42010-11-20 19:04:17 +00001248
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001249# Test the basic MIMEAudio class
1250class TestMIMEAudio(unittest.TestCase):
1251 def setUp(self):
R David Murray28346b82011-03-31 11:40:20 -04001252 with openfile('audiotest.au', 'rb') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001253 self._audiodata = fp.read()
1254 self._au = MIMEAudio(self._audiodata)
1255
1256 def test_guess_minor_type(self):
1257 self.assertEqual(self._au.get_content_type(), 'audio/basic')
1258
1259 def test_encoding(self):
1260 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001261 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1262 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001263
1264 def test_checkSetMinor(self):
1265 au = MIMEAudio(self._audiodata, 'fish')
1266 self.assertEqual(au.get_content_type(), 'audio/fish')
1267
1268 def test_add_header(self):
1269 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001270 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001271 self._au.add_header('Content-Disposition', 'attachment',
1272 filename='audiotest.au')
1273 eq(self._au['content-disposition'],
1274 'attachment; filename="audiotest.au"')
1275 eq(self._au.get_params(header='content-disposition'),
1276 [('attachment', ''), ('filename', 'audiotest.au')])
1277 eq(self._au.get_param('filename', header='content-disposition'),
1278 'audiotest.au')
1279 missing = []
1280 eq(self._au.get_param('attachment', header='content-disposition'), '')
1281 unless(self._au.get_param('foo', failobj=missing,
1282 header='content-disposition') is missing)
1283 # Try some missing stuff
1284 unless(self._au.get_param('foobar', missing) is missing)
1285 unless(self._au.get_param('attachment', missing,
1286 header='foobar') is missing)
1287
1288
Ezio Melottib3aedd42010-11-20 19:04:17 +00001289
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001290# Test the basic MIMEImage class
1291class TestMIMEImage(unittest.TestCase):
1292 def setUp(self):
1293 with openfile('PyBanner048.gif', 'rb') as fp:
1294 self._imgdata = fp.read()
1295 self._im = MIMEImage(self._imgdata)
1296
1297 def test_guess_minor_type(self):
1298 self.assertEqual(self._im.get_content_type(), 'image/gif')
1299
1300 def test_encoding(self):
1301 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001302 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1303 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001304
1305 def test_checkSetMinor(self):
1306 im = MIMEImage(self._imgdata, 'fish')
1307 self.assertEqual(im.get_content_type(), 'image/fish')
1308
1309 def test_add_header(self):
1310 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001311 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001312 self._im.add_header('Content-Disposition', 'attachment',
1313 filename='dingusfish.gif')
1314 eq(self._im['content-disposition'],
1315 'attachment; filename="dingusfish.gif"')
1316 eq(self._im.get_params(header='content-disposition'),
1317 [('attachment', ''), ('filename', 'dingusfish.gif')])
1318 eq(self._im.get_param('filename', header='content-disposition'),
1319 'dingusfish.gif')
1320 missing = []
1321 eq(self._im.get_param('attachment', header='content-disposition'), '')
1322 unless(self._im.get_param('foo', failobj=missing,
1323 header='content-disposition') is missing)
1324 # Try some missing stuff
1325 unless(self._im.get_param('foobar', missing) is missing)
1326 unless(self._im.get_param('attachment', missing,
1327 header='foobar') is missing)
1328
1329
Ezio Melottib3aedd42010-11-20 19:04:17 +00001330
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001331# Test the basic MIMEApplication class
1332class TestMIMEApplication(unittest.TestCase):
1333 def test_headers(self):
1334 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001335 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001336 eq(msg.get_content_type(), 'application/octet-stream')
1337 eq(msg['content-transfer-encoding'], 'base64')
1338
1339 def test_body(self):
1340 eq = self.assertEqual
R David Murray6d94bd42011-03-16 15:52:22 -04001341 bytesdata = b'\xfa\xfb\xfc\xfd\xfe\xff'
1342 msg = MIMEApplication(bytesdata)
1343 # whitespace in the cte encoded block is RFC-irrelevant.
1344 eq(msg.get_payload().strip(), '+vv8/f7/')
1345 eq(msg.get_payload(decode=True), bytesdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001346
1347
Ezio Melottib3aedd42010-11-20 19:04:17 +00001348
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001349# Test the basic MIMEText class
1350class TestMIMEText(unittest.TestCase):
1351 def setUp(self):
1352 self._msg = MIMEText('hello there')
1353
1354 def test_types(self):
1355 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001356 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001357 eq(self._msg.get_content_type(), 'text/plain')
1358 eq(self._msg.get_param('charset'), 'us-ascii')
1359 missing = []
1360 unless(self._msg.get_param('foobar', missing) is missing)
1361 unless(self._msg.get_param('charset', missing, header='foobar')
1362 is missing)
1363
1364 def test_payload(self):
1365 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001366 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001367
1368 def test_charset(self):
1369 eq = self.assertEqual
1370 msg = MIMEText('hello there', _charset='us-ascii')
1371 eq(msg.get_charset().input_charset, 'us-ascii')
1372 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1373
R. David Murray850fc852010-06-03 01:58:28 +00001374 def test_7bit_input(self):
1375 eq = self.assertEqual
1376 msg = MIMEText('hello there', _charset='us-ascii')
1377 eq(msg.get_charset().input_charset, 'us-ascii')
1378 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1379
1380 def test_7bit_input_no_charset(self):
1381 eq = self.assertEqual
1382 msg = MIMEText('hello there')
1383 eq(msg.get_charset(), 'us-ascii')
1384 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1385 self.assertTrue('hello there' in msg.as_string())
1386
1387 def test_utf8_input(self):
1388 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1389 eq = self.assertEqual
1390 msg = MIMEText(teststr, _charset='utf-8')
1391 eq(msg.get_charset().output_charset, 'utf-8')
1392 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1393 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1394
1395 @unittest.skip("can't fix because of backward compat in email5, "
1396 "will fix in email6")
1397 def test_utf8_input_no_charset(self):
1398 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1399 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1400
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001401
Ezio Melottib3aedd42010-11-20 19:04:17 +00001402
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001403# Test complicated multipart/* messages
1404class TestMultipart(TestEmailBase):
1405 def setUp(self):
1406 with openfile('PyBanner048.gif', 'rb') as fp:
1407 data = fp.read()
1408 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1409 image = MIMEImage(data, name='dingusfish.gif')
1410 image.add_header('content-disposition', 'attachment',
1411 filename='dingusfish.gif')
1412 intro = MIMEText('''\
1413Hi there,
1414
1415This is the dingus fish.
1416''')
1417 container.attach(intro)
1418 container.attach(image)
1419 container['From'] = 'Barry <barry@digicool.com>'
1420 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1421 container['Subject'] = 'Here is your dingus fish'
1422
1423 now = 987809702.54848599
1424 timetuple = time.localtime(now)
1425 if timetuple[-1] == 0:
1426 tzsecs = time.timezone
1427 else:
1428 tzsecs = time.altzone
1429 if tzsecs > 0:
1430 sign = '-'
1431 else:
1432 sign = '+'
1433 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1434 container['Date'] = time.strftime(
1435 '%a, %d %b %Y %H:%M:%S',
1436 time.localtime(now)) + tzoffset
1437 self._msg = container
1438 self._im = image
1439 self._txt = intro
1440
1441 def test_hierarchy(self):
1442 # convenience
1443 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001444 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001445 raises = self.assertRaises
1446 # tests
1447 m = self._msg
1448 unless(m.is_multipart())
1449 eq(m.get_content_type(), 'multipart/mixed')
1450 eq(len(m.get_payload()), 2)
1451 raises(IndexError, m.get_payload, 2)
1452 m0 = m.get_payload(0)
1453 m1 = m.get_payload(1)
1454 unless(m0 is self._txt)
1455 unless(m1 is self._im)
1456 eq(m.get_payload(), [m0, m1])
1457 unless(not m0.is_multipart())
1458 unless(not m1.is_multipart())
1459
1460 def test_empty_multipart_idempotent(self):
1461 text = """\
1462Content-Type: multipart/mixed; boundary="BOUNDARY"
1463MIME-Version: 1.0
1464Subject: A subject
1465To: aperson@dom.ain
1466From: bperson@dom.ain
1467
1468
1469--BOUNDARY
1470
1471
1472--BOUNDARY--
1473"""
1474 msg = Parser().parsestr(text)
1475 self.ndiffAssertEqual(text, msg.as_string())
1476
1477 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1478 outer = MIMEBase('multipart', 'mixed')
1479 outer['Subject'] = 'A subject'
1480 outer['To'] = 'aperson@dom.ain'
1481 outer['From'] = 'bperson@dom.ain'
1482 outer.set_boundary('BOUNDARY')
1483 self.ndiffAssertEqual(outer.as_string(), '''\
1484Content-Type: multipart/mixed; boundary="BOUNDARY"
1485MIME-Version: 1.0
1486Subject: A subject
1487To: aperson@dom.ain
1488From: bperson@dom.ain
1489
1490--BOUNDARY
1491
1492--BOUNDARY--''')
1493
1494 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1495 outer = MIMEBase('multipart', 'mixed')
1496 outer['Subject'] = 'A subject'
1497 outer['To'] = 'aperson@dom.ain'
1498 outer['From'] = 'bperson@dom.ain'
1499 outer.preamble = ''
1500 outer.epilogue = ''
1501 outer.set_boundary('BOUNDARY')
1502 self.ndiffAssertEqual(outer.as_string(), '''\
1503Content-Type: multipart/mixed; boundary="BOUNDARY"
1504MIME-Version: 1.0
1505Subject: A subject
1506To: aperson@dom.ain
1507From: bperson@dom.ain
1508
1509
1510--BOUNDARY
1511
1512--BOUNDARY--
1513''')
1514
1515 def test_one_part_in_a_multipart(self):
1516 eq = self.ndiffAssertEqual
1517 outer = MIMEBase('multipart', 'mixed')
1518 outer['Subject'] = 'A subject'
1519 outer['To'] = 'aperson@dom.ain'
1520 outer['From'] = 'bperson@dom.ain'
1521 outer.set_boundary('BOUNDARY')
1522 msg = MIMEText('hello world')
1523 outer.attach(msg)
1524 eq(outer.as_string(), '''\
1525Content-Type: multipart/mixed; boundary="BOUNDARY"
1526MIME-Version: 1.0
1527Subject: A subject
1528To: aperson@dom.ain
1529From: bperson@dom.ain
1530
1531--BOUNDARY
1532Content-Type: text/plain; charset="us-ascii"
1533MIME-Version: 1.0
1534Content-Transfer-Encoding: 7bit
1535
1536hello world
1537--BOUNDARY--''')
1538
1539 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1540 eq = self.ndiffAssertEqual
1541 outer = MIMEBase('multipart', 'mixed')
1542 outer['Subject'] = 'A subject'
1543 outer['To'] = 'aperson@dom.ain'
1544 outer['From'] = 'bperson@dom.ain'
1545 outer.preamble = ''
1546 msg = MIMEText('hello world')
1547 outer.attach(msg)
1548 outer.set_boundary('BOUNDARY')
1549 eq(outer.as_string(), '''\
1550Content-Type: multipart/mixed; boundary="BOUNDARY"
1551MIME-Version: 1.0
1552Subject: A subject
1553To: aperson@dom.ain
1554From: bperson@dom.ain
1555
1556
1557--BOUNDARY
1558Content-Type: text/plain; charset="us-ascii"
1559MIME-Version: 1.0
1560Content-Transfer-Encoding: 7bit
1561
1562hello world
1563--BOUNDARY--''')
1564
1565
1566 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1567 eq = self.ndiffAssertEqual
1568 outer = MIMEBase('multipart', 'mixed')
1569 outer['Subject'] = 'A subject'
1570 outer['To'] = 'aperson@dom.ain'
1571 outer['From'] = 'bperson@dom.ain'
1572 outer.preamble = None
1573 msg = MIMEText('hello world')
1574 outer.attach(msg)
1575 outer.set_boundary('BOUNDARY')
1576 eq(outer.as_string(), '''\
1577Content-Type: multipart/mixed; boundary="BOUNDARY"
1578MIME-Version: 1.0
1579Subject: A subject
1580To: aperson@dom.ain
1581From: bperson@dom.ain
1582
1583--BOUNDARY
1584Content-Type: text/plain; charset="us-ascii"
1585MIME-Version: 1.0
1586Content-Transfer-Encoding: 7bit
1587
1588hello world
1589--BOUNDARY--''')
1590
1591
1592 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1593 eq = self.ndiffAssertEqual
1594 outer = MIMEBase('multipart', 'mixed')
1595 outer['Subject'] = 'A subject'
1596 outer['To'] = 'aperson@dom.ain'
1597 outer['From'] = 'bperson@dom.ain'
1598 outer.epilogue = None
1599 msg = MIMEText('hello world')
1600 outer.attach(msg)
1601 outer.set_boundary('BOUNDARY')
1602 eq(outer.as_string(), '''\
1603Content-Type: multipart/mixed; boundary="BOUNDARY"
1604MIME-Version: 1.0
1605Subject: A subject
1606To: aperson@dom.ain
1607From: bperson@dom.ain
1608
1609--BOUNDARY
1610Content-Type: text/plain; charset="us-ascii"
1611MIME-Version: 1.0
1612Content-Transfer-Encoding: 7bit
1613
1614hello world
1615--BOUNDARY--''')
1616
1617
1618 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1619 eq = self.ndiffAssertEqual
1620 outer = MIMEBase('multipart', 'mixed')
1621 outer['Subject'] = 'A subject'
1622 outer['To'] = 'aperson@dom.ain'
1623 outer['From'] = 'bperson@dom.ain'
1624 outer.epilogue = ''
1625 msg = MIMEText('hello world')
1626 outer.attach(msg)
1627 outer.set_boundary('BOUNDARY')
1628 eq(outer.as_string(), '''\
1629Content-Type: multipart/mixed; boundary="BOUNDARY"
1630MIME-Version: 1.0
1631Subject: A subject
1632To: aperson@dom.ain
1633From: bperson@dom.ain
1634
1635--BOUNDARY
1636Content-Type: text/plain; charset="us-ascii"
1637MIME-Version: 1.0
1638Content-Transfer-Encoding: 7bit
1639
1640hello world
1641--BOUNDARY--
1642''')
1643
1644
1645 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1646 eq = self.ndiffAssertEqual
1647 outer = MIMEBase('multipart', 'mixed')
1648 outer['Subject'] = 'A subject'
1649 outer['To'] = 'aperson@dom.ain'
1650 outer['From'] = 'bperson@dom.ain'
1651 outer.epilogue = '\n'
1652 msg = MIMEText('hello world')
1653 outer.attach(msg)
1654 outer.set_boundary('BOUNDARY')
1655 eq(outer.as_string(), '''\
1656Content-Type: multipart/mixed; boundary="BOUNDARY"
1657MIME-Version: 1.0
1658Subject: A subject
1659To: aperson@dom.ain
1660From: bperson@dom.ain
1661
1662--BOUNDARY
1663Content-Type: text/plain; charset="us-ascii"
1664MIME-Version: 1.0
1665Content-Transfer-Encoding: 7bit
1666
1667hello world
1668--BOUNDARY--
1669
1670''')
1671
1672 def test_message_external_body(self):
1673 eq = self.assertEqual
1674 msg = self._msgobj('msg_36.txt')
1675 eq(len(msg.get_payload()), 2)
1676 msg1 = msg.get_payload(1)
1677 eq(msg1.get_content_type(), 'multipart/alternative')
1678 eq(len(msg1.get_payload()), 2)
1679 for subpart in msg1.get_payload():
1680 eq(subpart.get_content_type(), 'message/external-body')
1681 eq(len(subpart.get_payload()), 1)
1682 subsubpart = subpart.get_payload(0)
1683 eq(subsubpart.get_content_type(), 'text/plain')
1684
1685 def test_double_boundary(self):
1686 # msg_37.txt is a multipart that contains two dash-boundary's in a
1687 # row. Our interpretation of RFC 2046 calls for ignoring the second
1688 # and subsequent boundaries.
1689 msg = self._msgobj('msg_37.txt')
1690 self.assertEqual(len(msg.get_payload()), 3)
1691
1692 def test_nested_inner_contains_outer_boundary(self):
1693 eq = self.ndiffAssertEqual
1694 # msg_38.txt has an inner part that contains outer boundaries. My
1695 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1696 # these are illegal and should be interpreted as unterminated inner
1697 # parts.
1698 msg = self._msgobj('msg_38.txt')
1699 sfp = StringIO()
1700 iterators._structure(msg, sfp)
1701 eq(sfp.getvalue(), """\
1702multipart/mixed
1703 multipart/mixed
1704 multipart/alternative
1705 text/plain
1706 text/plain
1707 text/plain
1708 text/plain
1709""")
1710
1711 def test_nested_with_same_boundary(self):
1712 eq = self.ndiffAssertEqual
1713 # msg 39.txt is similarly evil in that it's got inner parts that use
1714 # the same boundary as outer parts. Again, I believe the way this is
1715 # parsed is closest to the spirit of RFC 2046
1716 msg = self._msgobj('msg_39.txt')
1717 sfp = StringIO()
1718 iterators._structure(msg, sfp)
1719 eq(sfp.getvalue(), """\
1720multipart/mixed
1721 multipart/mixed
1722 multipart/alternative
1723 application/octet-stream
1724 application/octet-stream
1725 text/plain
1726""")
1727
1728 def test_boundary_in_non_multipart(self):
1729 msg = self._msgobj('msg_40.txt')
1730 self.assertEqual(msg.as_string(), '''\
1731MIME-Version: 1.0
1732Content-Type: text/html; boundary="--961284236552522269"
1733
1734----961284236552522269
1735Content-Type: text/html;
1736Content-Transfer-Encoding: 7Bit
1737
1738<html></html>
1739
1740----961284236552522269--
1741''')
1742
1743 def test_boundary_with_leading_space(self):
1744 eq = self.assertEqual
1745 msg = email.message_from_string('''\
1746MIME-Version: 1.0
1747Content-Type: multipart/mixed; boundary=" XXXX"
1748
1749-- XXXX
1750Content-Type: text/plain
1751
1752
1753-- XXXX
1754Content-Type: text/plain
1755
1756-- XXXX--
1757''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001758 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001759 eq(msg.get_boundary(), ' XXXX')
1760 eq(len(msg.get_payload()), 2)
1761
1762 def test_boundary_without_trailing_newline(self):
1763 m = Parser().parsestr("""\
1764Content-Type: multipart/mixed; boundary="===============0012394164=="
1765MIME-Version: 1.0
1766
1767--===============0012394164==
1768Content-Type: image/file1.jpg
1769MIME-Version: 1.0
1770Content-Transfer-Encoding: base64
1771
1772YXNkZg==
1773--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001774 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001775
1776
Ezio Melottib3aedd42010-11-20 19:04:17 +00001777
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001778# Test some badly formatted messages
R David Murray3edd22a2011-04-18 13:59:37 -04001779class TestNonConformantBase:
1780
1781 def _msgobj(self, filename):
1782 with openfile(filename) as fp:
1783 return email.message_from_file(fp, policy=self.policy)
1784
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001785 def test_parse_missing_minor_type(self):
1786 eq = self.assertEqual
1787 msg = self._msgobj('msg_14.txt')
1788 eq(msg.get_content_type(), 'text/plain')
1789 eq(msg.get_content_maintype(), 'text')
1790 eq(msg.get_content_subtype(), 'plain')
1791
1792 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001793 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001794 msg = self._msgobj('msg_15.txt')
1795 # XXX We can probably eventually do better
1796 inner = msg.get_payload(0)
1797 unless(hasattr(inner, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001798 self.assertEqual(len(self.get_defects(inner)), 1)
1799 unless(isinstance(self.get_defects(inner)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001800 errors.StartBoundaryNotFoundDefect))
1801
1802 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001803 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001804 msg = self._msgobj('msg_25.txt')
1805 unless(isinstance(msg.get_payload(), str))
R David Murray3edd22a2011-04-18 13:59:37 -04001806 self.assertEqual(len(self.get_defects(msg)), 2)
1807 unless(isinstance(self.get_defects(msg)[0],
1808 errors.NoBoundaryInMultipartDefect))
1809 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001810 errors.MultipartInvariantViolationDefect))
1811
1812 def test_invalid_content_type(self):
1813 eq = self.assertEqual
1814 neq = self.ndiffAssertEqual
1815 msg = Message()
1816 # RFC 2045, $5.2 says invalid yields text/plain
1817 msg['Content-Type'] = 'text'
1818 eq(msg.get_content_maintype(), 'text')
1819 eq(msg.get_content_subtype(), 'plain')
1820 eq(msg.get_content_type(), 'text/plain')
1821 # Clear the old value and try something /really/ invalid
1822 del msg['content-type']
1823 msg['Content-Type'] = 'foo'
1824 eq(msg.get_content_maintype(), 'text')
1825 eq(msg.get_content_subtype(), 'plain')
1826 eq(msg.get_content_type(), 'text/plain')
1827 # Still, make sure that the message is idempotently generated
1828 s = StringIO()
1829 g = Generator(s)
1830 g.flatten(msg)
1831 neq(s.getvalue(), 'Content-Type: foo\n\n')
1832
1833 def test_no_start_boundary(self):
1834 eq = self.ndiffAssertEqual
1835 msg = self._msgobj('msg_31.txt')
1836 eq(msg.get_payload(), """\
1837--BOUNDARY
1838Content-Type: text/plain
1839
1840message 1
1841
1842--BOUNDARY
1843Content-Type: text/plain
1844
1845message 2
1846
1847--BOUNDARY--
1848""")
1849
1850 def test_no_separating_blank_line(self):
1851 eq = self.ndiffAssertEqual
1852 msg = self._msgobj('msg_35.txt')
1853 eq(msg.as_string(), """\
1854From: aperson@dom.ain
1855To: bperson@dom.ain
1856Subject: here's something interesting
1857
1858counter to RFC 2822, there's no separating newline here
1859""")
1860
1861 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001862 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001863 msg = self._msgobj('msg_41.txt')
1864 unless(hasattr(msg, 'defects'))
R David Murray3edd22a2011-04-18 13:59:37 -04001865 self.assertEqual(len(self.get_defects(msg)), 2)
1866 unless(isinstance(self.get_defects(msg)[0],
1867 errors.NoBoundaryInMultipartDefect))
1868 unless(isinstance(self.get_defects(msg)[1],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001869 errors.MultipartInvariantViolationDefect))
1870
1871 def test_missing_start_boundary(self):
1872 outer = self._msgobj('msg_42.txt')
1873 # The message structure is:
1874 #
1875 # multipart/mixed
1876 # text/plain
1877 # message/rfc822
1878 # multipart/mixed [*]
1879 #
1880 # [*] This message is missing its start boundary
1881 bad = outer.get_payload(1).get_payload(0)
R David Murray3edd22a2011-04-18 13:59:37 -04001882 self.assertEqual(len(self.get_defects(bad)), 1)
1883 self.assertTrue(isinstance(self.get_defects(bad)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001884 errors.StartBoundaryNotFoundDefect))
1885
1886 def test_first_line_is_continuation_header(self):
1887 eq = self.assertEqual
1888 m = ' Line 1\nLine 2\nLine 3'
R David Murray3edd22a2011-04-18 13:59:37 -04001889 msg = email.message_from_string(m, policy=self.policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001890 eq(msg.keys(), [])
1891 eq(msg.get_payload(), 'Line 2\nLine 3')
R David Murray3edd22a2011-04-18 13:59:37 -04001892 eq(len(self.get_defects(msg)), 1)
1893 self.assertTrue(isinstance(self.get_defects(msg)[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001894 errors.FirstHeaderLineIsContinuationDefect))
R David Murray3edd22a2011-04-18 13:59:37 -04001895 eq(self.get_defects(msg)[0].line, ' Line 1\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001896
1897
R David Murray3edd22a2011-04-18 13:59:37 -04001898class TestNonConformant(TestNonConformantBase, TestEmailBase):
1899
1900 policy=email.policy.default
1901
1902 def get_defects(self, obj):
1903 return obj.defects
1904
1905
1906class TestNonConformantCapture(TestNonConformantBase, TestEmailBase):
1907
1908 class CapturePolicy(email.policy.Policy):
1909 captured = None
1910 def register_defect(self, obj, defect):
1911 self.captured.append(defect)
1912
1913 def setUp(self):
1914 self.policy = self.CapturePolicy(captured=list())
1915
1916 def get_defects(self, obj):
1917 return self.policy.captured
1918
1919
1920class TestRaisingDefects(TestEmailBase):
1921
1922 def _msgobj(self, filename):
1923 with openfile(filename) as fp:
1924 return email.message_from_file(fp, policy=email.policy.strict)
1925
1926 def test_same_boundary_inner_outer(self):
1927 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
1928 self._msgobj('msg_15.txt')
1929
1930 def test_multipart_no_boundary(self):
1931 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
1932 self._msgobj('msg_25.txt')
1933
1934 def test_lying_multipart(self):
1935 with self.assertRaises(errors.NoBoundaryInMultipartDefect):
1936 self._msgobj('msg_41.txt')
1937
1938
1939 def test_missing_start_boundary(self):
1940 with self.assertRaises(errors.StartBoundaryNotFoundDefect):
1941 self._msgobj('msg_42.txt')
1942
1943 def test_first_line_is_continuation_header(self):
1944 m = ' Line 1\nLine 2\nLine 3'
1945 with self.assertRaises(errors.FirstHeaderLineIsContinuationDefect):
1946 msg = email.message_from_string(m, policy=email.policy.strict)
1947
Ezio Melottib3aedd42010-11-20 19:04:17 +00001948
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001949# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001950class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001951 def test_rfc2047_multiline(self):
1952 eq = self.assertEqual
1953 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1954 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1955 dh = decode_header(s)
1956 eq(dh, [
1957 (b'Re:', None),
1958 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1959 (b'baz foo bar', None),
1960 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1961 header = make_header(dh)
1962 eq(str(header),
1963 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001964 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001965Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1966 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001967
1968 def test_whitespace_eater_unicode(self):
1969 eq = self.assertEqual
1970 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1971 dh = decode_header(s)
1972 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1973 (b'Pirard <pirard@dom.ain>', None)])
1974 header = str(make_header(dh))
1975 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1976
1977 def test_whitespace_eater_unicode_2(self):
1978 eq = self.assertEqual
1979 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1980 dh = decode_header(s)
1981 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1982 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1983 hu = str(make_header(dh))
1984 eq(hu, 'The quick brown fox jumped over the lazy dog')
1985
1986 def test_rfc2047_missing_whitespace(self):
1987 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1988 dh = decode_header(s)
1989 self.assertEqual(dh, [(s, None)])
1990
1991 def test_rfc2047_with_whitespace(self):
1992 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1993 dh = decode_header(s)
1994 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1995 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1996 (b'sbord', None)])
1997
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001998 def test_rfc2047_B_bad_padding(self):
1999 s = '=?iso-8859-1?B?%s?='
2000 data = [ # only test complete bytes
2001 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
2002 ('dmk=', b'vi'), ('dmk', b'vi')
2003 ]
2004 for q, a in data:
2005 dh = decode_header(s % q)
2006 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002007
R. David Murray31e984c2010-10-01 15:40:20 +00002008 def test_rfc2047_Q_invalid_digits(self):
2009 # issue 10004.
2010 s = '=?iso-8659-1?Q?andr=e9=zz?='
2011 self.assertEqual(decode_header(s),
2012 [(b'andr\xe9=zz', 'iso-8659-1')])
2013
Ezio Melottib3aedd42010-11-20 19:04:17 +00002014
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002015# Test the MIMEMessage class
2016class TestMIMEMessage(TestEmailBase):
2017 def setUp(self):
2018 with openfile('msg_11.txt') as fp:
2019 self._text = fp.read()
2020
2021 def test_type_error(self):
2022 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
2023
2024 def test_valid_argument(self):
2025 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002026 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002027 subject = 'A sub-message'
2028 m = Message()
2029 m['Subject'] = subject
2030 r = MIMEMessage(m)
2031 eq(r.get_content_type(), 'message/rfc822')
2032 payload = r.get_payload()
2033 unless(isinstance(payload, list))
2034 eq(len(payload), 1)
2035 subpart = payload[0]
2036 unless(subpart is m)
2037 eq(subpart['subject'], subject)
2038
2039 def test_bad_multipart(self):
2040 eq = self.assertEqual
2041 msg1 = Message()
2042 msg1['Subject'] = 'subpart 1'
2043 msg2 = Message()
2044 msg2['Subject'] = 'subpart 2'
2045 r = MIMEMessage(msg1)
2046 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
2047
2048 def test_generate(self):
2049 # First craft the message to be encapsulated
2050 m = Message()
2051 m['Subject'] = 'An enclosed message'
2052 m.set_payload('Here is the body of the message.\n')
2053 r = MIMEMessage(m)
2054 r['Subject'] = 'The enclosing message'
2055 s = StringIO()
2056 g = Generator(s)
2057 g.flatten(r)
2058 self.assertEqual(s.getvalue(), """\
2059Content-Type: message/rfc822
2060MIME-Version: 1.0
2061Subject: The enclosing message
2062
2063Subject: An enclosed message
2064
2065Here is the body of the message.
2066""")
2067
2068 def test_parse_message_rfc822(self):
2069 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002070 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002071 msg = self._msgobj('msg_11.txt')
2072 eq(msg.get_content_type(), 'message/rfc822')
2073 payload = msg.get_payload()
2074 unless(isinstance(payload, list))
2075 eq(len(payload), 1)
2076 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002077 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002078 eq(submsg['subject'], 'An enclosed message')
2079 eq(submsg.get_payload(), 'Here is the body of the message.\n')
2080
2081 def test_dsn(self):
2082 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002083 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002084 # msg 16 is a Delivery Status Notification, see RFC 1894
2085 msg = self._msgobj('msg_16.txt')
2086 eq(msg.get_content_type(), 'multipart/report')
2087 unless(msg.is_multipart())
2088 eq(len(msg.get_payload()), 3)
2089 # Subpart 1 is a text/plain, human readable section
2090 subpart = msg.get_payload(0)
2091 eq(subpart.get_content_type(), 'text/plain')
2092 eq(subpart.get_payload(), """\
2093This report relates to a message you sent with the following header fields:
2094
2095 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
2096 Date: Sun, 23 Sep 2001 20:10:55 -0700
2097 From: "Ian T. Henry" <henryi@oxy.edu>
2098 To: SoCal Raves <scr@socal-raves.org>
2099 Subject: [scr] yeah for Ians!!
2100
2101Your message cannot be delivered to the following recipients:
2102
2103 Recipient address: jangel1@cougar.noc.ucla.edu
2104 Reason: recipient reached disk quota
2105
2106""")
2107 # Subpart 2 contains the machine parsable DSN information. It
2108 # consists of two blocks of headers, represented by two nested Message
2109 # objects.
2110 subpart = msg.get_payload(1)
2111 eq(subpart.get_content_type(), 'message/delivery-status')
2112 eq(len(subpart.get_payload()), 2)
2113 # message/delivery-status should treat each block as a bunch of
2114 # headers, i.e. a bunch of Message objects.
2115 dsn1 = subpart.get_payload(0)
2116 unless(isinstance(dsn1, Message))
2117 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
2118 eq(dsn1.get_param('dns', header='reporting-mta'), '')
2119 # Try a missing one <wink>
2120 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
2121 dsn2 = subpart.get_payload(1)
2122 unless(isinstance(dsn2, Message))
2123 eq(dsn2['action'], 'failed')
2124 eq(dsn2.get_params(header='original-recipient'),
2125 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
2126 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
2127 # Subpart 3 is the original message
2128 subpart = msg.get_payload(2)
2129 eq(subpart.get_content_type(), 'message/rfc822')
2130 payload = subpart.get_payload()
2131 unless(isinstance(payload, list))
2132 eq(len(payload), 1)
2133 subsubpart = payload[0]
2134 unless(isinstance(subsubpart, Message))
2135 eq(subsubpart.get_content_type(), 'text/plain')
2136 eq(subsubpart['message-id'],
2137 '<002001c144a6$8752e060$56104586@oxy.edu>')
2138
2139 def test_epilogue(self):
2140 eq = self.ndiffAssertEqual
2141 with openfile('msg_21.txt') as fp:
2142 text = fp.read()
2143 msg = Message()
2144 msg['From'] = 'aperson@dom.ain'
2145 msg['To'] = 'bperson@dom.ain'
2146 msg['Subject'] = 'Test'
2147 msg.preamble = 'MIME message'
2148 msg.epilogue = 'End of MIME message\n'
2149 msg1 = MIMEText('One')
2150 msg2 = MIMEText('Two')
2151 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2152 msg.attach(msg1)
2153 msg.attach(msg2)
2154 sfp = StringIO()
2155 g = Generator(sfp)
2156 g.flatten(msg)
2157 eq(sfp.getvalue(), text)
2158
2159 def test_no_nl_preamble(self):
2160 eq = self.ndiffAssertEqual
2161 msg = Message()
2162 msg['From'] = 'aperson@dom.ain'
2163 msg['To'] = 'bperson@dom.ain'
2164 msg['Subject'] = 'Test'
2165 msg.preamble = 'MIME message'
2166 msg.epilogue = ''
2167 msg1 = MIMEText('One')
2168 msg2 = MIMEText('Two')
2169 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
2170 msg.attach(msg1)
2171 msg.attach(msg2)
2172 eq(msg.as_string(), """\
2173From: aperson@dom.ain
2174To: bperson@dom.ain
2175Subject: Test
2176Content-Type: multipart/mixed; boundary="BOUNDARY"
2177
2178MIME message
2179--BOUNDARY
2180Content-Type: text/plain; charset="us-ascii"
2181MIME-Version: 1.0
2182Content-Transfer-Encoding: 7bit
2183
2184One
2185--BOUNDARY
2186Content-Type: text/plain; charset="us-ascii"
2187MIME-Version: 1.0
2188Content-Transfer-Encoding: 7bit
2189
2190Two
2191--BOUNDARY--
2192""")
2193
2194 def test_default_type(self):
2195 eq = self.assertEqual
2196 with openfile('msg_30.txt') as fp:
2197 msg = email.message_from_file(fp)
2198 container1 = msg.get_payload(0)
2199 eq(container1.get_default_type(), 'message/rfc822')
2200 eq(container1.get_content_type(), 'message/rfc822')
2201 container2 = msg.get_payload(1)
2202 eq(container2.get_default_type(), 'message/rfc822')
2203 eq(container2.get_content_type(), 'message/rfc822')
2204 container1a = container1.get_payload(0)
2205 eq(container1a.get_default_type(), 'text/plain')
2206 eq(container1a.get_content_type(), 'text/plain')
2207 container2a = container2.get_payload(0)
2208 eq(container2a.get_default_type(), 'text/plain')
2209 eq(container2a.get_content_type(), 'text/plain')
2210
2211 def test_default_type_with_explicit_container_type(self):
2212 eq = self.assertEqual
2213 with openfile('msg_28.txt') as fp:
2214 msg = email.message_from_file(fp)
2215 container1 = msg.get_payload(0)
2216 eq(container1.get_default_type(), 'message/rfc822')
2217 eq(container1.get_content_type(), 'message/rfc822')
2218 container2 = msg.get_payload(1)
2219 eq(container2.get_default_type(), 'message/rfc822')
2220 eq(container2.get_content_type(), 'message/rfc822')
2221 container1a = container1.get_payload(0)
2222 eq(container1a.get_default_type(), 'text/plain')
2223 eq(container1a.get_content_type(), 'text/plain')
2224 container2a = container2.get_payload(0)
2225 eq(container2a.get_default_type(), 'text/plain')
2226 eq(container2a.get_content_type(), 'text/plain')
2227
2228 def test_default_type_non_parsed(self):
2229 eq = self.assertEqual
2230 neq = self.ndiffAssertEqual
2231 # Set up container
2232 container = MIMEMultipart('digest', 'BOUNDARY')
2233 container.epilogue = ''
2234 # Set up subparts
2235 subpart1a = MIMEText('message 1\n')
2236 subpart2a = MIMEText('message 2\n')
2237 subpart1 = MIMEMessage(subpart1a)
2238 subpart2 = MIMEMessage(subpart2a)
2239 container.attach(subpart1)
2240 container.attach(subpart2)
2241 eq(subpart1.get_content_type(), 'message/rfc822')
2242 eq(subpart1.get_default_type(), 'message/rfc822')
2243 eq(subpart2.get_content_type(), 'message/rfc822')
2244 eq(subpart2.get_default_type(), 'message/rfc822')
2245 neq(container.as_string(0), '''\
2246Content-Type: multipart/digest; boundary="BOUNDARY"
2247MIME-Version: 1.0
2248
2249--BOUNDARY
2250Content-Type: message/rfc822
2251MIME-Version: 1.0
2252
2253Content-Type: text/plain; charset="us-ascii"
2254MIME-Version: 1.0
2255Content-Transfer-Encoding: 7bit
2256
2257message 1
2258
2259--BOUNDARY
2260Content-Type: message/rfc822
2261MIME-Version: 1.0
2262
2263Content-Type: text/plain; charset="us-ascii"
2264MIME-Version: 1.0
2265Content-Transfer-Encoding: 7bit
2266
2267message 2
2268
2269--BOUNDARY--
2270''')
2271 del subpart1['content-type']
2272 del subpart1['mime-version']
2273 del subpart2['content-type']
2274 del subpart2['mime-version']
2275 eq(subpart1.get_content_type(), 'message/rfc822')
2276 eq(subpart1.get_default_type(), 'message/rfc822')
2277 eq(subpart2.get_content_type(), 'message/rfc822')
2278 eq(subpart2.get_default_type(), 'message/rfc822')
2279 neq(container.as_string(0), '''\
2280Content-Type: multipart/digest; boundary="BOUNDARY"
2281MIME-Version: 1.0
2282
2283--BOUNDARY
2284
2285Content-Type: text/plain; charset="us-ascii"
2286MIME-Version: 1.0
2287Content-Transfer-Encoding: 7bit
2288
2289message 1
2290
2291--BOUNDARY
2292
2293Content-Type: text/plain; charset="us-ascii"
2294MIME-Version: 1.0
2295Content-Transfer-Encoding: 7bit
2296
2297message 2
2298
2299--BOUNDARY--
2300''')
2301
2302 def test_mime_attachments_in_constructor(self):
2303 eq = self.assertEqual
2304 text1 = MIMEText('')
2305 text2 = MIMEText('')
2306 msg = MIMEMultipart(_subparts=(text1, text2))
2307 eq(len(msg.get_payload()), 2)
2308 eq(msg.get_payload(0), text1)
2309 eq(msg.get_payload(1), text2)
2310
Christian Heimes587c2bf2008-01-19 16:21:02 +00002311 def test_default_multipart_constructor(self):
2312 msg = MIMEMultipart()
2313 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002314
Ezio Melottib3aedd42010-11-20 19:04:17 +00002315
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002316# A general test of parser->model->generator idempotency. IOW, read a message
2317# in, parse it into a message object tree, then without touching the tree,
2318# regenerate the plain text. The original text and the transformed text
2319# should be identical. Note: that we ignore the Unix-From since that may
2320# contain a changed date.
2321class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002322
2323 linesep = '\n'
2324
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002325 def _msgobj(self, filename):
2326 with openfile(filename) as fp:
2327 data = fp.read()
2328 msg = email.message_from_string(data)
2329 return msg, data
2330
R. David Murray719a4492010-11-21 16:53:48 +00002331 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002332 eq = self.ndiffAssertEqual
2333 s = StringIO()
2334 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002335 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002336 eq(text, s.getvalue())
2337
2338 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002339 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002340 msg, text = self._msgobj('msg_01.txt')
2341 eq(msg.get_content_type(), 'text/plain')
2342 eq(msg.get_content_maintype(), 'text')
2343 eq(msg.get_content_subtype(), 'plain')
2344 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2345 eq(msg.get_param('charset'), 'us-ascii')
2346 eq(msg.preamble, None)
2347 eq(msg.epilogue, None)
2348 self._idempotent(msg, text)
2349
2350 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002351 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002352 msg, text = self._msgobj('msg_03.txt')
2353 eq(msg.get_content_type(), 'text/plain')
2354 eq(msg.get_params(), None)
2355 eq(msg.get_param('charset'), None)
2356 self._idempotent(msg, text)
2357
2358 def test_simple_multipart(self):
2359 msg, text = self._msgobj('msg_04.txt')
2360 self._idempotent(msg, text)
2361
2362 def test_MIME_digest(self):
2363 msg, text = self._msgobj('msg_02.txt')
2364 self._idempotent(msg, text)
2365
2366 def test_long_header(self):
2367 msg, text = self._msgobj('msg_27.txt')
2368 self._idempotent(msg, text)
2369
2370 def test_MIME_digest_with_part_headers(self):
2371 msg, text = self._msgobj('msg_28.txt')
2372 self._idempotent(msg, text)
2373
2374 def test_mixed_with_image(self):
2375 msg, text = self._msgobj('msg_06.txt')
2376 self._idempotent(msg, text)
2377
2378 def test_multipart_report(self):
2379 msg, text = self._msgobj('msg_05.txt')
2380 self._idempotent(msg, text)
2381
2382 def test_dsn(self):
2383 msg, text = self._msgobj('msg_16.txt')
2384 self._idempotent(msg, text)
2385
2386 def test_preamble_epilogue(self):
2387 msg, text = self._msgobj('msg_21.txt')
2388 self._idempotent(msg, text)
2389
2390 def test_multipart_one_part(self):
2391 msg, text = self._msgobj('msg_23.txt')
2392 self._idempotent(msg, text)
2393
2394 def test_multipart_no_parts(self):
2395 msg, text = self._msgobj('msg_24.txt')
2396 self._idempotent(msg, text)
2397
2398 def test_no_start_boundary(self):
2399 msg, text = self._msgobj('msg_31.txt')
2400 self._idempotent(msg, text)
2401
2402 def test_rfc2231_charset(self):
2403 msg, text = self._msgobj('msg_32.txt')
2404 self._idempotent(msg, text)
2405
2406 def test_more_rfc2231_parameters(self):
2407 msg, text = self._msgobj('msg_33.txt')
2408 self._idempotent(msg, text)
2409
2410 def test_text_plain_in_a_multipart_digest(self):
2411 msg, text = self._msgobj('msg_34.txt')
2412 self._idempotent(msg, text)
2413
2414 def test_nested_multipart_mixeds(self):
2415 msg, text = self._msgobj('msg_12a.txt')
2416 self._idempotent(msg, text)
2417
2418 def test_message_external_body_idempotent(self):
2419 msg, text = self._msgobj('msg_36.txt')
2420 self._idempotent(msg, text)
2421
R. David Murray719a4492010-11-21 16:53:48 +00002422 def test_message_delivery_status(self):
2423 msg, text = self._msgobj('msg_43.txt')
2424 self._idempotent(msg, text, unixfrom=True)
2425
R. David Murray96fd54e2010-10-08 15:55:28 +00002426 def test_message_signed_idempotent(self):
2427 msg, text = self._msgobj('msg_45.txt')
2428 self._idempotent(msg, text)
2429
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002430 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002431 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002432 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002433 # Get a message object and reset the seek pointer for other tests
2434 msg, text = self._msgobj('msg_05.txt')
2435 eq(msg.get_content_type(), 'multipart/report')
2436 # Test the Content-Type: parameters
2437 params = {}
2438 for pk, pv in msg.get_params():
2439 params[pk] = pv
2440 eq(params['report-type'], 'delivery-status')
2441 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002442 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2443 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002444 eq(len(msg.get_payload()), 3)
2445 # Make sure the subparts are what we expect
2446 msg1 = msg.get_payload(0)
2447 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002448 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002449 msg2 = msg.get_payload(1)
2450 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002451 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002452 msg3 = msg.get_payload(2)
2453 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002454 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002455 payload = msg3.get_payload()
2456 unless(isinstance(payload, list))
2457 eq(len(payload), 1)
2458 msg4 = payload[0]
2459 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002460 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002461
2462 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002463 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002464 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002465 msg, text = self._msgobj('msg_06.txt')
2466 # Check some of the outer headers
2467 eq(msg.get_content_type(), 'message/rfc822')
2468 # Make sure the payload is a list of exactly one sub-Message, and that
2469 # that submessage has a type of text/plain
2470 payload = msg.get_payload()
2471 unless(isinstance(payload, list))
2472 eq(len(payload), 1)
2473 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002474 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002475 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002476 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002477 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002478
2479
Ezio Melottib3aedd42010-11-20 19:04:17 +00002480
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002481# Test various other bits of the package's functionality
2482class TestMiscellaneous(TestEmailBase):
2483 def test_message_from_string(self):
2484 with openfile('msg_01.txt') as fp:
2485 text = fp.read()
2486 msg = email.message_from_string(text)
2487 s = StringIO()
2488 # Don't wrap/continue long headers since we're trying to test
2489 # idempotency.
2490 g = Generator(s, maxheaderlen=0)
2491 g.flatten(msg)
2492 self.assertEqual(text, s.getvalue())
2493
2494 def test_message_from_file(self):
2495 with openfile('msg_01.txt') as fp:
2496 text = fp.read()
2497 fp.seek(0)
2498 msg = email.message_from_file(fp)
2499 s = StringIO()
2500 # Don't wrap/continue long headers since we're trying to test
2501 # idempotency.
2502 g = Generator(s, maxheaderlen=0)
2503 g.flatten(msg)
2504 self.assertEqual(text, s.getvalue())
2505
2506 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002507 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002508 with openfile('msg_01.txt') as fp:
2509 text = fp.read()
2510
2511 # Create a subclass
2512 class MyMessage(Message):
2513 pass
2514
2515 msg = email.message_from_string(text, MyMessage)
2516 unless(isinstance(msg, MyMessage))
2517 # Try something more complicated
2518 with openfile('msg_02.txt') as fp:
2519 text = fp.read()
2520 msg = email.message_from_string(text, MyMessage)
2521 for subpart in msg.walk():
2522 unless(isinstance(subpart, MyMessage))
2523
2524 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002525 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002526 # Create a subclass
2527 class MyMessage(Message):
2528 pass
2529
2530 with openfile('msg_01.txt') as fp:
2531 msg = email.message_from_file(fp, MyMessage)
2532 unless(isinstance(msg, MyMessage))
2533 # Try something more complicated
2534 with openfile('msg_02.txt') as fp:
2535 msg = email.message_from_file(fp, MyMessage)
2536 for subpart in msg.walk():
2537 unless(isinstance(subpart, MyMessage))
2538
2539 def test__all__(self):
2540 module = __import__('email')
2541 # Can't use sorted() here due to Python 2.3 compatibility
2542 all = module.__all__[:]
2543 all.sort()
2544 self.assertEqual(all, [
2545 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002546 'header', 'iterators', 'message', 'message_from_binary_file',
2547 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002548 'message_from_string', 'mime', 'parser',
2549 'quoprimime', 'utils',
2550 ])
2551
2552 def test_formatdate(self):
2553 now = time.time()
2554 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2555 time.gmtime(now)[:6])
2556
2557 def test_formatdate_localtime(self):
2558 now = time.time()
2559 self.assertEqual(
2560 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2561 time.localtime(now)[:6])
2562
2563 def test_formatdate_usegmt(self):
2564 now = time.time()
2565 self.assertEqual(
2566 utils.formatdate(now, localtime=False),
2567 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2568 self.assertEqual(
2569 utils.formatdate(now, localtime=False, usegmt=True),
2570 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2571
2572 def test_parsedate_none(self):
2573 self.assertEqual(utils.parsedate(''), None)
2574
2575 def test_parsedate_compact(self):
2576 # The FWS after the comma is optional
2577 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2578 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2579
2580 def test_parsedate_no_dayofweek(self):
2581 eq = self.assertEqual
2582 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2583 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2584
2585 def test_parsedate_compact_no_dayofweek(self):
2586 eq = self.assertEqual
2587 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2588 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2589
R. David Murray4a62e892010-12-23 20:35:46 +00002590 def test_parsedate_no_space_before_positive_offset(self):
2591 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26+0800'),
2592 (2002, 4, 3, 14, 58, 26, 0, 1, -1, 28800))
2593
2594 def test_parsedate_no_space_before_negative_offset(self):
2595 # Issue 1155362: we already handled '+' for this case.
2596 self.assertEqual(utils.parsedate_tz('Wed, 3 Apr 2002 14:58:26-0800'),
2597 (2002, 4, 3, 14, 58, 26, 0, 1, -1, -28800))
2598
2599
R David Murrayaccd1c02011-03-13 20:06:23 -04002600 def test_parsedate_accepts_time_with_dots(self):
2601 eq = self.assertEqual
2602 eq(utils.parsedate_tz('5 Feb 2003 13.47.26 -0800'),
2603 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2604 eq(utils.parsedate_tz('5 Feb 2003 13.47 -0800'),
2605 (2003, 2, 5, 13, 47, 0, 0, 1, -1, -28800))
2606
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002607 def test_parsedate_acceptable_to_time_functions(self):
2608 eq = self.assertEqual
2609 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2610 t = int(time.mktime(timetup))
2611 eq(time.localtime(t)[:6], timetup[:6])
2612 eq(int(time.strftime('%Y', timetup)), 2003)
2613 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2614 t = int(time.mktime(timetup[:9]))
2615 eq(time.localtime(t)[:6], timetup[:6])
2616 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2617
R. David Murray219d1c82010-08-25 00:45:55 +00002618 def test_parsedate_y2k(self):
2619 """Test for parsing a date with a two-digit year.
2620
2621 Parsing a date with a two-digit year should return the correct
2622 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2623 obsoletes RFC822) requires four-digit years.
2624
2625 """
2626 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2627 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2628 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2629 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2630
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002631 def test_parseaddr_empty(self):
2632 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2633 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2634
2635 def test_noquote_dump(self):
2636 self.assertEqual(
2637 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2638 'A Silly Person <person@dom.ain>')
2639
2640 def test_escape_dump(self):
2641 self.assertEqual(
2642 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2643 r'"A \(Very\) Silly Person" <person@dom.ain>')
2644 a = r'A \(Special\) Person'
2645 b = 'person@dom.ain'
2646 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2647
2648 def test_escape_backslashes(self):
2649 self.assertEqual(
2650 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2651 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2652 a = r'Arthur \Backslash\ Foobar'
2653 b = 'person@dom.ain'
2654 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2655
R David Murray8debacb2011-04-06 09:35:57 -04002656 def test_quotes_unicode_names(self):
2657 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2658 name = "H\u00e4ns W\u00fcrst"
2659 addr = 'person@dom.ain'
2660 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2661 latin1_quopri = "=?iso-8859-1?q?H=E4ns_W=FCrst?= <person@dom.ain>"
2662 self.assertEqual(utils.formataddr((name, addr)), utf8_base64)
2663 self.assertEqual(utils.formataddr((name, addr), 'iso-8859-1'),
2664 latin1_quopri)
2665
2666 def test_accepts_any_charset_like_object(self):
2667 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2668 name = "H\u00e4ns W\u00fcrst"
2669 addr = 'person@dom.ain'
2670 utf8_base64 = "=?utf-8?b?SMOkbnMgV8O8cnN0?= <person@dom.ain>"
2671 foobar = "FOOBAR"
2672 class CharsetMock:
2673 def header_encode(self, string):
2674 return foobar
2675 mock = CharsetMock()
2676 mock_expected = "%s <%s>" % (foobar, addr)
2677 self.assertEqual(utils.formataddr((name, addr), mock), mock_expected)
2678 self.assertEqual(utils.formataddr((name, addr), Charset('utf-8')),
2679 utf8_base64)
2680
2681 def test_invalid_charset_like_object_raises_error(self):
2682 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2683 name = "H\u00e4ns W\u00fcrst"
2684 addr = 'person@dom.ain'
2685 # A object without a header_encode method:
2686 bad_charset = object()
2687 self.assertRaises(AttributeError, utils.formataddr, (name, addr),
2688 bad_charset)
2689
2690 def test_unicode_address_raises_error(self):
2691 # issue 1690608. email.utils.formataddr() should be rfc2047 aware.
2692 addr = 'pers\u00f6n@dom.in'
2693 self.assertRaises(UnicodeError, utils.formataddr, (None, addr))
2694 self.assertRaises(UnicodeError, utils.formataddr, ("Name", addr))
2695
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002696 def test_name_with_dot(self):
2697 x = 'John X. Doe <jxd@example.com>'
2698 y = '"John X. Doe" <jxd@example.com>'
2699 a, b = ('John X. Doe', 'jxd@example.com')
2700 self.assertEqual(utils.parseaddr(x), (a, b))
2701 self.assertEqual(utils.parseaddr(y), (a, b))
2702 # formataddr() quotes the name if there's a dot in it
2703 self.assertEqual(utils.formataddr((a, b)), y)
2704
R. David Murray5397e862010-10-02 15:58:26 +00002705 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2706 # issue 10005. Note that in the third test the second pair of
2707 # backslashes is not actually a quoted pair because it is not inside a
2708 # comment or quoted string: the address being parsed has a quoted
2709 # string containing a quoted backslash, followed by 'example' and two
2710 # backslashes, followed by another quoted string containing a space and
2711 # the word 'example'. parseaddr copies those two backslashes
2712 # literally. Per rfc5322 this is not technically correct since a \ may
2713 # not appear in an address outside of a quoted string. It is probably
2714 # a sensible Postel interpretation, though.
2715 eq = self.assertEqual
2716 eq(utils.parseaddr('""example" example"@example.com'),
2717 ('', '""example" example"@example.com'))
2718 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2719 ('', '"\\"example\\" example"@example.com'))
2720 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2721 ('', '"\\\\"example\\\\" example"@example.com'))
2722
R. David Murray63563cd2010-12-18 18:25:38 +00002723 def test_parseaddr_preserves_spaces_in_local_part(self):
2724 # issue 9286. A normal RFC5322 local part should not contain any
2725 # folding white space, but legacy local parts can (they are a sequence
2726 # of atoms, not dotatoms). On the other hand we strip whitespace from
2727 # before the @ and around dots, on the assumption that the whitespace
2728 # around the punctuation is a mistake in what would otherwise be
2729 # an RFC5322 local part. Leading whitespace is, usual, stripped as well.
2730 self.assertEqual(('', "merwok wok@xample.com"),
2731 utils.parseaddr("merwok wok@xample.com"))
2732 self.assertEqual(('', "merwok wok@xample.com"),
2733 utils.parseaddr("merwok wok@xample.com"))
2734 self.assertEqual(('', "merwok wok@xample.com"),
2735 utils.parseaddr(" merwok wok @xample.com"))
2736 self.assertEqual(('', 'merwok"wok" wok@xample.com'),
2737 utils.parseaddr('merwok"wok" wok@xample.com'))
2738 self.assertEqual(('', 'merwok.wok.wok@xample.com'),
2739 utils.parseaddr('merwok. wok . wok@xample.com'))
2740
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002741 def test_multiline_from_comment(self):
2742 x = """\
2743Foo
2744\tBar <foo@example.com>"""
2745 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2746
2747 def test_quote_dump(self):
2748 self.assertEqual(
2749 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2750 r'"A Silly; Person" <person@dom.ain>')
2751
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002752 def test_charset_richcomparisons(self):
2753 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002754 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002755 cset1 = Charset()
2756 cset2 = Charset()
2757 eq(cset1, 'us-ascii')
2758 eq(cset1, 'US-ASCII')
2759 eq(cset1, 'Us-AsCiI')
2760 eq('us-ascii', cset1)
2761 eq('US-ASCII', cset1)
2762 eq('Us-AsCiI', cset1)
2763 ne(cset1, 'usascii')
2764 ne(cset1, 'USASCII')
2765 ne(cset1, 'UsAsCiI')
2766 ne('usascii', cset1)
2767 ne('USASCII', cset1)
2768 ne('UsAsCiI', cset1)
2769 eq(cset1, cset2)
2770 eq(cset2, cset1)
2771
2772 def test_getaddresses(self):
2773 eq = self.assertEqual
2774 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2775 'Bud Person <bperson@dom.ain>']),
2776 [('Al Person', 'aperson@dom.ain'),
2777 ('Bud Person', 'bperson@dom.ain')])
2778
2779 def test_getaddresses_nasty(self):
2780 eq = self.assertEqual
2781 eq(utils.getaddresses(['foo: ;']), [('', '')])
2782 eq(utils.getaddresses(
2783 ['[]*-- =~$']),
2784 [('', ''), ('', ''), ('', '*--')])
2785 eq(utils.getaddresses(
2786 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2787 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2788
2789 def test_getaddresses_embedded_comment(self):
2790 """Test proper handling of a nested comment"""
2791 eq = self.assertEqual
2792 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2793 eq(addrs[0][1], 'foo@bar.com')
2794
2795 def test_utils_quote_unquote(self):
2796 eq = self.assertEqual
2797 msg = Message()
2798 msg.add_header('content-disposition', 'attachment',
2799 filename='foo\\wacky"name')
2800 eq(msg.get_filename(), 'foo\\wacky"name')
2801
2802 def test_get_body_encoding_with_bogus_charset(self):
2803 charset = Charset('not a charset')
2804 self.assertEqual(charset.get_body_encoding(), 'base64')
2805
2806 def test_get_body_encoding_with_uppercase_charset(self):
2807 eq = self.assertEqual
2808 msg = Message()
2809 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2810 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2811 charsets = msg.get_charsets()
2812 eq(len(charsets), 1)
2813 eq(charsets[0], 'utf-8')
2814 charset = Charset(charsets[0])
2815 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002816 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002817 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2818 eq(msg.get_payload(decode=True), b'hello world')
2819 eq(msg['content-transfer-encoding'], 'base64')
2820 # Try another one
2821 msg = Message()
2822 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2823 charsets = msg.get_charsets()
2824 eq(len(charsets), 1)
2825 eq(charsets[0], 'us-ascii')
2826 charset = Charset(charsets[0])
2827 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2828 msg.set_payload('hello world', charset=charset)
2829 eq(msg.get_payload(), 'hello world')
2830 eq(msg['content-transfer-encoding'], '7bit')
2831
2832 def test_charsets_case_insensitive(self):
2833 lc = Charset('us-ascii')
2834 uc = Charset('US-ASCII')
2835 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2836
2837 def test_partial_falls_inside_message_delivery_status(self):
2838 eq = self.ndiffAssertEqual
2839 # The Parser interface provides chunks of data to FeedParser in 8192
2840 # byte gulps. SF bug #1076485 found one of those chunks inside
2841 # message/delivery-status header block, which triggered an
2842 # unreadline() of NeedMoreData.
2843 msg = self._msgobj('msg_43.txt')
2844 sfp = StringIO()
2845 iterators._structure(msg, sfp)
2846 eq(sfp.getvalue(), """\
2847multipart/report
2848 text/plain
2849 message/delivery-status
2850 text/plain
2851 text/plain
2852 text/plain
2853 text/plain
2854 text/plain
2855 text/plain
2856 text/plain
2857 text/plain
2858 text/plain
2859 text/plain
2860 text/plain
2861 text/plain
2862 text/plain
2863 text/plain
2864 text/plain
2865 text/plain
2866 text/plain
2867 text/plain
2868 text/plain
2869 text/plain
2870 text/plain
2871 text/plain
2872 text/plain
2873 text/plain
2874 text/plain
2875 text/plain
2876 text/rfc822-headers
2877""")
2878
R. David Murraya0b44b52010-12-02 21:47:19 +00002879 def test_make_msgid_domain(self):
2880 self.assertEqual(
2881 email.utils.make_msgid(domain='testdomain-string')[-19:],
2882 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002883
Ezio Melottib3aedd42010-11-20 19:04:17 +00002884
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002885# Test the iterator/generators
2886class TestIterators(TestEmailBase):
2887 def test_body_line_iterator(self):
2888 eq = self.assertEqual
2889 neq = self.ndiffAssertEqual
2890 # First a simple non-multipart message
2891 msg = self._msgobj('msg_01.txt')
2892 it = iterators.body_line_iterator(msg)
2893 lines = list(it)
2894 eq(len(lines), 6)
2895 neq(EMPTYSTRING.join(lines), msg.get_payload())
2896 # Now a more complicated multipart
2897 msg = self._msgobj('msg_02.txt')
2898 it = iterators.body_line_iterator(msg)
2899 lines = list(it)
2900 eq(len(lines), 43)
2901 with openfile('msg_19.txt') as fp:
2902 neq(EMPTYSTRING.join(lines), fp.read())
2903
2904 def test_typed_subpart_iterator(self):
2905 eq = self.assertEqual
2906 msg = self._msgobj('msg_04.txt')
2907 it = iterators.typed_subpart_iterator(msg, 'text')
2908 lines = []
2909 subparts = 0
2910 for subpart in it:
2911 subparts += 1
2912 lines.append(subpart.get_payload())
2913 eq(subparts, 2)
2914 eq(EMPTYSTRING.join(lines), """\
2915a simple kind of mirror
2916to reflect upon our own
2917a simple kind of mirror
2918to reflect upon our own
2919""")
2920
2921 def test_typed_subpart_iterator_default_type(self):
2922 eq = self.assertEqual
2923 msg = self._msgobj('msg_03.txt')
2924 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2925 lines = []
2926 subparts = 0
2927 for subpart in it:
2928 subparts += 1
2929 lines.append(subpart.get_payload())
2930 eq(subparts, 1)
2931 eq(EMPTYSTRING.join(lines), """\
2932
2933Hi,
2934
2935Do you like this message?
2936
2937-Me
2938""")
2939
R. David Murray45bf773f2010-07-17 01:19:57 +00002940 def test_pushCR_LF(self):
2941 '''FeedParser BufferedSubFile.push() assumed it received complete
2942 line endings. A CR ending one push() followed by a LF starting
2943 the next push() added an empty line.
2944 '''
2945 imt = [
2946 ("a\r \n", 2),
2947 ("b", 0),
2948 ("c\n", 1),
2949 ("", 0),
2950 ("d\r\n", 1),
2951 ("e\r", 0),
2952 ("\nf", 1),
2953 ("\r\n", 1),
2954 ]
2955 from email.feedparser import BufferedSubFile, NeedMoreData
2956 bsf = BufferedSubFile()
2957 om = []
2958 nt = 0
2959 for il, n in imt:
2960 bsf.push(il)
2961 nt += n
2962 n1 = 0
2963 while True:
2964 ol = bsf.readline()
2965 if ol == NeedMoreData:
2966 break
2967 om.append(ol)
2968 n1 += 1
2969 self.assertTrue(n == n1)
2970 self.assertTrue(len(om) == nt)
2971 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2972
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002973
Ezio Melottib3aedd42010-11-20 19:04:17 +00002974
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002975class TestParsers(TestEmailBase):
R David Murrayb35c8502011-04-13 16:46:05 -04002976
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002977 def test_header_parser(self):
2978 eq = self.assertEqual
2979 # Parse only the headers of a complex multipart MIME document
2980 with openfile('msg_02.txt') as fp:
2981 msg = HeaderParser().parse(fp)
2982 eq(msg['from'], 'ppp-request@zzz.org')
2983 eq(msg['to'], 'ppp@zzz.org')
2984 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002985 self.assertFalse(msg.is_multipart())
2986 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002987
R David Murrayb35c8502011-04-13 16:46:05 -04002988 def test_bytes_header_parser(self):
2989 eq = self.assertEqual
2990 # Parse only the headers of a complex multipart MIME document
2991 with openfile('msg_02.txt', 'rb') as fp:
2992 msg = email.parser.BytesHeaderParser().parse(fp)
2993 eq(msg['from'], 'ppp-request@zzz.org')
2994 eq(msg['to'], 'ppp@zzz.org')
2995 eq(msg.get_content_type(), 'multipart/mixed')
2996 self.assertFalse(msg.is_multipart())
2997 self.assertTrue(isinstance(msg.get_payload(), str))
2998 self.assertTrue(isinstance(msg.get_payload(decode=True), bytes))
2999
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003000 def test_whitespace_continuation(self):
3001 eq = self.assertEqual
3002 # This message contains a line after the Subject: header that has only
3003 # whitespace, but it is not empty!
3004 msg = email.message_from_string("""\
3005From: aperson@dom.ain
3006To: bperson@dom.ain
3007Subject: the next line has a space on it
3008\x20
3009Date: Mon, 8 Apr 2002 15:09:19 -0400
3010Message-ID: spam
3011
3012Here's the message body
3013""")
3014 eq(msg['subject'], 'the next line has a space on it\n ')
3015 eq(msg['message-id'], 'spam')
3016 eq(msg.get_payload(), "Here's the message body\n")
3017
3018 def test_whitespace_continuation_last_header(self):
3019 eq = self.assertEqual
3020 # Like the previous test, but the subject line is the last
3021 # header.
3022 msg = email.message_from_string("""\
3023From: aperson@dom.ain
3024To: bperson@dom.ain
3025Date: Mon, 8 Apr 2002 15:09:19 -0400
3026Message-ID: spam
3027Subject: the next line has a space on it
3028\x20
3029
3030Here's the message body
3031""")
3032 eq(msg['subject'], 'the next line has a space on it\n ')
3033 eq(msg['message-id'], 'spam')
3034 eq(msg.get_payload(), "Here's the message body\n")
3035
3036 def test_crlf_separation(self):
3037 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00003038 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003039 msg = Parser().parse(fp)
3040 eq(len(msg.get_payload()), 2)
3041 part1 = msg.get_payload(0)
3042 eq(part1.get_content_type(), 'text/plain')
3043 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
3044 part2 = msg.get_payload(1)
3045 eq(part2.get_content_type(), 'application/riscos')
3046
R. David Murray8451c4b2010-10-23 22:19:56 +00003047 def test_crlf_flatten(self):
3048 # Using newline='\n' preserves the crlfs in this input file.
3049 with openfile('msg_26.txt', newline='\n') as fp:
3050 text = fp.read()
3051 msg = email.message_from_string(text)
3052 s = StringIO()
3053 g = Generator(s)
3054 g.flatten(msg, linesep='\r\n')
3055 self.assertEqual(s.getvalue(), text)
3056
R David Murray3edd22a2011-04-18 13:59:37 -04003057 def test_crlf_control_via_policy(self):
3058 with openfile('msg_26.txt', newline='\n') as fp:
3059 text = fp.read()
3060 msg = email.message_from_string(text)
3061 s = StringIO()
3062 g = email.generator.Generator(s, policy=email.policy.SMTP)
3063 g.flatten(msg)
3064 self.assertEqual(s.getvalue(), text)
3065
3066 def test_flatten_linesep_overrides_policy(self):
3067 # msg_27 is lf separated
3068 with openfile('msg_27.txt', newline='\n') as fp:
3069 text = fp.read()
3070 msg = email.message_from_string(text)
3071 s = StringIO()
3072 g = email.generator.Generator(s, policy=email.policy.SMTP)
3073 g.flatten(msg, linesep='\n')
3074 self.assertEqual(s.getvalue(), text)
3075
R. David Murray8451c4b2010-10-23 22:19:56 +00003076 maxDiff = None
3077
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003078 def test_multipart_digest_with_extra_mime_headers(self):
3079 eq = self.assertEqual
3080 neq = self.ndiffAssertEqual
3081 with openfile('msg_28.txt') as fp:
3082 msg = email.message_from_file(fp)
3083 # Structure is:
3084 # multipart/digest
3085 # message/rfc822
3086 # text/plain
3087 # message/rfc822
3088 # text/plain
3089 eq(msg.is_multipart(), 1)
3090 eq(len(msg.get_payload()), 2)
3091 part1 = msg.get_payload(0)
3092 eq(part1.get_content_type(), 'message/rfc822')
3093 eq(part1.is_multipart(), 1)
3094 eq(len(part1.get_payload()), 1)
3095 part1a = part1.get_payload(0)
3096 eq(part1a.is_multipart(), 0)
3097 eq(part1a.get_content_type(), 'text/plain')
3098 neq(part1a.get_payload(), 'message 1\n')
3099 # next message/rfc822
3100 part2 = msg.get_payload(1)
3101 eq(part2.get_content_type(), 'message/rfc822')
3102 eq(part2.is_multipart(), 1)
3103 eq(len(part2.get_payload()), 1)
3104 part2a = part2.get_payload(0)
3105 eq(part2a.is_multipart(), 0)
3106 eq(part2a.get_content_type(), 'text/plain')
3107 neq(part2a.get_payload(), 'message 2\n')
3108
3109 def test_three_lines(self):
3110 # A bug report by Andrew McNamara
3111 lines = ['From: Andrew Person <aperson@dom.ain',
3112 'Subject: Test',
3113 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
3114 msg = email.message_from_string(NL.join(lines))
3115 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
3116
3117 def test_strip_line_feed_and_carriage_return_in_headers(self):
3118 eq = self.assertEqual
3119 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
3120 value1 = 'text'
3121 value2 = 'more text'
3122 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
3123 value1, value2)
3124 msg = email.message_from_string(m)
3125 eq(msg.get('Header'), value1)
3126 eq(msg.get('Next-Header'), value2)
3127
3128 def test_rfc2822_header_syntax(self):
3129 eq = self.assertEqual
3130 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3131 msg = email.message_from_string(m)
3132 eq(len(msg), 3)
3133 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
3134 eq(msg.get_payload(), 'body')
3135
3136 def test_rfc2822_space_not_allowed_in_header(self):
3137 eq = self.assertEqual
3138 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
3139 msg = email.message_from_string(m)
3140 eq(len(msg.keys()), 0)
3141
3142 def test_rfc2822_one_character_header(self):
3143 eq = self.assertEqual
3144 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
3145 msg = email.message_from_string(m)
3146 headers = msg.keys()
3147 headers.sort()
3148 eq(headers, ['A', 'B', 'CC'])
3149 eq(msg.get_payload(), 'body')
3150
R. David Murray45e0e142010-06-16 02:19:40 +00003151 def test_CRLFLF_at_end_of_part(self):
3152 # issue 5610: feedparser should not eat two chars from body part ending
3153 # with "\r\n\n".
3154 m = (
3155 "From: foo@bar.com\n"
3156 "To: baz\n"
3157 "Mime-Version: 1.0\n"
3158 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
3159 "\n"
3160 "--BOUNDARY\n"
3161 "Content-Type: text/plain\n"
3162 "\n"
3163 "body ending with CRLF newline\r\n"
3164 "\n"
3165 "--BOUNDARY--\n"
3166 )
3167 msg = email.message_from_string(m)
3168 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003169
Ezio Melottib3aedd42010-11-20 19:04:17 +00003170
R. David Murray96fd54e2010-10-08 15:55:28 +00003171class Test8BitBytesHandling(unittest.TestCase):
3172 # In Python3 all input is string, but that doesn't work if the actual input
3173 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
3174 # decode byte streams using the surrogateescape error handler, and
3175 # reconvert to binary at appropriate places if we detect surrogates. This
3176 # doesn't allow us to transform headers with 8bit bytes (they get munged),
3177 # but it does allow us to parse and preserve them, and to decode body
3178 # parts that use an 8bit CTE.
3179
3180 bodytest_msg = textwrap.dedent("""\
3181 From: foo@bar.com
3182 To: baz
3183 Mime-Version: 1.0
3184 Content-Type: text/plain; charset={charset}
3185 Content-Transfer-Encoding: {cte}
3186
3187 {bodyline}
3188 """)
3189
3190 def test_known_8bit_CTE(self):
3191 m = self.bodytest_msg.format(charset='utf-8',
3192 cte='8bit',
3193 bodyline='pöstal').encode('utf-8')
3194 msg = email.message_from_bytes(m)
3195 self.assertEqual(msg.get_payload(), "pöstal\n")
3196 self.assertEqual(msg.get_payload(decode=True),
3197 "pöstal\n".encode('utf-8'))
3198
3199 def test_unknown_8bit_CTE(self):
3200 m = self.bodytest_msg.format(charset='notavalidcharset',
3201 cte='8bit',
3202 bodyline='pöstal').encode('utf-8')
3203 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003204 self.assertEqual(msg.get_payload(), "p\uFFFD\uFFFDstal\n")
R. David Murray96fd54e2010-10-08 15:55:28 +00003205 self.assertEqual(msg.get_payload(decode=True),
3206 "pöstal\n".encode('utf-8'))
3207
3208 def test_8bit_in_quopri_body(self):
3209 # This is non-RFC compliant data...without 'decode' the library code
3210 # decodes the body using the charset from the headers, and because the
3211 # source byte really is utf-8 this works. This is likely to fail
3212 # against real dirty data (ie: produce mojibake), but the data is
3213 # invalid anyway so it is as good a guess as any. But this means that
3214 # this test just confirms the current behavior; that behavior is not
3215 # necessarily the best possible behavior. With 'decode' it is
3216 # returning the raw bytes, so that test should be of correct behavior,
3217 # or at least produce the same result that email4 did.
3218 m = self.bodytest_msg.format(charset='utf-8',
3219 cte='quoted-printable',
3220 bodyline='p=C3=B6stál').encode('utf-8')
3221 msg = email.message_from_bytes(m)
3222 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
3223 self.assertEqual(msg.get_payload(decode=True),
3224 'pöstál\n'.encode('utf-8'))
3225
3226 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
3227 # This is similar to the previous test, but proves that if the 8bit
3228 # byte is undecodeable in the specified charset, it gets replaced
3229 # by the unicode 'unknown' character. Again, this may or may not
3230 # be the ideal behavior. Note that if decode=False none of the
3231 # decoders will get involved, so this is the only test we need
3232 # for this behavior.
3233 m = self.bodytest_msg.format(charset='ascii',
3234 cte='quoted-printable',
3235 bodyline='p=C3=B6stál').encode('utf-8')
3236 msg = email.message_from_bytes(m)
R. David Murray92532142011-01-07 23:25:30 +00003237 self.assertEqual(msg.get_payload(), 'p=C3=B6st\uFFFD\uFFFDl\n')
R. David Murray96fd54e2010-10-08 15:55:28 +00003238 self.assertEqual(msg.get_payload(decode=True),
3239 'pöstál\n'.encode('utf-8'))
3240
3241 def test_8bit_in_base64_body(self):
3242 # Sticking an 8bit byte in a base64 block makes it undecodable by
3243 # normal means, so the block is returned undecoded, but as bytes.
3244 m = self.bodytest_msg.format(charset='utf-8',
3245 cte='base64',
3246 bodyline='cMO2c3RhbAá=').encode('utf-8')
3247 msg = email.message_from_bytes(m)
3248 self.assertEqual(msg.get_payload(decode=True),
3249 'cMO2c3RhbAá=\n'.encode('utf-8'))
3250
3251 def test_8bit_in_uuencode_body(self):
3252 # Sticking an 8bit byte in a uuencode block makes it undecodable by
3253 # normal means, so the block is returned undecoded, but as bytes.
3254 m = self.bodytest_msg.format(charset='utf-8',
3255 cte='uuencode',
3256 bodyline='<,.V<W1A; á ').encode('utf-8')
3257 msg = email.message_from_bytes(m)
3258 self.assertEqual(msg.get_payload(decode=True),
3259 '<,.V<W1A; á \n'.encode('utf-8'))
3260
3261
R. David Murray92532142011-01-07 23:25:30 +00003262 headertest_headers = (
3263 ('From: foo@bar.com', ('From', 'foo@bar.com')),
3264 ('To: báz', ('To', '=?unknown-8bit?q?b=C3=A1z?=')),
3265 ('Subject: Maintenant je vous présente mon collègue, le pouf célèbre\n'
3266 '\tJean de Baddie',
3267 ('Subject', '=?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3268 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=\n'
3269 ' =?unknown-8bit?q?_Jean_de_Baddie?=')),
3270 ('From: göst', ('From', '=?unknown-8bit?b?Z8O2c3Q=?=')),
3271 )
3272 headertest_msg = ('\n'.join([src for (src, _) in headertest_headers]) +
3273 '\nYes, they are flying.\n').encode('utf-8')
R. David Murray96fd54e2010-10-08 15:55:28 +00003274
3275 def test_get_8bit_header(self):
3276 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003277 self.assertEqual(str(msg.get('to')), 'b\uFFFD\uFFFDz')
3278 self.assertEqual(str(msg['to']), 'b\uFFFD\uFFFDz')
R. David Murray96fd54e2010-10-08 15:55:28 +00003279
3280 def test_print_8bit_headers(self):
3281 msg = email.message_from_bytes(self.headertest_msg)
3282 self.assertEqual(str(msg),
R. David Murray92532142011-01-07 23:25:30 +00003283 textwrap.dedent("""\
3284 From: {}
3285 To: {}
3286 Subject: {}
3287 From: {}
3288
3289 Yes, they are flying.
3290 """).format(*[expected[1] for (_, expected) in
3291 self.headertest_headers]))
R. David Murray96fd54e2010-10-08 15:55:28 +00003292
3293 def test_values_with_8bit_headers(self):
3294 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003295 self.assertListEqual([str(x) for x in msg.values()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003296 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003297 'b\uFFFD\uFFFDz',
3298 'Maintenant je vous pr\uFFFD\uFFFDsente mon '
3299 'coll\uFFFD\uFFFDgue, le pouf '
3300 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
R. David Murray96fd54e2010-10-08 15:55:28 +00003301 '\tJean de Baddie',
R. David Murray92532142011-01-07 23:25:30 +00003302 "g\uFFFD\uFFFDst"])
R. David Murray96fd54e2010-10-08 15:55:28 +00003303
3304 def test_items_with_8bit_headers(self):
3305 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003306 self.assertListEqual([(str(x), str(y)) for (x, y) in msg.items()],
R. David Murray96fd54e2010-10-08 15:55:28 +00003307 [('From', 'foo@bar.com'),
R. David Murray92532142011-01-07 23:25:30 +00003308 ('To', 'b\uFFFD\uFFFDz'),
3309 ('Subject', 'Maintenant je vous '
3310 'pr\uFFFD\uFFFDsente '
3311 'mon coll\uFFFD\uFFFDgue, le pouf '
3312 'c\uFFFD\uFFFDl\uFFFD\uFFFDbre\n'
3313 '\tJean de Baddie'),
3314 ('From', 'g\uFFFD\uFFFDst')])
R. David Murray96fd54e2010-10-08 15:55:28 +00003315
3316 def test_get_all_with_8bit_headers(self):
3317 msg = email.message_from_bytes(self.headertest_msg)
R. David Murray92532142011-01-07 23:25:30 +00003318 self.assertListEqual([str(x) for x in msg.get_all('from')],
R. David Murray96fd54e2010-10-08 15:55:28 +00003319 ['foo@bar.com',
R. David Murray92532142011-01-07 23:25:30 +00003320 'g\uFFFD\uFFFDst'])
R. David Murray96fd54e2010-10-08 15:55:28 +00003321
R David Murraya2150232011-03-16 21:11:23 -04003322 def test_get_content_type_with_8bit(self):
3323 msg = email.message_from_bytes(textwrap.dedent("""\
3324 Content-Type: text/pl\xA7in; charset=utf-8
3325 """).encode('latin-1'))
3326 self.assertEqual(msg.get_content_type(), "text/pl\uFFFDin")
3327 self.assertEqual(msg.get_content_maintype(), "text")
3328 self.assertEqual(msg.get_content_subtype(), "pl\uFFFDin")
3329
3330 def test_get_params_with_8bit(self):
3331 msg = email.message_from_bytes(
3332 'X-Header: foo=\xa7ne; b\xa7r=two; baz=three\n'.encode('latin-1'))
3333 self.assertEqual(msg.get_params(header='x-header'),
3334 [('foo', '\uFFFDne'), ('b\uFFFDr', 'two'), ('baz', 'three')])
3335 self.assertEqual(msg.get_param('Foo', header='x-header'), '\uFFFdne')
3336 # XXX: someday you might be able to get 'b\xa7r', for now you can't.
3337 self.assertEqual(msg.get_param('b\xa7r', header='x-header'), None)
3338
3339 def test_get_rfc2231_params_with_8bit(self):
3340 msg = email.message_from_bytes(textwrap.dedent("""\
3341 Content-Type: text/plain; charset=us-ascii;
3342 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3343 ).encode('latin-1'))
3344 self.assertEqual(msg.get_param('title'),
3345 ('us-ascii', 'en', 'This is not f\uFFFDn'))
3346
3347 def test_set_rfc2231_params_with_8bit(self):
3348 msg = email.message_from_bytes(textwrap.dedent("""\
3349 Content-Type: text/plain; charset=us-ascii;
3350 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3351 ).encode('latin-1'))
3352 msg.set_param('title', 'test')
3353 self.assertEqual(msg.get_param('title'), 'test')
3354
3355 def test_del_rfc2231_params_with_8bit(self):
3356 msg = email.message_from_bytes(textwrap.dedent("""\
3357 Content-Type: text/plain; charset=us-ascii;
3358 title*=us-ascii'en'This%20is%20not%20f\xa7n"""
3359 ).encode('latin-1'))
3360 msg.del_param('title')
3361 self.assertEqual(msg.get_param('title'), None)
3362 self.assertEqual(msg.get_content_maintype(), 'text')
3363
3364 def test_get_payload_with_8bit_cte_header(self):
3365 msg = email.message_from_bytes(textwrap.dedent("""\
3366 Content-Transfer-Encoding: b\xa7se64
3367 Content-Type: text/plain; charset=latin-1
3368
3369 payload
3370 """).encode('latin-1'))
3371 self.assertEqual(msg.get_payload(), 'payload\n')
3372 self.assertEqual(msg.get_payload(decode=True), b'payload\n')
3373
R. David Murray96fd54e2010-10-08 15:55:28 +00003374 non_latin_bin_msg = textwrap.dedent("""\
3375 From: foo@bar.com
3376 To: báz
3377 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
3378 \tJean de Baddie
3379 Mime-Version: 1.0
3380 Content-Type: text/plain; charset="utf-8"
3381 Content-Transfer-Encoding: 8bit
3382
3383 Да, они летят.
3384 """).encode('utf-8')
3385
3386 def test_bytes_generator(self):
3387 msg = email.message_from_bytes(self.non_latin_bin_msg)
3388 out = BytesIO()
3389 email.generator.BytesGenerator(out).flatten(msg)
3390 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
3391
R. David Murray7372a072011-01-26 21:21:32 +00003392 def test_bytes_generator_handles_None_body(self):
3393 #Issue 11019
3394 msg = email.message.Message()
3395 out = BytesIO()
3396 email.generator.BytesGenerator(out).flatten(msg)
3397 self.assertEqual(out.getvalue(), b"\n")
3398
R. David Murray92532142011-01-07 23:25:30 +00003399 non_latin_bin_msg_as7bit_wrapped = textwrap.dedent("""\
R. David Murray96fd54e2010-10-08 15:55:28 +00003400 From: foo@bar.com
R. David Murray92532142011-01-07 23:25:30 +00003401 To: =?unknown-8bit?q?b=C3=A1z?=
3402 Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_coll=C3=A8gue?=
3403 =?unknown-8bit?q?=2C_le_pouf_c=C3=A9l=C3=A8bre?=
3404 =?unknown-8bit?q?_Jean_de_Baddie?=
R. David Murray96fd54e2010-10-08 15:55:28 +00003405 Mime-Version: 1.0
3406 Content-Type: text/plain; charset="utf-8"
3407 Content-Transfer-Encoding: base64
3408
3409 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
3410 """)
3411
3412 def test_generator_handles_8bit(self):
3413 msg = email.message_from_bytes(self.non_latin_bin_msg)
3414 out = StringIO()
3415 email.generator.Generator(out).flatten(msg)
R. David Murray92532142011-01-07 23:25:30 +00003416 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit_wrapped)
R. David Murray96fd54e2010-10-08 15:55:28 +00003417
3418 def test_bytes_generator_with_unix_from(self):
3419 # The unixfrom contains a current date, so we can't check it
3420 # literally. Just make sure the first word is 'From' and the
3421 # rest of the message matches the input.
3422 msg = email.message_from_bytes(self.non_latin_bin_msg)
3423 out = BytesIO()
3424 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
3425 lines = out.getvalue().split(b'\n')
3426 self.assertEqual(lines[0].split()[0], b'From')
3427 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
3428
R. David Murray92532142011-01-07 23:25:30 +00003429 non_latin_bin_msg_as7bit = non_latin_bin_msg_as7bit_wrapped.split('\n')
3430 non_latin_bin_msg_as7bit[2:4] = [
3431 'Subject: =?unknown-8bit?q?Maintenant_je_vous_pr=C3=A9sente_mon_'
3432 'coll=C3=A8gue=2C_le_pouf_c=C3=A9l=C3=A8bre?=']
3433 non_latin_bin_msg_as7bit = '\n'.join(non_latin_bin_msg_as7bit)
3434
R. David Murray96fd54e2010-10-08 15:55:28 +00003435 def test_message_from_binary_file(self):
3436 fn = 'test.msg'
3437 self.addCleanup(unlink, fn)
3438 with open(fn, 'wb') as testfile:
3439 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00003440 with open(fn, 'rb') as testfile:
3441 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00003442 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
3443
3444 latin_bin_msg = textwrap.dedent("""\
3445 From: foo@bar.com
3446 To: Dinsdale
3447 Subject: Nudge nudge, wink, wink
3448 Mime-Version: 1.0
3449 Content-Type: text/plain; charset="latin-1"
3450 Content-Transfer-Encoding: 8bit
3451
3452 oh là là, know what I mean, know what I mean?
3453 """).encode('latin-1')
3454
3455 latin_bin_msg_as7bit = textwrap.dedent("""\
3456 From: foo@bar.com
3457 To: Dinsdale
3458 Subject: Nudge nudge, wink, wink
3459 Mime-Version: 1.0
3460 Content-Type: text/plain; charset="iso-8859-1"
3461 Content-Transfer-Encoding: quoted-printable
3462
3463 oh l=E0 l=E0, know what I mean, know what I mean?
3464 """)
3465
3466 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
3467 m = email.message_from_bytes(self.latin_bin_msg)
3468 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3469
3470 def test_decoded_generator_emits_unicode_body(self):
3471 m = email.message_from_bytes(self.latin_bin_msg)
3472 out = StringIO()
3473 email.generator.DecodedGenerator(out).flatten(m)
3474 #DecodedHeader output contains an extra blank line compared
3475 #to the input message. RDM: not sure if this is a bug or not,
3476 #but it is not specific to the 8bit->7bit conversion.
3477 self.assertEqual(out.getvalue(),
3478 self.latin_bin_msg.decode('latin-1')+'\n')
3479
3480 def test_bytes_feedparser(self):
3481 bfp = email.feedparser.BytesFeedParser()
3482 for i in range(0, len(self.latin_bin_msg), 10):
3483 bfp.feed(self.latin_bin_msg[i:i+10])
3484 m = bfp.close()
3485 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
3486
R. David Murray8451c4b2010-10-23 22:19:56 +00003487 def test_crlf_flatten(self):
3488 with openfile('msg_26.txt', 'rb') as fp:
3489 text = fp.read()
3490 msg = email.message_from_bytes(text)
3491 s = BytesIO()
3492 g = email.generator.BytesGenerator(s)
3493 g.flatten(msg, linesep='\r\n')
3494 self.assertEqual(s.getvalue(), text)
R David Murrayc5c14722011-04-06 08:13:02 -04003495
3496 def test_8bit_multipart(self):
3497 # Issue 11605
3498 source = textwrap.dedent("""\
3499 Date: Fri, 18 Mar 2011 17:15:43 +0100
3500 To: foo@example.com
3501 From: foodwatch-Newsletter <bar@example.com>
3502 Subject: Aktuelles zu Japan, Klonfleisch und Smiley-System
3503 Message-ID: <76a486bee62b0d200f33dc2ca08220ad@localhost.localdomain>
3504 MIME-Version: 1.0
3505 Content-Type: multipart/alternative;
3506 boundary="b1_76a486bee62b0d200f33dc2ca08220ad"
3507
3508 --b1_76a486bee62b0d200f33dc2ca08220ad
3509 Content-Type: text/plain; charset="utf-8"
3510 Content-Transfer-Encoding: 8bit
3511
3512 Guten Tag, ,
3513
3514 mit großer Betroffenheit verfolgen auch wir im foodwatch-Team die
3515 Nachrichten aus Japan.
3516
3517
3518 --b1_76a486bee62b0d200f33dc2ca08220ad
3519 Content-Type: text/html; charset="utf-8"
3520 Content-Transfer-Encoding: 8bit
3521
3522 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
3523 "http://www.w3.org/TR/html4/loose.dtd">
3524 <html lang="de">
3525 <head>
3526 <title>foodwatch - Newsletter</title>
3527 </head>
3528 <body>
3529 <p>mit gro&szlig;er Betroffenheit verfolgen auch wir im foodwatch-Team
3530 die Nachrichten aus Japan.</p>
3531 </body>
3532 </html>
3533 --b1_76a486bee62b0d200f33dc2ca08220ad--
3534
3535 """).encode('utf-8')
3536 msg = email.message_from_bytes(source)
3537 s = BytesIO()
3538 g = email.generator.BytesGenerator(s)
3539 g.flatten(msg)
3540 self.assertEqual(s.getvalue(), source)
3541
R David Murray3edd22a2011-04-18 13:59:37 -04003542 def test_crlf_control_via_policy(self):
3543 # msg_26 is crlf terminated
3544 with openfile('msg_26.txt', 'rb') as fp:
3545 text = fp.read()
3546 msg = email.message_from_bytes(text)
3547 s = BytesIO()
3548 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3549 g.flatten(msg)
3550 self.assertEqual(s.getvalue(), text)
3551
3552 def test_flatten_linesep_overrides_policy(self):
3553 # msg_27 is lf separated
3554 with openfile('msg_27.txt', 'rb') as fp:
3555 text = fp.read()
3556 msg = email.message_from_bytes(text)
3557 s = BytesIO()
3558 g = email.generator.BytesGenerator(s, policy=email.policy.SMTP)
3559 g.flatten(msg, linesep='\n')
3560 self.assertEqual(s.getvalue(), text)
3561
3562 def test_must_be_7bit_handles_unknown_8bit(self):
3563 msg = email.message_from_bytes(self.non_latin_bin_msg)
3564 out = BytesIO()
3565 g = email.generator.BytesGenerator(out,
3566 policy=email.policy.default.clone(must_be_7bit=True))
3567 g.flatten(msg)
3568 self.assertEqual(out.getvalue(),
3569 self.non_latin_bin_msg_as7bit_wrapped.encode('ascii'))
3570
3571 def test_must_be_7bit_transforms_8bit_cte(self):
3572 msg = email.message_from_bytes(self.latin_bin_msg)
3573 out = BytesIO()
3574 g = email.generator.BytesGenerator(out,
3575 policy=email.policy.default.clone(must_be_7bit=True))
3576 g.flatten(msg)
3577 self.assertEqual(out.getvalue(),
3578 self.latin_bin_msg_as7bit.encode('ascii'))
3579
R. David Murray8451c4b2010-10-23 22:19:56 +00003580 maxDiff = None
3581
Ezio Melottib3aedd42010-11-20 19:04:17 +00003582
R. David Murray719a4492010-11-21 16:53:48 +00003583class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00003584
R. David Murraye5db2632010-11-20 15:10:13 +00003585 maxDiff = None
3586
R. David Murray96fd54e2010-10-08 15:55:28 +00003587 def _msgobj(self, filename):
3588 with openfile(filename, 'rb') as fp:
3589 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003590 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003591 msg = email.message_from_bytes(data)
3592 return msg, data
3593
R. David Murray719a4492010-11-21 16:53:48 +00003594 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003595 b = BytesIO()
3596 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003597 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R David Murraya46ed112011-03-31 13:11:40 -04003598 self.assertEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003599
3600
R. David Murray719a4492010-11-21 16:53:48 +00003601class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3602 TestIdempotent):
3603 linesep = '\n'
3604 blinesep = b'\n'
3605 normalize_linesep_regex = re.compile(br'\r\n')
3606
3607
3608class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3609 TestIdempotent):
3610 linesep = '\r\n'
3611 blinesep = b'\r\n'
3612 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3613
Ezio Melottib3aedd42010-11-20 19:04:17 +00003614
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003615class TestBase64(unittest.TestCase):
3616 def test_len(self):
3617 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003618 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003619 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003620 for size in range(15):
3621 if size == 0 : bsize = 0
3622 elif size <= 3 : bsize = 4
3623 elif size <= 6 : bsize = 8
3624 elif size <= 9 : bsize = 12
3625 elif size <= 12: bsize = 16
3626 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003627 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003628
3629 def test_decode(self):
3630 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003631 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003632 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003633
3634 def test_encode(self):
3635 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003636 eq(base64mime.body_encode(b''), b'')
3637 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003638 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003639 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003640 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003641 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003642eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3643eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3644eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3645eHh4eCB4eHh4IA==
3646""")
3647 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003648 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003649 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003650eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3651eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3652eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3653eHh4eCB4eHh4IA==\r
3654""")
3655
3656 def test_header_encode(self):
3657 eq = self.assertEqual
3658 he = base64mime.header_encode
3659 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003660 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3661 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003662 # Test the charset option
3663 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3664 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003665
3666
Ezio Melottib3aedd42010-11-20 19:04:17 +00003667
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003668class TestQuopri(unittest.TestCase):
3669 def setUp(self):
3670 # Set of characters (as byte integers) that don't need to be encoded
3671 # in headers.
3672 self.hlit = list(chain(
3673 range(ord('a'), ord('z') + 1),
3674 range(ord('A'), ord('Z') + 1),
3675 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003676 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003677 # Set of characters (as byte integers) that do need to be encoded in
3678 # headers.
3679 self.hnon = [c for c in range(256) if c not in self.hlit]
3680 assert len(self.hlit) + len(self.hnon) == 256
3681 # Set of characters (as byte integers) that don't need to be encoded
3682 # in bodies.
3683 self.blit = list(range(ord(' '), ord('~') + 1))
3684 self.blit.append(ord('\t'))
3685 self.blit.remove(ord('='))
3686 # Set of characters (as byte integers) that do need to be encoded in
3687 # bodies.
3688 self.bnon = [c for c in range(256) if c not in self.blit]
3689 assert len(self.blit) + len(self.bnon) == 256
3690
Guido van Rossum9604e662007-08-30 03:46:43 +00003691 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003692 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003693 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003694 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003695 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003696 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003697 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003698
Guido van Rossum9604e662007-08-30 03:46:43 +00003699 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003700 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003701 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003702 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003703 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003704 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003705 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003706
3707 def test_header_quopri_len(self):
3708 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003709 eq(quoprimime.header_length(b'hello'), 5)
3710 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003711 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003712 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003713 # =?xxx?q?...?= means 10 extra characters
3714 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003715 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3716 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003717 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003718 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003719 # =?xxx?q?...?= means 10 extra characters
3720 10)
3721 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003722 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003723 'expected length 1 for %r' % chr(c))
3724 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003725 # Space is special; it's encoded to _
3726 if c == ord(' '):
3727 continue
3728 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003729 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003730 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003731
3732 def test_body_quopri_len(self):
3733 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003734 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003735 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003736 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003737 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003738
3739 def test_quote_unquote_idempotent(self):
3740 for x in range(256):
3741 c = chr(x)
3742 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3743
R David Murrayec1b5b82011-03-23 14:19:05 -04003744 def _test_header_encode(self, header, expected_encoded_header, charset=None):
3745 if charset is None:
3746 encoded_header = quoprimime.header_encode(header)
3747 else:
3748 encoded_header = quoprimime.header_encode(header, charset)
3749 self.assertEqual(encoded_header, expected_encoded_header)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003750
R David Murraycafd79d2011-03-23 15:25:55 -04003751 def test_header_encode_null(self):
3752 self._test_header_encode(b'', '')
3753
R David Murrayec1b5b82011-03-23 14:19:05 -04003754 def test_header_encode_one_word(self):
3755 self._test_header_encode(b'hello', '=?iso-8859-1?q?hello?=')
3756
3757 def test_header_encode_two_lines(self):
3758 self._test_header_encode(b'hello\nworld',
3759 '=?iso-8859-1?q?hello=0Aworld?=')
3760
3761 def test_header_encode_non_ascii(self):
3762 self._test_header_encode(b'hello\xc7there',
3763 '=?iso-8859-1?q?hello=C7there?=')
3764
3765 def test_header_encode_alt_charset(self):
3766 self._test_header_encode(b'hello', '=?iso-8859-2?q?hello?=',
3767 charset='iso-8859-2')
3768
3769 def _test_header_decode(self, encoded_header, expected_decoded_header):
3770 decoded_header = quoprimime.header_decode(encoded_header)
3771 self.assertEqual(decoded_header, expected_decoded_header)
3772
3773 def test_header_decode_null(self):
3774 self._test_header_decode('', '')
3775
3776 def test_header_decode_one_word(self):
3777 self._test_header_decode('hello', 'hello')
3778
3779 def test_header_decode_two_lines(self):
3780 self._test_header_decode('hello=0Aworld', 'hello\nworld')
3781
3782 def test_header_decode_non_ascii(self):
3783 self._test_header_decode('hello=C7there', 'hello\xc7there')
3784
3785 def _test_decode(self, encoded, expected_decoded, eol=None):
3786 if eol is None:
3787 decoded = quoprimime.decode(encoded)
3788 else:
3789 decoded = quoprimime.decode(encoded, eol=eol)
3790 self.assertEqual(decoded, expected_decoded)
3791
3792 def test_decode_null_word(self):
3793 self._test_decode('', '')
3794
3795 def test_decode_null_line_null_word(self):
3796 self._test_decode('\r\n', '\n')
3797
3798 def test_decode_one_word(self):
3799 self._test_decode('hello', 'hello')
3800
3801 def test_decode_one_word_eol(self):
3802 self._test_decode('hello', 'hello', eol='X')
3803
3804 def test_decode_one_line(self):
3805 self._test_decode('hello\r\n', 'hello\n')
3806
3807 def test_decode_one_line_lf(self):
3808 self._test_decode('hello\n', 'hello\n')
3809
R David Murraycafd79d2011-03-23 15:25:55 -04003810 def test_decode_one_line_cr(self):
3811 self._test_decode('hello\r', 'hello\n')
3812
3813 def test_decode_one_line_nl(self):
3814 self._test_decode('hello\n', 'helloX', eol='X')
3815
3816 def test_decode_one_line_crnl(self):
3817 self._test_decode('hello\r\n', 'helloX', eol='X')
3818
R David Murrayec1b5b82011-03-23 14:19:05 -04003819 def test_decode_one_line_one_word(self):
3820 self._test_decode('hello\r\nworld', 'hello\nworld')
3821
3822 def test_decode_one_line_one_word_eol(self):
3823 self._test_decode('hello\r\nworld', 'helloXworld', eol='X')
3824
3825 def test_decode_two_lines(self):
3826 self._test_decode('hello\r\nworld\r\n', 'hello\nworld\n')
3827
R David Murraycafd79d2011-03-23 15:25:55 -04003828 def test_decode_two_lines_eol(self):
3829 self._test_decode('hello\r\nworld\r\n', 'helloXworldX', eol='X')
3830
R David Murrayec1b5b82011-03-23 14:19:05 -04003831 def test_decode_one_long_line(self):
3832 self._test_decode('Spam' * 250, 'Spam' * 250)
3833
3834 def test_decode_one_space(self):
3835 self._test_decode(' ', '')
3836
3837 def test_decode_multiple_spaces(self):
3838 self._test_decode(' ' * 5, '')
3839
3840 def test_decode_one_line_trailing_spaces(self):
3841 self._test_decode('hello \r\n', 'hello\n')
3842
3843 def test_decode_two_lines_trailing_spaces(self):
3844 self._test_decode('hello \r\nworld \r\n', 'hello\nworld\n')
3845
3846 def test_decode_quoted_word(self):
3847 self._test_decode('=22quoted=20words=22', '"quoted words"')
3848
3849 def test_decode_uppercase_quoting(self):
3850 self._test_decode('ab=CD=EF', 'ab\xcd\xef')
3851
3852 def test_decode_lowercase_quoting(self):
3853 self._test_decode('ab=cd=ef', 'ab\xcd\xef')
3854
3855 def test_decode_soft_line_break(self):
3856 self._test_decode('soft line=\r\nbreak', 'soft linebreak')
3857
3858 def test_decode_false_quoting(self):
3859 self._test_decode('A=1,B=A ==> A+B==2', 'A=1,B=A ==> A+B==2')
3860
3861 def _test_encode(self, body, expected_encoded_body, maxlinelen=None, eol=None):
3862 kwargs = {}
3863 if maxlinelen is None:
3864 # Use body_encode's default.
3865 maxlinelen = 76
3866 else:
3867 kwargs['maxlinelen'] = maxlinelen
3868 if eol is None:
3869 # Use body_encode's default.
3870 eol = '\n'
3871 else:
3872 kwargs['eol'] = eol
3873 encoded_body = quoprimime.body_encode(body, **kwargs)
3874 self.assertEqual(encoded_body, expected_encoded_body)
3875 if eol == '\n' or eol == '\r\n':
3876 # We know how to split the result back into lines, so maxlinelen
3877 # can be checked.
3878 for line in encoded_body.splitlines():
3879 self.assertLessEqual(len(line), maxlinelen)
3880
3881 def test_encode_null(self):
3882 self._test_encode('', '')
3883
3884 def test_encode_null_lines(self):
3885 self._test_encode('\n\n', '\n\n')
3886
3887 def test_encode_one_line(self):
3888 self._test_encode('hello\n', 'hello\n')
3889
3890 def test_encode_one_line_crlf(self):
3891 self._test_encode('hello\r\n', 'hello\n')
3892
3893 def test_encode_one_line_eol(self):
3894 self._test_encode('hello\n', 'hello\r\n', eol='\r\n')
3895
3896 def test_encode_one_space(self):
3897 self._test_encode(' ', '=20')
3898
3899 def test_encode_one_line_one_space(self):
3900 self._test_encode(' \n', '=20\n')
3901
R David Murrayb938c8c2011-03-24 12:19:26 -04003902# XXX: body_encode() expect strings, but uses ord(char) from these strings
3903# to index into a 256-entry list. For code points above 255, this will fail.
3904# Should there be a check for 8-bit only ord() values in body, or at least
3905# a comment about the expected input?
3906
3907 def test_encode_two_lines_one_space(self):
3908 self._test_encode(' \n \n', '=20\n=20\n')
3909
R David Murrayec1b5b82011-03-23 14:19:05 -04003910 def test_encode_one_word_trailing_spaces(self):
3911 self._test_encode('hello ', 'hello =20')
3912
3913 def test_encode_one_line_trailing_spaces(self):
3914 self._test_encode('hello \n', 'hello =20\n')
3915
3916 def test_encode_one_word_trailing_tab(self):
3917 self._test_encode('hello \t', 'hello =09')
3918
3919 def test_encode_one_line_trailing_tab(self):
3920 self._test_encode('hello \t\n', 'hello =09\n')
3921
3922 def test_encode_trailing_space_before_maxlinelen(self):
3923 self._test_encode('abcd \n1234', 'abcd =\n\n1234', maxlinelen=6)
3924
R David Murrayb938c8c2011-03-24 12:19:26 -04003925 def test_encode_trailing_space_at_maxlinelen(self):
3926 self._test_encode('abcd \n1234', 'abcd=\n=20\n1234', maxlinelen=5)
3927
R David Murrayec1b5b82011-03-23 14:19:05 -04003928 def test_encode_trailing_space_beyond_maxlinelen(self):
R David Murrayb938c8c2011-03-24 12:19:26 -04003929 self._test_encode('abcd \n1234', 'abc=\nd=20\n1234', maxlinelen=4)
3930
3931 def test_encode_whitespace_lines(self):
3932 self._test_encode(' \n' * 5, '=20\n' * 5)
R David Murrayec1b5b82011-03-23 14:19:05 -04003933
3934 def test_encode_quoted_equals(self):
3935 self._test_encode('a = b', 'a =3D b')
3936
3937 def test_encode_one_long_string(self):
3938 self._test_encode('x' * 100, 'x' * 75 + '=\n' + 'x' * 25)
3939
3940 def test_encode_one_long_line(self):
3941 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3942
3943 def test_encode_one_very_long_line(self):
3944 self._test_encode('x' * 200 + '\n',
3945 2 * ('x' * 75 + '=\n') + 'x' * 50 + '\n')
3946
3947 def test_encode_one_long_line(self):
3948 self._test_encode('x' * 100 + '\n', 'x' * 75 + '=\n' + 'x' * 25 + '\n')
3949
3950 def test_encode_shortest_maxlinelen(self):
3951 self._test_encode('=' * 5, '=3D=\n' * 4 + '=3D', maxlinelen=4)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003952
R David Murrayb938c8c2011-03-24 12:19:26 -04003953 def test_encode_maxlinelen_too_small(self):
3954 self.assertRaises(ValueError, self._test_encode, '', '', maxlinelen=3)
3955
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003956 def test_encode(self):
3957 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003958 eq(quoprimime.body_encode(''), '')
3959 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003960 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003961 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003962 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003963 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003964xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3965 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3966x xxxx xxxx xxxx xxxx=20""")
3967 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003968 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3969 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003970xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3971 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3972x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003973 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003974one line
3975
3976two line"""), """\
3977one line
3978
3979two line""")
3980
3981
Ezio Melottib3aedd42010-11-20 19:04:17 +00003982
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003983# Test the Charset class
3984class TestCharset(unittest.TestCase):
3985 def tearDown(self):
3986 from email import charset as CharsetModule
3987 try:
3988 del CharsetModule.CHARSETS['fake']
3989 except KeyError:
3990 pass
3991
Guido van Rossum9604e662007-08-30 03:46:43 +00003992 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003993 eq = self.assertEqual
3994 # Make sure us-ascii = no Unicode conversion
3995 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003996 eq(c.header_encode('Hello World!'), 'Hello World!')
3997 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003998 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003999 self.assertRaises(UnicodeError, c.header_encode, s)
4000 c = Charset('utf-8')
4001 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004002
4003 def test_body_encode(self):
4004 eq = self.assertEqual
4005 # Try a charset with QP body encoding
4006 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004007 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004008 # Try a charset with Base64 body encoding
4009 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00004010 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004011 # Try a charset with None body encoding
4012 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00004013 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004014 # Try the convert argument, where input codec != output codec
4015 c = Charset('euc-jp')
4016 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00004017 # XXX FIXME
4018## try:
4019## eq('\x1b$B5FCO;~IW\x1b(B',
4020## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
4021## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
4022## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
4023## except LookupError:
4024## # We probably don't have the Japanese codecs installed
4025## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004026 # Testing SF bug #625509, which we have to fake, since there are no
4027 # built-in encodings where the header encoding is QP but the body
4028 # encoding is not.
4029 from email import charset as CharsetModule
R David Murray56a9d7e2011-03-15 12:20:02 -04004030 CharsetModule.add_charset('fake', CharsetModule.QP, None, 'utf-8')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004031 c = Charset('fake')
R David Murray56a9d7e2011-03-15 12:20:02 -04004032 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004033
4034 def test_unicode_charset_name(self):
4035 charset = Charset('us-ascii')
4036 self.assertEqual(str(charset), 'us-ascii')
4037 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
4038
4039
Ezio Melottib3aedd42010-11-20 19:04:17 +00004040
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004041# Test multilingual MIME headers.
4042class TestHeader(TestEmailBase):
4043 def test_simple(self):
4044 eq = self.ndiffAssertEqual
4045 h = Header('Hello World!')
4046 eq(h.encode(), 'Hello World!')
4047 h.append(' Goodbye World!')
4048 eq(h.encode(), 'Hello World! Goodbye World!')
4049
4050 def test_simple_surprise(self):
4051 eq = self.ndiffAssertEqual
4052 h = Header('Hello World!')
4053 eq(h.encode(), 'Hello World!')
4054 h.append('Goodbye World!')
4055 eq(h.encode(), 'Hello World! Goodbye World!')
4056
4057 def test_header_needs_no_decoding(self):
4058 h = 'no decoding needed'
4059 self.assertEqual(decode_header(h), [(h, None)])
4060
4061 def test_long(self):
4062 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
4063 maxlinelen=76)
4064 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004065 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004066
4067 def test_multilingual(self):
4068 eq = self.ndiffAssertEqual
4069 g = Charset("iso-8859-1")
4070 cz = Charset("iso-8859-2")
4071 utf8 = Charset("utf-8")
4072 g_head = (b'Die Mieter treten hier ein werden mit einem '
4073 b'Foerderband komfortabel den Korridor entlang, '
4074 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
4075 b'gegen die rotierenden Klingen bef\xf6rdert. ')
4076 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
4077 b'd\xf9vtipu.. ')
4078 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
4079 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
4080 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
4081 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
4082 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
4083 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
4084 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
4085 '\u3044\u307e\u3059\u3002')
4086 h = Header(g_head, g)
4087 h.append(cz_head, cz)
4088 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00004089 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004090 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00004091=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
4092 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
4093 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
4094 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004095 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
4096 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
4097 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
4098 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00004099 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
4100 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
4101 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
4102 decoded = decode_header(enc)
4103 eq(len(decoded), 3)
4104 eq(decoded[0], (g_head, 'iso-8859-1'))
4105 eq(decoded[1], (cz_head, 'iso-8859-2'))
4106 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004107 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00004108 eq(ustr,
4109 (b'Die Mieter treten hier ein werden mit einem Foerderband '
4110 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
4111 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
4112 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
4113 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
4114 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
4115 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
4116 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
4117 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
4118 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
4119 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
4120 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
4121 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
4122 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
4123 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
4124 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
4125 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004126 # Test make_header()
4127 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00004128 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004129
4130 def test_empty_header_encode(self):
4131 h = Header()
4132 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00004133
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004134 def test_header_ctor_default_args(self):
4135 eq = self.ndiffAssertEqual
4136 h = Header()
4137 eq(h, '')
4138 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00004139 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004140
4141 def test_explicit_maxlinelen(self):
4142 eq = self.ndiffAssertEqual
4143 hstr = ('A very long line that must get split to something other '
4144 'than at the 76th character boundary to test the non-default '
4145 'behavior')
4146 h = Header(hstr)
4147 eq(h.encode(), '''\
4148A very long line that must get split to something other than at the 76th
4149 character boundary to test the non-default behavior''')
4150 eq(str(h), hstr)
4151 h = Header(hstr, header_name='Subject')
4152 eq(h.encode(), '''\
4153A very long line that must get split to something other than at the
4154 76th character boundary to test the non-default behavior''')
4155 eq(str(h), hstr)
4156 h = Header(hstr, maxlinelen=1024, header_name='Subject')
4157 eq(h.encode(), hstr)
4158 eq(str(h), hstr)
4159
Guido van Rossum9604e662007-08-30 03:46:43 +00004160 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004161 eq = self.ndiffAssertEqual
4162 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004163 x = 'xxxx ' * 20
4164 h.append(x)
4165 s = h.encode()
4166 eq(s, """\
4167=?iso-8859-1?q?xxx?=
4168 =?iso-8859-1?q?x_?=
4169 =?iso-8859-1?q?xx?=
4170 =?iso-8859-1?q?xx?=
4171 =?iso-8859-1?q?_x?=
4172 =?iso-8859-1?q?xx?=
4173 =?iso-8859-1?q?x_?=
4174 =?iso-8859-1?q?xx?=
4175 =?iso-8859-1?q?xx?=
4176 =?iso-8859-1?q?_x?=
4177 =?iso-8859-1?q?xx?=
4178 =?iso-8859-1?q?x_?=
4179 =?iso-8859-1?q?xx?=
4180 =?iso-8859-1?q?xx?=
4181 =?iso-8859-1?q?_x?=
4182 =?iso-8859-1?q?xx?=
4183 =?iso-8859-1?q?x_?=
4184 =?iso-8859-1?q?xx?=
4185 =?iso-8859-1?q?xx?=
4186 =?iso-8859-1?q?_x?=
4187 =?iso-8859-1?q?xx?=
4188 =?iso-8859-1?q?x_?=
4189 =?iso-8859-1?q?xx?=
4190 =?iso-8859-1?q?xx?=
4191 =?iso-8859-1?q?_x?=
4192 =?iso-8859-1?q?xx?=
4193 =?iso-8859-1?q?x_?=
4194 =?iso-8859-1?q?xx?=
4195 =?iso-8859-1?q?xx?=
4196 =?iso-8859-1?q?_x?=
4197 =?iso-8859-1?q?xx?=
4198 =?iso-8859-1?q?x_?=
4199 =?iso-8859-1?q?xx?=
4200 =?iso-8859-1?q?xx?=
4201 =?iso-8859-1?q?_x?=
4202 =?iso-8859-1?q?xx?=
4203 =?iso-8859-1?q?x_?=
4204 =?iso-8859-1?q?xx?=
4205 =?iso-8859-1?q?xx?=
4206 =?iso-8859-1?q?_x?=
4207 =?iso-8859-1?q?xx?=
4208 =?iso-8859-1?q?x_?=
4209 =?iso-8859-1?q?xx?=
4210 =?iso-8859-1?q?xx?=
4211 =?iso-8859-1?q?_x?=
4212 =?iso-8859-1?q?xx?=
4213 =?iso-8859-1?q?x_?=
4214 =?iso-8859-1?q?xx?=
4215 =?iso-8859-1?q?xx?=
4216 =?iso-8859-1?q?_?=""")
4217 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004218 h = Header(charset='iso-8859-1', maxlinelen=40)
4219 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00004220 s = h.encode()
4221 eq(s, """\
4222=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
4223 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
4224 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
4225 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
4226 =?iso-8859-1?q?_xxxx_xxxx_?=""")
4227 eq(x, str(make_header(decode_header(s))))
4228
4229 def test_base64_splittable(self):
4230 eq = self.ndiffAssertEqual
4231 h = Header(charset='koi8-r', maxlinelen=20)
4232 x = 'xxxx ' * 20
4233 h.append(x)
4234 s = h.encode()
4235 eq(s, """\
4236=?koi8-r?b?eHh4?=
4237 =?koi8-r?b?eCB4?=
4238 =?koi8-r?b?eHh4?=
4239 =?koi8-r?b?IHh4?=
4240 =?koi8-r?b?eHgg?=
4241 =?koi8-r?b?eHh4?=
4242 =?koi8-r?b?eCB4?=
4243 =?koi8-r?b?eHh4?=
4244 =?koi8-r?b?IHh4?=
4245 =?koi8-r?b?eHgg?=
4246 =?koi8-r?b?eHh4?=
4247 =?koi8-r?b?eCB4?=
4248 =?koi8-r?b?eHh4?=
4249 =?koi8-r?b?IHh4?=
4250 =?koi8-r?b?eHgg?=
4251 =?koi8-r?b?eHh4?=
4252 =?koi8-r?b?eCB4?=
4253 =?koi8-r?b?eHh4?=
4254 =?koi8-r?b?IHh4?=
4255 =?koi8-r?b?eHgg?=
4256 =?koi8-r?b?eHh4?=
4257 =?koi8-r?b?eCB4?=
4258 =?koi8-r?b?eHh4?=
4259 =?koi8-r?b?IHh4?=
4260 =?koi8-r?b?eHgg?=
4261 =?koi8-r?b?eHh4?=
4262 =?koi8-r?b?eCB4?=
4263 =?koi8-r?b?eHh4?=
4264 =?koi8-r?b?IHh4?=
4265 =?koi8-r?b?eHgg?=
4266 =?koi8-r?b?eHh4?=
4267 =?koi8-r?b?eCB4?=
4268 =?koi8-r?b?eHh4?=
4269 =?koi8-r?b?IA==?=""")
4270 eq(x, str(make_header(decode_header(s))))
4271 h = Header(charset='koi8-r', maxlinelen=40)
4272 h.append(x)
4273 s = h.encode()
4274 eq(s, """\
4275=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
4276 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
4277 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
4278 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
4279 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
4280 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
4281 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004282
4283 def test_us_ascii_header(self):
4284 eq = self.assertEqual
4285 s = 'hello'
4286 x = decode_header(s)
4287 eq(x, [('hello', None)])
4288 h = make_header(x)
4289 eq(s, h.encode())
4290
4291 def test_string_charset(self):
4292 eq = self.assertEqual
4293 h = Header()
4294 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00004295 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004296
4297## def test_unicode_error(self):
4298## raises = self.assertRaises
4299## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
4300## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
4301## h = Header()
4302## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
4303## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
4304## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
4305
4306 def test_utf8_shortest(self):
4307 eq = self.assertEqual
4308 h = Header('p\xf6stal', 'utf-8')
4309 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
4310 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
4311 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
4312
4313 def test_bad_8bit_header(self):
4314 raises = self.assertRaises
4315 eq = self.assertEqual
4316 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4317 raises(UnicodeError, Header, x)
4318 h = Header()
4319 raises(UnicodeError, h.append, x)
4320 e = x.decode('utf-8', 'replace')
4321 eq(str(Header(x, errors='replace')), e)
4322 h.append(x, errors='replace')
4323 eq(str(h), e)
4324
R David Murray041015c2011-03-25 15:10:55 -04004325 def test_escaped_8bit_header(self):
4326 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
4327 x = x.decode('ascii', 'surrogateescape')
4328 h = Header(x, charset=email.charset.UNKNOWN8BIT)
4329 self.assertEqual(str(h),
4330 'Ynwp4dUEbay Auction Semiar- No Charge \uFFFD Earn Big')
4331 self.assertEqual(email.header.decode_header(h), [(x, 'unknown-8bit')])
4332
4333 def test_modify_returned_list_does_not_change_header(self):
4334 h = Header('test')
4335 chunks = email.header.decode_header(h)
4336 chunks.append(('ascii', 'test2'))
4337 self.assertEqual(str(h), 'test')
4338
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004339 def test_encoded_adjacent_nonencoded(self):
4340 eq = self.assertEqual
4341 h = Header()
4342 h.append('hello', 'iso-8859-1')
4343 h.append('world')
4344 s = h.encode()
4345 eq(s, '=?iso-8859-1?q?hello?= world')
4346 h = make_header(decode_header(s))
4347 eq(h.encode(), s)
4348
4349 def test_whitespace_eater(self):
4350 eq = self.assertEqual
4351 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
4352 parts = decode_header(s)
4353 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
4354 hdr = make_header(parts)
4355 eq(hdr.encode(),
4356 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
4357
4358 def test_broken_base64_header(self):
4359 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00004360 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004361 raises(errors.HeaderParseError, decode_header, s)
4362
R. David Murray477efb32011-01-05 01:39:32 +00004363 def test_shift_jis_charset(self):
4364 h = Header('文', charset='shift_jis')
4365 self.assertEqual(h.encode(), '=?iso-2022-jp?b?GyRCSjgbKEI=?=')
4366
R David Murrayde912762011-03-16 18:26:23 -04004367 def test_flatten_header_with_no_value(self):
4368 # Issue 11401 (regression from email 4.x) Note that the space after
4369 # the header doesn't reflect the input, but this is also the way
4370 # email 4.x behaved. At some point it would be nice to fix that.
4371 msg = email.message_from_string("EmptyHeader:")
4372 self.assertEqual(str(msg), "EmptyHeader: \n\n")
4373
R David Murray01581ee2011-04-18 10:04:34 -04004374 def test_encode_preserves_leading_ws_on_value(self):
4375 msg = Message()
4376 msg['SomeHeader'] = ' value with leading ws'
4377 self.assertEqual(str(msg), "SomeHeader: value with leading ws\n\n")
4378
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004379
Ezio Melottib3aedd42010-11-20 19:04:17 +00004380
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004381# Test RFC 2231 header parameters (en/de)coding
4382class TestRFC2231(TestEmailBase):
4383 def test_get_param(self):
4384 eq = self.assertEqual
4385 msg = self._msgobj('msg_29.txt')
4386 eq(msg.get_param('title'),
4387 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4388 eq(msg.get_param('title', unquote=False),
4389 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
4390
4391 def test_set_param(self):
4392 eq = self.ndiffAssertEqual
4393 msg = Message()
4394 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4395 charset='us-ascii')
4396 eq(msg.get_param('title'),
4397 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
4398 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4399 charset='us-ascii', language='en')
4400 eq(msg.get_param('title'),
4401 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
4402 msg = self._msgobj('msg_01.txt')
4403 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4404 charset='us-ascii', language='en')
4405 eq(msg.as_string(maxheaderlen=78), """\
4406Return-Path: <bbb@zzz.org>
4407Delivered-To: bbb@zzz.org
4408Received: by mail.zzz.org (Postfix, from userid 889)
4409\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4410MIME-Version: 1.0
4411Content-Transfer-Encoding: 7bit
4412Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4413From: bbb@ddd.com (John X. Doe)
4414To: bbb@zzz.org
4415Subject: This is a test message
4416Date: Fri, 4 May 2001 14:05:44 -0400
4417Content-Type: text/plain; charset=us-ascii;
R. David Murraydfd7eb02010-12-24 22:36:49 +00004418 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004419
4420
4421Hi,
4422
4423Do you like this message?
4424
4425-Me
4426""")
4427
R David Murraya2860e82011-04-16 09:20:30 -04004428 def test_set_param_requote(self):
4429 msg = Message()
4430 msg.set_param('title', 'foo')
4431 self.assertEqual(msg['content-type'], 'text/plain; title="foo"')
4432 msg.set_param('title', 'bar', requote=False)
4433 self.assertEqual(msg['content-type'], 'text/plain; title=bar')
4434 # tspecial is still quoted.
4435 msg.set_param('title', "(bar)bell", requote=False)
4436 self.assertEqual(msg['content-type'], 'text/plain; title="(bar)bell"')
4437
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004438 def test_del_param(self):
4439 eq = self.ndiffAssertEqual
4440 msg = self._msgobj('msg_01.txt')
4441 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
4442 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
4443 charset='us-ascii', language='en')
4444 msg.del_param('foo', header='Content-Type')
4445 eq(msg.as_string(maxheaderlen=78), """\
4446Return-Path: <bbb@zzz.org>
4447Delivered-To: bbb@zzz.org
4448Received: by mail.zzz.org (Postfix, from userid 889)
4449\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
4450MIME-Version: 1.0
4451Content-Transfer-Encoding: 7bit
4452Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
4453From: bbb@ddd.com (John X. Doe)
4454To: bbb@zzz.org
4455Subject: This is a test message
4456Date: Fri, 4 May 2001 14:05:44 -0400
4457Content-Type: text/plain; charset="us-ascii";
R. David Murraydfd7eb02010-12-24 22:36:49 +00004458 title*=us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004459
4460
4461Hi,
4462
4463Do you like this message?
4464
4465-Me
4466""")
4467
4468 def test_rfc2231_get_content_charset(self):
4469 eq = self.assertEqual
4470 msg = self._msgobj('msg_32.txt')
4471 eq(msg.get_content_charset(), 'us-ascii')
4472
R. David Murraydfd7eb02010-12-24 22:36:49 +00004473 def test_rfc2231_parse_rfc_quoting(self):
4474 m = textwrap.dedent('''\
4475 Content-Disposition: inline;
4476 \tfilename*0*=''This%20is%20even%20more%20;
4477 \tfilename*1*=%2A%2A%2Afun%2A%2A%2A%20;
4478 \tfilename*2="is it not.pdf"
4479
4480 ''')
4481 msg = email.message_from_string(m)
4482 self.assertEqual(msg.get_filename(),
4483 'This is even more ***fun*** is it not.pdf')
4484 self.assertEqual(m, msg.as_string())
4485
4486 def test_rfc2231_parse_extra_quoting(self):
4487 m = textwrap.dedent('''\
4488 Content-Disposition: inline;
4489 \tfilename*0*="''This%20is%20even%20more%20";
4490 \tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4491 \tfilename*2="is it not.pdf"
4492
4493 ''')
4494 msg = email.message_from_string(m)
4495 self.assertEqual(msg.get_filename(),
4496 'This is even more ***fun*** is it not.pdf')
4497 self.assertEqual(m, msg.as_string())
4498
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004499 def test_rfc2231_no_language_or_charset(self):
4500 m = '''\
4501Content-Transfer-Encoding: 8bit
4502Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
4503Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
4504
4505'''
4506 msg = email.message_from_string(m)
4507 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004508 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004509 self.assertEqual(
4510 param,
4511 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
4512
4513 def test_rfc2231_no_language_or_charset_in_filename(self):
4514 m = '''\
4515Content-Disposition: inline;
4516\tfilename*0*="''This%20is%20even%20more%20";
4517\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4518\tfilename*2="is it not.pdf"
4519
4520'''
4521 msg = email.message_from_string(m)
4522 self.assertEqual(msg.get_filename(),
4523 'This is even more ***fun*** is it not.pdf')
4524
4525 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
4526 m = '''\
4527Content-Disposition: inline;
4528\tfilename*0*="''This%20is%20even%20more%20";
4529\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4530\tfilename*2="is it not.pdf"
4531
4532'''
4533 msg = email.message_from_string(m)
4534 self.assertEqual(msg.get_filename(),
4535 'This is even more ***fun*** is it not.pdf')
4536
4537 def test_rfc2231_partly_encoded(self):
4538 m = '''\
4539Content-Disposition: inline;
4540\tfilename*0="''This%20is%20even%20more%20";
4541\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4542\tfilename*2="is it not.pdf"
4543
4544'''
4545 msg = email.message_from_string(m)
4546 self.assertEqual(
4547 msg.get_filename(),
4548 'This%20is%20even%20more%20***fun*** is it not.pdf')
4549
4550 def test_rfc2231_partly_nonencoded(self):
4551 m = '''\
4552Content-Disposition: inline;
4553\tfilename*0="This%20is%20even%20more%20";
4554\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
4555\tfilename*2="is it not.pdf"
4556
4557'''
4558 msg = email.message_from_string(m)
4559 self.assertEqual(
4560 msg.get_filename(),
4561 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
4562
4563 def test_rfc2231_no_language_or_charset_in_boundary(self):
4564 m = '''\
4565Content-Type: multipart/alternative;
4566\tboundary*0*="''This%20is%20even%20more%20";
4567\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
4568\tboundary*2="is it not.pdf"
4569
4570'''
4571 msg = email.message_from_string(m)
4572 self.assertEqual(msg.get_boundary(),
4573 'This is even more ***fun*** is it not.pdf')
4574
4575 def test_rfc2231_no_language_or_charset_in_charset(self):
4576 # This is a nonsensical charset value, but tests the code anyway
4577 m = '''\
4578Content-Type: text/plain;
4579\tcharset*0*="This%20is%20even%20more%20";
4580\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
4581\tcharset*2="is it not.pdf"
4582
4583'''
4584 msg = email.message_from_string(m)
4585 self.assertEqual(msg.get_content_charset(),
4586 'this is even more ***fun*** is it not.pdf')
4587
4588 def test_rfc2231_bad_encoding_in_filename(self):
4589 m = '''\
4590Content-Disposition: inline;
4591\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
4592\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4593\tfilename*2="is it not.pdf"
4594
4595'''
4596 msg = email.message_from_string(m)
4597 self.assertEqual(msg.get_filename(),
4598 'This is even more ***fun*** is it not.pdf')
4599
4600 def test_rfc2231_bad_encoding_in_charset(self):
4601 m = """\
4602Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
4603
4604"""
4605 msg = email.message_from_string(m)
4606 # This should return None because non-ascii characters in the charset
4607 # are not allowed.
4608 self.assertEqual(msg.get_content_charset(), None)
4609
4610 def test_rfc2231_bad_character_in_charset(self):
4611 m = """\
4612Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
4613
4614"""
4615 msg = email.message_from_string(m)
4616 # This should return None because non-ascii characters in the charset
4617 # are not allowed.
4618 self.assertEqual(msg.get_content_charset(), None)
4619
4620 def test_rfc2231_bad_character_in_filename(self):
4621 m = '''\
4622Content-Disposition: inline;
4623\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
4624\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
4625\tfilename*2*="is it not.pdf%E2"
4626
4627'''
4628 msg = email.message_from_string(m)
4629 self.assertEqual(msg.get_filename(),
4630 'This is even more ***fun*** is it not.pdf\ufffd')
4631
4632 def test_rfc2231_unknown_encoding(self):
4633 m = """\
4634Content-Transfer-Encoding: 8bit
4635Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
4636
4637"""
4638 msg = email.message_from_string(m)
4639 self.assertEqual(msg.get_filename(), 'myfile.txt')
4640
4641 def test_rfc2231_single_tick_in_filename_extended(self):
4642 eq = self.assertEqual
4643 m = """\
4644Content-Type: application/x-foo;
4645\tname*0*=\"Frank's\"; name*1*=\" Document\"
4646
4647"""
4648 msg = email.message_from_string(m)
4649 charset, language, s = msg.get_param('name')
4650 eq(charset, None)
4651 eq(language, None)
4652 eq(s, "Frank's Document")
4653
4654 def test_rfc2231_single_tick_in_filename(self):
4655 m = """\
4656Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
4657
4658"""
4659 msg = email.message_from_string(m)
4660 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004661 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004662 self.assertEqual(param, "Frank's Document")
4663
4664 def test_rfc2231_tick_attack_extended(self):
4665 eq = self.assertEqual
4666 m = """\
4667Content-Type: application/x-foo;
4668\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
4669
4670"""
4671 msg = email.message_from_string(m)
4672 charset, language, s = msg.get_param('name')
4673 eq(charset, 'us-ascii')
4674 eq(language, 'en-us')
4675 eq(s, "Frank's Document")
4676
4677 def test_rfc2231_tick_attack(self):
4678 m = """\
4679Content-Type: application/x-foo;
4680\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
4681
4682"""
4683 msg = email.message_from_string(m)
4684 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00004685 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004686 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
4687
4688 def test_rfc2231_no_extended_values(self):
4689 eq = self.assertEqual
4690 m = """\
4691Content-Type: application/x-foo; name=\"Frank's Document\"
4692
4693"""
4694 msg = email.message_from_string(m)
4695 eq(msg.get_param('name'), "Frank's Document")
4696
4697 def test_rfc2231_encoded_then_unencoded_segments(self):
4698 eq = self.assertEqual
4699 m = """\
4700Content-Type: application/x-foo;
4701\tname*0*=\"us-ascii'en-us'My\";
4702\tname*1=\" Document\";
4703\tname*2*=\" For You\"
4704
4705"""
4706 msg = email.message_from_string(m)
4707 charset, language, s = msg.get_param('name')
4708 eq(charset, 'us-ascii')
4709 eq(language, 'en-us')
4710 eq(s, 'My Document For You')
4711
4712 def test_rfc2231_unencoded_then_encoded_segments(self):
4713 eq = self.assertEqual
4714 m = """\
4715Content-Type: application/x-foo;
4716\tname*0=\"us-ascii'en-us'My\";
4717\tname*1*=\" Document\";
4718\tname*2*=\" For You\"
4719
4720"""
4721 msg = email.message_from_string(m)
4722 charset, language, s = msg.get_param('name')
4723 eq(charset, 'us-ascii')
4724 eq(language, 'en-us')
4725 eq(s, 'My Document For You')
4726
4727
Ezio Melottib3aedd42010-11-20 19:04:17 +00004728
R. David Murraya8f480f2010-01-16 18:30:03 +00004729# Tests to ensure that signed parts of an email are completely preserved, as
4730# required by RFC1847 section 2.1. Note that these are incomplete, because the
4731# email package does not currently always preserve the body. See issue 1670765.
4732class TestSigned(TestEmailBase):
4733
4734 def _msg_and_obj(self, filename):
R David Murray28346b82011-03-31 11:40:20 -04004735 with openfile(filename) as fp:
R. David Murraya8f480f2010-01-16 18:30:03 +00004736 original = fp.read()
4737 msg = email.message_from_string(original)
4738 return original, msg
4739
4740 def _signed_parts_eq(self, original, result):
4741 # Extract the first mime part of each message
4742 import re
4743 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
4744 inpart = repart.search(original).group(2)
4745 outpart = repart.search(result).group(2)
4746 self.assertEqual(outpart, inpart)
4747
4748 def test_long_headers_as_string(self):
4749 original, msg = self._msg_and_obj('msg_45.txt')
4750 result = msg.as_string()
4751 self._signed_parts_eq(original, result)
4752
4753 def test_long_headers_as_string_maxheaderlen(self):
4754 original, msg = self._msg_and_obj('msg_45.txt')
4755 result = msg.as_string(maxheaderlen=60)
4756 self._signed_parts_eq(original, result)
4757
4758 def test_long_headers_flatten(self):
4759 original, msg = self._msg_and_obj('msg_45.txt')
4760 fp = StringIO()
4761 Generator(fp).flatten(msg)
4762 result = fp.getvalue()
4763 self._signed_parts_eq(original, result)
4764
4765
Ezio Melottib3aedd42010-11-20 19:04:17 +00004766
Guido van Rossum8b3febe2007-08-30 01:15:14 +00004767if __name__ == '__main__':
R David Murray9aaba782011-03-21 17:17:06 -04004768 unittest.main()