blob: 5545abe4a64fcaae791337bfe337581d9c5467b7 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray57c45ac2010-02-21 04:39:40 +0000183 def test_message_rfc822_only(self):
184 # Issue 7970: message/rfc822 not in multipart parsed by
185 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000186 with openfile(findfile('msg_46.txt')) as fp:
187 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000188 parser = HeaderParser()
189 msg = parser.parsestr(msgdata)
190 out = StringIO()
191 gen = Generator(out, True, 0)
192 gen.flatten(msg, False)
193 self.assertEqual(out.getvalue(), msgdata)
194
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000195 def test_get_decoded_payload(self):
196 eq = self.assertEqual
197 msg = self._msgobj('msg_10.txt')
198 # The outer message is a multipart
199 eq(msg.get_payload(decode=True), None)
200 # Subpart 1 is 7bit encoded
201 eq(msg.get_payload(0).get_payload(decode=True),
202 b'This is a 7bit encoded message.\n')
203 # Subpart 2 is quopri
204 eq(msg.get_payload(1).get_payload(decode=True),
205 b'\xa1This is a Quoted Printable encoded message!\n')
206 # Subpart 3 is base64
207 eq(msg.get_payload(2).get_payload(decode=True),
208 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000209 # Subpart 4 is base64 with a trailing newline, which
210 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000212 b'This is a Base64 encoded message.\n')
213 # Subpart 5 has no Content-Transfer-Encoding: header.
214 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000215 b'This has no Content-Transfer-Encoding: header.\n')
216
217 def test_get_decoded_uu_payload(self):
218 eq = self.assertEqual
219 msg = Message()
220 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
221 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
222 msg['content-transfer-encoding'] = cte
223 eq(msg.get_payload(decode=True), b'hello world')
224 # Now try some bogus data
225 msg.set_payload('foo')
226 eq(msg.get_payload(decode=True), b'foo')
227
228 def test_decoded_generator(self):
229 eq = self.assertEqual
230 msg = self._msgobj('msg_07.txt')
231 with openfile('msg_17.txt') as fp:
232 text = fp.read()
233 s = StringIO()
234 g = DecodedGenerator(s)
235 g.flatten(msg)
236 eq(s.getvalue(), text)
237
238 def test__contains__(self):
239 msg = Message()
240 msg['From'] = 'Me'
241 msg['to'] = 'You'
242 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue('from' in msg)
244 self.assertTrue('From' in msg)
245 self.assertTrue('FROM' in msg)
246 self.assertTrue('to' in msg)
247 self.assertTrue('To' in msg)
248 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249
250 def test_as_string(self):
251 eq = self.ndiffAssertEqual
252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
255 eq(text, str(msg))
256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 eq(text, NL.join(lines[1:]))
260
261 def test_bad_param(self):
262 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
263 self.assertEqual(msg.get_param('baz'), '')
264
265 def test_missing_filename(self):
266 msg = email.message_from_string("From: foo\n")
267 self.assertEqual(msg.get_filename(), None)
268
269 def test_bogus_filename(self):
270 msg = email.message_from_string(
271 "Content-Disposition: blarg; filename\n")
272 self.assertEqual(msg.get_filename(), '')
273
274 def test_missing_boundary(self):
275 msg = email.message_from_string("From: foo\n")
276 self.assertEqual(msg.get_boundary(), None)
277
278 def test_get_params(self):
279 eq = self.assertEqual
280 msg = email.message_from_string(
281 'X-Header: foo=one; bar=two; baz=three\n')
282 eq(msg.get_params(header='x-header'),
283 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
284 msg = email.message_from_string(
285 'X-Header: foo; bar=one; baz=two\n')
286 eq(msg.get_params(header='x-header'),
287 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
288 eq(msg.get_params(), None)
289 msg = email.message_from_string(
290 'X-Header: foo; bar="one"; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293
294 def test_get_param_liberal(self):
295 msg = Message()
296 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
297 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
298
299 def test_get_param(self):
300 eq = self.assertEqual
301 msg = email.message_from_string(
302 "X-Header: foo=one; bar=two; baz=three\n")
303 eq(msg.get_param('bar', header='x-header'), 'two')
304 eq(msg.get_param('quuz', header='x-header'), None)
305 eq(msg.get_param('quuz'), None)
306 msg = email.message_from_string(
307 'X-Header: foo; bar="one"; baz=two\n')
308 eq(msg.get_param('foo', header='x-header'), '')
309 eq(msg.get_param('bar', header='x-header'), 'one')
310 eq(msg.get_param('baz', header='x-header'), 'two')
311 # XXX: We are not RFC-2045 compliant! We cannot parse:
312 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
313 # msg.get_param("weird")
314 # yet.
315
316 def test_get_param_funky_continuation_lines(self):
317 msg = self._msgobj('msg_22.txt')
318 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
319
320 def test_get_param_with_semis_in_quotes(self):
321 msg = email.message_from_string(
322 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
323 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
324 self.assertEqual(msg.get_param('name', unquote=False),
325 '"Jim&amp;&amp;Jill"')
326
R. David Murrayd48739f2010-04-14 18:59:18 +0000327 def test_get_param_with_quotes(self):
328 msg = email.message_from_string(
329 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
330 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
331 msg = email.message_from_string(
332 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
333 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
334
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000336 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 msg = email.message_from_string('Header: exists')
338 unless('header' in msg)
339 unless('Header' in msg)
340 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000341 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342
343 def test_set_param(self):
344 eq = self.assertEqual
345 msg = Message()
346 msg.set_param('charset', 'iso-2022-jp')
347 eq(msg.get_param('charset'), 'iso-2022-jp')
348 msg.set_param('importance', 'high value')
349 eq(msg.get_param('importance'), 'high value')
350 eq(msg.get_param('importance', unquote=False), '"high value"')
351 eq(msg.get_params(), [('text/plain', ''),
352 ('charset', 'iso-2022-jp'),
353 ('importance', 'high value')])
354 eq(msg.get_params(unquote=False), [('text/plain', ''),
355 ('charset', '"iso-2022-jp"'),
356 ('importance', '"high value"')])
357 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
358 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
359
360 def test_del_param(self):
361 eq = self.assertEqual
362 msg = self._msgobj('msg_05.txt')
363 eq(msg.get_params(),
364 [('multipart/report', ''), ('report-type', 'delivery-status'),
365 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
366 old_val = msg.get_param("report-type")
367 msg.del_param("report-type")
368 eq(msg.get_params(),
369 [('multipart/report', ''),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 msg.set_param("report-type", old_val)
372 eq(msg.get_params(),
373 [('multipart/report', ''),
374 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
375 ('report-type', old_val)])
376
377 def test_del_param_on_other_header(self):
378 msg = Message()
379 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
380 msg.del_param('filename', 'content-disposition')
381 self.assertEqual(msg['content-disposition'], 'attachment')
382
383 def test_set_type(self):
384 eq = self.assertEqual
385 msg = Message()
386 self.assertRaises(ValueError, msg.set_type, 'text')
387 msg.set_type('text/plain')
388 eq(msg['content-type'], 'text/plain')
389 msg.set_param('charset', 'us-ascii')
390 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
391 msg.set_type('text/html')
392 eq(msg['content-type'], 'text/html; charset="us-ascii"')
393
394 def test_set_type_on_other_header(self):
395 msg = Message()
396 msg['X-Content-Type'] = 'text/plain'
397 msg.set_type('application/octet-stream', 'X-Content-Type')
398 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
399
400 def test_get_content_type_missing(self):
401 msg = Message()
402 self.assertEqual(msg.get_content_type(), 'text/plain')
403
404 def test_get_content_type_missing_with_default_type(self):
405 msg = Message()
406 msg.set_default_type('message/rfc822')
407 self.assertEqual(msg.get_content_type(), 'message/rfc822')
408
409 def test_get_content_type_from_message_implicit(self):
410 msg = self._msgobj('msg_30.txt')
411 self.assertEqual(msg.get_payload(0).get_content_type(),
412 'message/rfc822')
413
414 def test_get_content_type_from_message_explicit(self):
415 msg = self._msgobj('msg_28.txt')
416 self.assertEqual(msg.get_payload(0).get_content_type(),
417 'message/rfc822')
418
419 def test_get_content_type_from_message_text_plain_implicit(self):
420 msg = self._msgobj('msg_03.txt')
421 self.assertEqual(msg.get_content_type(), 'text/plain')
422
423 def test_get_content_type_from_message_text_plain_explicit(self):
424 msg = self._msgobj('msg_01.txt')
425 self.assertEqual(msg.get_content_type(), 'text/plain')
426
427 def test_get_content_maintype_missing(self):
428 msg = Message()
429 self.assertEqual(msg.get_content_maintype(), 'text')
430
431 def test_get_content_maintype_missing_with_default_type(self):
432 msg = Message()
433 msg.set_default_type('message/rfc822')
434 self.assertEqual(msg.get_content_maintype(), 'message')
435
436 def test_get_content_maintype_from_message_implicit(self):
437 msg = self._msgobj('msg_30.txt')
438 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
439
440 def test_get_content_maintype_from_message_explicit(self):
441 msg = self._msgobj('msg_28.txt')
442 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
443
444 def test_get_content_maintype_from_message_text_plain_implicit(self):
445 msg = self._msgobj('msg_03.txt')
446 self.assertEqual(msg.get_content_maintype(), 'text')
447
448 def test_get_content_maintype_from_message_text_plain_explicit(self):
449 msg = self._msgobj('msg_01.txt')
450 self.assertEqual(msg.get_content_maintype(), 'text')
451
452 def test_get_content_subtype_missing(self):
453 msg = Message()
454 self.assertEqual(msg.get_content_subtype(), 'plain')
455
456 def test_get_content_subtype_missing_with_default_type(self):
457 msg = Message()
458 msg.set_default_type('message/rfc822')
459 self.assertEqual(msg.get_content_subtype(), 'rfc822')
460
461 def test_get_content_subtype_from_message_implicit(self):
462 msg = self._msgobj('msg_30.txt')
463 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
464
465 def test_get_content_subtype_from_message_explicit(self):
466 msg = self._msgobj('msg_28.txt')
467 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
468
469 def test_get_content_subtype_from_message_text_plain_implicit(self):
470 msg = self._msgobj('msg_03.txt')
471 self.assertEqual(msg.get_content_subtype(), 'plain')
472
473 def test_get_content_subtype_from_message_text_plain_explicit(self):
474 msg = self._msgobj('msg_01.txt')
475 self.assertEqual(msg.get_content_subtype(), 'plain')
476
477 def test_get_content_maintype_error(self):
478 msg = Message()
479 msg['Content-Type'] = 'no-slash-in-this-string'
480 self.assertEqual(msg.get_content_maintype(), 'text')
481
482 def test_get_content_subtype_error(self):
483 msg = Message()
484 msg['Content-Type'] = 'no-slash-in-this-string'
485 self.assertEqual(msg.get_content_subtype(), 'plain')
486
487 def test_replace_header(self):
488 eq = self.assertEqual
489 msg = Message()
490 msg.add_header('First', 'One')
491 msg.add_header('Second', 'Two')
492 msg.add_header('Third', 'Three')
493 eq(msg.keys(), ['First', 'Second', 'Third'])
494 eq(msg.values(), ['One', 'Two', 'Three'])
495 msg.replace_header('Second', 'Twenty')
496 eq(msg.keys(), ['First', 'Second', 'Third'])
497 eq(msg.values(), ['One', 'Twenty', 'Three'])
498 msg.add_header('First', 'Eleven')
499 msg.replace_header('First', 'One Hundred')
500 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
501 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
502 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
503
504 def test_broken_base64_payload(self):
505 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
506 msg = Message()
507 msg['content-type'] = 'audio/x-midi'
508 msg['content-transfer-encoding'] = 'base64'
509 msg.set_payload(x)
510 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000511 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000512
513
Ezio Melottib3aedd42010-11-20 19:04:17 +0000514
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000515# Test the email.encoders module
516class TestEncoders(unittest.TestCase):
517 def test_encode_empty_payload(self):
518 eq = self.assertEqual
519 msg = Message()
520 msg.set_charset('us-ascii')
521 eq(msg['content-transfer-encoding'], '7bit')
522
523 def test_default_cte(self):
524 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000525 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000526 msg = MIMEText('hello world')
527 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000528 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000529 msg = MIMEText('hello \xf8 world')
530 eq(msg['content-transfer-encoding'], '8bit')
531 # And now with a different charset
532 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
533 eq(msg['content-transfer-encoding'], 'quoted-printable')
534
R. David Murraye85200d2010-05-06 01:41:14 +0000535 def test_encode7or8bit(self):
536 # Make sure a charset whose input character set is 8bit but
537 # whose output character set is 7bit gets a transfer-encoding
538 # of 7bit.
539 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000540 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000541 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000542
Ezio Melottib3aedd42010-11-20 19:04:17 +0000543
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000544# Test long header wrapping
545class TestLongHeaders(TestEmailBase):
546 def test_split_long_continuation(self):
547 eq = self.ndiffAssertEqual
548 msg = email.message_from_string("""\
549Subject: bug demonstration
550\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
551\tmore text
552
553test
554""")
555 sfp = StringIO()
556 g = Generator(sfp)
557 g.flatten(msg)
558 eq(sfp.getvalue(), """\
559Subject: bug demonstration
560\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
561\tmore text
562
563test
564""")
565
566 def test_another_long_almost_unsplittable_header(self):
567 eq = self.ndiffAssertEqual
568 hstr = """\
569bug demonstration
570\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
571\tmore text"""
572 h = Header(hstr, continuation_ws='\t')
573 eq(h.encode(), """\
574bug demonstration
575\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
576\tmore text""")
577 h = Header(hstr.replace('\t', ' '))
578 eq(h.encode(), """\
579bug demonstration
580 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
581 more text""")
582
583 def test_long_nonstring(self):
584 eq = self.ndiffAssertEqual
585 g = Charset("iso-8859-1")
586 cz = Charset("iso-8859-2")
587 utf8 = Charset("utf-8")
588 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
589 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
590 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
591 b'bef\xf6rdert. ')
592 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
593 b'd\xf9vtipu.. ')
594 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
595 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
596 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
597 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
598 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
599 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
600 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
601 '\u3044\u307e\u3059\u3002')
602 h = Header(g_head, g, header_name='Subject')
603 h.append(cz_head, cz)
604 h.append(utf8_head, utf8)
605 msg = Message()
606 msg['Subject'] = h
607 sfp = StringIO()
608 g = Generator(sfp)
609 g.flatten(msg)
610 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000611Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
612 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
613 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
614 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
615 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
616 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
617 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
618 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
619 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
620 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
621 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000622
623""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000624 eq(h.encode(maxlinelen=76), """\
625=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
626 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
627 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
628 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
629 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
630 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
631 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
632 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
633 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
634 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
635 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000636
637 def test_long_header_encode(self):
638 eq = self.ndiffAssertEqual
639 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
640 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
641 header_name='X-Foobar-Spoink-Defrobnit')
642 eq(h.encode(), '''\
643wasnipoop; giraffes="very-long-necked-animals";
644 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
645
646 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
647 eq = self.ndiffAssertEqual
648 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
649 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
650 header_name='X-Foobar-Spoink-Defrobnit',
651 continuation_ws='\t')
652 eq(h.encode(), '''\
653wasnipoop; giraffes="very-long-necked-animals";
654 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
655
656 def test_long_header_encode_with_tab_continuation(self):
657 eq = self.ndiffAssertEqual
658 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
659 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
660 header_name='X-Foobar-Spoink-Defrobnit',
661 continuation_ws='\t')
662 eq(h.encode(), '''\
663wasnipoop; giraffes="very-long-necked-animals";
664\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
665
666 def test_header_splitter(self):
667 eq = self.ndiffAssertEqual
668 msg = MIMEText('')
669 # It'd be great if we could use add_header() here, but that doesn't
670 # guarantee an order of the parameters.
671 msg['X-Foobar-Spoink-Defrobnit'] = (
672 'wasnipoop; giraffes="very-long-necked-animals"; '
673 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
674 sfp = StringIO()
675 g = Generator(sfp)
676 g.flatten(msg)
677 eq(sfp.getvalue(), '''\
678Content-Type: text/plain; charset="us-ascii"
679MIME-Version: 1.0
680Content-Transfer-Encoding: 7bit
681X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
682 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
683
684''')
685
686 def test_no_semis_header_splitter(self):
687 eq = self.ndiffAssertEqual
688 msg = Message()
689 msg['From'] = 'test@dom.ain'
690 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
691 msg.set_payload('Test')
692 sfp = StringIO()
693 g = Generator(sfp)
694 g.flatten(msg)
695 eq(sfp.getvalue(), """\
696From: test@dom.ain
697References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
698 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
699
700Test""")
701
702 def test_no_split_long_header(self):
703 eq = self.ndiffAssertEqual
704 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000705 h = Header(hstr)
706 # These come on two lines because Headers are really field value
707 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000708 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000709References:
710 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
711 h = Header('x' * 80)
712 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000713
714 def test_splitting_multiple_long_lines(self):
715 eq = self.ndiffAssertEqual
716 hstr = """\
717from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
718\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
719\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
720"""
721 h = Header(hstr, continuation_ws='\t')
722 eq(h.encode(), """\
723from babylon.socal-raves.org (localhost [127.0.0.1]);
724 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
725 for <mailman-admin@babylon.socal-raves.org>;
726 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
727\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
728 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
729 for <mailman-admin@babylon.socal-raves.org>;
730 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
731\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
732 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
733 for <mailman-admin@babylon.socal-raves.org>;
734 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
735
736 def test_splitting_first_line_only_is_long(self):
737 eq = self.ndiffAssertEqual
738 hstr = """\
739from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
740\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
741\tid 17k4h5-00034i-00
742\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
743 h = Header(hstr, maxlinelen=78, header_name='Received',
744 continuation_ws='\t')
745 eq(h.encode(), """\
746from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
747 helo=cthulhu.gerg.ca)
748\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
749\tid 17k4h5-00034i-00
750\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
751
752 def test_long_8bit_header(self):
753 eq = self.ndiffAssertEqual
754 msg = Message()
755 h = Header('Britische Regierung gibt', 'iso-8859-1',
756 header_name='Subject')
757 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000758 eq(h.encode(maxlinelen=76), """\
759=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
760 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000761 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000762 eq(msg.as_string(maxheaderlen=76), """\
763Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
764 =?iso-8859-1?q?hore-Windkraftprojekte?=
765
766""")
767 eq(msg.as_string(maxheaderlen=0), """\
768Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000769
770""")
771
772 def test_long_8bit_header_no_charset(self):
773 eq = self.ndiffAssertEqual
774 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000775 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
776 'f\xfcr Offshore-Windkraftprojekte '
777 '<a-very-long-address@example.com>')
778 msg['Reply-To'] = header_string
779 self.assertRaises(UnicodeEncodeError, msg.as_string)
780 msg = Message()
781 msg['Reply-To'] = Header(header_string, 'utf-8',
782 header_name='Reply-To')
783 eq(msg.as_string(maxheaderlen=78), """\
784Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
785 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000786
787""")
788
789 def test_long_to_header(self):
790 eq = self.ndiffAssertEqual
791 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
792 '<someone@eecs.umich.edu>,'
793 '"Someone Test #B" <someone@umich.edu>, '
794 '"Someone Test #C" <someone@eecs.umich.edu>, '
795 '"Someone Test #D" <someone@eecs.umich.edu>')
796 msg = Message()
797 msg['To'] = to
798 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000799To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000800 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000801 "Someone Test #C" <someone@eecs.umich.edu>,
802 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000803
804''')
805
806 def test_long_line_after_append(self):
807 eq = self.ndiffAssertEqual
808 s = 'This is an example of string which has almost the limit of header length.'
809 h = Header(s)
810 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000811 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000812This is an example of string which has almost the limit of header length.
813 Add another line.""")
814
815 def test_shorter_line_with_append(self):
816 eq = self.ndiffAssertEqual
817 s = 'This is a shorter line.'
818 h = Header(s)
819 h.append('Add another sentence. (Surprise?)')
820 eq(h.encode(),
821 'This is a shorter line. Add another sentence. (Surprise?)')
822
823 def test_long_field_name(self):
824 eq = self.ndiffAssertEqual
825 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000826 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
827 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
828 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
829 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000830 h = Header(gs, 'iso-8859-1', header_name=fn)
831 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000832 eq(h.encode(maxlinelen=76), """\
833=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
834 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
835 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
836 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000837
838 def test_long_received_header(self):
839 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
840 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
841 'Wed, 05 Mar 2003 18:10:18 -0700')
842 msg = Message()
843 msg['Received-1'] = Header(h, continuation_ws='\t')
844 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000845 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000846 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000847Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
848 Wed, 05 Mar 2003 18:10:18 -0700
849Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
850 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000851
852""")
853
854 def test_string_headerinst_eq(self):
855 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
856 'tu-muenchen.de> (David Bremner\'s message of '
857 '"Thu, 6 Mar 2003 13:58:21 +0100")')
858 msg = Message()
859 msg['Received-1'] = Header(h, header_name='Received-1',
860 continuation_ws='\t')
861 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000862 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000863 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000864Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
865 6 Mar 2003 13:58:21 +0100\")
866Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
867 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000868
869""")
870
871 def test_long_unbreakable_lines_with_continuation(self):
872 eq = self.ndiffAssertEqual
873 msg = Message()
874 t = """\
875iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
876 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
877 msg['Face-1'] = t
878 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000879 # XXX This splitting is all wrong. It the first value line should be
880 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000881 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000882Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000883 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000884 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000885Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000886 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000887 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
888
889""")
890
891 def test_another_long_multiline_header(self):
892 eq = self.ndiffAssertEqual
893 m = ('Received: from siimage.com '
894 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000895 'Microsoft SMTPSVC(5.0.2195.4905); '
896 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000897 msg = email.message_from_string(m)
898 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000899Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
900 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000901
902''')
903
904 def test_long_lines_with_different_header(self):
905 eq = self.ndiffAssertEqual
906 h = ('List-Unsubscribe: '
907 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
908 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
909 '?subject=unsubscribe>')
910 msg = Message()
911 msg['List'] = h
912 msg['List'] = Header(h, header_name='List')
913 eq(msg.as_string(maxheaderlen=78), """\
914List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000915 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000916List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000917 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000918
919""")
920
921
Ezio Melottib3aedd42010-11-20 19:04:17 +0000922
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000923# Test mangling of "From " lines in the body of a message
924class TestFromMangling(unittest.TestCase):
925 def setUp(self):
926 self.msg = Message()
927 self.msg['From'] = 'aaa@bbb.org'
928 self.msg.set_payload("""\
929From the desk of A.A.A.:
930Blah blah blah
931""")
932
933 def test_mangled_from(self):
934 s = StringIO()
935 g = Generator(s, mangle_from_=True)
936 g.flatten(self.msg)
937 self.assertEqual(s.getvalue(), """\
938From: aaa@bbb.org
939
940>From the desk of A.A.A.:
941Blah blah blah
942""")
943
944 def test_dont_mangle_from(self):
945 s = StringIO()
946 g = Generator(s, mangle_from_=False)
947 g.flatten(self.msg)
948 self.assertEqual(s.getvalue(), """\
949From: aaa@bbb.org
950
951From the desk of A.A.A.:
952Blah blah blah
953""")
954
955
Ezio Melottib3aedd42010-11-20 19:04:17 +0000956
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000957# Test the basic MIMEAudio class
958class TestMIMEAudio(unittest.TestCase):
959 def setUp(self):
960 # Make sure we pick up the audiotest.au that lives in email/test/data.
961 # In Python, there's an audiotest.au living in Lib/test but that isn't
962 # included in some binary distros that don't include the test
963 # package. The trailing empty string on the .join() is significant
964 # since findfile() will do a dirname().
965 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
966 with open(findfile('audiotest.au', datadir), 'rb') as fp:
967 self._audiodata = fp.read()
968 self._au = MIMEAudio(self._audiodata)
969
970 def test_guess_minor_type(self):
971 self.assertEqual(self._au.get_content_type(), 'audio/basic')
972
973 def test_encoding(self):
974 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +0000975 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
976 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000977
978 def test_checkSetMinor(self):
979 au = MIMEAudio(self._audiodata, 'fish')
980 self.assertEqual(au.get_content_type(), 'audio/fish')
981
982 def test_add_header(self):
983 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000984 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000985 self._au.add_header('Content-Disposition', 'attachment',
986 filename='audiotest.au')
987 eq(self._au['content-disposition'],
988 'attachment; filename="audiotest.au"')
989 eq(self._au.get_params(header='content-disposition'),
990 [('attachment', ''), ('filename', 'audiotest.au')])
991 eq(self._au.get_param('filename', header='content-disposition'),
992 'audiotest.au')
993 missing = []
994 eq(self._au.get_param('attachment', header='content-disposition'), '')
995 unless(self._au.get_param('foo', failobj=missing,
996 header='content-disposition') is missing)
997 # Try some missing stuff
998 unless(self._au.get_param('foobar', missing) is missing)
999 unless(self._au.get_param('attachment', missing,
1000 header='foobar') is missing)
1001
1002
Ezio Melottib3aedd42010-11-20 19:04:17 +00001003
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001004# Test the basic MIMEImage class
1005class TestMIMEImage(unittest.TestCase):
1006 def setUp(self):
1007 with openfile('PyBanner048.gif', 'rb') as fp:
1008 self._imgdata = fp.read()
1009 self._im = MIMEImage(self._imgdata)
1010
1011 def test_guess_minor_type(self):
1012 self.assertEqual(self._im.get_content_type(), 'image/gif')
1013
1014 def test_encoding(self):
1015 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001016 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1017 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001018
1019 def test_checkSetMinor(self):
1020 im = MIMEImage(self._imgdata, 'fish')
1021 self.assertEqual(im.get_content_type(), 'image/fish')
1022
1023 def test_add_header(self):
1024 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001025 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001026 self._im.add_header('Content-Disposition', 'attachment',
1027 filename='dingusfish.gif')
1028 eq(self._im['content-disposition'],
1029 'attachment; filename="dingusfish.gif"')
1030 eq(self._im.get_params(header='content-disposition'),
1031 [('attachment', ''), ('filename', 'dingusfish.gif')])
1032 eq(self._im.get_param('filename', header='content-disposition'),
1033 'dingusfish.gif')
1034 missing = []
1035 eq(self._im.get_param('attachment', header='content-disposition'), '')
1036 unless(self._im.get_param('foo', failobj=missing,
1037 header='content-disposition') is missing)
1038 # Try some missing stuff
1039 unless(self._im.get_param('foobar', missing) is missing)
1040 unless(self._im.get_param('attachment', missing,
1041 header='foobar') is missing)
1042
1043
Ezio Melottib3aedd42010-11-20 19:04:17 +00001044
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001045# Test the basic MIMEApplication class
1046class TestMIMEApplication(unittest.TestCase):
1047 def test_headers(self):
1048 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001049 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001050 eq(msg.get_content_type(), 'application/octet-stream')
1051 eq(msg['content-transfer-encoding'], 'base64')
1052
1053 def test_body(self):
1054 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001055 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001056 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001057 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001058 eq(msg.get_payload(decode=True), bytes)
1059
1060
Ezio Melottib3aedd42010-11-20 19:04:17 +00001061
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001062# Test the basic MIMEText class
1063class TestMIMEText(unittest.TestCase):
1064 def setUp(self):
1065 self._msg = MIMEText('hello there')
1066
1067 def test_types(self):
1068 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001069 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001070 eq(self._msg.get_content_type(), 'text/plain')
1071 eq(self._msg.get_param('charset'), 'us-ascii')
1072 missing = []
1073 unless(self._msg.get_param('foobar', missing) is missing)
1074 unless(self._msg.get_param('charset', missing, header='foobar')
1075 is missing)
1076
1077 def test_payload(self):
1078 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001079 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001080
1081 def test_charset(self):
1082 eq = self.assertEqual
1083 msg = MIMEText('hello there', _charset='us-ascii')
1084 eq(msg.get_charset().input_charset, 'us-ascii')
1085 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1086
R. David Murray850fc852010-06-03 01:58:28 +00001087 def test_7bit_input(self):
1088 eq = self.assertEqual
1089 msg = MIMEText('hello there', _charset='us-ascii')
1090 eq(msg.get_charset().input_charset, 'us-ascii')
1091 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1092
1093 def test_7bit_input_no_charset(self):
1094 eq = self.assertEqual
1095 msg = MIMEText('hello there')
1096 eq(msg.get_charset(), 'us-ascii')
1097 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1098 self.assertTrue('hello there' in msg.as_string())
1099
1100 def test_utf8_input(self):
1101 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1102 eq = self.assertEqual
1103 msg = MIMEText(teststr, _charset='utf-8')
1104 eq(msg.get_charset().output_charset, 'utf-8')
1105 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1106 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1107
1108 @unittest.skip("can't fix because of backward compat in email5, "
1109 "will fix in email6")
1110 def test_utf8_input_no_charset(self):
1111 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1112 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1113
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001114
Ezio Melottib3aedd42010-11-20 19:04:17 +00001115
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001116# Test complicated multipart/* messages
1117class TestMultipart(TestEmailBase):
1118 def setUp(self):
1119 with openfile('PyBanner048.gif', 'rb') as fp:
1120 data = fp.read()
1121 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1122 image = MIMEImage(data, name='dingusfish.gif')
1123 image.add_header('content-disposition', 'attachment',
1124 filename='dingusfish.gif')
1125 intro = MIMEText('''\
1126Hi there,
1127
1128This is the dingus fish.
1129''')
1130 container.attach(intro)
1131 container.attach(image)
1132 container['From'] = 'Barry <barry@digicool.com>'
1133 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1134 container['Subject'] = 'Here is your dingus fish'
1135
1136 now = 987809702.54848599
1137 timetuple = time.localtime(now)
1138 if timetuple[-1] == 0:
1139 tzsecs = time.timezone
1140 else:
1141 tzsecs = time.altzone
1142 if tzsecs > 0:
1143 sign = '-'
1144 else:
1145 sign = '+'
1146 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1147 container['Date'] = time.strftime(
1148 '%a, %d %b %Y %H:%M:%S',
1149 time.localtime(now)) + tzoffset
1150 self._msg = container
1151 self._im = image
1152 self._txt = intro
1153
1154 def test_hierarchy(self):
1155 # convenience
1156 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001157 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001158 raises = self.assertRaises
1159 # tests
1160 m = self._msg
1161 unless(m.is_multipart())
1162 eq(m.get_content_type(), 'multipart/mixed')
1163 eq(len(m.get_payload()), 2)
1164 raises(IndexError, m.get_payload, 2)
1165 m0 = m.get_payload(0)
1166 m1 = m.get_payload(1)
1167 unless(m0 is self._txt)
1168 unless(m1 is self._im)
1169 eq(m.get_payload(), [m0, m1])
1170 unless(not m0.is_multipart())
1171 unless(not m1.is_multipart())
1172
1173 def test_empty_multipart_idempotent(self):
1174 text = """\
1175Content-Type: multipart/mixed; boundary="BOUNDARY"
1176MIME-Version: 1.0
1177Subject: A subject
1178To: aperson@dom.ain
1179From: bperson@dom.ain
1180
1181
1182--BOUNDARY
1183
1184
1185--BOUNDARY--
1186"""
1187 msg = Parser().parsestr(text)
1188 self.ndiffAssertEqual(text, msg.as_string())
1189
1190 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1191 outer = MIMEBase('multipart', 'mixed')
1192 outer['Subject'] = 'A subject'
1193 outer['To'] = 'aperson@dom.ain'
1194 outer['From'] = 'bperson@dom.ain'
1195 outer.set_boundary('BOUNDARY')
1196 self.ndiffAssertEqual(outer.as_string(), '''\
1197Content-Type: multipart/mixed; boundary="BOUNDARY"
1198MIME-Version: 1.0
1199Subject: A subject
1200To: aperson@dom.ain
1201From: bperson@dom.ain
1202
1203--BOUNDARY
1204
1205--BOUNDARY--''')
1206
1207 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1208 outer = MIMEBase('multipart', 'mixed')
1209 outer['Subject'] = 'A subject'
1210 outer['To'] = 'aperson@dom.ain'
1211 outer['From'] = 'bperson@dom.ain'
1212 outer.preamble = ''
1213 outer.epilogue = ''
1214 outer.set_boundary('BOUNDARY')
1215 self.ndiffAssertEqual(outer.as_string(), '''\
1216Content-Type: multipart/mixed; boundary="BOUNDARY"
1217MIME-Version: 1.0
1218Subject: A subject
1219To: aperson@dom.ain
1220From: bperson@dom.ain
1221
1222
1223--BOUNDARY
1224
1225--BOUNDARY--
1226''')
1227
1228 def test_one_part_in_a_multipart(self):
1229 eq = self.ndiffAssertEqual
1230 outer = MIMEBase('multipart', 'mixed')
1231 outer['Subject'] = 'A subject'
1232 outer['To'] = 'aperson@dom.ain'
1233 outer['From'] = 'bperson@dom.ain'
1234 outer.set_boundary('BOUNDARY')
1235 msg = MIMEText('hello world')
1236 outer.attach(msg)
1237 eq(outer.as_string(), '''\
1238Content-Type: multipart/mixed; boundary="BOUNDARY"
1239MIME-Version: 1.0
1240Subject: A subject
1241To: aperson@dom.ain
1242From: bperson@dom.ain
1243
1244--BOUNDARY
1245Content-Type: text/plain; charset="us-ascii"
1246MIME-Version: 1.0
1247Content-Transfer-Encoding: 7bit
1248
1249hello world
1250--BOUNDARY--''')
1251
1252 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1253 eq = self.ndiffAssertEqual
1254 outer = MIMEBase('multipart', 'mixed')
1255 outer['Subject'] = 'A subject'
1256 outer['To'] = 'aperson@dom.ain'
1257 outer['From'] = 'bperson@dom.ain'
1258 outer.preamble = ''
1259 msg = MIMEText('hello world')
1260 outer.attach(msg)
1261 outer.set_boundary('BOUNDARY')
1262 eq(outer.as_string(), '''\
1263Content-Type: multipart/mixed; boundary="BOUNDARY"
1264MIME-Version: 1.0
1265Subject: A subject
1266To: aperson@dom.ain
1267From: bperson@dom.ain
1268
1269
1270--BOUNDARY
1271Content-Type: text/plain; charset="us-ascii"
1272MIME-Version: 1.0
1273Content-Transfer-Encoding: 7bit
1274
1275hello world
1276--BOUNDARY--''')
1277
1278
1279 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1280 eq = self.ndiffAssertEqual
1281 outer = MIMEBase('multipart', 'mixed')
1282 outer['Subject'] = 'A subject'
1283 outer['To'] = 'aperson@dom.ain'
1284 outer['From'] = 'bperson@dom.ain'
1285 outer.preamble = None
1286 msg = MIMEText('hello world')
1287 outer.attach(msg)
1288 outer.set_boundary('BOUNDARY')
1289 eq(outer.as_string(), '''\
1290Content-Type: multipart/mixed; boundary="BOUNDARY"
1291MIME-Version: 1.0
1292Subject: A subject
1293To: aperson@dom.ain
1294From: bperson@dom.ain
1295
1296--BOUNDARY
1297Content-Type: text/plain; charset="us-ascii"
1298MIME-Version: 1.0
1299Content-Transfer-Encoding: 7bit
1300
1301hello world
1302--BOUNDARY--''')
1303
1304
1305 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1306 eq = self.ndiffAssertEqual
1307 outer = MIMEBase('multipart', 'mixed')
1308 outer['Subject'] = 'A subject'
1309 outer['To'] = 'aperson@dom.ain'
1310 outer['From'] = 'bperson@dom.ain'
1311 outer.epilogue = None
1312 msg = MIMEText('hello world')
1313 outer.attach(msg)
1314 outer.set_boundary('BOUNDARY')
1315 eq(outer.as_string(), '''\
1316Content-Type: multipart/mixed; boundary="BOUNDARY"
1317MIME-Version: 1.0
1318Subject: A subject
1319To: aperson@dom.ain
1320From: bperson@dom.ain
1321
1322--BOUNDARY
1323Content-Type: text/plain; charset="us-ascii"
1324MIME-Version: 1.0
1325Content-Transfer-Encoding: 7bit
1326
1327hello world
1328--BOUNDARY--''')
1329
1330
1331 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1332 eq = self.ndiffAssertEqual
1333 outer = MIMEBase('multipart', 'mixed')
1334 outer['Subject'] = 'A subject'
1335 outer['To'] = 'aperson@dom.ain'
1336 outer['From'] = 'bperson@dom.ain'
1337 outer.epilogue = ''
1338 msg = MIMEText('hello world')
1339 outer.attach(msg)
1340 outer.set_boundary('BOUNDARY')
1341 eq(outer.as_string(), '''\
1342Content-Type: multipart/mixed; boundary="BOUNDARY"
1343MIME-Version: 1.0
1344Subject: A subject
1345To: aperson@dom.ain
1346From: bperson@dom.ain
1347
1348--BOUNDARY
1349Content-Type: text/plain; charset="us-ascii"
1350MIME-Version: 1.0
1351Content-Transfer-Encoding: 7bit
1352
1353hello world
1354--BOUNDARY--
1355''')
1356
1357
1358 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1359 eq = self.ndiffAssertEqual
1360 outer = MIMEBase('multipart', 'mixed')
1361 outer['Subject'] = 'A subject'
1362 outer['To'] = 'aperson@dom.ain'
1363 outer['From'] = 'bperson@dom.ain'
1364 outer.epilogue = '\n'
1365 msg = MIMEText('hello world')
1366 outer.attach(msg)
1367 outer.set_boundary('BOUNDARY')
1368 eq(outer.as_string(), '''\
1369Content-Type: multipart/mixed; boundary="BOUNDARY"
1370MIME-Version: 1.0
1371Subject: A subject
1372To: aperson@dom.ain
1373From: bperson@dom.ain
1374
1375--BOUNDARY
1376Content-Type: text/plain; charset="us-ascii"
1377MIME-Version: 1.0
1378Content-Transfer-Encoding: 7bit
1379
1380hello world
1381--BOUNDARY--
1382
1383''')
1384
1385 def test_message_external_body(self):
1386 eq = self.assertEqual
1387 msg = self._msgobj('msg_36.txt')
1388 eq(len(msg.get_payload()), 2)
1389 msg1 = msg.get_payload(1)
1390 eq(msg1.get_content_type(), 'multipart/alternative')
1391 eq(len(msg1.get_payload()), 2)
1392 for subpart in msg1.get_payload():
1393 eq(subpart.get_content_type(), 'message/external-body')
1394 eq(len(subpart.get_payload()), 1)
1395 subsubpart = subpart.get_payload(0)
1396 eq(subsubpart.get_content_type(), 'text/plain')
1397
1398 def test_double_boundary(self):
1399 # msg_37.txt is a multipart that contains two dash-boundary's in a
1400 # row. Our interpretation of RFC 2046 calls for ignoring the second
1401 # and subsequent boundaries.
1402 msg = self._msgobj('msg_37.txt')
1403 self.assertEqual(len(msg.get_payload()), 3)
1404
1405 def test_nested_inner_contains_outer_boundary(self):
1406 eq = self.ndiffAssertEqual
1407 # msg_38.txt has an inner part that contains outer boundaries. My
1408 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1409 # these are illegal and should be interpreted as unterminated inner
1410 # parts.
1411 msg = self._msgobj('msg_38.txt')
1412 sfp = StringIO()
1413 iterators._structure(msg, sfp)
1414 eq(sfp.getvalue(), """\
1415multipart/mixed
1416 multipart/mixed
1417 multipart/alternative
1418 text/plain
1419 text/plain
1420 text/plain
1421 text/plain
1422""")
1423
1424 def test_nested_with_same_boundary(self):
1425 eq = self.ndiffAssertEqual
1426 # msg 39.txt is similarly evil in that it's got inner parts that use
1427 # the same boundary as outer parts. Again, I believe the way this is
1428 # parsed is closest to the spirit of RFC 2046
1429 msg = self._msgobj('msg_39.txt')
1430 sfp = StringIO()
1431 iterators._structure(msg, sfp)
1432 eq(sfp.getvalue(), """\
1433multipart/mixed
1434 multipart/mixed
1435 multipart/alternative
1436 application/octet-stream
1437 application/octet-stream
1438 text/plain
1439""")
1440
1441 def test_boundary_in_non_multipart(self):
1442 msg = self._msgobj('msg_40.txt')
1443 self.assertEqual(msg.as_string(), '''\
1444MIME-Version: 1.0
1445Content-Type: text/html; boundary="--961284236552522269"
1446
1447----961284236552522269
1448Content-Type: text/html;
1449Content-Transfer-Encoding: 7Bit
1450
1451<html></html>
1452
1453----961284236552522269--
1454''')
1455
1456 def test_boundary_with_leading_space(self):
1457 eq = self.assertEqual
1458 msg = email.message_from_string('''\
1459MIME-Version: 1.0
1460Content-Type: multipart/mixed; boundary=" XXXX"
1461
1462-- XXXX
1463Content-Type: text/plain
1464
1465
1466-- XXXX
1467Content-Type: text/plain
1468
1469-- XXXX--
1470''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001471 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001472 eq(msg.get_boundary(), ' XXXX')
1473 eq(len(msg.get_payload()), 2)
1474
1475 def test_boundary_without_trailing_newline(self):
1476 m = Parser().parsestr("""\
1477Content-Type: multipart/mixed; boundary="===============0012394164=="
1478MIME-Version: 1.0
1479
1480--===============0012394164==
1481Content-Type: image/file1.jpg
1482MIME-Version: 1.0
1483Content-Transfer-Encoding: base64
1484
1485YXNkZg==
1486--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001487 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001488
1489
Ezio Melottib3aedd42010-11-20 19:04:17 +00001490
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001491# Test some badly formatted messages
1492class TestNonConformant(TestEmailBase):
1493 def test_parse_missing_minor_type(self):
1494 eq = self.assertEqual
1495 msg = self._msgobj('msg_14.txt')
1496 eq(msg.get_content_type(), 'text/plain')
1497 eq(msg.get_content_maintype(), 'text')
1498 eq(msg.get_content_subtype(), 'plain')
1499
1500 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001501 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001502 msg = self._msgobj('msg_15.txt')
1503 # XXX We can probably eventually do better
1504 inner = msg.get_payload(0)
1505 unless(hasattr(inner, 'defects'))
1506 self.assertEqual(len(inner.defects), 1)
1507 unless(isinstance(inner.defects[0],
1508 errors.StartBoundaryNotFoundDefect))
1509
1510 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001511 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001512 msg = self._msgobj('msg_25.txt')
1513 unless(isinstance(msg.get_payload(), str))
1514 self.assertEqual(len(msg.defects), 2)
1515 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1516 unless(isinstance(msg.defects[1],
1517 errors.MultipartInvariantViolationDefect))
1518
1519 def test_invalid_content_type(self):
1520 eq = self.assertEqual
1521 neq = self.ndiffAssertEqual
1522 msg = Message()
1523 # RFC 2045, $5.2 says invalid yields text/plain
1524 msg['Content-Type'] = 'text'
1525 eq(msg.get_content_maintype(), 'text')
1526 eq(msg.get_content_subtype(), 'plain')
1527 eq(msg.get_content_type(), 'text/plain')
1528 # Clear the old value and try something /really/ invalid
1529 del msg['content-type']
1530 msg['Content-Type'] = 'foo'
1531 eq(msg.get_content_maintype(), 'text')
1532 eq(msg.get_content_subtype(), 'plain')
1533 eq(msg.get_content_type(), 'text/plain')
1534 # Still, make sure that the message is idempotently generated
1535 s = StringIO()
1536 g = Generator(s)
1537 g.flatten(msg)
1538 neq(s.getvalue(), 'Content-Type: foo\n\n')
1539
1540 def test_no_start_boundary(self):
1541 eq = self.ndiffAssertEqual
1542 msg = self._msgobj('msg_31.txt')
1543 eq(msg.get_payload(), """\
1544--BOUNDARY
1545Content-Type: text/plain
1546
1547message 1
1548
1549--BOUNDARY
1550Content-Type: text/plain
1551
1552message 2
1553
1554--BOUNDARY--
1555""")
1556
1557 def test_no_separating_blank_line(self):
1558 eq = self.ndiffAssertEqual
1559 msg = self._msgobj('msg_35.txt')
1560 eq(msg.as_string(), """\
1561From: aperson@dom.ain
1562To: bperson@dom.ain
1563Subject: here's something interesting
1564
1565counter to RFC 2822, there's no separating newline here
1566""")
1567
1568 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001569 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001570 msg = self._msgobj('msg_41.txt')
1571 unless(hasattr(msg, 'defects'))
1572 self.assertEqual(len(msg.defects), 2)
1573 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1574 unless(isinstance(msg.defects[1],
1575 errors.MultipartInvariantViolationDefect))
1576
1577 def test_missing_start_boundary(self):
1578 outer = self._msgobj('msg_42.txt')
1579 # The message structure is:
1580 #
1581 # multipart/mixed
1582 # text/plain
1583 # message/rfc822
1584 # multipart/mixed [*]
1585 #
1586 # [*] This message is missing its start boundary
1587 bad = outer.get_payload(1).get_payload(0)
1588 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001589 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001590 errors.StartBoundaryNotFoundDefect))
1591
1592 def test_first_line_is_continuation_header(self):
1593 eq = self.assertEqual
1594 m = ' Line 1\nLine 2\nLine 3'
1595 msg = email.message_from_string(m)
1596 eq(msg.keys(), [])
1597 eq(msg.get_payload(), 'Line 2\nLine 3')
1598 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001599 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001600 errors.FirstHeaderLineIsContinuationDefect))
1601 eq(msg.defects[0].line, ' Line 1\n')
1602
1603
Ezio Melottib3aedd42010-11-20 19:04:17 +00001604
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001605# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001606class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001607 def test_rfc2047_multiline(self):
1608 eq = self.assertEqual
1609 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1610 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1611 dh = decode_header(s)
1612 eq(dh, [
1613 (b'Re:', None),
1614 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1615 (b'baz foo bar', None),
1616 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1617 header = make_header(dh)
1618 eq(str(header),
1619 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001620 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001621Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1622 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001623
1624 def test_whitespace_eater_unicode(self):
1625 eq = self.assertEqual
1626 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1627 dh = decode_header(s)
1628 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1629 (b'Pirard <pirard@dom.ain>', None)])
1630 header = str(make_header(dh))
1631 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1632
1633 def test_whitespace_eater_unicode_2(self):
1634 eq = self.assertEqual
1635 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1636 dh = decode_header(s)
1637 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1638 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1639 hu = str(make_header(dh))
1640 eq(hu, 'The quick brown fox jumped over the lazy dog')
1641
1642 def test_rfc2047_missing_whitespace(self):
1643 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1644 dh = decode_header(s)
1645 self.assertEqual(dh, [(s, None)])
1646
1647 def test_rfc2047_with_whitespace(self):
1648 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1649 dh = decode_header(s)
1650 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1651 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1652 (b'sbord', None)])
1653
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001654 def test_rfc2047_B_bad_padding(self):
1655 s = '=?iso-8859-1?B?%s?='
1656 data = [ # only test complete bytes
1657 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1658 ('dmk=', b'vi'), ('dmk', b'vi')
1659 ]
1660 for q, a in data:
1661 dh = decode_header(s % q)
1662 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001663
R. David Murray31e984c2010-10-01 15:40:20 +00001664 def test_rfc2047_Q_invalid_digits(self):
1665 # issue 10004.
1666 s = '=?iso-8659-1?Q?andr=e9=zz?='
1667 self.assertEqual(decode_header(s),
1668 [(b'andr\xe9=zz', 'iso-8659-1')])
1669
Ezio Melottib3aedd42010-11-20 19:04:17 +00001670
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001671# Test the MIMEMessage class
1672class TestMIMEMessage(TestEmailBase):
1673 def setUp(self):
1674 with openfile('msg_11.txt') as fp:
1675 self._text = fp.read()
1676
1677 def test_type_error(self):
1678 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1679
1680 def test_valid_argument(self):
1681 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001682 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001683 subject = 'A sub-message'
1684 m = Message()
1685 m['Subject'] = subject
1686 r = MIMEMessage(m)
1687 eq(r.get_content_type(), 'message/rfc822')
1688 payload = r.get_payload()
1689 unless(isinstance(payload, list))
1690 eq(len(payload), 1)
1691 subpart = payload[0]
1692 unless(subpart is m)
1693 eq(subpart['subject'], subject)
1694
1695 def test_bad_multipart(self):
1696 eq = self.assertEqual
1697 msg1 = Message()
1698 msg1['Subject'] = 'subpart 1'
1699 msg2 = Message()
1700 msg2['Subject'] = 'subpart 2'
1701 r = MIMEMessage(msg1)
1702 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1703
1704 def test_generate(self):
1705 # First craft the message to be encapsulated
1706 m = Message()
1707 m['Subject'] = 'An enclosed message'
1708 m.set_payload('Here is the body of the message.\n')
1709 r = MIMEMessage(m)
1710 r['Subject'] = 'The enclosing message'
1711 s = StringIO()
1712 g = Generator(s)
1713 g.flatten(r)
1714 self.assertEqual(s.getvalue(), """\
1715Content-Type: message/rfc822
1716MIME-Version: 1.0
1717Subject: The enclosing message
1718
1719Subject: An enclosed message
1720
1721Here is the body of the message.
1722""")
1723
1724 def test_parse_message_rfc822(self):
1725 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001726 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001727 msg = self._msgobj('msg_11.txt')
1728 eq(msg.get_content_type(), 'message/rfc822')
1729 payload = msg.get_payload()
1730 unless(isinstance(payload, list))
1731 eq(len(payload), 1)
1732 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001733 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001734 eq(submsg['subject'], 'An enclosed message')
1735 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1736
1737 def test_dsn(self):
1738 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001739 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001740 # msg 16 is a Delivery Status Notification, see RFC 1894
1741 msg = self._msgobj('msg_16.txt')
1742 eq(msg.get_content_type(), 'multipart/report')
1743 unless(msg.is_multipart())
1744 eq(len(msg.get_payload()), 3)
1745 # Subpart 1 is a text/plain, human readable section
1746 subpart = msg.get_payload(0)
1747 eq(subpart.get_content_type(), 'text/plain')
1748 eq(subpart.get_payload(), """\
1749This report relates to a message you sent with the following header fields:
1750
1751 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1752 Date: Sun, 23 Sep 2001 20:10:55 -0700
1753 From: "Ian T. Henry" <henryi@oxy.edu>
1754 To: SoCal Raves <scr@socal-raves.org>
1755 Subject: [scr] yeah for Ians!!
1756
1757Your message cannot be delivered to the following recipients:
1758
1759 Recipient address: jangel1@cougar.noc.ucla.edu
1760 Reason: recipient reached disk quota
1761
1762""")
1763 # Subpart 2 contains the machine parsable DSN information. It
1764 # consists of two blocks of headers, represented by two nested Message
1765 # objects.
1766 subpart = msg.get_payload(1)
1767 eq(subpart.get_content_type(), 'message/delivery-status')
1768 eq(len(subpart.get_payload()), 2)
1769 # message/delivery-status should treat each block as a bunch of
1770 # headers, i.e. a bunch of Message objects.
1771 dsn1 = subpart.get_payload(0)
1772 unless(isinstance(dsn1, Message))
1773 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1774 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1775 # Try a missing one <wink>
1776 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1777 dsn2 = subpart.get_payload(1)
1778 unless(isinstance(dsn2, Message))
1779 eq(dsn2['action'], 'failed')
1780 eq(dsn2.get_params(header='original-recipient'),
1781 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1782 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1783 # Subpart 3 is the original message
1784 subpart = msg.get_payload(2)
1785 eq(subpart.get_content_type(), 'message/rfc822')
1786 payload = subpart.get_payload()
1787 unless(isinstance(payload, list))
1788 eq(len(payload), 1)
1789 subsubpart = payload[0]
1790 unless(isinstance(subsubpart, Message))
1791 eq(subsubpart.get_content_type(), 'text/plain')
1792 eq(subsubpart['message-id'],
1793 '<002001c144a6$8752e060$56104586@oxy.edu>')
1794
1795 def test_epilogue(self):
1796 eq = self.ndiffAssertEqual
1797 with openfile('msg_21.txt') as fp:
1798 text = fp.read()
1799 msg = Message()
1800 msg['From'] = 'aperson@dom.ain'
1801 msg['To'] = 'bperson@dom.ain'
1802 msg['Subject'] = 'Test'
1803 msg.preamble = 'MIME message'
1804 msg.epilogue = 'End of MIME message\n'
1805 msg1 = MIMEText('One')
1806 msg2 = MIMEText('Two')
1807 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1808 msg.attach(msg1)
1809 msg.attach(msg2)
1810 sfp = StringIO()
1811 g = Generator(sfp)
1812 g.flatten(msg)
1813 eq(sfp.getvalue(), text)
1814
1815 def test_no_nl_preamble(self):
1816 eq = self.ndiffAssertEqual
1817 msg = Message()
1818 msg['From'] = 'aperson@dom.ain'
1819 msg['To'] = 'bperson@dom.ain'
1820 msg['Subject'] = 'Test'
1821 msg.preamble = 'MIME message'
1822 msg.epilogue = ''
1823 msg1 = MIMEText('One')
1824 msg2 = MIMEText('Two')
1825 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1826 msg.attach(msg1)
1827 msg.attach(msg2)
1828 eq(msg.as_string(), """\
1829From: aperson@dom.ain
1830To: bperson@dom.ain
1831Subject: Test
1832Content-Type: multipart/mixed; boundary="BOUNDARY"
1833
1834MIME message
1835--BOUNDARY
1836Content-Type: text/plain; charset="us-ascii"
1837MIME-Version: 1.0
1838Content-Transfer-Encoding: 7bit
1839
1840One
1841--BOUNDARY
1842Content-Type: text/plain; charset="us-ascii"
1843MIME-Version: 1.0
1844Content-Transfer-Encoding: 7bit
1845
1846Two
1847--BOUNDARY--
1848""")
1849
1850 def test_default_type(self):
1851 eq = self.assertEqual
1852 with openfile('msg_30.txt') as fp:
1853 msg = email.message_from_file(fp)
1854 container1 = msg.get_payload(0)
1855 eq(container1.get_default_type(), 'message/rfc822')
1856 eq(container1.get_content_type(), 'message/rfc822')
1857 container2 = msg.get_payload(1)
1858 eq(container2.get_default_type(), 'message/rfc822')
1859 eq(container2.get_content_type(), 'message/rfc822')
1860 container1a = container1.get_payload(0)
1861 eq(container1a.get_default_type(), 'text/plain')
1862 eq(container1a.get_content_type(), 'text/plain')
1863 container2a = container2.get_payload(0)
1864 eq(container2a.get_default_type(), 'text/plain')
1865 eq(container2a.get_content_type(), 'text/plain')
1866
1867 def test_default_type_with_explicit_container_type(self):
1868 eq = self.assertEqual
1869 with openfile('msg_28.txt') as fp:
1870 msg = email.message_from_file(fp)
1871 container1 = msg.get_payload(0)
1872 eq(container1.get_default_type(), 'message/rfc822')
1873 eq(container1.get_content_type(), 'message/rfc822')
1874 container2 = msg.get_payload(1)
1875 eq(container2.get_default_type(), 'message/rfc822')
1876 eq(container2.get_content_type(), 'message/rfc822')
1877 container1a = container1.get_payload(0)
1878 eq(container1a.get_default_type(), 'text/plain')
1879 eq(container1a.get_content_type(), 'text/plain')
1880 container2a = container2.get_payload(0)
1881 eq(container2a.get_default_type(), 'text/plain')
1882 eq(container2a.get_content_type(), 'text/plain')
1883
1884 def test_default_type_non_parsed(self):
1885 eq = self.assertEqual
1886 neq = self.ndiffAssertEqual
1887 # Set up container
1888 container = MIMEMultipart('digest', 'BOUNDARY')
1889 container.epilogue = ''
1890 # Set up subparts
1891 subpart1a = MIMEText('message 1\n')
1892 subpart2a = MIMEText('message 2\n')
1893 subpart1 = MIMEMessage(subpart1a)
1894 subpart2 = MIMEMessage(subpart2a)
1895 container.attach(subpart1)
1896 container.attach(subpart2)
1897 eq(subpart1.get_content_type(), 'message/rfc822')
1898 eq(subpart1.get_default_type(), 'message/rfc822')
1899 eq(subpart2.get_content_type(), 'message/rfc822')
1900 eq(subpart2.get_default_type(), 'message/rfc822')
1901 neq(container.as_string(0), '''\
1902Content-Type: multipart/digest; boundary="BOUNDARY"
1903MIME-Version: 1.0
1904
1905--BOUNDARY
1906Content-Type: message/rfc822
1907MIME-Version: 1.0
1908
1909Content-Type: text/plain; charset="us-ascii"
1910MIME-Version: 1.0
1911Content-Transfer-Encoding: 7bit
1912
1913message 1
1914
1915--BOUNDARY
1916Content-Type: message/rfc822
1917MIME-Version: 1.0
1918
1919Content-Type: text/plain; charset="us-ascii"
1920MIME-Version: 1.0
1921Content-Transfer-Encoding: 7bit
1922
1923message 2
1924
1925--BOUNDARY--
1926''')
1927 del subpart1['content-type']
1928 del subpart1['mime-version']
1929 del subpart2['content-type']
1930 del subpart2['mime-version']
1931 eq(subpart1.get_content_type(), 'message/rfc822')
1932 eq(subpart1.get_default_type(), 'message/rfc822')
1933 eq(subpart2.get_content_type(), 'message/rfc822')
1934 eq(subpart2.get_default_type(), 'message/rfc822')
1935 neq(container.as_string(0), '''\
1936Content-Type: multipart/digest; boundary="BOUNDARY"
1937MIME-Version: 1.0
1938
1939--BOUNDARY
1940
1941Content-Type: text/plain; charset="us-ascii"
1942MIME-Version: 1.0
1943Content-Transfer-Encoding: 7bit
1944
1945message 1
1946
1947--BOUNDARY
1948
1949Content-Type: text/plain; charset="us-ascii"
1950MIME-Version: 1.0
1951Content-Transfer-Encoding: 7bit
1952
1953message 2
1954
1955--BOUNDARY--
1956''')
1957
1958 def test_mime_attachments_in_constructor(self):
1959 eq = self.assertEqual
1960 text1 = MIMEText('')
1961 text2 = MIMEText('')
1962 msg = MIMEMultipart(_subparts=(text1, text2))
1963 eq(len(msg.get_payload()), 2)
1964 eq(msg.get_payload(0), text1)
1965 eq(msg.get_payload(1), text2)
1966
Christian Heimes587c2bf2008-01-19 16:21:02 +00001967 def test_default_multipart_constructor(self):
1968 msg = MIMEMultipart()
1969 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001970
Ezio Melottib3aedd42010-11-20 19:04:17 +00001971
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001972# A general test of parser->model->generator idempotency. IOW, read a message
1973# in, parse it into a message object tree, then without touching the tree,
1974# regenerate the plain text. The original text and the transformed text
1975# should be identical. Note: that we ignore the Unix-From since that may
1976# contain a changed date.
1977class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00001978
1979 linesep = '\n'
1980
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001981 def _msgobj(self, filename):
1982 with openfile(filename) as fp:
1983 data = fp.read()
1984 msg = email.message_from_string(data)
1985 return msg, data
1986
R. David Murray719a4492010-11-21 16:53:48 +00001987 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001988 eq = self.ndiffAssertEqual
1989 s = StringIO()
1990 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00001991 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001992 eq(text, s.getvalue())
1993
1994 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00001995 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001996 msg, text = self._msgobj('msg_01.txt')
1997 eq(msg.get_content_type(), 'text/plain')
1998 eq(msg.get_content_maintype(), 'text')
1999 eq(msg.get_content_subtype(), 'plain')
2000 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2001 eq(msg.get_param('charset'), 'us-ascii')
2002 eq(msg.preamble, None)
2003 eq(msg.epilogue, None)
2004 self._idempotent(msg, text)
2005
2006 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002007 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002008 msg, text = self._msgobj('msg_03.txt')
2009 eq(msg.get_content_type(), 'text/plain')
2010 eq(msg.get_params(), None)
2011 eq(msg.get_param('charset'), None)
2012 self._idempotent(msg, text)
2013
2014 def test_simple_multipart(self):
2015 msg, text = self._msgobj('msg_04.txt')
2016 self._idempotent(msg, text)
2017
2018 def test_MIME_digest(self):
2019 msg, text = self._msgobj('msg_02.txt')
2020 self._idempotent(msg, text)
2021
2022 def test_long_header(self):
2023 msg, text = self._msgobj('msg_27.txt')
2024 self._idempotent(msg, text)
2025
2026 def test_MIME_digest_with_part_headers(self):
2027 msg, text = self._msgobj('msg_28.txt')
2028 self._idempotent(msg, text)
2029
2030 def test_mixed_with_image(self):
2031 msg, text = self._msgobj('msg_06.txt')
2032 self._idempotent(msg, text)
2033
2034 def test_multipart_report(self):
2035 msg, text = self._msgobj('msg_05.txt')
2036 self._idempotent(msg, text)
2037
2038 def test_dsn(self):
2039 msg, text = self._msgobj('msg_16.txt')
2040 self._idempotent(msg, text)
2041
2042 def test_preamble_epilogue(self):
2043 msg, text = self._msgobj('msg_21.txt')
2044 self._idempotent(msg, text)
2045
2046 def test_multipart_one_part(self):
2047 msg, text = self._msgobj('msg_23.txt')
2048 self._idempotent(msg, text)
2049
2050 def test_multipart_no_parts(self):
2051 msg, text = self._msgobj('msg_24.txt')
2052 self._idempotent(msg, text)
2053
2054 def test_no_start_boundary(self):
2055 msg, text = self._msgobj('msg_31.txt')
2056 self._idempotent(msg, text)
2057
2058 def test_rfc2231_charset(self):
2059 msg, text = self._msgobj('msg_32.txt')
2060 self._idempotent(msg, text)
2061
2062 def test_more_rfc2231_parameters(self):
2063 msg, text = self._msgobj('msg_33.txt')
2064 self._idempotent(msg, text)
2065
2066 def test_text_plain_in_a_multipart_digest(self):
2067 msg, text = self._msgobj('msg_34.txt')
2068 self._idempotent(msg, text)
2069
2070 def test_nested_multipart_mixeds(self):
2071 msg, text = self._msgobj('msg_12a.txt')
2072 self._idempotent(msg, text)
2073
2074 def test_message_external_body_idempotent(self):
2075 msg, text = self._msgobj('msg_36.txt')
2076 self._idempotent(msg, text)
2077
R. David Murray719a4492010-11-21 16:53:48 +00002078 def test_message_delivery_status(self):
2079 msg, text = self._msgobj('msg_43.txt')
2080 self._idempotent(msg, text, unixfrom=True)
2081
R. David Murray96fd54e2010-10-08 15:55:28 +00002082 def test_message_signed_idempotent(self):
2083 msg, text = self._msgobj('msg_45.txt')
2084 self._idempotent(msg, text)
2085
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002086 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002087 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002088 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002089 # Get a message object and reset the seek pointer for other tests
2090 msg, text = self._msgobj('msg_05.txt')
2091 eq(msg.get_content_type(), 'multipart/report')
2092 # Test the Content-Type: parameters
2093 params = {}
2094 for pk, pv in msg.get_params():
2095 params[pk] = pv
2096 eq(params['report-type'], 'delivery-status')
2097 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002098 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2099 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002100 eq(len(msg.get_payload()), 3)
2101 # Make sure the subparts are what we expect
2102 msg1 = msg.get_payload(0)
2103 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002104 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002105 msg2 = msg.get_payload(1)
2106 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002107 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002108 msg3 = msg.get_payload(2)
2109 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002110 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002111 payload = msg3.get_payload()
2112 unless(isinstance(payload, list))
2113 eq(len(payload), 1)
2114 msg4 = payload[0]
2115 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002116 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002117
2118 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002119 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002120 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002121 msg, text = self._msgobj('msg_06.txt')
2122 # Check some of the outer headers
2123 eq(msg.get_content_type(), 'message/rfc822')
2124 # Make sure the payload is a list of exactly one sub-Message, and that
2125 # that submessage has a type of text/plain
2126 payload = msg.get_payload()
2127 unless(isinstance(payload, list))
2128 eq(len(payload), 1)
2129 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002130 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002131 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002132 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002133 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002134
2135
Ezio Melottib3aedd42010-11-20 19:04:17 +00002136
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002137# Test various other bits of the package's functionality
2138class TestMiscellaneous(TestEmailBase):
2139 def test_message_from_string(self):
2140 with openfile('msg_01.txt') as fp:
2141 text = fp.read()
2142 msg = email.message_from_string(text)
2143 s = StringIO()
2144 # Don't wrap/continue long headers since we're trying to test
2145 # idempotency.
2146 g = Generator(s, maxheaderlen=0)
2147 g.flatten(msg)
2148 self.assertEqual(text, s.getvalue())
2149
2150 def test_message_from_file(self):
2151 with openfile('msg_01.txt') as fp:
2152 text = fp.read()
2153 fp.seek(0)
2154 msg = email.message_from_file(fp)
2155 s = StringIO()
2156 # Don't wrap/continue long headers since we're trying to test
2157 # idempotency.
2158 g = Generator(s, maxheaderlen=0)
2159 g.flatten(msg)
2160 self.assertEqual(text, s.getvalue())
2161
2162 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002163 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002164 with openfile('msg_01.txt') as fp:
2165 text = fp.read()
2166
2167 # Create a subclass
2168 class MyMessage(Message):
2169 pass
2170
2171 msg = email.message_from_string(text, MyMessage)
2172 unless(isinstance(msg, MyMessage))
2173 # Try something more complicated
2174 with openfile('msg_02.txt') as fp:
2175 text = fp.read()
2176 msg = email.message_from_string(text, MyMessage)
2177 for subpart in msg.walk():
2178 unless(isinstance(subpart, MyMessage))
2179
2180 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002181 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002182 # Create a subclass
2183 class MyMessage(Message):
2184 pass
2185
2186 with openfile('msg_01.txt') as fp:
2187 msg = email.message_from_file(fp, MyMessage)
2188 unless(isinstance(msg, MyMessage))
2189 # Try something more complicated
2190 with openfile('msg_02.txt') as fp:
2191 msg = email.message_from_file(fp, MyMessage)
2192 for subpart in msg.walk():
2193 unless(isinstance(subpart, MyMessage))
2194
2195 def test__all__(self):
2196 module = __import__('email')
2197 # Can't use sorted() here due to Python 2.3 compatibility
2198 all = module.__all__[:]
2199 all.sort()
2200 self.assertEqual(all, [
2201 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002202 'header', 'iterators', 'message', 'message_from_binary_file',
2203 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002204 'message_from_string', 'mime', 'parser',
2205 'quoprimime', 'utils',
2206 ])
2207
2208 def test_formatdate(self):
2209 now = time.time()
2210 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2211 time.gmtime(now)[:6])
2212
2213 def test_formatdate_localtime(self):
2214 now = time.time()
2215 self.assertEqual(
2216 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2217 time.localtime(now)[:6])
2218
2219 def test_formatdate_usegmt(self):
2220 now = time.time()
2221 self.assertEqual(
2222 utils.formatdate(now, localtime=False),
2223 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2224 self.assertEqual(
2225 utils.formatdate(now, localtime=False, usegmt=True),
2226 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2227
2228 def test_parsedate_none(self):
2229 self.assertEqual(utils.parsedate(''), None)
2230
2231 def test_parsedate_compact(self):
2232 # The FWS after the comma is optional
2233 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2234 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2235
2236 def test_parsedate_no_dayofweek(self):
2237 eq = self.assertEqual
2238 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2239 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2240
2241 def test_parsedate_compact_no_dayofweek(self):
2242 eq = self.assertEqual
2243 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2244 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2245
2246 def test_parsedate_acceptable_to_time_functions(self):
2247 eq = self.assertEqual
2248 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2249 t = int(time.mktime(timetup))
2250 eq(time.localtime(t)[:6], timetup[:6])
2251 eq(int(time.strftime('%Y', timetup)), 2003)
2252 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2253 t = int(time.mktime(timetup[:9]))
2254 eq(time.localtime(t)[:6], timetup[:6])
2255 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2256
R. David Murray219d1c82010-08-25 00:45:55 +00002257 def test_parsedate_y2k(self):
2258 """Test for parsing a date with a two-digit year.
2259
2260 Parsing a date with a two-digit year should return the correct
2261 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2262 obsoletes RFC822) requires four-digit years.
2263
2264 """
2265 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2266 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2267 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2268 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2269
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002270 def test_parseaddr_empty(self):
2271 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2272 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2273
2274 def test_noquote_dump(self):
2275 self.assertEqual(
2276 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2277 'A Silly Person <person@dom.ain>')
2278
2279 def test_escape_dump(self):
2280 self.assertEqual(
2281 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2282 r'"A \(Very\) Silly Person" <person@dom.ain>')
2283 a = r'A \(Special\) Person'
2284 b = 'person@dom.ain'
2285 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2286
2287 def test_escape_backslashes(self):
2288 self.assertEqual(
2289 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2290 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2291 a = r'Arthur \Backslash\ Foobar'
2292 b = 'person@dom.ain'
2293 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2294
2295 def test_name_with_dot(self):
2296 x = 'John X. Doe <jxd@example.com>'
2297 y = '"John X. Doe" <jxd@example.com>'
2298 a, b = ('John X. Doe', 'jxd@example.com')
2299 self.assertEqual(utils.parseaddr(x), (a, b))
2300 self.assertEqual(utils.parseaddr(y), (a, b))
2301 # formataddr() quotes the name if there's a dot in it
2302 self.assertEqual(utils.formataddr((a, b)), y)
2303
R. David Murray5397e862010-10-02 15:58:26 +00002304 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2305 # issue 10005. Note that in the third test the second pair of
2306 # backslashes is not actually a quoted pair because it is not inside a
2307 # comment or quoted string: the address being parsed has a quoted
2308 # string containing a quoted backslash, followed by 'example' and two
2309 # backslashes, followed by another quoted string containing a space and
2310 # the word 'example'. parseaddr copies those two backslashes
2311 # literally. Per rfc5322 this is not technically correct since a \ may
2312 # not appear in an address outside of a quoted string. It is probably
2313 # a sensible Postel interpretation, though.
2314 eq = self.assertEqual
2315 eq(utils.parseaddr('""example" example"@example.com'),
2316 ('', '""example" example"@example.com'))
2317 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2318 ('', '"\\"example\\" example"@example.com'))
2319 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2320 ('', '"\\\\"example\\\\" example"@example.com'))
2321
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002322 def test_multiline_from_comment(self):
2323 x = """\
2324Foo
2325\tBar <foo@example.com>"""
2326 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2327
2328 def test_quote_dump(self):
2329 self.assertEqual(
2330 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2331 r'"A Silly; Person" <person@dom.ain>')
2332
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002333 def test_charset_richcomparisons(self):
2334 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002335 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002336 cset1 = Charset()
2337 cset2 = Charset()
2338 eq(cset1, 'us-ascii')
2339 eq(cset1, 'US-ASCII')
2340 eq(cset1, 'Us-AsCiI')
2341 eq('us-ascii', cset1)
2342 eq('US-ASCII', cset1)
2343 eq('Us-AsCiI', cset1)
2344 ne(cset1, 'usascii')
2345 ne(cset1, 'USASCII')
2346 ne(cset1, 'UsAsCiI')
2347 ne('usascii', cset1)
2348 ne('USASCII', cset1)
2349 ne('UsAsCiI', cset1)
2350 eq(cset1, cset2)
2351 eq(cset2, cset1)
2352
2353 def test_getaddresses(self):
2354 eq = self.assertEqual
2355 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2356 'Bud Person <bperson@dom.ain>']),
2357 [('Al Person', 'aperson@dom.ain'),
2358 ('Bud Person', 'bperson@dom.ain')])
2359
2360 def test_getaddresses_nasty(self):
2361 eq = self.assertEqual
2362 eq(utils.getaddresses(['foo: ;']), [('', '')])
2363 eq(utils.getaddresses(
2364 ['[]*-- =~$']),
2365 [('', ''), ('', ''), ('', '*--')])
2366 eq(utils.getaddresses(
2367 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2368 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2369
2370 def test_getaddresses_embedded_comment(self):
2371 """Test proper handling of a nested comment"""
2372 eq = self.assertEqual
2373 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2374 eq(addrs[0][1], 'foo@bar.com')
2375
2376 def test_utils_quote_unquote(self):
2377 eq = self.assertEqual
2378 msg = Message()
2379 msg.add_header('content-disposition', 'attachment',
2380 filename='foo\\wacky"name')
2381 eq(msg.get_filename(), 'foo\\wacky"name')
2382
2383 def test_get_body_encoding_with_bogus_charset(self):
2384 charset = Charset('not a charset')
2385 self.assertEqual(charset.get_body_encoding(), 'base64')
2386
2387 def test_get_body_encoding_with_uppercase_charset(self):
2388 eq = self.assertEqual
2389 msg = Message()
2390 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2391 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2392 charsets = msg.get_charsets()
2393 eq(len(charsets), 1)
2394 eq(charsets[0], 'utf-8')
2395 charset = Charset(charsets[0])
2396 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002397 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002398 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2399 eq(msg.get_payload(decode=True), b'hello world')
2400 eq(msg['content-transfer-encoding'], 'base64')
2401 # Try another one
2402 msg = Message()
2403 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2404 charsets = msg.get_charsets()
2405 eq(len(charsets), 1)
2406 eq(charsets[0], 'us-ascii')
2407 charset = Charset(charsets[0])
2408 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2409 msg.set_payload('hello world', charset=charset)
2410 eq(msg.get_payload(), 'hello world')
2411 eq(msg['content-transfer-encoding'], '7bit')
2412
2413 def test_charsets_case_insensitive(self):
2414 lc = Charset('us-ascii')
2415 uc = Charset('US-ASCII')
2416 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2417
2418 def test_partial_falls_inside_message_delivery_status(self):
2419 eq = self.ndiffAssertEqual
2420 # The Parser interface provides chunks of data to FeedParser in 8192
2421 # byte gulps. SF bug #1076485 found one of those chunks inside
2422 # message/delivery-status header block, which triggered an
2423 # unreadline() of NeedMoreData.
2424 msg = self._msgobj('msg_43.txt')
2425 sfp = StringIO()
2426 iterators._structure(msg, sfp)
2427 eq(sfp.getvalue(), """\
2428multipart/report
2429 text/plain
2430 message/delivery-status
2431 text/plain
2432 text/plain
2433 text/plain
2434 text/plain
2435 text/plain
2436 text/plain
2437 text/plain
2438 text/plain
2439 text/plain
2440 text/plain
2441 text/plain
2442 text/plain
2443 text/plain
2444 text/plain
2445 text/plain
2446 text/plain
2447 text/plain
2448 text/plain
2449 text/plain
2450 text/plain
2451 text/plain
2452 text/plain
2453 text/plain
2454 text/plain
2455 text/plain
2456 text/plain
2457 text/rfc822-headers
2458""")
2459
2460
Ezio Melottib3aedd42010-11-20 19:04:17 +00002461
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002462# Test the iterator/generators
2463class TestIterators(TestEmailBase):
2464 def test_body_line_iterator(self):
2465 eq = self.assertEqual
2466 neq = self.ndiffAssertEqual
2467 # First a simple non-multipart message
2468 msg = self._msgobj('msg_01.txt')
2469 it = iterators.body_line_iterator(msg)
2470 lines = list(it)
2471 eq(len(lines), 6)
2472 neq(EMPTYSTRING.join(lines), msg.get_payload())
2473 # Now a more complicated multipart
2474 msg = self._msgobj('msg_02.txt')
2475 it = iterators.body_line_iterator(msg)
2476 lines = list(it)
2477 eq(len(lines), 43)
2478 with openfile('msg_19.txt') as fp:
2479 neq(EMPTYSTRING.join(lines), fp.read())
2480
2481 def test_typed_subpart_iterator(self):
2482 eq = self.assertEqual
2483 msg = self._msgobj('msg_04.txt')
2484 it = iterators.typed_subpart_iterator(msg, 'text')
2485 lines = []
2486 subparts = 0
2487 for subpart in it:
2488 subparts += 1
2489 lines.append(subpart.get_payload())
2490 eq(subparts, 2)
2491 eq(EMPTYSTRING.join(lines), """\
2492a simple kind of mirror
2493to reflect upon our own
2494a simple kind of mirror
2495to reflect upon our own
2496""")
2497
2498 def test_typed_subpart_iterator_default_type(self):
2499 eq = self.assertEqual
2500 msg = self._msgobj('msg_03.txt')
2501 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2502 lines = []
2503 subparts = 0
2504 for subpart in it:
2505 subparts += 1
2506 lines.append(subpart.get_payload())
2507 eq(subparts, 1)
2508 eq(EMPTYSTRING.join(lines), """\
2509
2510Hi,
2511
2512Do you like this message?
2513
2514-Me
2515""")
2516
R. David Murray45bf773f2010-07-17 01:19:57 +00002517 def test_pushCR_LF(self):
2518 '''FeedParser BufferedSubFile.push() assumed it received complete
2519 line endings. A CR ending one push() followed by a LF starting
2520 the next push() added an empty line.
2521 '''
2522 imt = [
2523 ("a\r \n", 2),
2524 ("b", 0),
2525 ("c\n", 1),
2526 ("", 0),
2527 ("d\r\n", 1),
2528 ("e\r", 0),
2529 ("\nf", 1),
2530 ("\r\n", 1),
2531 ]
2532 from email.feedparser import BufferedSubFile, NeedMoreData
2533 bsf = BufferedSubFile()
2534 om = []
2535 nt = 0
2536 for il, n in imt:
2537 bsf.push(il)
2538 nt += n
2539 n1 = 0
2540 while True:
2541 ol = bsf.readline()
2542 if ol == NeedMoreData:
2543 break
2544 om.append(ol)
2545 n1 += 1
2546 self.assertTrue(n == n1)
2547 self.assertTrue(len(om) == nt)
2548 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2549
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002550
Ezio Melottib3aedd42010-11-20 19:04:17 +00002551
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002552class TestParsers(TestEmailBase):
2553 def test_header_parser(self):
2554 eq = self.assertEqual
2555 # Parse only the headers of a complex multipart MIME document
2556 with openfile('msg_02.txt') as fp:
2557 msg = HeaderParser().parse(fp)
2558 eq(msg['from'], 'ppp-request@zzz.org')
2559 eq(msg['to'], 'ppp@zzz.org')
2560 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002561 self.assertFalse(msg.is_multipart())
2562 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002563
2564 def test_whitespace_continuation(self):
2565 eq = self.assertEqual
2566 # This message contains a line after the Subject: header that has only
2567 # whitespace, but it is not empty!
2568 msg = email.message_from_string("""\
2569From: aperson@dom.ain
2570To: bperson@dom.ain
2571Subject: the next line has a space on it
2572\x20
2573Date: Mon, 8 Apr 2002 15:09:19 -0400
2574Message-ID: spam
2575
2576Here's the message body
2577""")
2578 eq(msg['subject'], 'the next line has a space on it\n ')
2579 eq(msg['message-id'], 'spam')
2580 eq(msg.get_payload(), "Here's the message body\n")
2581
2582 def test_whitespace_continuation_last_header(self):
2583 eq = self.assertEqual
2584 # Like the previous test, but the subject line is the last
2585 # header.
2586 msg = email.message_from_string("""\
2587From: aperson@dom.ain
2588To: bperson@dom.ain
2589Date: Mon, 8 Apr 2002 15:09:19 -0400
2590Message-ID: spam
2591Subject: the next line has a space on it
2592\x20
2593
2594Here's the message body
2595""")
2596 eq(msg['subject'], 'the next line has a space on it\n ')
2597 eq(msg['message-id'], 'spam')
2598 eq(msg.get_payload(), "Here's the message body\n")
2599
2600 def test_crlf_separation(self):
2601 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002602 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002603 msg = Parser().parse(fp)
2604 eq(len(msg.get_payload()), 2)
2605 part1 = msg.get_payload(0)
2606 eq(part1.get_content_type(), 'text/plain')
2607 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2608 part2 = msg.get_payload(1)
2609 eq(part2.get_content_type(), 'application/riscos')
2610
R. David Murray8451c4b2010-10-23 22:19:56 +00002611 def test_crlf_flatten(self):
2612 # Using newline='\n' preserves the crlfs in this input file.
2613 with openfile('msg_26.txt', newline='\n') as fp:
2614 text = fp.read()
2615 msg = email.message_from_string(text)
2616 s = StringIO()
2617 g = Generator(s)
2618 g.flatten(msg, linesep='\r\n')
2619 self.assertEqual(s.getvalue(), text)
2620
2621 maxDiff = None
2622
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002623 def test_multipart_digest_with_extra_mime_headers(self):
2624 eq = self.assertEqual
2625 neq = self.ndiffAssertEqual
2626 with openfile('msg_28.txt') as fp:
2627 msg = email.message_from_file(fp)
2628 # Structure is:
2629 # multipart/digest
2630 # message/rfc822
2631 # text/plain
2632 # message/rfc822
2633 # text/plain
2634 eq(msg.is_multipart(), 1)
2635 eq(len(msg.get_payload()), 2)
2636 part1 = msg.get_payload(0)
2637 eq(part1.get_content_type(), 'message/rfc822')
2638 eq(part1.is_multipart(), 1)
2639 eq(len(part1.get_payload()), 1)
2640 part1a = part1.get_payload(0)
2641 eq(part1a.is_multipart(), 0)
2642 eq(part1a.get_content_type(), 'text/plain')
2643 neq(part1a.get_payload(), 'message 1\n')
2644 # next message/rfc822
2645 part2 = msg.get_payload(1)
2646 eq(part2.get_content_type(), 'message/rfc822')
2647 eq(part2.is_multipart(), 1)
2648 eq(len(part2.get_payload()), 1)
2649 part2a = part2.get_payload(0)
2650 eq(part2a.is_multipart(), 0)
2651 eq(part2a.get_content_type(), 'text/plain')
2652 neq(part2a.get_payload(), 'message 2\n')
2653
2654 def test_three_lines(self):
2655 # A bug report by Andrew McNamara
2656 lines = ['From: Andrew Person <aperson@dom.ain',
2657 'Subject: Test',
2658 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2659 msg = email.message_from_string(NL.join(lines))
2660 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2661
2662 def test_strip_line_feed_and_carriage_return_in_headers(self):
2663 eq = self.assertEqual
2664 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2665 value1 = 'text'
2666 value2 = 'more text'
2667 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2668 value1, value2)
2669 msg = email.message_from_string(m)
2670 eq(msg.get('Header'), value1)
2671 eq(msg.get('Next-Header'), value2)
2672
2673 def test_rfc2822_header_syntax(self):
2674 eq = self.assertEqual
2675 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2676 msg = email.message_from_string(m)
2677 eq(len(msg), 3)
2678 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2679 eq(msg.get_payload(), 'body')
2680
2681 def test_rfc2822_space_not_allowed_in_header(self):
2682 eq = self.assertEqual
2683 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2684 msg = email.message_from_string(m)
2685 eq(len(msg.keys()), 0)
2686
2687 def test_rfc2822_one_character_header(self):
2688 eq = self.assertEqual
2689 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2690 msg = email.message_from_string(m)
2691 headers = msg.keys()
2692 headers.sort()
2693 eq(headers, ['A', 'B', 'CC'])
2694 eq(msg.get_payload(), 'body')
2695
R. David Murray45e0e142010-06-16 02:19:40 +00002696 def test_CRLFLF_at_end_of_part(self):
2697 # issue 5610: feedparser should not eat two chars from body part ending
2698 # with "\r\n\n".
2699 m = (
2700 "From: foo@bar.com\n"
2701 "To: baz\n"
2702 "Mime-Version: 1.0\n"
2703 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2704 "\n"
2705 "--BOUNDARY\n"
2706 "Content-Type: text/plain\n"
2707 "\n"
2708 "body ending with CRLF newline\r\n"
2709 "\n"
2710 "--BOUNDARY--\n"
2711 )
2712 msg = email.message_from_string(m)
2713 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002714
Ezio Melottib3aedd42010-11-20 19:04:17 +00002715
R. David Murray96fd54e2010-10-08 15:55:28 +00002716class Test8BitBytesHandling(unittest.TestCase):
2717 # In Python3 all input is string, but that doesn't work if the actual input
2718 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2719 # decode byte streams using the surrogateescape error handler, and
2720 # reconvert to binary at appropriate places if we detect surrogates. This
2721 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2722 # but it does allow us to parse and preserve them, and to decode body
2723 # parts that use an 8bit CTE.
2724
2725 bodytest_msg = textwrap.dedent("""\
2726 From: foo@bar.com
2727 To: baz
2728 Mime-Version: 1.0
2729 Content-Type: text/plain; charset={charset}
2730 Content-Transfer-Encoding: {cte}
2731
2732 {bodyline}
2733 """)
2734
2735 def test_known_8bit_CTE(self):
2736 m = self.bodytest_msg.format(charset='utf-8',
2737 cte='8bit',
2738 bodyline='pöstal').encode('utf-8')
2739 msg = email.message_from_bytes(m)
2740 self.assertEqual(msg.get_payload(), "pöstal\n")
2741 self.assertEqual(msg.get_payload(decode=True),
2742 "pöstal\n".encode('utf-8'))
2743
2744 def test_unknown_8bit_CTE(self):
2745 m = self.bodytest_msg.format(charset='notavalidcharset',
2746 cte='8bit',
2747 bodyline='pöstal').encode('utf-8')
2748 msg = email.message_from_bytes(m)
2749 self.assertEqual(msg.get_payload(), "p��stal\n")
2750 self.assertEqual(msg.get_payload(decode=True),
2751 "pöstal\n".encode('utf-8'))
2752
2753 def test_8bit_in_quopri_body(self):
2754 # This is non-RFC compliant data...without 'decode' the library code
2755 # decodes the body using the charset from the headers, and because the
2756 # source byte really is utf-8 this works. This is likely to fail
2757 # against real dirty data (ie: produce mojibake), but the data is
2758 # invalid anyway so it is as good a guess as any. But this means that
2759 # this test just confirms the current behavior; that behavior is not
2760 # necessarily the best possible behavior. With 'decode' it is
2761 # returning the raw bytes, so that test should be of correct behavior,
2762 # or at least produce the same result that email4 did.
2763 m = self.bodytest_msg.format(charset='utf-8',
2764 cte='quoted-printable',
2765 bodyline='p=C3=B6stál').encode('utf-8')
2766 msg = email.message_from_bytes(m)
2767 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2768 self.assertEqual(msg.get_payload(decode=True),
2769 'pöstál\n'.encode('utf-8'))
2770
2771 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2772 # This is similar to the previous test, but proves that if the 8bit
2773 # byte is undecodeable in the specified charset, it gets replaced
2774 # by the unicode 'unknown' character. Again, this may or may not
2775 # be the ideal behavior. Note that if decode=False none of the
2776 # decoders will get involved, so this is the only test we need
2777 # for this behavior.
2778 m = self.bodytest_msg.format(charset='ascii',
2779 cte='quoted-printable',
2780 bodyline='p=C3=B6stál').encode('utf-8')
2781 msg = email.message_from_bytes(m)
2782 self.assertEqual(msg.get_payload(), 'p=C3=B6st��l\n')
2783 self.assertEqual(msg.get_payload(decode=True),
2784 'pöstál\n'.encode('utf-8'))
2785
2786 def test_8bit_in_base64_body(self):
2787 # Sticking an 8bit byte in a base64 block makes it undecodable by
2788 # normal means, so the block is returned undecoded, but as bytes.
2789 m = self.bodytest_msg.format(charset='utf-8',
2790 cte='base64',
2791 bodyline='cMO2c3RhbAá=').encode('utf-8')
2792 msg = email.message_from_bytes(m)
2793 self.assertEqual(msg.get_payload(decode=True),
2794 'cMO2c3RhbAá=\n'.encode('utf-8'))
2795
2796 def test_8bit_in_uuencode_body(self):
2797 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2798 # normal means, so the block is returned undecoded, but as bytes.
2799 m = self.bodytest_msg.format(charset='utf-8',
2800 cte='uuencode',
2801 bodyline='<,.V<W1A; á ').encode('utf-8')
2802 msg = email.message_from_bytes(m)
2803 self.assertEqual(msg.get_payload(decode=True),
2804 '<,.V<W1A; á \n'.encode('utf-8'))
2805
2806
2807 headertest_msg = textwrap.dedent("""\
2808 From: foo@bar.com
2809 To: báz
2810 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2811 \tJean de Baddie
2812 From: göst
2813
2814 Yes, they are flying.
2815 """).encode('utf-8')
2816
2817 def test_get_8bit_header(self):
2818 msg = email.message_from_bytes(self.headertest_msg)
2819 self.assertEqual(msg.get('to'), 'b??z')
2820 self.assertEqual(msg['to'], 'b??z')
2821
2822 def test_print_8bit_headers(self):
2823 msg = email.message_from_bytes(self.headertest_msg)
2824 self.assertEqual(str(msg),
2825 self.headertest_msg.decode(
2826 'ascii', 'replace').replace('�', '?'))
2827
2828 def test_values_with_8bit_headers(self):
2829 msg = email.message_from_bytes(self.headertest_msg)
2830 self.assertListEqual(msg.values(),
2831 ['foo@bar.com',
2832 'b??z',
2833 'Maintenant je vous pr??sente mon '
2834 'coll??gue, le pouf c??l??bre\n'
2835 '\tJean de Baddie',
2836 "g??st"])
2837
2838 def test_items_with_8bit_headers(self):
2839 msg = email.message_from_bytes(self.headertest_msg)
2840 self.assertListEqual(msg.items(),
2841 [('From', 'foo@bar.com'),
2842 ('To', 'b??z'),
2843 ('Subject', 'Maintenant je vous pr??sente mon '
2844 'coll??gue, le pouf c??l??bre\n'
2845 '\tJean de Baddie'),
2846 ('From', 'g??st')])
2847
2848 def test_get_all_with_8bit_headers(self):
2849 msg = email.message_from_bytes(self.headertest_msg)
2850 self.assertListEqual(msg.get_all('from'),
2851 ['foo@bar.com',
2852 'g??st'])
2853
2854 non_latin_bin_msg = textwrap.dedent("""\
2855 From: foo@bar.com
2856 To: báz
2857 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2858 \tJean de Baddie
2859 Mime-Version: 1.0
2860 Content-Type: text/plain; charset="utf-8"
2861 Content-Transfer-Encoding: 8bit
2862
2863 Да, они летят.
2864 """).encode('utf-8')
2865
2866 def test_bytes_generator(self):
2867 msg = email.message_from_bytes(self.non_latin_bin_msg)
2868 out = BytesIO()
2869 email.generator.BytesGenerator(out).flatten(msg)
2870 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2871
2872 # XXX: ultimately the '?' should turn into CTE encoded bytes
2873 # using 'unknown-8bit' charset.
2874 non_latin_bin_msg_as7bit = textwrap.dedent("""\
2875 From: foo@bar.com
2876 To: b??z
2877 Subject: Maintenant je vous pr??sente mon coll??gue, le pouf c??l??bre
2878 \tJean de Baddie
2879 Mime-Version: 1.0
2880 Content-Type: text/plain; charset="utf-8"
2881 Content-Transfer-Encoding: base64
2882
2883 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
2884 """)
2885
2886 def test_generator_handles_8bit(self):
2887 msg = email.message_from_bytes(self.non_latin_bin_msg)
2888 out = StringIO()
2889 email.generator.Generator(out).flatten(msg)
2890 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit)
2891
2892 def test_bytes_generator_with_unix_from(self):
2893 # The unixfrom contains a current date, so we can't check it
2894 # literally. Just make sure the first word is 'From' and the
2895 # rest of the message matches the input.
2896 msg = email.message_from_bytes(self.non_latin_bin_msg)
2897 out = BytesIO()
2898 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
2899 lines = out.getvalue().split(b'\n')
2900 self.assertEqual(lines[0].split()[0], b'From')
2901 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
2902
2903 def test_message_from_binary_file(self):
2904 fn = 'test.msg'
2905 self.addCleanup(unlink, fn)
2906 with open(fn, 'wb') as testfile:
2907 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00002908 with open(fn, 'rb') as testfile:
2909 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00002910 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
2911
2912 latin_bin_msg = textwrap.dedent("""\
2913 From: foo@bar.com
2914 To: Dinsdale
2915 Subject: Nudge nudge, wink, wink
2916 Mime-Version: 1.0
2917 Content-Type: text/plain; charset="latin-1"
2918 Content-Transfer-Encoding: 8bit
2919
2920 oh là là, know what I mean, know what I mean?
2921 """).encode('latin-1')
2922
2923 latin_bin_msg_as7bit = textwrap.dedent("""\
2924 From: foo@bar.com
2925 To: Dinsdale
2926 Subject: Nudge nudge, wink, wink
2927 Mime-Version: 1.0
2928 Content-Type: text/plain; charset="iso-8859-1"
2929 Content-Transfer-Encoding: quoted-printable
2930
2931 oh l=E0 l=E0, know what I mean, know what I mean?
2932 """)
2933
2934 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
2935 m = email.message_from_bytes(self.latin_bin_msg)
2936 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2937
2938 def test_decoded_generator_emits_unicode_body(self):
2939 m = email.message_from_bytes(self.latin_bin_msg)
2940 out = StringIO()
2941 email.generator.DecodedGenerator(out).flatten(m)
2942 #DecodedHeader output contains an extra blank line compared
2943 #to the input message. RDM: not sure if this is a bug or not,
2944 #but it is not specific to the 8bit->7bit conversion.
2945 self.assertEqual(out.getvalue(),
2946 self.latin_bin_msg.decode('latin-1')+'\n')
2947
2948 def test_bytes_feedparser(self):
2949 bfp = email.feedparser.BytesFeedParser()
2950 for i in range(0, len(self.latin_bin_msg), 10):
2951 bfp.feed(self.latin_bin_msg[i:i+10])
2952 m = bfp.close()
2953 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2954
R. David Murray8451c4b2010-10-23 22:19:56 +00002955 def test_crlf_flatten(self):
2956 with openfile('msg_26.txt', 'rb') as fp:
2957 text = fp.read()
2958 msg = email.message_from_bytes(text)
2959 s = BytesIO()
2960 g = email.generator.BytesGenerator(s)
2961 g.flatten(msg, linesep='\r\n')
2962 self.assertEqual(s.getvalue(), text)
2963 maxDiff = None
2964
Ezio Melottib3aedd42010-11-20 19:04:17 +00002965
R. David Murray719a4492010-11-21 16:53:48 +00002966class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00002967
R. David Murraye5db2632010-11-20 15:10:13 +00002968 maxDiff = None
2969
R. David Murray96fd54e2010-10-08 15:55:28 +00002970 def _msgobj(self, filename):
2971 with openfile(filename, 'rb') as fp:
2972 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00002973 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00002974 msg = email.message_from_bytes(data)
2975 return msg, data
2976
R. David Murray719a4492010-11-21 16:53:48 +00002977 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00002978 b = BytesIO()
2979 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002980 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00002981 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00002982
R. David Murraye5db2632010-11-20 15:10:13 +00002983 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00002984 # Not using self.blinesep here is intentional. This way the output
2985 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00002986 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
2987
2988
R. David Murray719a4492010-11-21 16:53:48 +00002989class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
2990 TestIdempotent):
2991 linesep = '\n'
2992 blinesep = b'\n'
2993 normalize_linesep_regex = re.compile(br'\r\n')
2994
2995
2996class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
2997 TestIdempotent):
2998 linesep = '\r\n'
2999 blinesep = b'\r\n'
3000 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3001
Ezio Melottib3aedd42010-11-20 19:04:17 +00003002
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003003class TestBase64(unittest.TestCase):
3004 def test_len(self):
3005 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003006 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003007 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003008 for size in range(15):
3009 if size == 0 : bsize = 0
3010 elif size <= 3 : bsize = 4
3011 elif size <= 6 : bsize = 8
3012 elif size <= 9 : bsize = 12
3013 elif size <= 12: bsize = 16
3014 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003015 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003016
3017 def test_decode(self):
3018 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003019 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003020 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003021
3022 def test_encode(self):
3023 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003024 eq(base64mime.body_encode(b''), b'')
3025 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003026 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003027 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003028 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003029 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003030eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3031eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3032eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3033eHh4eCB4eHh4IA==
3034""")
3035 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003036 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003037 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003038eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3039eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3040eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3041eHh4eCB4eHh4IA==\r
3042""")
3043
3044 def test_header_encode(self):
3045 eq = self.assertEqual
3046 he = base64mime.header_encode
3047 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003048 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3049 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003050 # Test the charset option
3051 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3052 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003053
3054
Ezio Melottib3aedd42010-11-20 19:04:17 +00003055
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003056class TestQuopri(unittest.TestCase):
3057 def setUp(self):
3058 # Set of characters (as byte integers) that don't need to be encoded
3059 # in headers.
3060 self.hlit = list(chain(
3061 range(ord('a'), ord('z') + 1),
3062 range(ord('A'), ord('Z') + 1),
3063 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003064 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003065 # Set of characters (as byte integers) that do need to be encoded in
3066 # headers.
3067 self.hnon = [c for c in range(256) if c not in self.hlit]
3068 assert len(self.hlit) + len(self.hnon) == 256
3069 # Set of characters (as byte integers) that don't need to be encoded
3070 # in bodies.
3071 self.blit = list(range(ord(' '), ord('~') + 1))
3072 self.blit.append(ord('\t'))
3073 self.blit.remove(ord('='))
3074 # Set of characters (as byte integers) that do need to be encoded in
3075 # bodies.
3076 self.bnon = [c for c in range(256) if c not in self.blit]
3077 assert len(self.blit) + len(self.bnon) == 256
3078
Guido van Rossum9604e662007-08-30 03:46:43 +00003079 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003080 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003081 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003082 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003083 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003084 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003085 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003086
Guido van Rossum9604e662007-08-30 03:46:43 +00003087 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003088 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003089 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003090 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003091 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003092 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003093 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003094
3095 def test_header_quopri_len(self):
3096 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003097 eq(quoprimime.header_length(b'hello'), 5)
3098 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003099 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003100 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003101 # =?xxx?q?...?= means 10 extra characters
3102 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003103 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3104 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003105 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003106 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003107 # =?xxx?q?...?= means 10 extra characters
3108 10)
3109 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003110 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003111 'expected length 1 for %r' % chr(c))
3112 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003113 # Space is special; it's encoded to _
3114 if c == ord(' '):
3115 continue
3116 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003117 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003118 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003119
3120 def test_body_quopri_len(self):
3121 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003122 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003123 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003124 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003125 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003126
3127 def test_quote_unquote_idempotent(self):
3128 for x in range(256):
3129 c = chr(x)
3130 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3131
3132 def test_header_encode(self):
3133 eq = self.assertEqual
3134 he = quoprimime.header_encode
3135 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3136 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3137 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3138 # Test a non-ASCII character
3139 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3140
3141 def test_decode(self):
3142 eq = self.assertEqual
3143 eq(quoprimime.decode(''), '')
3144 eq(quoprimime.decode('hello'), 'hello')
3145 eq(quoprimime.decode('hello', 'X'), 'hello')
3146 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3147
3148 def test_encode(self):
3149 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003150 eq(quoprimime.body_encode(''), '')
3151 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003152 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003153 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003154 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003155 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003156xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3157 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3158x xxxx xxxx xxxx xxxx=20""")
3159 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003160 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3161 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003162xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3163 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3164x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003165 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003166one line
3167
3168two line"""), """\
3169one line
3170
3171two line""")
3172
3173
Ezio Melottib3aedd42010-11-20 19:04:17 +00003174
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003175# Test the Charset class
3176class TestCharset(unittest.TestCase):
3177 def tearDown(self):
3178 from email import charset as CharsetModule
3179 try:
3180 del CharsetModule.CHARSETS['fake']
3181 except KeyError:
3182 pass
3183
Guido van Rossum9604e662007-08-30 03:46:43 +00003184 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003185 eq = self.assertEqual
3186 # Make sure us-ascii = no Unicode conversion
3187 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003188 eq(c.header_encode('Hello World!'), 'Hello World!')
3189 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003190 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003191 self.assertRaises(UnicodeError, c.header_encode, s)
3192 c = Charset('utf-8')
3193 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003194
3195 def test_body_encode(self):
3196 eq = self.assertEqual
3197 # Try a charset with QP body encoding
3198 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003199 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003200 # Try a charset with Base64 body encoding
3201 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003202 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003203 # Try a charset with None body encoding
3204 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003205 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003206 # Try the convert argument, where input codec != output codec
3207 c = Charset('euc-jp')
3208 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003209 # XXX FIXME
3210## try:
3211## eq('\x1b$B5FCO;~IW\x1b(B',
3212## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3213## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3214## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3215## except LookupError:
3216## # We probably don't have the Japanese codecs installed
3217## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003218 # Testing SF bug #625509, which we have to fake, since there are no
3219 # built-in encodings where the header encoding is QP but the body
3220 # encoding is not.
3221 from email import charset as CharsetModule
3222 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3223 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003224 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003225
3226 def test_unicode_charset_name(self):
3227 charset = Charset('us-ascii')
3228 self.assertEqual(str(charset), 'us-ascii')
3229 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3230
3231
Ezio Melottib3aedd42010-11-20 19:04:17 +00003232
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003233# Test multilingual MIME headers.
3234class TestHeader(TestEmailBase):
3235 def test_simple(self):
3236 eq = self.ndiffAssertEqual
3237 h = Header('Hello World!')
3238 eq(h.encode(), 'Hello World!')
3239 h.append(' Goodbye World!')
3240 eq(h.encode(), 'Hello World! Goodbye World!')
3241
3242 def test_simple_surprise(self):
3243 eq = self.ndiffAssertEqual
3244 h = Header('Hello World!')
3245 eq(h.encode(), 'Hello World!')
3246 h.append('Goodbye World!')
3247 eq(h.encode(), 'Hello World! Goodbye World!')
3248
3249 def test_header_needs_no_decoding(self):
3250 h = 'no decoding needed'
3251 self.assertEqual(decode_header(h), [(h, None)])
3252
3253 def test_long(self):
3254 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3255 maxlinelen=76)
3256 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003257 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003258
3259 def test_multilingual(self):
3260 eq = self.ndiffAssertEqual
3261 g = Charset("iso-8859-1")
3262 cz = Charset("iso-8859-2")
3263 utf8 = Charset("utf-8")
3264 g_head = (b'Die Mieter treten hier ein werden mit einem '
3265 b'Foerderband komfortabel den Korridor entlang, '
3266 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3267 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3268 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3269 b'd\xf9vtipu.. ')
3270 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3271 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3272 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3273 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3274 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3275 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3276 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3277 '\u3044\u307e\u3059\u3002')
3278 h = Header(g_head, g)
3279 h.append(cz_head, cz)
3280 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003281 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003282 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003283=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3284 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3285 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3286 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003287 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3288 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3289 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3290 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003291 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3292 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3293 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3294 decoded = decode_header(enc)
3295 eq(len(decoded), 3)
3296 eq(decoded[0], (g_head, 'iso-8859-1'))
3297 eq(decoded[1], (cz_head, 'iso-8859-2'))
3298 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003299 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003300 eq(ustr,
3301 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3302 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3303 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3304 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3305 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3306 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3307 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3308 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3309 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3310 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3311 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3312 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3313 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3314 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3315 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3316 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3317 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003318 # Test make_header()
3319 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003320 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003321
3322 def test_empty_header_encode(self):
3323 h = Header()
3324 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003325
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003326 def test_header_ctor_default_args(self):
3327 eq = self.ndiffAssertEqual
3328 h = Header()
3329 eq(h, '')
3330 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003331 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003332
3333 def test_explicit_maxlinelen(self):
3334 eq = self.ndiffAssertEqual
3335 hstr = ('A very long line that must get split to something other '
3336 'than at the 76th character boundary to test the non-default '
3337 'behavior')
3338 h = Header(hstr)
3339 eq(h.encode(), '''\
3340A very long line that must get split to something other than at the 76th
3341 character boundary to test the non-default behavior''')
3342 eq(str(h), hstr)
3343 h = Header(hstr, header_name='Subject')
3344 eq(h.encode(), '''\
3345A very long line that must get split to something other than at the
3346 76th character boundary to test the non-default behavior''')
3347 eq(str(h), hstr)
3348 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3349 eq(h.encode(), hstr)
3350 eq(str(h), hstr)
3351
Guido van Rossum9604e662007-08-30 03:46:43 +00003352 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003353 eq = self.ndiffAssertEqual
3354 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003355 x = 'xxxx ' * 20
3356 h.append(x)
3357 s = h.encode()
3358 eq(s, """\
3359=?iso-8859-1?q?xxx?=
3360 =?iso-8859-1?q?x_?=
3361 =?iso-8859-1?q?xx?=
3362 =?iso-8859-1?q?xx?=
3363 =?iso-8859-1?q?_x?=
3364 =?iso-8859-1?q?xx?=
3365 =?iso-8859-1?q?x_?=
3366 =?iso-8859-1?q?xx?=
3367 =?iso-8859-1?q?xx?=
3368 =?iso-8859-1?q?_x?=
3369 =?iso-8859-1?q?xx?=
3370 =?iso-8859-1?q?x_?=
3371 =?iso-8859-1?q?xx?=
3372 =?iso-8859-1?q?xx?=
3373 =?iso-8859-1?q?_x?=
3374 =?iso-8859-1?q?xx?=
3375 =?iso-8859-1?q?x_?=
3376 =?iso-8859-1?q?xx?=
3377 =?iso-8859-1?q?xx?=
3378 =?iso-8859-1?q?_x?=
3379 =?iso-8859-1?q?xx?=
3380 =?iso-8859-1?q?x_?=
3381 =?iso-8859-1?q?xx?=
3382 =?iso-8859-1?q?xx?=
3383 =?iso-8859-1?q?_x?=
3384 =?iso-8859-1?q?xx?=
3385 =?iso-8859-1?q?x_?=
3386 =?iso-8859-1?q?xx?=
3387 =?iso-8859-1?q?xx?=
3388 =?iso-8859-1?q?_x?=
3389 =?iso-8859-1?q?xx?=
3390 =?iso-8859-1?q?x_?=
3391 =?iso-8859-1?q?xx?=
3392 =?iso-8859-1?q?xx?=
3393 =?iso-8859-1?q?_x?=
3394 =?iso-8859-1?q?xx?=
3395 =?iso-8859-1?q?x_?=
3396 =?iso-8859-1?q?xx?=
3397 =?iso-8859-1?q?xx?=
3398 =?iso-8859-1?q?_x?=
3399 =?iso-8859-1?q?xx?=
3400 =?iso-8859-1?q?x_?=
3401 =?iso-8859-1?q?xx?=
3402 =?iso-8859-1?q?xx?=
3403 =?iso-8859-1?q?_x?=
3404 =?iso-8859-1?q?xx?=
3405 =?iso-8859-1?q?x_?=
3406 =?iso-8859-1?q?xx?=
3407 =?iso-8859-1?q?xx?=
3408 =?iso-8859-1?q?_?=""")
3409 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003410 h = Header(charset='iso-8859-1', maxlinelen=40)
3411 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003412 s = h.encode()
3413 eq(s, """\
3414=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3415 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3416 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3417 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3418 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3419 eq(x, str(make_header(decode_header(s))))
3420
3421 def test_base64_splittable(self):
3422 eq = self.ndiffAssertEqual
3423 h = Header(charset='koi8-r', maxlinelen=20)
3424 x = 'xxxx ' * 20
3425 h.append(x)
3426 s = h.encode()
3427 eq(s, """\
3428=?koi8-r?b?eHh4?=
3429 =?koi8-r?b?eCB4?=
3430 =?koi8-r?b?eHh4?=
3431 =?koi8-r?b?IHh4?=
3432 =?koi8-r?b?eHgg?=
3433 =?koi8-r?b?eHh4?=
3434 =?koi8-r?b?eCB4?=
3435 =?koi8-r?b?eHh4?=
3436 =?koi8-r?b?IHh4?=
3437 =?koi8-r?b?eHgg?=
3438 =?koi8-r?b?eHh4?=
3439 =?koi8-r?b?eCB4?=
3440 =?koi8-r?b?eHh4?=
3441 =?koi8-r?b?IHh4?=
3442 =?koi8-r?b?eHgg?=
3443 =?koi8-r?b?eHh4?=
3444 =?koi8-r?b?eCB4?=
3445 =?koi8-r?b?eHh4?=
3446 =?koi8-r?b?IHh4?=
3447 =?koi8-r?b?eHgg?=
3448 =?koi8-r?b?eHh4?=
3449 =?koi8-r?b?eCB4?=
3450 =?koi8-r?b?eHh4?=
3451 =?koi8-r?b?IHh4?=
3452 =?koi8-r?b?eHgg?=
3453 =?koi8-r?b?eHh4?=
3454 =?koi8-r?b?eCB4?=
3455 =?koi8-r?b?eHh4?=
3456 =?koi8-r?b?IHh4?=
3457 =?koi8-r?b?eHgg?=
3458 =?koi8-r?b?eHh4?=
3459 =?koi8-r?b?eCB4?=
3460 =?koi8-r?b?eHh4?=
3461 =?koi8-r?b?IA==?=""")
3462 eq(x, str(make_header(decode_header(s))))
3463 h = Header(charset='koi8-r', maxlinelen=40)
3464 h.append(x)
3465 s = h.encode()
3466 eq(s, """\
3467=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3468 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3469 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3470 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3471 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3472 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3473 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003474
3475 def test_us_ascii_header(self):
3476 eq = self.assertEqual
3477 s = 'hello'
3478 x = decode_header(s)
3479 eq(x, [('hello', None)])
3480 h = make_header(x)
3481 eq(s, h.encode())
3482
3483 def test_string_charset(self):
3484 eq = self.assertEqual
3485 h = Header()
3486 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003487 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003488
3489## def test_unicode_error(self):
3490## raises = self.assertRaises
3491## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3492## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3493## h = Header()
3494## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3495## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3496## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3497
3498 def test_utf8_shortest(self):
3499 eq = self.assertEqual
3500 h = Header('p\xf6stal', 'utf-8')
3501 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3502 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3503 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3504
3505 def test_bad_8bit_header(self):
3506 raises = self.assertRaises
3507 eq = self.assertEqual
3508 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3509 raises(UnicodeError, Header, x)
3510 h = Header()
3511 raises(UnicodeError, h.append, x)
3512 e = x.decode('utf-8', 'replace')
3513 eq(str(Header(x, errors='replace')), e)
3514 h.append(x, errors='replace')
3515 eq(str(h), e)
3516
3517 def test_encoded_adjacent_nonencoded(self):
3518 eq = self.assertEqual
3519 h = Header()
3520 h.append('hello', 'iso-8859-1')
3521 h.append('world')
3522 s = h.encode()
3523 eq(s, '=?iso-8859-1?q?hello?= world')
3524 h = make_header(decode_header(s))
3525 eq(h.encode(), s)
3526
3527 def test_whitespace_eater(self):
3528 eq = self.assertEqual
3529 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3530 parts = decode_header(s)
3531 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3532 hdr = make_header(parts)
3533 eq(hdr.encode(),
3534 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3535
3536 def test_broken_base64_header(self):
3537 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003538 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003539 raises(errors.HeaderParseError, decode_header, s)
3540
3541
Ezio Melottib3aedd42010-11-20 19:04:17 +00003542
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003543# Test RFC 2231 header parameters (en/de)coding
3544class TestRFC2231(TestEmailBase):
3545 def test_get_param(self):
3546 eq = self.assertEqual
3547 msg = self._msgobj('msg_29.txt')
3548 eq(msg.get_param('title'),
3549 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3550 eq(msg.get_param('title', unquote=False),
3551 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3552
3553 def test_set_param(self):
3554 eq = self.ndiffAssertEqual
3555 msg = Message()
3556 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3557 charset='us-ascii')
3558 eq(msg.get_param('title'),
3559 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3560 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3561 charset='us-ascii', language='en')
3562 eq(msg.get_param('title'),
3563 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3564 msg = self._msgobj('msg_01.txt')
3565 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3566 charset='us-ascii', language='en')
3567 eq(msg.as_string(maxheaderlen=78), """\
3568Return-Path: <bbb@zzz.org>
3569Delivered-To: bbb@zzz.org
3570Received: by mail.zzz.org (Postfix, from userid 889)
3571\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3572MIME-Version: 1.0
3573Content-Transfer-Encoding: 7bit
3574Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3575From: bbb@ddd.com (John X. Doe)
3576To: bbb@zzz.org
3577Subject: This is a test message
3578Date: Fri, 4 May 2001 14:05:44 -0400
3579Content-Type: text/plain; charset=us-ascii;
3580 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3581
3582
3583Hi,
3584
3585Do you like this message?
3586
3587-Me
3588""")
3589
3590 def test_del_param(self):
3591 eq = self.ndiffAssertEqual
3592 msg = self._msgobj('msg_01.txt')
3593 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3594 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3595 charset='us-ascii', language='en')
3596 msg.del_param('foo', header='Content-Type')
3597 eq(msg.as_string(maxheaderlen=78), """\
3598Return-Path: <bbb@zzz.org>
3599Delivered-To: bbb@zzz.org
3600Received: by mail.zzz.org (Postfix, from userid 889)
3601\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3602MIME-Version: 1.0
3603Content-Transfer-Encoding: 7bit
3604Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3605From: bbb@ddd.com (John X. Doe)
3606To: bbb@zzz.org
3607Subject: This is a test message
3608Date: Fri, 4 May 2001 14:05:44 -0400
3609Content-Type: text/plain; charset="us-ascii";
3610 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3611
3612
3613Hi,
3614
3615Do you like this message?
3616
3617-Me
3618""")
3619
3620 def test_rfc2231_get_content_charset(self):
3621 eq = self.assertEqual
3622 msg = self._msgobj('msg_32.txt')
3623 eq(msg.get_content_charset(), 'us-ascii')
3624
3625 def test_rfc2231_no_language_or_charset(self):
3626 m = '''\
3627Content-Transfer-Encoding: 8bit
3628Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3629Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3630
3631'''
3632 msg = email.message_from_string(m)
3633 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003634 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003635 self.assertEqual(
3636 param,
3637 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3638
3639 def test_rfc2231_no_language_or_charset_in_filename(self):
3640 m = '''\
3641Content-Disposition: inline;
3642\tfilename*0*="''This%20is%20even%20more%20";
3643\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3644\tfilename*2="is it not.pdf"
3645
3646'''
3647 msg = email.message_from_string(m)
3648 self.assertEqual(msg.get_filename(),
3649 'This is even more ***fun*** is it not.pdf')
3650
3651 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3652 m = '''\
3653Content-Disposition: inline;
3654\tfilename*0*="''This%20is%20even%20more%20";
3655\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3656\tfilename*2="is it not.pdf"
3657
3658'''
3659 msg = email.message_from_string(m)
3660 self.assertEqual(msg.get_filename(),
3661 'This is even more ***fun*** is it not.pdf')
3662
3663 def test_rfc2231_partly_encoded(self):
3664 m = '''\
3665Content-Disposition: inline;
3666\tfilename*0="''This%20is%20even%20more%20";
3667\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3668\tfilename*2="is it not.pdf"
3669
3670'''
3671 msg = email.message_from_string(m)
3672 self.assertEqual(
3673 msg.get_filename(),
3674 'This%20is%20even%20more%20***fun*** is it not.pdf')
3675
3676 def test_rfc2231_partly_nonencoded(self):
3677 m = '''\
3678Content-Disposition: inline;
3679\tfilename*0="This%20is%20even%20more%20";
3680\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3681\tfilename*2="is it not.pdf"
3682
3683'''
3684 msg = email.message_from_string(m)
3685 self.assertEqual(
3686 msg.get_filename(),
3687 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3688
3689 def test_rfc2231_no_language_or_charset_in_boundary(self):
3690 m = '''\
3691Content-Type: multipart/alternative;
3692\tboundary*0*="''This%20is%20even%20more%20";
3693\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3694\tboundary*2="is it not.pdf"
3695
3696'''
3697 msg = email.message_from_string(m)
3698 self.assertEqual(msg.get_boundary(),
3699 'This is even more ***fun*** is it not.pdf')
3700
3701 def test_rfc2231_no_language_or_charset_in_charset(self):
3702 # This is a nonsensical charset value, but tests the code anyway
3703 m = '''\
3704Content-Type: text/plain;
3705\tcharset*0*="This%20is%20even%20more%20";
3706\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3707\tcharset*2="is it not.pdf"
3708
3709'''
3710 msg = email.message_from_string(m)
3711 self.assertEqual(msg.get_content_charset(),
3712 'this is even more ***fun*** is it not.pdf')
3713
3714 def test_rfc2231_bad_encoding_in_filename(self):
3715 m = '''\
3716Content-Disposition: inline;
3717\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3718\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3719\tfilename*2="is it not.pdf"
3720
3721'''
3722 msg = email.message_from_string(m)
3723 self.assertEqual(msg.get_filename(),
3724 'This is even more ***fun*** is it not.pdf')
3725
3726 def test_rfc2231_bad_encoding_in_charset(self):
3727 m = """\
3728Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3729
3730"""
3731 msg = email.message_from_string(m)
3732 # This should return None because non-ascii characters in the charset
3733 # are not allowed.
3734 self.assertEqual(msg.get_content_charset(), None)
3735
3736 def test_rfc2231_bad_character_in_charset(self):
3737 m = """\
3738Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3739
3740"""
3741 msg = email.message_from_string(m)
3742 # This should return None because non-ascii characters in the charset
3743 # are not allowed.
3744 self.assertEqual(msg.get_content_charset(), None)
3745
3746 def test_rfc2231_bad_character_in_filename(self):
3747 m = '''\
3748Content-Disposition: inline;
3749\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3750\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3751\tfilename*2*="is it not.pdf%E2"
3752
3753'''
3754 msg = email.message_from_string(m)
3755 self.assertEqual(msg.get_filename(),
3756 'This is even more ***fun*** is it not.pdf\ufffd')
3757
3758 def test_rfc2231_unknown_encoding(self):
3759 m = """\
3760Content-Transfer-Encoding: 8bit
3761Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3762
3763"""
3764 msg = email.message_from_string(m)
3765 self.assertEqual(msg.get_filename(), 'myfile.txt')
3766
3767 def test_rfc2231_single_tick_in_filename_extended(self):
3768 eq = self.assertEqual
3769 m = """\
3770Content-Type: application/x-foo;
3771\tname*0*=\"Frank's\"; name*1*=\" Document\"
3772
3773"""
3774 msg = email.message_from_string(m)
3775 charset, language, s = msg.get_param('name')
3776 eq(charset, None)
3777 eq(language, None)
3778 eq(s, "Frank's Document")
3779
3780 def test_rfc2231_single_tick_in_filename(self):
3781 m = """\
3782Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3783
3784"""
3785 msg = email.message_from_string(m)
3786 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003787 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003788 self.assertEqual(param, "Frank's Document")
3789
3790 def test_rfc2231_tick_attack_extended(self):
3791 eq = self.assertEqual
3792 m = """\
3793Content-Type: application/x-foo;
3794\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3795
3796"""
3797 msg = email.message_from_string(m)
3798 charset, language, s = msg.get_param('name')
3799 eq(charset, 'us-ascii')
3800 eq(language, 'en-us')
3801 eq(s, "Frank's Document")
3802
3803 def test_rfc2231_tick_attack(self):
3804 m = """\
3805Content-Type: application/x-foo;
3806\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3807
3808"""
3809 msg = email.message_from_string(m)
3810 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003811 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003812 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3813
3814 def test_rfc2231_no_extended_values(self):
3815 eq = self.assertEqual
3816 m = """\
3817Content-Type: application/x-foo; name=\"Frank's Document\"
3818
3819"""
3820 msg = email.message_from_string(m)
3821 eq(msg.get_param('name'), "Frank's Document")
3822
3823 def test_rfc2231_encoded_then_unencoded_segments(self):
3824 eq = self.assertEqual
3825 m = """\
3826Content-Type: application/x-foo;
3827\tname*0*=\"us-ascii'en-us'My\";
3828\tname*1=\" Document\";
3829\tname*2*=\" For You\"
3830
3831"""
3832 msg = email.message_from_string(m)
3833 charset, language, s = msg.get_param('name')
3834 eq(charset, 'us-ascii')
3835 eq(language, 'en-us')
3836 eq(s, 'My Document For You')
3837
3838 def test_rfc2231_unencoded_then_encoded_segments(self):
3839 eq = self.assertEqual
3840 m = """\
3841Content-Type: application/x-foo;
3842\tname*0=\"us-ascii'en-us'My\";
3843\tname*1*=\" Document\";
3844\tname*2*=\" For You\"
3845
3846"""
3847 msg = email.message_from_string(m)
3848 charset, language, s = msg.get_param('name')
3849 eq(charset, 'us-ascii')
3850 eq(language, 'en-us')
3851 eq(s, 'My Document For You')
3852
3853
Ezio Melottib3aedd42010-11-20 19:04:17 +00003854
R. David Murraya8f480f2010-01-16 18:30:03 +00003855# Tests to ensure that signed parts of an email are completely preserved, as
3856# required by RFC1847 section 2.1. Note that these are incomplete, because the
3857# email package does not currently always preserve the body. See issue 1670765.
3858class TestSigned(TestEmailBase):
3859
3860 def _msg_and_obj(self, filename):
3861 with openfile(findfile(filename)) as fp:
3862 original = fp.read()
3863 msg = email.message_from_string(original)
3864 return original, msg
3865
3866 def _signed_parts_eq(self, original, result):
3867 # Extract the first mime part of each message
3868 import re
3869 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3870 inpart = repart.search(original).group(2)
3871 outpart = repart.search(result).group(2)
3872 self.assertEqual(outpart, inpart)
3873
3874 def test_long_headers_as_string(self):
3875 original, msg = self._msg_and_obj('msg_45.txt')
3876 result = msg.as_string()
3877 self._signed_parts_eq(original, result)
3878
3879 def test_long_headers_as_string_maxheaderlen(self):
3880 original, msg = self._msg_and_obj('msg_45.txt')
3881 result = msg.as_string(maxheaderlen=60)
3882 self._signed_parts_eq(original, result)
3883
3884 def test_long_headers_flatten(self):
3885 original, msg = self._msg_and_obj('msg_45.txt')
3886 fp = StringIO()
3887 Generator(fp).flatten(msg)
3888 result = fp.getvalue()
3889 self._signed_parts_eq(original, result)
3890
3891
Ezio Melottib3aedd42010-11-20 19:04:17 +00003892
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003893def _testclasses():
3894 mod = sys.modules[__name__]
3895 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3896
3897
3898def suite():
3899 suite = unittest.TestSuite()
3900 for testclass in _testclasses():
3901 suite.addTest(unittest.makeSuite(testclass))
3902 return suite
3903
3904
3905def test_main():
3906 for testclass in _testclasses():
3907 run_unittest(testclass)
3908
3909
Ezio Melottib3aedd42010-11-20 19:04:17 +00003910
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003911if __name__ == '__main__':
3912 unittest.main(defaultTest='suite')