blob: 78fb9616152e2fbad01090121bba12acdd1893a4 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
R. David Murray719a4492010-11-21 16:53:48 +00006import re
Guido van Rossum8b3febe2007-08-30 01:15:14 +00007import sys
8import time
9import base64
10import difflib
11import unittest
12import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000013import textwrap
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
R. David Murray96fd54e2010-10-08 15:55:28 +000015from io import StringIO, BytesIO
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016from itertools import chain
17
18import email
19
20from email.charset import Charset
21from email.header import Header, decode_header, make_header
22from email.parser import Parser, HeaderParser
23from email.generator import Generator, DecodedGenerator
24from email.message import Message
25from email.mime.application import MIMEApplication
26from email.mime.audio import MIMEAudio
27from email.mime.text import MIMEText
28from email.mime.image import MIMEImage
29from email.mime.base import MIMEBase
30from email.mime.message import MIMEMessage
31from email.mime.multipart import MIMEMultipart
32from email import utils
33from email import errors
34from email import encoders
35from email import iterators
36from email import base64mime
37from email import quoprimime
38
R. David Murray96fd54e2010-10-08 15:55:28 +000039from test.support import findfile, run_unittest, unlink
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from email.test import __file__ as landmark
41
42
43NL = '\n'
44EMPTYSTRING = ''
45SPACE = ' '
46
47
Ezio Melottib3aedd42010-11-20 19:04:17 +000048
Guido van Rossum8b3febe2007-08-30 01:15:14 +000049def openfile(filename, *args, **kws):
50 path = os.path.join(os.path.dirname(landmark), 'data', filename)
51 return open(path, *args, **kws)
52
53
Ezio Melottib3aedd42010-11-20 19:04:17 +000054
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055# Base test class
56class TestEmailBase(unittest.TestCase):
57 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000058 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 if first != second:
60 sfirst = str(first)
61 ssecond = str(second)
62 rfirst = [repr(line) for line in sfirst.splitlines()]
63 rsecond = [repr(line) for line in ssecond.splitlines()]
64 diff = difflib.ndiff(rfirst, rsecond)
65 raise self.failureException(NL + NL.join(diff))
66
67 def _msgobj(self, filename):
68 with openfile(findfile(filename)) as fp:
69 return email.message_from_file(fp)
70
71
Ezio Melottib3aedd42010-11-20 19:04:17 +000072
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073# Test various aspects of the Message class's API
74class TestMessageAPI(TestEmailBase):
75 def test_get_all(self):
76 eq = self.assertEqual
77 msg = self._msgobj('msg_20.txt')
78 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
79 eq(msg.get_all('xx', 'n/a'), 'n/a')
80
R. David Murraye5db2632010-11-20 15:10:13 +000081 def test_getset_charset(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 eq = self.assertEqual
83 msg = Message()
84 eq(msg.get_charset(), None)
85 charset = Charset('iso-8859-1')
86 msg.set_charset(charset)
87 eq(msg['mime-version'], '1.0')
88 eq(msg.get_content_type(), 'text/plain')
89 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
90 eq(msg.get_param('charset'), 'iso-8859-1')
91 eq(msg['content-transfer-encoding'], 'quoted-printable')
92 eq(msg.get_charset().input_charset, 'iso-8859-1')
93 # Remove the charset
94 msg.set_charset(None)
95 eq(msg.get_charset(), None)
96 eq(msg['content-type'], 'text/plain')
97 # Try adding a charset when there's already MIME headers present
98 msg = Message()
99 msg['MIME-Version'] = '2.0'
100 msg['Content-Type'] = 'text/x-weird'
101 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
102 msg.set_charset(charset)
103 eq(msg['mime-version'], '2.0')
104 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
105 eq(msg['content-transfer-encoding'], 'quinted-puntable')
106
107 def test_set_charset_from_string(self):
108 eq = self.assertEqual
109 msg = Message()
110 msg.set_charset('us-ascii')
111 eq(msg.get_charset().input_charset, 'us-ascii')
112 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
113
114 def test_set_payload_with_charset(self):
115 msg = Message()
116 charset = Charset('iso-8859-1')
117 msg.set_payload('This is a string payload', charset)
118 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
119
120 def test_get_charsets(self):
121 eq = self.assertEqual
122
123 msg = self._msgobj('msg_08.txt')
124 charsets = msg.get_charsets()
125 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
126
127 msg = self._msgobj('msg_09.txt')
128 charsets = msg.get_charsets('dingbat')
129 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
130 'koi8-r'])
131
132 msg = self._msgobj('msg_12.txt')
133 charsets = msg.get_charsets()
134 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
135 'iso-8859-3', 'us-ascii', 'koi8-r'])
136
137 def test_get_filename(self):
138 eq = self.assertEqual
139
140 msg = self._msgobj('msg_04.txt')
141 filenames = [p.get_filename() for p in msg.get_payload()]
142 eq(filenames, ['msg.txt', 'msg.txt'])
143
144 msg = self._msgobj('msg_07.txt')
145 subpart = msg.get_payload(1)
146 eq(subpart.get_filename(), 'dingusfish.gif')
147
148 def test_get_filename_with_name_parameter(self):
149 eq = self.assertEqual
150
151 msg = self._msgobj('msg_44.txt')
152 filenames = [p.get_filename() for p in msg.get_payload()]
153 eq(filenames, ['msg.txt', 'msg.txt'])
154
155 def test_get_boundary(self):
156 eq = self.assertEqual
157 msg = self._msgobj('msg_07.txt')
158 # No quotes!
159 eq(msg.get_boundary(), 'BOUNDARY')
160
161 def test_set_boundary(self):
162 eq = self.assertEqual
163 # This one has no existing boundary parameter, but the Content-Type:
164 # header appears fifth.
165 msg = self._msgobj('msg_01.txt')
166 msg.set_boundary('BOUNDARY')
167 header, value = msg.items()[4]
168 eq(header.lower(), 'content-type')
169 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
170 # This one has a Content-Type: header, with a boundary, stuck in the
171 # middle of its headers. Make sure the order is preserved; it should
172 # be fifth.
173 msg = self._msgobj('msg_04.txt')
174 msg.set_boundary('BOUNDARY')
175 header, value = msg.items()[4]
176 eq(header.lower(), 'content-type')
177 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
178 # And this one has no Content-Type: header at all.
179 msg = self._msgobj('msg_03.txt')
180 self.assertRaises(errors.HeaderParseError,
181 msg.set_boundary, 'BOUNDARY')
182
R. David Murray57c45ac2010-02-21 04:39:40 +0000183 def test_message_rfc822_only(self):
184 # Issue 7970: message/rfc822 not in multipart parsed by
185 # HeaderParser caused an exception when flattened.
Brett Cannon384917a2010-10-29 23:08:36 +0000186 with openfile(findfile('msg_46.txt')) as fp:
187 msgdata = fp.read()
R. David Murray57c45ac2010-02-21 04:39:40 +0000188 parser = HeaderParser()
189 msg = parser.parsestr(msgdata)
190 out = StringIO()
191 gen = Generator(out, True, 0)
192 gen.flatten(msg, False)
193 self.assertEqual(out.getvalue(), msgdata)
194
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000195 def test_get_decoded_payload(self):
196 eq = self.assertEqual
197 msg = self._msgobj('msg_10.txt')
198 # The outer message is a multipart
199 eq(msg.get_payload(decode=True), None)
200 # Subpart 1 is 7bit encoded
201 eq(msg.get_payload(0).get_payload(decode=True),
202 b'This is a 7bit encoded message.\n')
203 # Subpart 2 is quopri
204 eq(msg.get_payload(1).get_payload(decode=True),
205 b'\xa1This is a Quoted Printable encoded message!\n')
206 # Subpart 3 is base64
207 eq(msg.get_payload(2).get_payload(decode=True),
208 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000209 # Subpart 4 is base64 with a trailing newline, which
210 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000211 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000212 b'This is a Base64 encoded message.\n')
213 # Subpart 5 has no Content-Transfer-Encoding: header.
214 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000215 b'This has no Content-Transfer-Encoding: header.\n')
216
217 def test_get_decoded_uu_payload(self):
218 eq = self.assertEqual
219 msg = Message()
220 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
221 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
222 msg['content-transfer-encoding'] = cte
223 eq(msg.get_payload(decode=True), b'hello world')
224 # Now try some bogus data
225 msg.set_payload('foo')
226 eq(msg.get_payload(decode=True), b'foo')
227
228 def test_decoded_generator(self):
229 eq = self.assertEqual
230 msg = self._msgobj('msg_07.txt')
231 with openfile('msg_17.txt') as fp:
232 text = fp.read()
233 s = StringIO()
234 g = DecodedGenerator(s)
235 g.flatten(msg)
236 eq(s.getvalue(), text)
237
238 def test__contains__(self):
239 msg = Message()
240 msg['From'] = 'Me'
241 msg['to'] = 'You'
242 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000243 self.assertTrue('from' in msg)
244 self.assertTrue('From' in msg)
245 self.assertTrue('FROM' in msg)
246 self.assertTrue('to' in msg)
247 self.assertTrue('To' in msg)
248 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000249
250 def test_as_string(self):
251 eq = self.ndiffAssertEqual
252 msg = self._msgobj('msg_01.txt')
253 with openfile('msg_01.txt') as fp:
254 text = fp.read()
255 eq(text, str(msg))
256 fullrepr = msg.as_string(unixfrom=True)
257 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000258 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000259 eq(text, NL.join(lines[1:]))
260
261 def test_bad_param(self):
262 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
263 self.assertEqual(msg.get_param('baz'), '')
264
265 def test_missing_filename(self):
266 msg = email.message_from_string("From: foo\n")
267 self.assertEqual(msg.get_filename(), None)
268
269 def test_bogus_filename(self):
270 msg = email.message_from_string(
271 "Content-Disposition: blarg; filename\n")
272 self.assertEqual(msg.get_filename(), '')
273
274 def test_missing_boundary(self):
275 msg = email.message_from_string("From: foo\n")
276 self.assertEqual(msg.get_boundary(), None)
277
278 def test_get_params(self):
279 eq = self.assertEqual
280 msg = email.message_from_string(
281 'X-Header: foo=one; bar=two; baz=three\n')
282 eq(msg.get_params(header='x-header'),
283 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
284 msg = email.message_from_string(
285 'X-Header: foo; bar=one; baz=two\n')
286 eq(msg.get_params(header='x-header'),
287 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
288 eq(msg.get_params(), None)
289 msg = email.message_from_string(
290 'X-Header: foo; bar="one"; baz=two\n')
291 eq(msg.get_params(header='x-header'),
292 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
293
294 def test_get_param_liberal(self):
295 msg = Message()
296 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
297 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
298
299 def test_get_param(self):
300 eq = self.assertEqual
301 msg = email.message_from_string(
302 "X-Header: foo=one; bar=two; baz=three\n")
303 eq(msg.get_param('bar', header='x-header'), 'two')
304 eq(msg.get_param('quuz', header='x-header'), None)
305 eq(msg.get_param('quuz'), None)
306 msg = email.message_from_string(
307 'X-Header: foo; bar="one"; baz=two\n')
308 eq(msg.get_param('foo', header='x-header'), '')
309 eq(msg.get_param('bar', header='x-header'), 'one')
310 eq(msg.get_param('baz', header='x-header'), 'two')
311 # XXX: We are not RFC-2045 compliant! We cannot parse:
312 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
313 # msg.get_param("weird")
314 # yet.
315
316 def test_get_param_funky_continuation_lines(self):
317 msg = self._msgobj('msg_22.txt')
318 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
319
320 def test_get_param_with_semis_in_quotes(self):
321 msg = email.message_from_string(
322 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
323 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
324 self.assertEqual(msg.get_param('name', unquote=False),
325 '"Jim&amp;&amp;Jill"')
326
R. David Murrayd48739f2010-04-14 18:59:18 +0000327 def test_get_param_with_quotes(self):
328 msg = email.message_from_string(
329 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
330 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
331 msg = email.message_from_string(
332 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
333 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
334
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000336 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000337 msg = email.message_from_string('Header: exists')
338 unless('header' in msg)
339 unless('Header' in msg)
340 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000341 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000342
343 def test_set_param(self):
344 eq = self.assertEqual
345 msg = Message()
346 msg.set_param('charset', 'iso-2022-jp')
347 eq(msg.get_param('charset'), 'iso-2022-jp')
348 msg.set_param('importance', 'high value')
349 eq(msg.get_param('importance'), 'high value')
350 eq(msg.get_param('importance', unquote=False), '"high value"')
351 eq(msg.get_params(), [('text/plain', ''),
352 ('charset', 'iso-2022-jp'),
353 ('importance', 'high value')])
354 eq(msg.get_params(unquote=False), [('text/plain', ''),
355 ('charset', '"iso-2022-jp"'),
356 ('importance', '"high value"')])
357 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
358 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
359
360 def test_del_param(self):
361 eq = self.assertEqual
362 msg = self._msgobj('msg_05.txt')
363 eq(msg.get_params(),
364 [('multipart/report', ''), ('report-type', 'delivery-status'),
365 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
366 old_val = msg.get_param("report-type")
367 msg.del_param("report-type")
368 eq(msg.get_params(),
369 [('multipart/report', ''),
370 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
371 msg.set_param("report-type", old_val)
372 eq(msg.get_params(),
373 [('multipart/report', ''),
374 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
375 ('report-type', old_val)])
376
377 def test_del_param_on_other_header(self):
378 msg = Message()
379 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
380 msg.del_param('filename', 'content-disposition')
381 self.assertEqual(msg['content-disposition'], 'attachment')
382
383 def test_set_type(self):
384 eq = self.assertEqual
385 msg = Message()
386 self.assertRaises(ValueError, msg.set_type, 'text')
387 msg.set_type('text/plain')
388 eq(msg['content-type'], 'text/plain')
389 msg.set_param('charset', 'us-ascii')
390 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
391 msg.set_type('text/html')
392 eq(msg['content-type'], 'text/html; charset="us-ascii"')
393
394 def test_set_type_on_other_header(self):
395 msg = Message()
396 msg['X-Content-Type'] = 'text/plain'
397 msg.set_type('application/octet-stream', 'X-Content-Type')
398 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
399
400 def test_get_content_type_missing(self):
401 msg = Message()
402 self.assertEqual(msg.get_content_type(), 'text/plain')
403
404 def test_get_content_type_missing_with_default_type(self):
405 msg = Message()
406 msg.set_default_type('message/rfc822')
407 self.assertEqual(msg.get_content_type(), 'message/rfc822')
408
409 def test_get_content_type_from_message_implicit(self):
410 msg = self._msgobj('msg_30.txt')
411 self.assertEqual(msg.get_payload(0).get_content_type(),
412 'message/rfc822')
413
414 def test_get_content_type_from_message_explicit(self):
415 msg = self._msgobj('msg_28.txt')
416 self.assertEqual(msg.get_payload(0).get_content_type(),
417 'message/rfc822')
418
419 def test_get_content_type_from_message_text_plain_implicit(self):
420 msg = self._msgobj('msg_03.txt')
421 self.assertEqual(msg.get_content_type(), 'text/plain')
422
423 def test_get_content_type_from_message_text_plain_explicit(self):
424 msg = self._msgobj('msg_01.txt')
425 self.assertEqual(msg.get_content_type(), 'text/plain')
426
427 def test_get_content_maintype_missing(self):
428 msg = Message()
429 self.assertEqual(msg.get_content_maintype(), 'text')
430
431 def test_get_content_maintype_missing_with_default_type(self):
432 msg = Message()
433 msg.set_default_type('message/rfc822')
434 self.assertEqual(msg.get_content_maintype(), 'message')
435
436 def test_get_content_maintype_from_message_implicit(self):
437 msg = self._msgobj('msg_30.txt')
438 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
439
440 def test_get_content_maintype_from_message_explicit(self):
441 msg = self._msgobj('msg_28.txt')
442 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
443
444 def test_get_content_maintype_from_message_text_plain_implicit(self):
445 msg = self._msgobj('msg_03.txt')
446 self.assertEqual(msg.get_content_maintype(), 'text')
447
448 def test_get_content_maintype_from_message_text_plain_explicit(self):
449 msg = self._msgobj('msg_01.txt')
450 self.assertEqual(msg.get_content_maintype(), 'text')
451
452 def test_get_content_subtype_missing(self):
453 msg = Message()
454 self.assertEqual(msg.get_content_subtype(), 'plain')
455
456 def test_get_content_subtype_missing_with_default_type(self):
457 msg = Message()
458 msg.set_default_type('message/rfc822')
459 self.assertEqual(msg.get_content_subtype(), 'rfc822')
460
461 def test_get_content_subtype_from_message_implicit(self):
462 msg = self._msgobj('msg_30.txt')
463 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
464
465 def test_get_content_subtype_from_message_explicit(self):
466 msg = self._msgobj('msg_28.txt')
467 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
468
469 def test_get_content_subtype_from_message_text_plain_implicit(self):
470 msg = self._msgobj('msg_03.txt')
471 self.assertEqual(msg.get_content_subtype(), 'plain')
472
473 def test_get_content_subtype_from_message_text_plain_explicit(self):
474 msg = self._msgobj('msg_01.txt')
475 self.assertEqual(msg.get_content_subtype(), 'plain')
476
477 def test_get_content_maintype_error(self):
478 msg = Message()
479 msg['Content-Type'] = 'no-slash-in-this-string'
480 self.assertEqual(msg.get_content_maintype(), 'text')
481
482 def test_get_content_subtype_error(self):
483 msg = Message()
484 msg['Content-Type'] = 'no-slash-in-this-string'
485 self.assertEqual(msg.get_content_subtype(), 'plain')
486
487 def test_replace_header(self):
488 eq = self.assertEqual
489 msg = Message()
490 msg.add_header('First', 'One')
491 msg.add_header('Second', 'Two')
492 msg.add_header('Third', 'Three')
493 eq(msg.keys(), ['First', 'Second', 'Third'])
494 eq(msg.values(), ['One', 'Two', 'Three'])
495 msg.replace_header('Second', 'Twenty')
496 eq(msg.keys(), ['First', 'Second', 'Third'])
497 eq(msg.values(), ['One', 'Twenty', 'Three'])
498 msg.add_header('First', 'Eleven')
499 msg.replace_header('First', 'One Hundred')
500 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
501 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
502 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
503
504 def test_broken_base64_payload(self):
505 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
506 msg = Message()
507 msg['content-type'] = 'audio/x-midi'
508 msg['content-transfer-encoding'] = 'base64'
509 msg.set_payload(x)
510 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000511 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000512
R. David Murray7ec754b2010-12-13 23:51:19 +0000513 # Issue 1078919
514 def test_ascii_add_header(self):
515 msg = Message()
516 msg.add_header('Content-Disposition', 'attachment',
517 filename='bud.gif')
518 self.assertEqual('attachment; filename="bud.gif"',
519 msg['Content-Disposition'])
520
521 def test_noascii_add_header(self):
522 msg = Message()
523 msg.add_header('Content-Disposition', 'attachment',
524 filename="Fußballer.ppt")
525 self.assertEqual(
526 'attachment; filename*="utf-8\'\'Fu%C3%9Fballer.ppt"',
527 msg['Content-Disposition'])
528
529 def test_nonascii_add_header_via_triple(self):
530 msg = Message()
531 msg.add_header('Content-Disposition', 'attachment',
532 filename=('iso-8859-1', '', 'Fußballer.ppt'))
533 self.assertEqual(
534 'attachment; filename*="iso-8859-1\'\'Fu%DFballer.ppt"',
535 msg['Content-Disposition'])
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000536
Ezio Melottib3aedd42010-11-20 19:04:17 +0000537
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000538# Test the email.encoders module
539class TestEncoders(unittest.TestCase):
540 def test_encode_empty_payload(self):
541 eq = self.assertEqual
542 msg = Message()
543 msg.set_charset('us-ascii')
544 eq(msg['content-transfer-encoding'], '7bit')
545
546 def test_default_cte(self):
547 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000548 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000549 msg = MIMEText('hello world')
550 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000551 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000552 msg = MIMEText('hello \xf8 world')
553 eq(msg['content-transfer-encoding'], '8bit')
554 # And now with a different charset
555 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
556 eq(msg['content-transfer-encoding'], 'quoted-printable')
557
R. David Murraye85200d2010-05-06 01:41:14 +0000558 def test_encode7or8bit(self):
559 # Make sure a charset whose input character set is 8bit but
560 # whose output character set is 7bit gets a transfer-encoding
561 # of 7bit.
562 eq = self.assertEqual
R. David Murray850fc852010-06-03 01:58:28 +0000563 msg = MIMEText('文', _charset='euc-jp')
R. David Murraye85200d2010-05-06 01:41:14 +0000564 eq(msg['content-transfer-encoding'], '7bit')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000565
Ezio Melottib3aedd42010-11-20 19:04:17 +0000566
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000567# Test long header wrapping
568class TestLongHeaders(TestEmailBase):
569 def test_split_long_continuation(self):
570 eq = self.ndiffAssertEqual
571 msg = email.message_from_string("""\
572Subject: bug demonstration
573\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
574\tmore text
575
576test
577""")
578 sfp = StringIO()
579 g = Generator(sfp)
580 g.flatten(msg)
581 eq(sfp.getvalue(), """\
582Subject: bug demonstration
583\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
584\tmore text
585
586test
587""")
588
589 def test_another_long_almost_unsplittable_header(self):
590 eq = self.ndiffAssertEqual
591 hstr = """\
592bug demonstration
593\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
594\tmore text"""
595 h = Header(hstr, continuation_ws='\t')
596 eq(h.encode(), """\
597bug demonstration
598\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
599\tmore text""")
600 h = Header(hstr.replace('\t', ' '))
601 eq(h.encode(), """\
602bug demonstration
603 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
604 more text""")
605
606 def test_long_nonstring(self):
607 eq = self.ndiffAssertEqual
608 g = Charset("iso-8859-1")
609 cz = Charset("iso-8859-2")
610 utf8 = Charset("utf-8")
611 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
612 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
613 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
614 b'bef\xf6rdert. ')
615 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
616 b'd\xf9vtipu.. ')
617 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
618 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
619 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
620 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
621 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
622 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
623 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
624 '\u3044\u307e\u3059\u3002')
625 h = Header(g_head, g, header_name='Subject')
626 h.append(cz_head, cz)
627 h.append(utf8_head, utf8)
628 msg = Message()
629 msg['Subject'] = h
630 sfp = StringIO()
631 g = Generator(sfp)
632 g.flatten(msg)
633 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000634Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
635 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
636 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
637 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
638 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
639 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
640 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
641 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
642 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
643 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
644 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000645
646""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000647 eq(h.encode(maxlinelen=76), """\
648=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
649 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
650 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
651 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
652 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
653 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
654 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
655 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
656 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
657 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
658 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000659
660 def test_long_header_encode(self):
661 eq = self.ndiffAssertEqual
662 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
663 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
664 header_name='X-Foobar-Spoink-Defrobnit')
665 eq(h.encode(), '''\
666wasnipoop; giraffes="very-long-necked-animals";
667 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
668
669 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
670 eq = self.ndiffAssertEqual
671 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
672 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
673 header_name='X-Foobar-Spoink-Defrobnit',
674 continuation_ws='\t')
675 eq(h.encode(), '''\
676wasnipoop; giraffes="very-long-necked-animals";
677 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
678
679 def test_long_header_encode_with_tab_continuation(self):
680 eq = self.ndiffAssertEqual
681 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
682 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
683 header_name='X-Foobar-Spoink-Defrobnit',
684 continuation_ws='\t')
685 eq(h.encode(), '''\
686wasnipoop; giraffes="very-long-necked-animals";
687\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
688
689 def test_header_splitter(self):
690 eq = self.ndiffAssertEqual
691 msg = MIMEText('')
692 # It'd be great if we could use add_header() here, but that doesn't
693 # guarantee an order of the parameters.
694 msg['X-Foobar-Spoink-Defrobnit'] = (
695 'wasnipoop; giraffes="very-long-necked-animals"; '
696 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
697 sfp = StringIO()
698 g = Generator(sfp)
699 g.flatten(msg)
700 eq(sfp.getvalue(), '''\
701Content-Type: text/plain; charset="us-ascii"
702MIME-Version: 1.0
703Content-Transfer-Encoding: 7bit
704X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
705 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
706
707''')
708
709 def test_no_semis_header_splitter(self):
710 eq = self.ndiffAssertEqual
711 msg = Message()
712 msg['From'] = 'test@dom.ain'
713 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
714 msg.set_payload('Test')
715 sfp = StringIO()
716 g = Generator(sfp)
717 g.flatten(msg)
718 eq(sfp.getvalue(), """\
719From: test@dom.ain
720References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
721 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
722
723Test""")
724
725 def test_no_split_long_header(self):
726 eq = self.ndiffAssertEqual
727 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000728 h = Header(hstr)
729 # These come on two lines because Headers are really field value
730 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000731 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000732References:
733 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
734 h = Header('x' * 80)
735 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000736
737 def test_splitting_multiple_long_lines(self):
738 eq = self.ndiffAssertEqual
739 hstr = """\
740from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
741\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
742\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
743"""
744 h = Header(hstr, continuation_ws='\t')
745 eq(h.encode(), """\
746from babylon.socal-raves.org (localhost [127.0.0.1]);
747 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
748 for <mailman-admin@babylon.socal-raves.org>;
749 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
750\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
751 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
752 for <mailman-admin@babylon.socal-raves.org>;
753 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
754\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
755 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
756 for <mailman-admin@babylon.socal-raves.org>;
757 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
758
759 def test_splitting_first_line_only_is_long(self):
760 eq = self.ndiffAssertEqual
761 hstr = """\
762from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
763\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
764\tid 17k4h5-00034i-00
765\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
766 h = Header(hstr, maxlinelen=78, header_name='Received',
767 continuation_ws='\t')
768 eq(h.encode(), """\
769from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
770 helo=cthulhu.gerg.ca)
771\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
772\tid 17k4h5-00034i-00
773\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
774
775 def test_long_8bit_header(self):
776 eq = self.ndiffAssertEqual
777 msg = Message()
778 h = Header('Britische Regierung gibt', 'iso-8859-1',
779 header_name='Subject')
780 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000781 eq(h.encode(maxlinelen=76), """\
782=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
783 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000784 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000785 eq(msg.as_string(maxheaderlen=76), """\
786Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
787 =?iso-8859-1?q?hore-Windkraftprojekte?=
788
789""")
790 eq(msg.as_string(maxheaderlen=0), """\
791Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000792
793""")
794
795 def test_long_8bit_header_no_charset(self):
796 eq = self.ndiffAssertEqual
797 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000798 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
799 'f\xfcr Offshore-Windkraftprojekte '
800 '<a-very-long-address@example.com>')
801 msg['Reply-To'] = header_string
802 self.assertRaises(UnicodeEncodeError, msg.as_string)
803 msg = Message()
804 msg['Reply-To'] = Header(header_string, 'utf-8',
805 header_name='Reply-To')
806 eq(msg.as_string(maxheaderlen=78), """\
807Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
808 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000809
810""")
811
812 def test_long_to_header(self):
813 eq = self.ndiffAssertEqual
814 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
815 '<someone@eecs.umich.edu>,'
816 '"Someone Test #B" <someone@umich.edu>, '
817 '"Someone Test #C" <someone@eecs.umich.edu>, '
818 '"Someone Test #D" <someone@eecs.umich.edu>')
819 msg = Message()
820 msg['To'] = to
821 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000822To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000823 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000824 "Someone Test #C" <someone@eecs.umich.edu>,
825 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000826
827''')
828
829 def test_long_line_after_append(self):
830 eq = self.ndiffAssertEqual
831 s = 'This is an example of string which has almost the limit of header length.'
832 h = Header(s)
833 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000834 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000835This is an example of string which has almost the limit of header length.
836 Add another line.""")
837
838 def test_shorter_line_with_append(self):
839 eq = self.ndiffAssertEqual
840 s = 'This is a shorter line.'
841 h = Header(s)
842 h.append('Add another sentence. (Surprise?)')
843 eq(h.encode(),
844 'This is a shorter line. Add another sentence. (Surprise?)')
845
846 def test_long_field_name(self):
847 eq = self.ndiffAssertEqual
848 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000849 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
850 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
851 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
852 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000853 h = Header(gs, 'iso-8859-1', header_name=fn)
854 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000855 eq(h.encode(maxlinelen=76), """\
856=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
857 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
858 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
859 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000860
861 def test_long_received_header(self):
862 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
863 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
864 'Wed, 05 Mar 2003 18:10:18 -0700')
865 msg = Message()
866 msg['Received-1'] = Header(h, continuation_ws='\t')
867 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000868 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000869 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000870Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
871 Wed, 05 Mar 2003 18:10:18 -0700
872Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
873 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000874
875""")
876
877 def test_string_headerinst_eq(self):
878 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
879 'tu-muenchen.de> (David Bremner\'s message of '
880 '"Thu, 6 Mar 2003 13:58:21 +0100")')
881 msg = Message()
882 msg['Received-1'] = Header(h, header_name='Received-1',
883 continuation_ws='\t')
884 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000885 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000886 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000887Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
888 6 Mar 2003 13:58:21 +0100\")
889Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
890 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000891
892""")
893
894 def test_long_unbreakable_lines_with_continuation(self):
895 eq = self.ndiffAssertEqual
896 msg = Message()
897 t = """\
898iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
899 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
900 msg['Face-1'] = t
901 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000902 # XXX This splitting is all wrong. It the first value line should be
903 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000904 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000905Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000906 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000907 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000908Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000909 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000910 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
911
912""")
913
914 def test_another_long_multiline_header(self):
915 eq = self.ndiffAssertEqual
916 m = ('Received: from siimage.com '
917 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000918 'Microsoft SMTPSVC(5.0.2195.4905); '
919 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000920 msg = email.message_from_string(m)
921 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000922Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
923 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000924
925''')
926
927 def test_long_lines_with_different_header(self):
928 eq = self.ndiffAssertEqual
929 h = ('List-Unsubscribe: '
930 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
931 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
932 '?subject=unsubscribe>')
933 msg = Message()
934 msg['List'] = h
935 msg['List'] = Header(h, header_name='List')
936 eq(msg.as_string(maxheaderlen=78), """\
937List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000938 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000939List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000940 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000941
942""")
943
944
Ezio Melottib3aedd42010-11-20 19:04:17 +0000945
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000946# Test mangling of "From " lines in the body of a message
947class TestFromMangling(unittest.TestCase):
948 def setUp(self):
949 self.msg = Message()
950 self.msg['From'] = 'aaa@bbb.org'
951 self.msg.set_payload("""\
952From the desk of A.A.A.:
953Blah blah blah
954""")
955
956 def test_mangled_from(self):
957 s = StringIO()
958 g = Generator(s, mangle_from_=True)
959 g.flatten(self.msg)
960 self.assertEqual(s.getvalue(), """\
961From: aaa@bbb.org
962
963>From the desk of A.A.A.:
964Blah blah blah
965""")
966
967 def test_dont_mangle_from(self):
968 s = StringIO()
969 g = Generator(s, mangle_from_=False)
970 g.flatten(self.msg)
971 self.assertEqual(s.getvalue(), """\
972From: aaa@bbb.org
973
974From the desk of A.A.A.:
975Blah blah blah
976""")
977
978
Ezio Melottib3aedd42010-11-20 19:04:17 +0000979
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000980# Test the basic MIMEAudio class
981class TestMIMEAudio(unittest.TestCase):
982 def setUp(self):
983 # Make sure we pick up the audiotest.au that lives in email/test/data.
984 # In Python, there's an audiotest.au living in Lib/test but that isn't
985 # included in some binary distros that don't include the test
986 # package. The trailing empty string on the .join() is significant
987 # since findfile() will do a dirname().
988 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
989 with open(findfile('audiotest.au', datadir), 'rb') as fp:
990 self._audiodata = fp.read()
991 self._au = MIMEAudio(self._audiodata)
992
993 def test_guess_minor_type(self):
994 self.assertEqual(self._au.get_content_type(), 'audio/basic')
995
996 def test_encoding(self):
997 payload = self._au.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +0000998 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
999 self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001000
1001 def test_checkSetMinor(self):
1002 au = MIMEAudio(self._audiodata, 'fish')
1003 self.assertEqual(au.get_content_type(), 'audio/fish')
1004
1005 def test_add_header(self):
1006 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001007 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001008 self._au.add_header('Content-Disposition', 'attachment',
1009 filename='audiotest.au')
1010 eq(self._au['content-disposition'],
1011 'attachment; filename="audiotest.au"')
1012 eq(self._au.get_params(header='content-disposition'),
1013 [('attachment', ''), ('filename', 'audiotest.au')])
1014 eq(self._au.get_param('filename', header='content-disposition'),
1015 'audiotest.au')
1016 missing = []
1017 eq(self._au.get_param('attachment', header='content-disposition'), '')
1018 unless(self._au.get_param('foo', failobj=missing,
1019 header='content-disposition') is missing)
1020 # Try some missing stuff
1021 unless(self._au.get_param('foobar', missing) is missing)
1022 unless(self._au.get_param('attachment', missing,
1023 header='foobar') is missing)
1024
1025
Ezio Melottib3aedd42010-11-20 19:04:17 +00001026
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001027# Test the basic MIMEImage class
1028class TestMIMEImage(unittest.TestCase):
1029 def setUp(self):
1030 with openfile('PyBanner048.gif', 'rb') as fp:
1031 self._imgdata = fp.read()
1032 self._im = MIMEImage(self._imgdata)
1033
1034 def test_guess_minor_type(self):
1035 self.assertEqual(self._im.get_content_type(), 'image/gif')
1036
1037 def test_encoding(self):
1038 payload = self._im.get_payload()
R. David Murray7da8f062010-06-04 16:11:08 +00001039 self.assertEqual(base64.decodebytes(bytes(payload, 'ascii')),
1040 self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001041
1042 def test_checkSetMinor(self):
1043 im = MIMEImage(self._imgdata, 'fish')
1044 self.assertEqual(im.get_content_type(), 'image/fish')
1045
1046 def test_add_header(self):
1047 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001048 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001049 self._im.add_header('Content-Disposition', 'attachment',
1050 filename='dingusfish.gif')
1051 eq(self._im['content-disposition'],
1052 'attachment; filename="dingusfish.gif"')
1053 eq(self._im.get_params(header='content-disposition'),
1054 [('attachment', ''), ('filename', 'dingusfish.gif')])
1055 eq(self._im.get_param('filename', header='content-disposition'),
1056 'dingusfish.gif')
1057 missing = []
1058 eq(self._im.get_param('attachment', header='content-disposition'), '')
1059 unless(self._im.get_param('foo', failobj=missing,
1060 header='content-disposition') is missing)
1061 # Try some missing stuff
1062 unless(self._im.get_param('foobar', missing) is missing)
1063 unless(self._im.get_param('attachment', missing,
1064 header='foobar') is missing)
1065
1066
Ezio Melottib3aedd42010-11-20 19:04:17 +00001067
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001068# Test the basic MIMEApplication class
1069class TestMIMEApplication(unittest.TestCase):
1070 def test_headers(self):
1071 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001072 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001073 eq(msg.get_content_type(), 'application/octet-stream')
1074 eq(msg['content-transfer-encoding'], 'base64')
1075
1076 def test_body(self):
1077 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001078 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001079 msg = MIMEApplication(bytes)
R. David Murray7da8f062010-06-04 16:11:08 +00001080 eq(msg.get_payload(), '+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001081 eq(msg.get_payload(decode=True), bytes)
1082
1083
Ezio Melottib3aedd42010-11-20 19:04:17 +00001084
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001085# Test the basic MIMEText class
1086class TestMIMEText(unittest.TestCase):
1087 def setUp(self):
1088 self._msg = MIMEText('hello there')
1089
1090 def test_types(self):
1091 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001092 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001093 eq(self._msg.get_content_type(), 'text/plain')
1094 eq(self._msg.get_param('charset'), 'us-ascii')
1095 missing = []
1096 unless(self._msg.get_param('foobar', missing) is missing)
1097 unless(self._msg.get_param('charset', missing, header='foobar')
1098 is missing)
1099
1100 def test_payload(self):
1101 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001102 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001103
1104 def test_charset(self):
1105 eq = self.assertEqual
1106 msg = MIMEText('hello there', _charset='us-ascii')
1107 eq(msg.get_charset().input_charset, 'us-ascii')
1108 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1109
R. David Murray850fc852010-06-03 01:58:28 +00001110 def test_7bit_input(self):
1111 eq = self.assertEqual
1112 msg = MIMEText('hello there', _charset='us-ascii')
1113 eq(msg.get_charset().input_charset, 'us-ascii')
1114 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1115
1116 def test_7bit_input_no_charset(self):
1117 eq = self.assertEqual
1118 msg = MIMEText('hello there')
1119 eq(msg.get_charset(), 'us-ascii')
1120 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1121 self.assertTrue('hello there' in msg.as_string())
1122
1123 def test_utf8_input(self):
1124 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1125 eq = self.assertEqual
1126 msg = MIMEText(teststr, _charset='utf-8')
1127 eq(msg.get_charset().output_charset, 'utf-8')
1128 eq(msg['content-type'], 'text/plain; charset="utf-8"')
1129 eq(msg.get_payload(decode=True), teststr.encode('utf-8'))
1130
1131 @unittest.skip("can't fix because of backward compat in email5, "
1132 "will fix in email6")
1133 def test_utf8_input_no_charset(self):
1134 teststr = '\u043a\u0438\u0440\u0438\u043b\u0438\u0446\u0430'
1135 self.assertRaises(UnicodeEncodeError, MIMEText, teststr)
1136
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001137
Ezio Melottib3aedd42010-11-20 19:04:17 +00001138
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001139# Test complicated multipart/* messages
1140class TestMultipart(TestEmailBase):
1141 def setUp(self):
1142 with openfile('PyBanner048.gif', 'rb') as fp:
1143 data = fp.read()
1144 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1145 image = MIMEImage(data, name='dingusfish.gif')
1146 image.add_header('content-disposition', 'attachment',
1147 filename='dingusfish.gif')
1148 intro = MIMEText('''\
1149Hi there,
1150
1151This is the dingus fish.
1152''')
1153 container.attach(intro)
1154 container.attach(image)
1155 container['From'] = 'Barry <barry@digicool.com>'
1156 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1157 container['Subject'] = 'Here is your dingus fish'
1158
1159 now = 987809702.54848599
1160 timetuple = time.localtime(now)
1161 if timetuple[-1] == 0:
1162 tzsecs = time.timezone
1163 else:
1164 tzsecs = time.altzone
1165 if tzsecs > 0:
1166 sign = '-'
1167 else:
1168 sign = '+'
1169 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1170 container['Date'] = time.strftime(
1171 '%a, %d %b %Y %H:%M:%S',
1172 time.localtime(now)) + tzoffset
1173 self._msg = container
1174 self._im = image
1175 self._txt = intro
1176
1177 def test_hierarchy(self):
1178 # convenience
1179 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001180 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001181 raises = self.assertRaises
1182 # tests
1183 m = self._msg
1184 unless(m.is_multipart())
1185 eq(m.get_content_type(), 'multipart/mixed')
1186 eq(len(m.get_payload()), 2)
1187 raises(IndexError, m.get_payload, 2)
1188 m0 = m.get_payload(0)
1189 m1 = m.get_payload(1)
1190 unless(m0 is self._txt)
1191 unless(m1 is self._im)
1192 eq(m.get_payload(), [m0, m1])
1193 unless(not m0.is_multipart())
1194 unless(not m1.is_multipart())
1195
1196 def test_empty_multipart_idempotent(self):
1197 text = """\
1198Content-Type: multipart/mixed; boundary="BOUNDARY"
1199MIME-Version: 1.0
1200Subject: A subject
1201To: aperson@dom.ain
1202From: bperson@dom.ain
1203
1204
1205--BOUNDARY
1206
1207
1208--BOUNDARY--
1209"""
1210 msg = Parser().parsestr(text)
1211 self.ndiffAssertEqual(text, msg.as_string())
1212
1213 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1214 outer = MIMEBase('multipart', 'mixed')
1215 outer['Subject'] = 'A subject'
1216 outer['To'] = 'aperson@dom.ain'
1217 outer['From'] = 'bperson@dom.ain'
1218 outer.set_boundary('BOUNDARY')
1219 self.ndiffAssertEqual(outer.as_string(), '''\
1220Content-Type: multipart/mixed; boundary="BOUNDARY"
1221MIME-Version: 1.0
1222Subject: A subject
1223To: aperson@dom.ain
1224From: bperson@dom.ain
1225
1226--BOUNDARY
1227
1228--BOUNDARY--''')
1229
1230 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1231 outer = MIMEBase('multipart', 'mixed')
1232 outer['Subject'] = 'A subject'
1233 outer['To'] = 'aperson@dom.ain'
1234 outer['From'] = 'bperson@dom.ain'
1235 outer.preamble = ''
1236 outer.epilogue = ''
1237 outer.set_boundary('BOUNDARY')
1238 self.ndiffAssertEqual(outer.as_string(), '''\
1239Content-Type: multipart/mixed; boundary="BOUNDARY"
1240MIME-Version: 1.0
1241Subject: A subject
1242To: aperson@dom.ain
1243From: bperson@dom.ain
1244
1245
1246--BOUNDARY
1247
1248--BOUNDARY--
1249''')
1250
1251 def test_one_part_in_a_multipart(self):
1252 eq = self.ndiffAssertEqual
1253 outer = MIMEBase('multipart', 'mixed')
1254 outer['Subject'] = 'A subject'
1255 outer['To'] = 'aperson@dom.ain'
1256 outer['From'] = 'bperson@dom.ain'
1257 outer.set_boundary('BOUNDARY')
1258 msg = MIMEText('hello world')
1259 outer.attach(msg)
1260 eq(outer.as_string(), '''\
1261Content-Type: multipart/mixed; boundary="BOUNDARY"
1262MIME-Version: 1.0
1263Subject: A subject
1264To: aperson@dom.ain
1265From: bperson@dom.ain
1266
1267--BOUNDARY
1268Content-Type: text/plain; charset="us-ascii"
1269MIME-Version: 1.0
1270Content-Transfer-Encoding: 7bit
1271
1272hello world
1273--BOUNDARY--''')
1274
1275 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1276 eq = self.ndiffAssertEqual
1277 outer = MIMEBase('multipart', 'mixed')
1278 outer['Subject'] = 'A subject'
1279 outer['To'] = 'aperson@dom.ain'
1280 outer['From'] = 'bperson@dom.ain'
1281 outer.preamble = ''
1282 msg = MIMEText('hello world')
1283 outer.attach(msg)
1284 outer.set_boundary('BOUNDARY')
1285 eq(outer.as_string(), '''\
1286Content-Type: multipart/mixed; boundary="BOUNDARY"
1287MIME-Version: 1.0
1288Subject: A subject
1289To: aperson@dom.ain
1290From: bperson@dom.ain
1291
1292
1293--BOUNDARY
1294Content-Type: text/plain; charset="us-ascii"
1295MIME-Version: 1.0
1296Content-Transfer-Encoding: 7bit
1297
1298hello world
1299--BOUNDARY--''')
1300
1301
1302 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1303 eq = self.ndiffAssertEqual
1304 outer = MIMEBase('multipart', 'mixed')
1305 outer['Subject'] = 'A subject'
1306 outer['To'] = 'aperson@dom.ain'
1307 outer['From'] = 'bperson@dom.ain'
1308 outer.preamble = None
1309 msg = MIMEText('hello world')
1310 outer.attach(msg)
1311 outer.set_boundary('BOUNDARY')
1312 eq(outer.as_string(), '''\
1313Content-Type: multipart/mixed; boundary="BOUNDARY"
1314MIME-Version: 1.0
1315Subject: A subject
1316To: aperson@dom.ain
1317From: bperson@dom.ain
1318
1319--BOUNDARY
1320Content-Type: text/plain; charset="us-ascii"
1321MIME-Version: 1.0
1322Content-Transfer-Encoding: 7bit
1323
1324hello world
1325--BOUNDARY--''')
1326
1327
1328 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1329 eq = self.ndiffAssertEqual
1330 outer = MIMEBase('multipart', 'mixed')
1331 outer['Subject'] = 'A subject'
1332 outer['To'] = 'aperson@dom.ain'
1333 outer['From'] = 'bperson@dom.ain'
1334 outer.epilogue = None
1335 msg = MIMEText('hello world')
1336 outer.attach(msg)
1337 outer.set_boundary('BOUNDARY')
1338 eq(outer.as_string(), '''\
1339Content-Type: multipart/mixed; boundary="BOUNDARY"
1340MIME-Version: 1.0
1341Subject: A subject
1342To: aperson@dom.ain
1343From: bperson@dom.ain
1344
1345--BOUNDARY
1346Content-Type: text/plain; charset="us-ascii"
1347MIME-Version: 1.0
1348Content-Transfer-Encoding: 7bit
1349
1350hello world
1351--BOUNDARY--''')
1352
1353
1354 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1355 eq = self.ndiffAssertEqual
1356 outer = MIMEBase('multipart', 'mixed')
1357 outer['Subject'] = 'A subject'
1358 outer['To'] = 'aperson@dom.ain'
1359 outer['From'] = 'bperson@dom.ain'
1360 outer.epilogue = ''
1361 msg = MIMEText('hello world')
1362 outer.attach(msg)
1363 outer.set_boundary('BOUNDARY')
1364 eq(outer.as_string(), '''\
1365Content-Type: multipart/mixed; boundary="BOUNDARY"
1366MIME-Version: 1.0
1367Subject: A subject
1368To: aperson@dom.ain
1369From: bperson@dom.ain
1370
1371--BOUNDARY
1372Content-Type: text/plain; charset="us-ascii"
1373MIME-Version: 1.0
1374Content-Transfer-Encoding: 7bit
1375
1376hello world
1377--BOUNDARY--
1378''')
1379
1380
1381 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1382 eq = self.ndiffAssertEqual
1383 outer = MIMEBase('multipart', 'mixed')
1384 outer['Subject'] = 'A subject'
1385 outer['To'] = 'aperson@dom.ain'
1386 outer['From'] = 'bperson@dom.ain'
1387 outer.epilogue = '\n'
1388 msg = MIMEText('hello world')
1389 outer.attach(msg)
1390 outer.set_boundary('BOUNDARY')
1391 eq(outer.as_string(), '''\
1392Content-Type: multipart/mixed; boundary="BOUNDARY"
1393MIME-Version: 1.0
1394Subject: A subject
1395To: aperson@dom.ain
1396From: bperson@dom.ain
1397
1398--BOUNDARY
1399Content-Type: text/plain; charset="us-ascii"
1400MIME-Version: 1.0
1401Content-Transfer-Encoding: 7bit
1402
1403hello world
1404--BOUNDARY--
1405
1406''')
1407
1408 def test_message_external_body(self):
1409 eq = self.assertEqual
1410 msg = self._msgobj('msg_36.txt')
1411 eq(len(msg.get_payload()), 2)
1412 msg1 = msg.get_payload(1)
1413 eq(msg1.get_content_type(), 'multipart/alternative')
1414 eq(len(msg1.get_payload()), 2)
1415 for subpart in msg1.get_payload():
1416 eq(subpart.get_content_type(), 'message/external-body')
1417 eq(len(subpart.get_payload()), 1)
1418 subsubpart = subpart.get_payload(0)
1419 eq(subsubpart.get_content_type(), 'text/plain')
1420
1421 def test_double_boundary(self):
1422 # msg_37.txt is a multipart that contains two dash-boundary's in a
1423 # row. Our interpretation of RFC 2046 calls for ignoring the second
1424 # and subsequent boundaries.
1425 msg = self._msgobj('msg_37.txt')
1426 self.assertEqual(len(msg.get_payload()), 3)
1427
1428 def test_nested_inner_contains_outer_boundary(self):
1429 eq = self.ndiffAssertEqual
1430 # msg_38.txt has an inner part that contains outer boundaries. My
1431 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1432 # these are illegal and should be interpreted as unterminated inner
1433 # parts.
1434 msg = self._msgobj('msg_38.txt')
1435 sfp = StringIO()
1436 iterators._structure(msg, sfp)
1437 eq(sfp.getvalue(), """\
1438multipart/mixed
1439 multipart/mixed
1440 multipart/alternative
1441 text/plain
1442 text/plain
1443 text/plain
1444 text/plain
1445""")
1446
1447 def test_nested_with_same_boundary(self):
1448 eq = self.ndiffAssertEqual
1449 # msg 39.txt is similarly evil in that it's got inner parts that use
1450 # the same boundary as outer parts. Again, I believe the way this is
1451 # parsed is closest to the spirit of RFC 2046
1452 msg = self._msgobj('msg_39.txt')
1453 sfp = StringIO()
1454 iterators._structure(msg, sfp)
1455 eq(sfp.getvalue(), """\
1456multipart/mixed
1457 multipart/mixed
1458 multipart/alternative
1459 application/octet-stream
1460 application/octet-stream
1461 text/plain
1462""")
1463
1464 def test_boundary_in_non_multipart(self):
1465 msg = self._msgobj('msg_40.txt')
1466 self.assertEqual(msg.as_string(), '''\
1467MIME-Version: 1.0
1468Content-Type: text/html; boundary="--961284236552522269"
1469
1470----961284236552522269
1471Content-Type: text/html;
1472Content-Transfer-Encoding: 7Bit
1473
1474<html></html>
1475
1476----961284236552522269--
1477''')
1478
1479 def test_boundary_with_leading_space(self):
1480 eq = self.assertEqual
1481 msg = email.message_from_string('''\
1482MIME-Version: 1.0
1483Content-Type: multipart/mixed; boundary=" XXXX"
1484
1485-- XXXX
1486Content-Type: text/plain
1487
1488
1489-- XXXX
1490Content-Type: text/plain
1491
1492-- XXXX--
1493''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001494 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001495 eq(msg.get_boundary(), ' XXXX')
1496 eq(len(msg.get_payload()), 2)
1497
1498 def test_boundary_without_trailing_newline(self):
1499 m = Parser().parsestr("""\
1500Content-Type: multipart/mixed; boundary="===============0012394164=="
1501MIME-Version: 1.0
1502
1503--===============0012394164==
1504Content-Type: image/file1.jpg
1505MIME-Version: 1.0
1506Content-Transfer-Encoding: base64
1507
1508YXNkZg==
1509--===============0012394164==--""")
Ezio Melottib3aedd42010-11-20 19:04:17 +00001510 self.assertEqual(m.get_payload(0).get_payload(), 'YXNkZg==')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001511
1512
Ezio Melottib3aedd42010-11-20 19:04:17 +00001513
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001514# Test some badly formatted messages
1515class TestNonConformant(TestEmailBase):
1516 def test_parse_missing_minor_type(self):
1517 eq = self.assertEqual
1518 msg = self._msgobj('msg_14.txt')
1519 eq(msg.get_content_type(), 'text/plain')
1520 eq(msg.get_content_maintype(), 'text')
1521 eq(msg.get_content_subtype(), 'plain')
1522
1523 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001524 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001525 msg = self._msgobj('msg_15.txt')
1526 # XXX We can probably eventually do better
1527 inner = msg.get_payload(0)
1528 unless(hasattr(inner, 'defects'))
1529 self.assertEqual(len(inner.defects), 1)
1530 unless(isinstance(inner.defects[0],
1531 errors.StartBoundaryNotFoundDefect))
1532
1533 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001534 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001535 msg = self._msgobj('msg_25.txt')
1536 unless(isinstance(msg.get_payload(), str))
1537 self.assertEqual(len(msg.defects), 2)
1538 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1539 unless(isinstance(msg.defects[1],
1540 errors.MultipartInvariantViolationDefect))
1541
1542 def test_invalid_content_type(self):
1543 eq = self.assertEqual
1544 neq = self.ndiffAssertEqual
1545 msg = Message()
1546 # RFC 2045, $5.2 says invalid yields text/plain
1547 msg['Content-Type'] = 'text'
1548 eq(msg.get_content_maintype(), 'text')
1549 eq(msg.get_content_subtype(), 'plain')
1550 eq(msg.get_content_type(), 'text/plain')
1551 # Clear the old value and try something /really/ invalid
1552 del msg['content-type']
1553 msg['Content-Type'] = 'foo'
1554 eq(msg.get_content_maintype(), 'text')
1555 eq(msg.get_content_subtype(), 'plain')
1556 eq(msg.get_content_type(), 'text/plain')
1557 # Still, make sure that the message is idempotently generated
1558 s = StringIO()
1559 g = Generator(s)
1560 g.flatten(msg)
1561 neq(s.getvalue(), 'Content-Type: foo\n\n')
1562
1563 def test_no_start_boundary(self):
1564 eq = self.ndiffAssertEqual
1565 msg = self._msgobj('msg_31.txt')
1566 eq(msg.get_payload(), """\
1567--BOUNDARY
1568Content-Type: text/plain
1569
1570message 1
1571
1572--BOUNDARY
1573Content-Type: text/plain
1574
1575message 2
1576
1577--BOUNDARY--
1578""")
1579
1580 def test_no_separating_blank_line(self):
1581 eq = self.ndiffAssertEqual
1582 msg = self._msgobj('msg_35.txt')
1583 eq(msg.as_string(), """\
1584From: aperson@dom.ain
1585To: bperson@dom.ain
1586Subject: here's something interesting
1587
1588counter to RFC 2822, there's no separating newline here
1589""")
1590
1591 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001592 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001593 msg = self._msgobj('msg_41.txt')
1594 unless(hasattr(msg, 'defects'))
1595 self.assertEqual(len(msg.defects), 2)
1596 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1597 unless(isinstance(msg.defects[1],
1598 errors.MultipartInvariantViolationDefect))
1599
1600 def test_missing_start_boundary(self):
1601 outer = self._msgobj('msg_42.txt')
1602 # The message structure is:
1603 #
1604 # multipart/mixed
1605 # text/plain
1606 # message/rfc822
1607 # multipart/mixed [*]
1608 #
1609 # [*] This message is missing its start boundary
1610 bad = outer.get_payload(1).get_payload(0)
1611 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001612 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001613 errors.StartBoundaryNotFoundDefect))
1614
1615 def test_first_line_is_continuation_header(self):
1616 eq = self.assertEqual
1617 m = ' Line 1\nLine 2\nLine 3'
1618 msg = email.message_from_string(m)
1619 eq(msg.keys(), [])
1620 eq(msg.get_payload(), 'Line 2\nLine 3')
1621 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001622 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001623 errors.FirstHeaderLineIsContinuationDefect))
1624 eq(msg.defects[0].line, ' Line 1\n')
1625
1626
Ezio Melottib3aedd42010-11-20 19:04:17 +00001627
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001628# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001629class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001630 def test_rfc2047_multiline(self):
1631 eq = self.assertEqual
1632 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1633 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1634 dh = decode_header(s)
1635 eq(dh, [
1636 (b'Re:', None),
1637 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1638 (b'baz foo bar', None),
1639 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1640 header = make_header(dh)
1641 eq(str(header),
1642 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001643 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001644Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1645 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001646
1647 def test_whitespace_eater_unicode(self):
1648 eq = self.assertEqual
1649 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1650 dh = decode_header(s)
1651 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1652 (b'Pirard <pirard@dom.ain>', None)])
1653 header = str(make_header(dh))
1654 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1655
1656 def test_whitespace_eater_unicode_2(self):
1657 eq = self.assertEqual
1658 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1659 dh = decode_header(s)
1660 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1661 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1662 hu = str(make_header(dh))
1663 eq(hu, 'The quick brown fox jumped over the lazy dog')
1664
1665 def test_rfc2047_missing_whitespace(self):
1666 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1667 dh = decode_header(s)
1668 self.assertEqual(dh, [(s, None)])
1669
1670 def test_rfc2047_with_whitespace(self):
1671 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1672 dh = decode_header(s)
1673 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1674 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1675 (b'sbord', None)])
1676
R. David Murrayc4e69cc2010-08-03 22:14:10 +00001677 def test_rfc2047_B_bad_padding(self):
1678 s = '=?iso-8859-1?B?%s?='
1679 data = [ # only test complete bytes
1680 ('dm==', b'v'), ('dm=', b'v'), ('dm', b'v'),
1681 ('dmk=', b'vi'), ('dmk', b'vi')
1682 ]
1683 for q, a in data:
1684 dh = decode_header(s % q)
1685 self.assertEqual(dh, [(a, 'iso-8859-1')])
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001686
R. David Murray31e984c2010-10-01 15:40:20 +00001687 def test_rfc2047_Q_invalid_digits(self):
1688 # issue 10004.
1689 s = '=?iso-8659-1?Q?andr=e9=zz?='
1690 self.assertEqual(decode_header(s),
1691 [(b'andr\xe9=zz', 'iso-8659-1')])
1692
Ezio Melottib3aedd42010-11-20 19:04:17 +00001693
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001694# Test the MIMEMessage class
1695class TestMIMEMessage(TestEmailBase):
1696 def setUp(self):
1697 with openfile('msg_11.txt') as fp:
1698 self._text = fp.read()
1699
1700 def test_type_error(self):
1701 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1702
1703 def test_valid_argument(self):
1704 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001705 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001706 subject = 'A sub-message'
1707 m = Message()
1708 m['Subject'] = subject
1709 r = MIMEMessage(m)
1710 eq(r.get_content_type(), 'message/rfc822')
1711 payload = r.get_payload()
1712 unless(isinstance(payload, list))
1713 eq(len(payload), 1)
1714 subpart = payload[0]
1715 unless(subpart is m)
1716 eq(subpart['subject'], subject)
1717
1718 def test_bad_multipart(self):
1719 eq = self.assertEqual
1720 msg1 = Message()
1721 msg1['Subject'] = 'subpart 1'
1722 msg2 = Message()
1723 msg2['Subject'] = 'subpart 2'
1724 r = MIMEMessage(msg1)
1725 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1726
1727 def test_generate(self):
1728 # First craft the message to be encapsulated
1729 m = Message()
1730 m['Subject'] = 'An enclosed message'
1731 m.set_payload('Here is the body of the message.\n')
1732 r = MIMEMessage(m)
1733 r['Subject'] = 'The enclosing message'
1734 s = StringIO()
1735 g = Generator(s)
1736 g.flatten(r)
1737 self.assertEqual(s.getvalue(), """\
1738Content-Type: message/rfc822
1739MIME-Version: 1.0
1740Subject: The enclosing message
1741
1742Subject: An enclosed message
1743
1744Here is the body of the message.
1745""")
1746
1747 def test_parse_message_rfc822(self):
1748 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001749 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001750 msg = self._msgobj('msg_11.txt')
1751 eq(msg.get_content_type(), 'message/rfc822')
1752 payload = msg.get_payload()
1753 unless(isinstance(payload, list))
1754 eq(len(payload), 1)
1755 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001756 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001757 eq(submsg['subject'], 'An enclosed message')
1758 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1759
1760 def test_dsn(self):
1761 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001762 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001763 # msg 16 is a Delivery Status Notification, see RFC 1894
1764 msg = self._msgobj('msg_16.txt')
1765 eq(msg.get_content_type(), 'multipart/report')
1766 unless(msg.is_multipart())
1767 eq(len(msg.get_payload()), 3)
1768 # Subpart 1 is a text/plain, human readable section
1769 subpart = msg.get_payload(0)
1770 eq(subpart.get_content_type(), 'text/plain')
1771 eq(subpart.get_payload(), """\
1772This report relates to a message you sent with the following header fields:
1773
1774 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1775 Date: Sun, 23 Sep 2001 20:10:55 -0700
1776 From: "Ian T. Henry" <henryi@oxy.edu>
1777 To: SoCal Raves <scr@socal-raves.org>
1778 Subject: [scr] yeah for Ians!!
1779
1780Your message cannot be delivered to the following recipients:
1781
1782 Recipient address: jangel1@cougar.noc.ucla.edu
1783 Reason: recipient reached disk quota
1784
1785""")
1786 # Subpart 2 contains the machine parsable DSN information. It
1787 # consists of two blocks of headers, represented by two nested Message
1788 # objects.
1789 subpart = msg.get_payload(1)
1790 eq(subpart.get_content_type(), 'message/delivery-status')
1791 eq(len(subpart.get_payload()), 2)
1792 # message/delivery-status should treat each block as a bunch of
1793 # headers, i.e. a bunch of Message objects.
1794 dsn1 = subpart.get_payload(0)
1795 unless(isinstance(dsn1, Message))
1796 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1797 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1798 # Try a missing one <wink>
1799 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1800 dsn2 = subpart.get_payload(1)
1801 unless(isinstance(dsn2, Message))
1802 eq(dsn2['action'], 'failed')
1803 eq(dsn2.get_params(header='original-recipient'),
1804 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1805 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1806 # Subpart 3 is the original message
1807 subpart = msg.get_payload(2)
1808 eq(subpart.get_content_type(), 'message/rfc822')
1809 payload = subpart.get_payload()
1810 unless(isinstance(payload, list))
1811 eq(len(payload), 1)
1812 subsubpart = payload[0]
1813 unless(isinstance(subsubpart, Message))
1814 eq(subsubpart.get_content_type(), 'text/plain')
1815 eq(subsubpart['message-id'],
1816 '<002001c144a6$8752e060$56104586@oxy.edu>')
1817
1818 def test_epilogue(self):
1819 eq = self.ndiffAssertEqual
1820 with openfile('msg_21.txt') as fp:
1821 text = fp.read()
1822 msg = Message()
1823 msg['From'] = 'aperson@dom.ain'
1824 msg['To'] = 'bperson@dom.ain'
1825 msg['Subject'] = 'Test'
1826 msg.preamble = 'MIME message'
1827 msg.epilogue = 'End of MIME message\n'
1828 msg1 = MIMEText('One')
1829 msg2 = MIMEText('Two')
1830 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1831 msg.attach(msg1)
1832 msg.attach(msg2)
1833 sfp = StringIO()
1834 g = Generator(sfp)
1835 g.flatten(msg)
1836 eq(sfp.getvalue(), text)
1837
1838 def test_no_nl_preamble(self):
1839 eq = self.ndiffAssertEqual
1840 msg = Message()
1841 msg['From'] = 'aperson@dom.ain'
1842 msg['To'] = 'bperson@dom.ain'
1843 msg['Subject'] = 'Test'
1844 msg.preamble = 'MIME message'
1845 msg.epilogue = ''
1846 msg1 = MIMEText('One')
1847 msg2 = MIMEText('Two')
1848 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1849 msg.attach(msg1)
1850 msg.attach(msg2)
1851 eq(msg.as_string(), """\
1852From: aperson@dom.ain
1853To: bperson@dom.ain
1854Subject: Test
1855Content-Type: multipart/mixed; boundary="BOUNDARY"
1856
1857MIME message
1858--BOUNDARY
1859Content-Type: text/plain; charset="us-ascii"
1860MIME-Version: 1.0
1861Content-Transfer-Encoding: 7bit
1862
1863One
1864--BOUNDARY
1865Content-Type: text/plain; charset="us-ascii"
1866MIME-Version: 1.0
1867Content-Transfer-Encoding: 7bit
1868
1869Two
1870--BOUNDARY--
1871""")
1872
1873 def test_default_type(self):
1874 eq = self.assertEqual
1875 with openfile('msg_30.txt') as fp:
1876 msg = email.message_from_file(fp)
1877 container1 = msg.get_payload(0)
1878 eq(container1.get_default_type(), 'message/rfc822')
1879 eq(container1.get_content_type(), 'message/rfc822')
1880 container2 = msg.get_payload(1)
1881 eq(container2.get_default_type(), 'message/rfc822')
1882 eq(container2.get_content_type(), 'message/rfc822')
1883 container1a = container1.get_payload(0)
1884 eq(container1a.get_default_type(), 'text/plain')
1885 eq(container1a.get_content_type(), 'text/plain')
1886 container2a = container2.get_payload(0)
1887 eq(container2a.get_default_type(), 'text/plain')
1888 eq(container2a.get_content_type(), 'text/plain')
1889
1890 def test_default_type_with_explicit_container_type(self):
1891 eq = self.assertEqual
1892 with openfile('msg_28.txt') as fp:
1893 msg = email.message_from_file(fp)
1894 container1 = msg.get_payload(0)
1895 eq(container1.get_default_type(), 'message/rfc822')
1896 eq(container1.get_content_type(), 'message/rfc822')
1897 container2 = msg.get_payload(1)
1898 eq(container2.get_default_type(), 'message/rfc822')
1899 eq(container2.get_content_type(), 'message/rfc822')
1900 container1a = container1.get_payload(0)
1901 eq(container1a.get_default_type(), 'text/plain')
1902 eq(container1a.get_content_type(), 'text/plain')
1903 container2a = container2.get_payload(0)
1904 eq(container2a.get_default_type(), 'text/plain')
1905 eq(container2a.get_content_type(), 'text/plain')
1906
1907 def test_default_type_non_parsed(self):
1908 eq = self.assertEqual
1909 neq = self.ndiffAssertEqual
1910 # Set up container
1911 container = MIMEMultipart('digest', 'BOUNDARY')
1912 container.epilogue = ''
1913 # Set up subparts
1914 subpart1a = MIMEText('message 1\n')
1915 subpart2a = MIMEText('message 2\n')
1916 subpart1 = MIMEMessage(subpart1a)
1917 subpart2 = MIMEMessage(subpart2a)
1918 container.attach(subpart1)
1919 container.attach(subpart2)
1920 eq(subpart1.get_content_type(), 'message/rfc822')
1921 eq(subpart1.get_default_type(), 'message/rfc822')
1922 eq(subpart2.get_content_type(), 'message/rfc822')
1923 eq(subpart2.get_default_type(), 'message/rfc822')
1924 neq(container.as_string(0), '''\
1925Content-Type: multipart/digest; boundary="BOUNDARY"
1926MIME-Version: 1.0
1927
1928--BOUNDARY
1929Content-Type: message/rfc822
1930MIME-Version: 1.0
1931
1932Content-Type: text/plain; charset="us-ascii"
1933MIME-Version: 1.0
1934Content-Transfer-Encoding: 7bit
1935
1936message 1
1937
1938--BOUNDARY
1939Content-Type: message/rfc822
1940MIME-Version: 1.0
1941
1942Content-Type: text/plain; charset="us-ascii"
1943MIME-Version: 1.0
1944Content-Transfer-Encoding: 7bit
1945
1946message 2
1947
1948--BOUNDARY--
1949''')
1950 del subpart1['content-type']
1951 del subpart1['mime-version']
1952 del subpart2['content-type']
1953 del subpart2['mime-version']
1954 eq(subpart1.get_content_type(), 'message/rfc822')
1955 eq(subpart1.get_default_type(), 'message/rfc822')
1956 eq(subpart2.get_content_type(), 'message/rfc822')
1957 eq(subpart2.get_default_type(), 'message/rfc822')
1958 neq(container.as_string(0), '''\
1959Content-Type: multipart/digest; boundary="BOUNDARY"
1960MIME-Version: 1.0
1961
1962--BOUNDARY
1963
1964Content-Type: text/plain; charset="us-ascii"
1965MIME-Version: 1.0
1966Content-Transfer-Encoding: 7bit
1967
1968message 1
1969
1970--BOUNDARY
1971
1972Content-Type: text/plain; charset="us-ascii"
1973MIME-Version: 1.0
1974Content-Transfer-Encoding: 7bit
1975
1976message 2
1977
1978--BOUNDARY--
1979''')
1980
1981 def test_mime_attachments_in_constructor(self):
1982 eq = self.assertEqual
1983 text1 = MIMEText('')
1984 text2 = MIMEText('')
1985 msg = MIMEMultipart(_subparts=(text1, text2))
1986 eq(len(msg.get_payload()), 2)
1987 eq(msg.get_payload(0), text1)
1988 eq(msg.get_payload(1), text2)
1989
Christian Heimes587c2bf2008-01-19 16:21:02 +00001990 def test_default_multipart_constructor(self):
1991 msg = MIMEMultipart()
1992 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001993
Ezio Melottib3aedd42010-11-20 19:04:17 +00001994
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001995# A general test of parser->model->generator idempotency. IOW, read a message
1996# in, parse it into a message object tree, then without touching the tree,
1997# regenerate the plain text. The original text and the transformed text
1998# should be identical. Note: that we ignore the Unix-From since that may
1999# contain a changed date.
2000class TestIdempotent(TestEmailBase):
R. David Murray719a4492010-11-21 16:53:48 +00002001
2002 linesep = '\n'
2003
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002004 def _msgobj(self, filename):
2005 with openfile(filename) as fp:
2006 data = fp.read()
2007 msg = email.message_from_string(data)
2008 return msg, data
2009
R. David Murray719a4492010-11-21 16:53:48 +00002010 def _idempotent(self, msg, text, unixfrom=False):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002011 eq = self.ndiffAssertEqual
2012 s = StringIO()
2013 g = Generator(s, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00002014 g.flatten(msg, unixfrom=unixfrom)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002015 eq(text, s.getvalue())
2016
2017 def test_parse_text_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002018 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002019 msg, text = self._msgobj('msg_01.txt')
2020 eq(msg.get_content_type(), 'text/plain')
2021 eq(msg.get_content_maintype(), 'text')
2022 eq(msg.get_content_subtype(), 'plain')
2023 eq(msg.get_params()[1], ('charset', 'us-ascii'))
2024 eq(msg.get_param('charset'), 'us-ascii')
2025 eq(msg.preamble, None)
2026 eq(msg.epilogue, None)
2027 self._idempotent(msg, text)
2028
2029 def test_parse_untyped_message(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002030 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002031 msg, text = self._msgobj('msg_03.txt')
2032 eq(msg.get_content_type(), 'text/plain')
2033 eq(msg.get_params(), None)
2034 eq(msg.get_param('charset'), None)
2035 self._idempotent(msg, text)
2036
2037 def test_simple_multipart(self):
2038 msg, text = self._msgobj('msg_04.txt')
2039 self._idempotent(msg, text)
2040
2041 def test_MIME_digest(self):
2042 msg, text = self._msgobj('msg_02.txt')
2043 self._idempotent(msg, text)
2044
2045 def test_long_header(self):
2046 msg, text = self._msgobj('msg_27.txt')
2047 self._idempotent(msg, text)
2048
2049 def test_MIME_digest_with_part_headers(self):
2050 msg, text = self._msgobj('msg_28.txt')
2051 self._idempotent(msg, text)
2052
2053 def test_mixed_with_image(self):
2054 msg, text = self._msgobj('msg_06.txt')
2055 self._idempotent(msg, text)
2056
2057 def test_multipart_report(self):
2058 msg, text = self._msgobj('msg_05.txt')
2059 self._idempotent(msg, text)
2060
2061 def test_dsn(self):
2062 msg, text = self._msgobj('msg_16.txt')
2063 self._idempotent(msg, text)
2064
2065 def test_preamble_epilogue(self):
2066 msg, text = self._msgobj('msg_21.txt')
2067 self._idempotent(msg, text)
2068
2069 def test_multipart_one_part(self):
2070 msg, text = self._msgobj('msg_23.txt')
2071 self._idempotent(msg, text)
2072
2073 def test_multipart_no_parts(self):
2074 msg, text = self._msgobj('msg_24.txt')
2075 self._idempotent(msg, text)
2076
2077 def test_no_start_boundary(self):
2078 msg, text = self._msgobj('msg_31.txt')
2079 self._idempotent(msg, text)
2080
2081 def test_rfc2231_charset(self):
2082 msg, text = self._msgobj('msg_32.txt')
2083 self._idempotent(msg, text)
2084
2085 def test_more_rfc2231_parameters(self):
2086 msg, text = self._msgobj('msg_33.txt')
2087 self._idempotent(msg, text)
2088
2089 def test_text_plain_in_a_multipart_digest(self):
2090 msg, text = self._msgobj('msg_34.txt')
2091 self._idempotent(msg, text)
2092
2093 def test_nested_multipart_mixeds(self):
2094 msg, text = self._msgobj('msg_12a.txt')
2095 self._idempotent(msg, text)
2096
2097 def test_message_external_body_idempotent(self):
2098 msg, text = self._msgobj('msg_36.txt')
2099 self._idempotent(msg, text)
2100
R. David Murray719a4492010-11-21 16:53:48 +00002101 def test_message_delivery_status(self):
2102 msg, text = self._msgobj('msg_43.txt')
2103 self._idempotent(msg, text, unixfrom=True)
2104
R. David Murray96fd54e2010-10-08 15:55:28 +00002105 def test_message_signed_idempotent(self):
2106 msg, text = self._msgobj('msg_45.txt')
2107 self._idempotent(msg, text)
2108
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002109 def test_content_type(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002110 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002111 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002112 # Get a message object and reset the seek pointer for other tests
2113 msg, text = self._msgobj('msg_05.txt')
2114 eq(msg.get_content_type(), 'multipart/report')
2115 # Test the Content-Type: parameters
2116 params = {}
2117 for pk, pv in msg.get_params():
2118 params[pk] = pv
2119 eq(params['report-type'], 'delivery-status')
2120 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
R. David Murray719a4492010-11-21 16:53:48 +00002121 eq(msg.preamble, 'This is a MIME-encapsulated message.' + self.linesep)
2122 eq(msg.epilogue, self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002123 eq(len(msg.get_payload()), 3)
2124 # Make sure the subparts are what we expect
2125 msg1 = msg.get_payload(0)
2126 eq(msg1.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002127 eq(msg1.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002128 msg2 = msg.get_payload(1)
2129 eq(msg2.get_content_type(), 'text/plain')
R. David Murray719a4492010-11-21 16:53:48 +00002130 eq(msg2.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002131 msg3 = msg.get_payload(2)
2132 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002133 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002134 payload = msg3.get_payload()
2135 unless(isinstance(payload, list))
2136 eq(len(payload), 1)
2137 msg4 = payload[0]
2138 unless(isinstance(msg4, Message))
R. David Murray719a4492010-11-21 16:53:48 +00002139 eq(msg4.get_payload(), 'Yadda yadda yadda' + self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002140
2141 def test_parser(self):
Ezio Melottib3aedd42010-11-20 19:04:17 +00002142 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002143 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002144 msg, text = self._msgobj('msg_06.txt')
2145 # Check some of the outer headers
2146 eq(msg.get_content_type(), 'message/rfc822')
2147 # Make sure the payload is a list of exactly one sub-Message, and that
2148 # that submessage has a type of text/plain
2149 payload = msg.get_payload()
2150 unless(isinstance(payload, list))
2151 eq(len(payload), 1)
2152 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002153 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002154 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002155 self.assertTrue(isinstance(msg1.get_payload(), str))
R. David Murray719a4492010-11-21 16:53:48 +00002156 eq(msg1.get_payload(), self.linesep)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002157
2158
Ezio Melottib3aedd42010-11-20 19:04:17 +00002159
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002160# Test various other bits of the package's functionality
2161class TestMiscellaneous(TestEmailBase):
2162 def test_message_from_string(self):
2163 with openfile('msg_01.txt') as fp:
2164 text = fp.read()
2165 msg = email.message_from_string(text)
2166 s = StringIO()
2167 # Don't wrap/continue long headers since we're trying to test
2168 # idempotency.
2169 g = Generator(s, maxheaderlen=0)
2170 g.flatten(msg)
2171 self.assertEqual(text, s.getvalue())
2172
2173 def test_message_from_file(self):
2174 with openfile('msg_01.txt') as fp:
2175 text = fp.read()
2176 fp.seek(0)
2177 msg = email.message_from_file(fp)
2178 s = StringIO()
2179 # Don't wrap/continue long headers since we're trying to test
2180 # idempotency.
2181 g = Generator(s, maxheaderlen=0)
2182 g.flatten(msg)
2183 self.assertEqual(text, s.getvalue())
2184
2185 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002186 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002187 with openfile('msg_01.txt') as fp:
2188 text = fp.read()
2189
2190 # Create a subclass
2191 class MyMessage(Message):
2192 pass
2193
2194 msg = email.message_from_string(text, MyMessage)
2195 unless(isinstance(msg, MyMessage))
2196 # Try something more complicated
2197 with openfile('msg_02.txt') as fp:
2198 text = fp.read()
2199 msg = email.message_from_string(text, MyMessage)
2200 for subpart in msg.walk():
2201 unless(isinstance(subpart, MyMessage))
2202
2203 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002204 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002205 # Create a subclass
2206 class MyMessage(Message):
2207 pass
2208
2209 with openfile('msg_01.txt') as fp:
2210 msg = email.message_from_file(fp, MyMessage)
2211 unless(isinstance(msg, MyMessage))
2212 # Try something more complicated
2213 with openfile('msg_02.txt') as fp:
2214 msg = email.message_from_file(fp, MyMessage)
2215 for subpart in msg.walk():
2216 unless(isinstance(subpart, MyMessage))
2217
2218 def test__all__(self):
2219 module = __import__('email')
2220 # Can't use sorted() here due to Python 2.3 compatibility
2221 all = module.__all__[:]
2222 all.sort()
2223 self.assertEqual(all, [
2224 'base64mime', 'charset', 'encoders', 'errors', 'generator',
R. David Murray96fd54e2010-10-08 15:55:28 +00002225 'header', 'iterators', 'message', 'message_from_binary_file',
2226 'message_from_bytes', 'message_from_file',
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002227 'message_from_string', 'mime', 'parser',
2228 'quoprimime', 'utils',
2229 ])
2230
2231 def test_formatdate(self):
2232 now = time.time()
2233 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2234 time.gmtime(now)[:6])
2235
2236 def test_formatdate_localtime(self):
2237 now = time.time()
2238 self.assertEqual(
2239 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2240 time.localtime(now)[:6])
2241
2242 def test_formatdate_usegmt(self):
2243 now = time.time()
2244 self.assertEqual(
2245 utils.formatdate(now, localtime=False),
2246 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2247 self.assertEqual(
2248 utils.formatdate(now, localtime=False, usegmt=True),
2249 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2250
2251 def test_parsedate_none(self):
2252 self.assertEqual(utils.parsedate(''), None)
2253
2254 def test_parsedate_compact(self):
2255 # The FWS after the comma is optional
2256 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2257 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2258
2259 def test_parsedate_no_dayofweek(self):
2260 eq = self.assertEqual
2261 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2262 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2263
2264 def test_parsedate_compact_no_dayofweek(self):
2265 eq = self.assertEqual
2266 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2267 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2268
2269 def test_parsedate_acceptable_to_time_functions(self):
2270 eq = self.assertEqual
2271 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2272 t = int(time.mktime(timetup))
2273 eq(time.localtime(t)[:6], timetup[:6])
2274 eq(int(time.strftime('%Y', timetup)), 2003)
2275 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2276 t = int(time.mktime(timetup[:9]))
2277 eq(time.localtime(t)[:6], timetup[:6])
2278 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2279
R. David Murray219d1c82010-08-25 00:45:55 +00002280 def test_parsedate_y2k(self):
2281 """Test for parsing a date with a two-digit year.
2282
2283 Parsing a date with a two-digit year should return the correct
2284 four-digit year. RFC822 allows two-digit years, but RFC2822 (which
2285 obsoletes RFC822) requires four-digit years.
2286
2287 """
2288 self.assertEqual(utils.parsedate_tz('25 Feb 03 13:47:26 -0800'),
2289 utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'))
2290 self.assertEqual(utils.parsedate_tz('25 Feb 71 13:47:26 -0800'),
2291 utils.parsedate_tz('25 Feb 1971 13:47:26 -0800'))
2292
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002293 def test_parseaddr_empty(self):
2294 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2295 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2296
2297 def test_noquote_dump(self):
2298 self.assertEqual(
2299 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2300 'A Silly Person <person@dom.ain>')
2301
2302 def test_escape_dump(self):
2303 self.assertEqual(
2304 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2305 r'"A \(Very\) Silly Person" <person@dom.ain>')
2306 a = r'A \(Special\) Person'
2307 b = 'person@dom.ain'
2308 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2309
2310 def test_escape_backslashes(self):
2311 self.assertEqual(
2312 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2313 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2314 a = r'Arthur \Backslash\ Foobar'
2315 b = 'person@dom.ain'
2316 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2317
2318 def test_name_with_dot(self):
2319 x = 'John X. Doe <jxd@example.com>'
2320 y = '"John X. Doe" <jxd@example.com>'
2321 a, b = ('John X. Doe', 'jxd@example.com')
2322 self.assertEqual(utils.parseaddr(x), (a, b))
2323 self.assertEqual(utils.parseaddr(y), (a, b))
2324 # formataddr() quotes the name if there's a dot in it
2325 self.assertEqual(utils.formataddr((a, b)), y)
2326
R. David Murray5397e862010-10-02 15:58:26 +00002327 def test_parseaddr_preserves_quoted_pairs_in_addresses(self):
2328 # issue 10005. Note that in the third test the second pair of
2329 # backslashes is not actually a quoted pair because it is not inside a
2330 # comment or quoted string: the address being parsed has a quoted
2331 # string containing a quoted backslash, followed by 'example' and two
2332 # backslashes, followed by another quoted string containing a space and
2333 # the word 'example'. parseaddr copies those two backslashes
2334 # literally. Per rfc5322 this is not technically correct since a \ may
2335 # not appear in an address outside of a quoted string. It is probably
2336 # a sensible Postel interpretation, though.
2337 eq = self.assertEqual
2338 eq(utils.parseaddr('""example" example"@example.com'),
2339 ('', '""example" example"@example.com'))
2340 eq(utils.parseaddr('"\\"example\\" example"@example.com'),
2341 ('', '"\\"example\\" example"@example.com'))
2342 eq(utils.parseaddr('"\\\\"example\\\\" example"@example.com'),
2343 ('', '"\\\\"example\\\\" example"@example.com'))
2344
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002345 def test_multiline_from_comment(self):
2346 x = """\
2347Foo
2348\tBar <foo@example.com>"""
2349 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2350
2351 def test_quote_dump(self):
2352 self.assertEqual(
2353 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2354 r'"A Silly; Person" <person@dom.ain>')
2355
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002356 def test_charset_richcomparisons(self):
2357 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002358 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002359 cset1 = Charset()
2360 cset2 = Charset()
2361 eq(cset1, 'us-ascii')
2362 eq(cset1, 'US-ASCII')
2363 eq(cset1, 'Us-AsCiI')
2364 eq('us-ascii', cset1)
2365 eq('US-ASCII', cset1)
2366 eq('Us-AsCiI', cset1)
2367 ne(cset1, 'usascii')
2368 ne(cset1, 'USASCII')
2369 ne(cset1, 'UsAsCiI')
2370 ne('usascii', cset1)
2371 ne('USASCII', cset1)
2372 ne('UsAsCiI', cset1)
2373 eq(cset1, cset2)
2374 eq(cset2, cset1)
2375
2376 def test_getaddresses(self):
2377 eq = self.assertEqual
2378 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2379 'Bud Person <bperson@dom.ain>']),
2380 [('Al Person', 'aperson@dom.ain'),
2381 ('Bud Person', 'bperson@dom.ain')])
2382
2383 def test_getaddresses_nasty(self):
2384 eq = self.assertEqual
2385 eq(utils.getaddresses(['foo: ;']), [('', '')])
2386 eq(utils.getaddresses(
2387 ['[]*-- =~$']),
2388 [('', ''), ('', ''), ('', '*--')])
2389 eq(utils.getaddresses(
2390 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2391 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2392
2393 def test_getaddresses_embedded_comment(self):
2394 """Test proper handling of a nested comment"""
2395 eq = self.assertEqual
2396 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2397 eq(addrs[0][1], 'foo@bar.com')
2398
2399 def test_utils_quote_unquote(self):
2400 eq = self.assertEqual
2401 msg = Message()
2402 msg.add_header('content-disposition', 'attachment',
2403 filename='foo\\wacky"name')
2404 eq(msg.get_filename(), 'foo\\wacky"name')
2405
2406 def test_get_body_encoding_with_bogus_charset(self):
2407 charset = Charset('not a charset')
2408 self.assertEqual(charset.get_body_encoding(), 'base64')
2409
2410 def test_get_body_encoding_with_uppercase_charset(self):
2411 eq = self.assertEqual
2412 msg = Message()
2413 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2414 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2415 charsets = msg.get_charsets()
2416 eq(len(charsets), 1)
2417 eq(charsets[0], 'utf-8')
2418 charset = Charset(charsets[0])
2419 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002420 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002421 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2422 eq(msg.get_payload(decode=True), b'hello world')
2423 eq(msg['content-transfer-encoding'], 'base64')
2424 # Try another one
2425 msg = Message()
2426 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2427 charsets = msg.get_charsets()
2428 eq(len(charsets), 1)
2429 eq(charsets[0], 'us-ascii')
2430 charset = Charset(charsets[0])
2431 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2432 msg.set_payload('hello world', charset=charset)
2433 eq(msg.get_payload(), 'hello world')
2434 eq(msg['content-transfer-encoding'], '7bit')
2435
2436 def test_charsets_case_insensitive(self):
2437 lc = Charset('us-ascii')
2438 uc = Charset('US-ASCII')
2439 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2440
2441 def test_partial_falls_inside_message_delivery_status(self):
2442 eq = self.ndiffAssertEqual
2443 # The Parser interface provides chunks of data to FeedParser in 8192
2444 # byte gulps. SF bug #1076485 found one of those chunks inside
2445 # message/delivery-status header block, which triggered an
2446 # unreadline() of NeedMoreData.
2447 msg = self._msgobj('msg_43.txt')
2448 sfp = StringIO()
2449 iterators._structure(msg, sfp)
2450 eq(sfp.getvalue(), """\
2451multipart/report
2452 text/plain
2453 message/delivery-status
2454 text/plain
2455 text/plain
2456 text/plain
2457 text/plain
2458 text/plain
2459 text/plain
2460 text/plain
2461 text/plain
2462 text/plain
2463 text/plain
2464 text/plain
2465 text/plain
2466 text/plain
2467 text/plain
2468 text/plain
2469 text/plain
2470 text/plain
2471 text/plain
2472 text/plain
2473 text/plain
2474 text/plain
2475 text/plain
2476 text/plain
2477 text/plain
2478 text/plain
2479 text/plain
2480 text/rfc822-headers
2481""")
2482
R. David Murraya0b44b52010-12-02 21:47:19 +00002483 def test_make_msgid_domain(self):
2484 self.assertEqual(
2485 email.utils.make_msgid(domain='testdomain-string')[-19:],
2486 '@testdomain-string>')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002487
Ezio Melottib3aedd42010-11-20 19:04:17 +00002488
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002489# Test the iterator/generators
2490class TestIterators(TestEmailBase):
2491 def test_body_line_iterator(self):
2492 eq = self.assertEqual
2493 neq = self.ndiffAssertEqual
2494 # First a simple non-multipart message
2495 msg = self._msgobj('msg_01.txt')
2496 it = iterators.body_line_iterator(msg)
2497 lines = list(it)
2498 eq(len(lines), 6)
2499 neq(EMPTYSTRING.join(lines), msg.get_payload())
2500 # Now a more complicated multipart
2501 msg = self._msgobj('msg_02.txt')
2502 it = iterators.body_line_iterator(msg)
2503 lines = list(it)
2504 eq(len(lines), 43)
2505 with openfile('msg_19.txt') as fp:
2506 neq(EMPTYSTRING.join(lines), fp.read())
2507
2508 def test_typed_subpart_iterator(self):
2509 eq = self.assertEqual
2510 msg = self._msgobj('msg_04.txt')
2511 it = iterators.typed_subpart_iterator(msg, 'text')
2512 lines = []
2513 subparts = 0
2514 for subpart in it:
2515 subparts += 1
2516 lines.append(subpart.get_payload())
2517 eq(subparts, 2)
2518 eq(EMPTYSTRING.join(lines), """\
2519a simple kind of mirror
2520to reflect upon our own
2521a simple kind of mirror
2522to reflect upon our own
2523""")
2524
2525 def test_typed_subpart_iterator_default_type(self):
2526 eq = self.assertEqual
2527 msg = self._msgobj('msg_03.txt')
2528 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2529 lines = []
2530 subparts = 0
2531 for subpart in it:
2532 subparts += 1
2533 lines.append(subpart.get_payload())
2534 eq(subparts, 1)
2535 eq(EMPTYSTRING.join(lines), """\
2536
2537Hi,
2538
2539Do you like this message?
2540
2541-Me
2542""")
2543
R. David Murray45bf773f2010-07-17 01:19:57 +00002544 def test_pushCR_LF(self):
2545 '''FeedParser BufferedSubFile.push() assumed it received complete
2546 line endings. A CR ending one push() followed by a LF starting
2547 the next push() added an empty line.
2548 '''
2549 imt = [
2550 ("a\r \n", 2),
2551 ("b", 0),
2552 ("c\n", 1),
2553 ("", 0),
2554 ("d\r\n", 1),
2555 ("e\r", 0),
2556 ("\nf", 1),
2557 ("\r\n", 1),
2558 ]
2559 from email.feedparser import BufferedSubFile, NeedMoreData
2560 bsf = BufferedSubFile()
2561 om = []
2562 nt = 0
2563 for il, n in imt:
2564 bsf.push(il)
2565 nt += n
2566 n1 = 0
2567 while True:
2568 ol = bsf.readline()
2569 if ol == NeedMoreData:
2570 break
2571 om.append(ol)
2572 n1 += 1
2573 self.assertTrue(n == n1)
2574 self.assertTrue(len(om) == nt)
2575 self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
2576
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002577
Ezio Melottib3aedd42010-11-20 19:04:17 +00002578
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002579class TestParsers(TestEmailBase):
2580 def test_header_parser(self):
2581 eq = self.assertEqual
2582 # Parse only the headers of a complex multipart MIME document
2583 with openfile('msg_02.txt') as fp:
2584 msg = HeaderParser().parse(fp)
2585 eq(msg['from'], 'ppp-request@zzz.org')
2586 eq(msg['to'], 'ppp@zzz.org')
2587 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002588 self.assertFalse(msg.is_multipart())
2589 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002590
2591 def test_whitespace_continuation(self):
2592 eq = self.assertEqual
2593 # This message contains a line after the Subject: header that has only
2594 # whitespace, but it is not empty!
2595 msg = email.message_from_string("""\
2596From: aperson@dom.ain
2597To: bperson@dom.ain
2598Subject: the next line has a space on it
2599\x20
2600Date: Mon, 8 Apr 2002 15:09:19 -0400
2601Message-ID: spam
2602
2603Here's the message body
2604""")
2605 eq(msg['subject'], 'the next line has a space on it\n ')
2606 eq(msg['message-id'], 'spam')
2607 eq(msg.get_payload(), "Here's the message body\n")
2608
2609 def test_whitespace_continuation_last_header(self):
2610 eq = self.assertEqual
2611 # Like the previous test, but the subject line is the last
2612 # header.
2613 msg = email.message_from_string("""\
2614From: aperson@dom.ain
2615To: bperson@dom.ain
2616Date: Mon, 8 Apr 2002 15:09:19 -0400
2617Message-ID: spam
2618Subject: the next line has a space on it
2619\x20
2620
2621Here's the message body
2622""")
2623 eq(msg['subject'], 'the next line has a space on it\n ')
2624 eq(msg['message-id'], 'spam')
2625 eq(msg.get_payload(), "Here's the message body\n")
2626
2627 def test_crlf_separation(self):
2628 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002629 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002630 msg = Parser().parse(fp)
2631 eq(len(msg.get_payload()), 2)
2632 part1 = msg.get_payload(0)
2633 eq(part1.get_content_type(), 'text/plain')
2634 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2635 part2 = msg.get_payload(1)
2636 eq(part2.get_content_type(), 'application/riscos')
2637
R. David Murray8451c4b2010-10-23 22:19:56 +00002638 def test_crlf_flatten(self):
2639 # Using newline='\n' preserves the crlfs in this input file.
2640 with openfile('msg_26.txt', newline='\n') as fp:
2641 text = fp.read()
2642 msg = email.message_from_string(text)
2643 s = StringIO()
2644 g = Generator(s)
2645 g.flatten(msg, linesep='\r\n')
2646 self.assertEqual(s.getvalue(), text)
2647
2648 maxDiff = None
2649
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002650 def test_multipart_digest_with_extra_mime_headers(self):
2651 eq = self.assertEqual
2652 neq = self.ndiffAssertEqual
2653 with openfile('msg_28.txt') as fp:
2654 msg = email.message_from_file(fp)
2655 # Structure is:
2656 # multipart/digest
2657 # message/rfc822
2658 # text/plain
2659 # message/rfc822
2660 # text/plain
2661 eq(msg.is_multipart(), 1)
2662 eq(len(msg.get_payload()), 2)
2663 part1 = msg.get_payload(0)
2664 eq(part1.get_content_type(), 'message/rfc822')
2665 eq(part1.is_multipart(), 1)
2666 eq(len(part1.get_payload()), 1)
2667 part1a = part1.get_payload(0)
2668 eq(part1a.is_multipart(), 0)
2669 eq(part1a.get_content_type(), 'text/plain')
2670 neq(part1a.get_payload(), 'message 1\n')
2671 # next message/rfc822
2672 part2 = msg.get_payload(1)
2673 eq(part2.get_content_type(), 'message/rfc822')
2674 eq(part2.is_multipart(), 1)
2675 eq(len(part2.get_payload()), 1)
2676 part2a = part2.get_payload(0)
2677 eq(part2a.is_multipart(), 0)
2678 eq(part2a.get_content_type(), 'text/plain')
2679 neq(part2a.get_payload(), 'message 2\n')
2680
2681 def test_three_lines(self):
2682 # A bug report by Andrew McNamara
2683 lines = ['From: Andrew Person <aperson@dom.ain',
2684 'Subject: Test',
2685 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2686 msg = email.message_from_string(NL.join(lines))
2687 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2688
2689 def test_strip_line_feed_and_carriage_return_in_headers(self):
2690 eq = self.assertEqual
2691 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2692 value1 = 'text'
2693 value2 = 'more text'
2694 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2695 value1, value2)
2696 msg = email.message_from_string(m)
2697 eq(msg.get('Header'), value1)
2698 eq(msg.get('Next-Header'), value2)
2699
2700 def test_rfc2822_header_syntax(self):
2701 eq = self.assertEqual
2702 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2703 msg = email.message_from_string(m)
2704 eq(len(msg), 3)
2705 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2706 eq(msg.get_payload(), 'body')
2707
2708 def test_rfc2822_space_not_allowed_in_header(self):
2709 eq = self.assertEqual
2710 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2711 msg = email.message_from_string(m)
2712 eq(len(msg.keys()), 0)
2713
2714 def test_rfc2822_one_character_header(self):
2715 eq = self.assertEqual
2716 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2717 msg = email.message_from_string(m)
2718 headers = msg.keys()
2719 headers.sort()
2720 eq(headers, ['A', 'B', 'CC'])
2721 eq(msg.get_payload(), 'body')
2722
R. David Murray45e0e142010-06-16 02:19:40 +00002723 def test_CRLFLF_at_end_of_part(self):
2724 # issue 5610: feedparser should not eat two chars from body part ending
2725 # with "\r\n\n".
2726 m = (
2727 "From: foo@bar.com\n"
2728 "To: baz\n"
2729 "Mime-Version: 1.0\n"
2730 "Content-Type: multipart/mixed; boundary=BOUNDARY\n"
2731 "\n"
2732 "--BOUNDARY\n"
2733 "Content-Type: text/plain\n"
2734 "\n"
2735 "body ending with CRLF newline\r\n"
2736 "\n"
2737 "--BOUNDARY--\n"
2738 )
2739 msg = email.message_from_string(m)
2740 self.assertTrue(msg.get_payload(0).get_payload().endswith('\r\n'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002741
Ezio Melottib3aedd42010-11-20 19:04:17 +00002742
R. David Murray96fd54e2010-10-08 15:55:28 +00002743class Test8BitBytesHandling(unittest.TestCase):
2744 # In Python3 all input is string, but that doesn't work if the actual input
2745 # uses an 8bit transfer encoding. To hack around that, in email 5.1 we
2746 # decode byte streams using the surrogateescape error handler, and
2747 # reconvert to binary at appropriate places if we detect surrogates. This
2748 # doesn't allow us to transform headers with 8bit bytes (they get munged),
2749 # but it does allow us to parse and preserve them, and to decode body
2750 # parts that use an 8bit CTE.
2751
2752 bodytest_msg = textwrap.dedent("""\
2753 From: foo@bar.com
2754 To: baz
2755 Mime-Version: 1.0
2756 Content-Type: text/plain; charset={charset}
2757 Content-Transfer-Encoding: {cte}
2758
2759 {bodyline}
2760 """)
2761
2762 def test_known_8bit_CTE(self):
2763 m = self.bodytest_msg.format(charset='utf-8',
2764 cte='8bit',
2765 bodyline='pöstal').encode('utf-8')
2766 msg = email.message_from_bytes(m)
2767 self.assertEqual(msg.get_payload(), "pöstal\n")
2768 self.assertEqual(msg.get_payload(decode=True),
2769 "pöstal\n".encode('utf-8'))
2770
2771 def test_unknown_8bit_CTE(self):
2772 m = self.bodytest_msg.format(charset='notavalidcharset',
2773 cte='8bit',
2774 bodyline='pöstal').encode('utf-8')
2775 msg = email.message_from_bytes(m)
2776 self.assertEqual(msg.get_payload(), "p��stal\n")
2777 self.assertEqual(msg.get_payload(decode=True),
2778 "pöstal\n".encode('utf-8'))
2779
2780 def test_8bit_in_quopri_body(self):
2781 # This is non-RFC compliant data...without 'decode' the library code
2782 # decodes the body using the charset from the headers, and because the
2783 # source byte really is utf-8 this works. This is likely to fail
2784 # against real dirty data (ie: produce mojibake), but the data is
2785 # invalid anyway so it is as good a guess as any. But this means that
2786 # this test just confirms the current behavior; that behavior is not
2787 # necessarily the best possible behavior. With 'decode' it is
2788 # returning the raw bytes, so that test should be of correct behavior,
2789 # or at least produce the same result that email4 did.
2790 m = self.bodytest_msg.format(charset='utf-8',
2791 cte='quoted-printable',
2792 bodyline='p=C3=B6stál').encode('utf-8')
2793 msg = email.message_from_bytes(m)
2794 self.assertEqual(msg.get_payload(), 'p=C3=B6stál\n')
2795 self.assertEqual(msg.get_payload(decode=True),
2796 'pöstál\n'.encode('utf-8'))
2797
2798 def test_invalid_8bit_in_non_8bit_cte_uses_replace(self):
2799 # This is similar to the previous test, but proves that if the 8bit
2800 # byte is undecodeable in the specified charset, it gets replaced
2801 # by the unicode 'unknown' character. Again, this may or may not
2802 # be the ideal behavior. Note that if decode=False none of the
2803 # decoders will get involved, so this is the only test we need
2804 # for this behavior.
2805 m = self.bodytest_msg.format(charset='ascii',
2806 cte='quoted-printable',
2807 bodyline='p=C3=B6stál').encode('utf-8')
2808 msg = email.message_from_bytes(m)
2809 self.assertEqual(msg.get_payload(), 'p=C3=B6st��l\n')
2810 self.assertEqual(msg.get_payload(decode=True),
2811 'pöstál\n'.encode('utf-8'))
2812
2813 def test_8bit_in_base64_body(self):
2814 # Sticking an 8bit byte in a base64 block makes it undecodable by
2815 # normal means, so the block is returned undecoded, but as bytes.
2816 m = self.bodytest_msg.format(charset='utf-8',
2817 cte='base64',
2818 bodyline='cMO2c3RhbAá=').encode('utf-8')
2819 msg = email.message_from_bytes(m)
2820 self.assertEqual(msg.get_payload(decode=True),
2821 'cMO2c3RhbAá=\n'.encode('utf-8'))
2822
2823 def test_8bit_in_uuencode_body(self):
2824 # Sticking an 8bit byte in a uuencode block makes it undecodable by
2825 # normal means, so the block is returned undecoded, but as bytes.
2826 m = self.bodytest_msg.format(charset='utf-8',
2827 cte='uuencode',
2828 bodyline='<,.V<W1A; á ').encode('utf-8')
2829 msg = email.message_from_bytes(m)
2830 self.assertEqual(msg.get_payload(decode=True),
2831 '<,.V<W1A; á \n'.encode('utf-8'))
2832
2833
2834 headertest_msg = textwrap.dedent("""\
2835 From: foo@bar.com
2836 To: báz
2837 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2838 \tJean de Baddie
2839 From: göst
2840
2841 Yes, they are flying.
2842 """).encode('utf-8')
2843
2844 def test_get_8bit_header(self):
2845 msg = email.message_from_bytes(self.headertest_msg)
2846 self.assertEqual(msg.get('to'), 'b??z')
2847 self.assertEqual(msg['to'], 'b??z')
2848
2849 def test_print_8bit_headers(self):
2850 msg = email.message_from_bytes(self.headertest_msg)
2851 self.assertEqual(str(msg),
2852 self.headertest_msg.decode(
2853 'ascii', 'replace').replace('�', '?'))
2854
2855 def test_values_with_8bit_headers(self):
2856 msg = email.message_from_bytes(self.headertest_msg)
2857 self.assertListEqual(msg.values(),
2858 ['foo@bar.com',
2859 'b??z',
2860 'Maintenant je vous pr??sente mon '
2861 'coll??gue, le pouf c??l??bre\n'
2862 '\tJean de Baddie',
2863 "g??st"])
2864
2865 def test_items_with_8bit_headers(self):
2866 msg = email.message_from_bytes(self.headertest_msg)
2867 self.assertListEqual(msg.items(),
2868 [('From', 'foo@bar.com'),
2869 ('To', 'b??z'),
2870 ('Subject', 'Maintenant je vous pr??sente mon '
2871 'coll??gue, le pouf c??l??bre\n'
2872 '\tJean de Baddie'),
2873 ('From', 'g??st')])
2874
2875 def test_get_all_with_8bit_headers(self):
2876 msg = email.message_from_bytes(self.headertest_msg)
2877 self.assertListEqual(msg.get_all('from'),
2878 ['foo@bar.com',
2879 'g??st'])
2880
2881 non_latin_bin_msg = textwrap.dedent("""\
2882 From: foo@bar.com
2883 To: báz
2884 Subject: Maintenant je vous présente mon collègue, le pouf célèbre
2885 \tJean de Baddie
2886 Mime-Version: 1.0
2887 Content-Type: text/plain; charset="utf-8"
2888 Content-Transfer-Encoding: 8bit
2889
2890 Да, они летят.
2891 """).encode('utf-8')
2892
2893 def test_bytes_generator(self):
2894 msg = email.message_from_bytes(self.non_latin_bin_msg)
2895 out = BytesIO()
2896 email.generator.BytesGenerator(out).flatten(msg)
2897 self.assertEqual(out.getvalue(), self.non_latin_bin_msg)
2898
2899 # XXX: ultimately the '?' should turn into CTE encoded bytes
2900 # using 'unknown-8bit' charset.
2901 non_latin_bin_msg_as7bit = textwrap.dedent("""\
2902 From: foo@bar.com
2903 To: b??z
2904 Subject: Maintenant je vous pr??sente mon coll??gue, le pouf c??l??bre
2905 \tJean de Baddie
2906 Mime-Version: 1.0
2907 Content-Type: text/plain; charset="utf-8"
2908 Content-Transfer-Encoding: base64
2909
2910 0JTQsCwg0L7QvdC4INC70LXRgtGP0YIuCg==
2911 """)
2912
2913 def test_generator_handles_8bit(self):
2914 msg = email.message_from_bytes(self.non_latin_bin_msg)
2915 out = StringIO()
2916 email.generator.Generator(out).flatten(msg)
2917 self.assertEqual(out.getvalue(), self.non_latin_bin_msg_as7bit)
2918
2919 def test_bytes_generator_with_unix_from(self):
2920 # The unixfrom contains a current date, so we can't check it
2921 # literally. Just make sure the first word is 'From' and the
2922 # rest of the message matches the input.
2923 msg = email.message_from_bytes(self.non_latin_bin_msg)
2924 out = BytesIO()
2925 email.generator.BytesGenerator(out).flatten(msg, unixfrom=True)
2926 lines = out.getvalue().split(b'\n')
2927 self.assertEqual(lines[0].split()[0], b'From')
2928 self.assertEqual(b'\n'.join(lines[1:]), self.non_latin_bin_msg)
2929
2930 def test_message_from_binary_file(self):
2931 fn = 'test.msg'
2932 self.addCleanup(unlink, fn)
2933 with open(fn, 'wb') as testfile:
2934 testfile.write(self.non_latin_bin_msg)
Brett Cannon384917a2010-10-29 23:08:36 +00002935 with open(fn, 'rb') as testfile:
2936 m = email.parser.BytesParser().parse(testfile)
R. David Murray96fd54e2010-10-08 15:55:28 +00002937 self.assertEqual(str(m), self.non_latin_bin_msg_as7bit)
2938
2939 latin_bin_msg = textwrap.dedent("""\
2940 From: foo@bar.com
2941 To: Dinsdale
2942 Subject: Nudge nudge, wink, wink
2943 Mime-Version: 1.0
2944 Content-Type: text/plain; charset="latin-1"
2945 Content-Transfer-Encoding: 8bit
2946
2947 oh là là, know what I mean, know what I mean?
2948 """).encode('latin-1')
2949
2950 latin_bin_msg_as7bit = textwrap.dedent("""\
2951 From: foo@bar.com
2952 To: Dinsdale
2953 Subject: Nudge nudge, wink, wink
2954 Mime-Version: 1.0
2955 Content-Type: text/plain; charset="iso-8859-1"
2956 Content-Transfer-Encoding: quoted-printable
2957
2958 oh l=E0 l=E0, know what I mean, know what I mean?
2959 """)
2960
2961 def test_string_generator_reencodes_to_quopri_when_appropriate(self):
2962 m = email.message_from_bytes(self.latin_bin_msg)
2963 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2964
2965 def test_decoded_generator_emits_unicode_body(self):
2966 m = email.message_from_bytes(self.latin_bin_msg)
2967 out = StringIO()
2968 email.generator.DecodedGenerator(out).flatten(m)
2969 #DecodedHeader output contains an extra blank line compared
2970 #to the input message. RDM: not sure if this is a bug or not,
2971 #but it is not specific to the 8bit->7bit conversion.
2972 self.assertEqual(out.getvalue(),
2973 self.latin_bin_msg.decode('latin-1')+'\n')
2974
2975 def test_bytes_feedparser(self):
2976 bfp = email.feedparser.BytesFeedParser()
2977 for i in range(0, len(self.latin_bin_msg), 10):
2978 bfp.feed(self.latin_bin_msg[i:i+10])
2979 m = bfp.close()
2980 self.assertEqual(str(m), self.latin_bin_msg_as7bit)
2981
R. David Murray8451c4b2010-10-23 22:19:56 +00002982 def test_crlf_flatten(self):
2983 with openfile('msg_26.txt', 'rb') as fp:
2984 text = fp.read()
2985 msg = email.message_from_bytes(text)
2986 s = BytesIO()
2987 g = email.generator.BytesGenerator(s)
2988 g.flatten(msg, linesep='\r\n')
2989 self.assertEqual(s.getvalue(), text)
2990 maxDiff = None
2991
Ezio Melottib3aedd42010-11-20 19:04:17 +00002992
R. David Murray719a4492010-11-21 16:53:48 +00002993class BaseTestBytesGeneratorIdempotent:
R. David Murray96fd54e2010-10-08 15:55:28 +00002994
R. David Murraye5db2632010-11-20 15:10:13 +00002995 maxDiff = None
2996
R. David Murray96fd54e2010-10-08 15:55:28 +00002997 def _msgobj(self, filename):
2998 with openfile(filename, 'rb') as fp:
2999 data = fp.read()
R. David Murray719a4492010-11-21 16:53:48 +00003000 data = self.normalize_linesep_regex.sub(self.blinesep, data)
R. David Murray96fd54e2010-10-08 15:55:28 +00003001 msg = email.message_from_bytes(data)
3002 return msg, data
3003
R. David Murray719a4492010-11-21 16:53:48 +00003004 def _idempotent(self, msg, data, unixfrom=False):
R. David Murray96fd54e2010-10-08 15:55:28 +00003005 b = BytesIO()
3006 g = email.generator.BytesGenerator(b, maxheaderlen=0)
R. David Murray719a4492010-11-21 16:53:48 +00003007 g.flatten(msg, unixfrom=unixfrom, linesep=self.linesep)
R. David Murraye5db2632010-11-20 15:10:13 +00003008 self.assertByteStringsEqual(data, b.getvalue())
R. David Murray96fd54e2010-10-08 15:55:28 +00003009
R. David Murraye5db2632010-11-20 15:10:13 +00003010 def assertByteStringsEqual(self, str1, str2):
R. David Murray719a4492010-11-21 16:53:48 +00003011 # Not using self.blinesep here is intentional. This way the output
3012 # is more useful when the failure results in mixed line endings.
R. David Murray96fd54e2010-10-08 15:55:28 +00003013 self.assertListEqual(str1.split(b'\n'), str2.split(b'\n'))
3014
3015
R. David Murray719a4492010-11-21 16:53:48 +00003016class TestBytesGeneratorIdempotentNL(BaseTestBytesGeneratorIdempotent,
3017 TestIdempotent):
3018 linesep = '\n'
3019 blinesep = b'\n'
3020 normalize_linesep_regex = re.compile(br'\r\n')
3021
3022
3023class TestBytesGeneratorIdempotentCRLF(BaseTestBytesGeneratorIdempotent,
3024 TestIdempotent):
3025 linesep = '\r\n'
3026 blinesep = b'\r\n'
3027 normalize_linesep_regex = re.compile(br'(?<!\r)\n')
3028
Ezio Melottib3aedd42010-11-20 19:04:17 +00003029
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003030class TestBase64(unittest.TestCase):
3031 def test_len(self):
3032 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003033 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003034 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003035 for size in range(15):
3036 if size == 0 : bsize = 0
3037 elif size <= 3 : bsize = 4
3038 elif size <= 6 : bsize = 8
3039 elif size <= 9 : bsize = 12
3040 elif size <= 12: bsize = 16
3041 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00003042 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003043
3044 def test_decode(self):
3045 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00003046 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003047 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003048
3049 def test_encode(self):
3050 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003051 eq(base64mime.body_encode(b''), b'')
3052 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003053 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003054 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003055 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003056 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003057eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3058eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3059eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
3060eHh4eCB4eHh4IA==
3061""")
3062 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003063 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003064 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003065eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3066eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3067eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
3068eHh4eCB4eHh4IA==\r
3069""")
3070
3071 def test_header_encode(self):
3072 eq = self.assertEqual
3073 he = base64mime.header_encode
3074 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00003075 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
3076 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003077 # Test the charset option
3078 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
3079 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003080
3081
Ezio Melottib3aedd42010-11-20 19:04:17 +00003082
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003083class TestQuopri(unittest.TestCase):
3084 def setUp(self):
3085 # Set of characters (as byte integers) that don't need to be encoded
3086 # in headers.
3087 self.hlit = list(chain(
3088 range(ord('a'), ord('z') + 1),
3089 range(ord('A'), ord('Z') + 1),
3090 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00003091 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003092 # Set of characters (as byte integers) that do need to be encoded in
3093 # headers.
3094 self.hnon = [c for c in range(256) if c not in self.hlit]
3095 assert len(self.hlit) + len(self.hnon) == 256
3096 # Set of characters (as byte integers) that don't need to be encoded
3097 # in bodies.
3098 self.blit = list(range(ord(' '), ord('~') + 1))
3099 self.blit.append(ord('\t'))
3100 self.blit.remove(ord('='))
3101 # Set of characters (as byte integers) that do need to be encoded in
3102 # bodies.
3103 self.bnon = [c for c in range(256) if c not in self.blit]
3104 assert len(self.blit) + len(self.bnon) == 256
3105
Guido van Rossum9604e662007-08-30 03:46:43 +00003106 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003107 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003108 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003109 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003110 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003111 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003112 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003113
Guido van Rossum9604e662007-08-30 03:46:43 +00003114 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003115 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003116 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003117 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003118 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003119 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00003120 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003121
3122 def test_header_quopri_len(self):
3123 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003124 eq(quoprimime.header_length(b'hello'), 5)
3125 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003126 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003127 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003128 # =?xxx?q?...?= means 10 extra characters
3129 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00003130 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
3131 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003132 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00003133 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003134 # =?xxx?q?...?= means 10 extra characters
3135 10)
3136 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003137 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003138 'expected length 1 for %r' % chr(c))
3139 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003140 # Space is special; it's encoded to _
3141 if c == ord(' '):
3142 continue
3143 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003144 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00003145 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003146
3147 def test_body_quopri_len(self):
3148 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003149 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00003150 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003151 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00003152 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003153
3154 def test_quote_unquote_idempotent(self):
3155 for x in range(256):
3156 c = chr(x)
3157 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
3158
3159 def test_header_encode(self):
3160 eq = self.assertEqual
3161 he = quoprimime.header_encode
3162 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
3163 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
3164 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
3165 # Test a non-ASCII character
3166 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
3167
3168 def test_decode(self):
3169 eq = self.assertEqual
3170 eq(quoprimime.decode(''), '')
3171 eq(quoprimime.decode('hello'), 'hello')
3172 eq(quoprimime.decode('hello', 'X'), 'hello')
3173 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
3174
3175 def test_encode(self):
3176 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00003177 eq(quoprimime.body_encode(''), '')
3178 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003179 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00003180 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003181 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00003182 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003183xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
3184 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
3185x xxxx xxxx xxxx xxxx=20""")
3186 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00003187 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
3188 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003189xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
3190 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
3191x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00003192 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003193one line
3194
3195two line"""), """\
3196one line
3197
3198two line""")
3199
3200
Ezio Melottib3aedd42010-11-20 19:04:17 +00003201
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003202# Test the Charset class
3203class TestCharset(unittest.TestCase):
3204 def tearDown(self):
3205 from email import charset as CharsetModule
3206 try:
3207 del CharsetModule.CHARSETS['fake']
3208 except KeyError:
3209 pass
3210
Guido van Rossum9604e662007-08-30 03:46:43 +00003211 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003212 eq = self.assertEqual
3213 # Make sure us-ascii = no Unicode conversion
3214 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00003215 eq(c.header_encode('Hello World!'), 'Hello World!')
3216 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003217 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00003218 self.assertRaises(UnicodeError, c.header_encode, s)
3219 c = Charset('utf-8')
3220 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003221
3222 def test_body_encode(self):
3223 eq = self.assertEqual
3224 # Try a charset with QP body encoding
3225 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003226 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003227 # Try a charset with Base64 body encoding
3228 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00003229 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003230 # Try a charset with None body encoding
3231 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003232 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003233 # Try the convert argument, where input codec != output codec
3234 c = Charset('euc-jp')
3235 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00003236 # XXX FIXME
3237## try:
3238## eq('\x1b$B5FCO;~IW\x1b(B',
3239## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
3240## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
3241## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
3242## except LookupError:
3243## # We probably don't have the Japanese codecs installed
3244## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003245 # Testing SF bug #625509, which we have to fake, since there are no
3246 # built-in encodings where the header encoding is QP but the body
3247 # encoding is not.
3248 from email import charset as CharsetModule
3249 CharsetModule.add_charset('fake', CharsetModule.QP, None)
3250 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00003251 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003252
3253 def test_unicode_charset_name(self):
3254 charset = Charset('us-ascii')
3255 self.assertEqual(str(charset), 'us-ascii')
3256 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
3257
3258
Ezio Melottib3aedd42010-11-20 19:04:17 +00003259
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003260# Test multilingual MIME headers.
3261class TestHeader(TestEmailBase):
3262 def test_simple(self):
3263 eq = self.ndiffAssertEqual
3264 h = Header('Hello World!')
3265 eq(h.encode(), 'Hello World!')
3266 h.append(' Goodbye World!')
3267 eq(h.encode(), 'Hello World! Goodbye World!')
3268
3269 def test_simple_surprise(self):
3270 eq = self.ndiffAssertEqual
3271 h = Header('Hello World!')
3272 eq(h.encode(), 'Hello World!')
3273 h.append('Goodbye World!')
3274 eq(h.encode(), 'Hello World! Goodbye World!')
3275
3276 def test_header_needs_no_decoding(self):
3277 h = 'no decoding needed'
3278 self.assertEqual(decode_header(h), [(h, None)])
3279
3280 def test_long(self):
3281 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
3282 maxlinelen=76)
3283 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003284 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003285
3286 def test_multilingual(self):
3287 eq = self.ndiffAssertEqual
3288 g = Charset("iso-8859-1")
3289 cz = Charset("iso-8859-2")
3290 utf8 = Charset("utf-8")
3291 g_head = (b'Die Mieter treten hier ein werden mit einem '
3292 b'Foerderband komfortabel den Korridor entlang, '
3293 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
3294 b'gegen die rotierenden Klingen bef\xf6rdert. ')
3295 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
3296 b'd\xf9vtipu.. ')
3297 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
3298 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
3299 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
3300 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
3301 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
3302 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
3303 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
3304 '\u3044\u307e\u3059\u3002')
3305 h = Header(g_head, g)
3306 h.append(cz_head, cz)
3307 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00003308 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003309 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00003310=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
3311 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
3312 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
3313 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003314 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
3315 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
3316 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
3317 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00003318 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
3319 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
3320 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
3321 decoded = decode_header(enc)
3322 eq(len(decoded), 3)
3323 eq(decoded[0], (g_head, 'iso-8859-1'))
3324 eq(decoded[1], (cz_head, 'iso-8859-2'))
3325 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003326 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00003327 eq(ustr,
3328 (b'Die Mieter treten hier ein werden mit einem Foerderband '
3329 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
3330 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
3331 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
3332 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
3333 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
3334 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
3335 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
3336 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
3337 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
3338 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
3339 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
3340 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
3341 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
3342 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
3343 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
3344 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003345 # Test make_header()
3346 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00003347 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003348
3349 def test_empty_header_encode(self):
3350 h = Header()
3351 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00003352
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003353 def test_header_ctor_default_args(self):
3354 eq = self.ndiffAssertEqual
3355 h = Header()
3356 eq(h, '')
3357 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00003358 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003359
3360 def test_explicit_maxlinelen(self):
3361 eq = self.ndiffAssertEqual
3362 hstr = ('A very long line that must get split to something other '
3363 'than at the 76th character boundary to test the non-default '
3364 'behavior')
3365 h = Header(hstr)
3366 eq(h.encode(), '''\
3367A very long line that must get split to something other than at the 76th
3368 character boundary to test the non-default behavior''')
3369 eq(str(h), hstr)
3370 h = Header(hstr, header_name='Subject')
3371 eq(h.encode(), '''\
3372A very long line that must get split to something other than at the
3373 76th character boundary to test the non-default behavior''')
3374 eq(str(h), hstr)
3375 h = Header(hstr, maxlinelen=1024, header_name='Subject')
3376 eq(h.encode(), hstr)
3377 eq(str(h), hstr)
3378
Guido van Rossum9604e662007-08-30 03:46:43 +00003379 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003380 eq = self.ndiffAssertEqual
3381 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003382 x = 'xxxx ' * 20
3383 h.append(x)
3384 s = h.encode()
3385 eq(s, """\
3386=?iso-8859-1?q?xxx?=
3387 =?iso-8859-1?q?x_?=
3388 =?iso-8859-1?q?xx?=
3389 =?iso-8859-1?q?xx?=
3390 =?iso-8859-1?q?_x?=
3391 =?iso-8859-1?q?xx?=
3392 =?iso-8859-1?q?x_?=
3393 =?iso-8859-1?q?xx?=
3394 =?iso-8859-1?q?xx?=
3395 =?iso-8859-1?q?_x?=
3396 =?iso-8859-1?q?xx?=
3397 =?iso-8859-1?q?x_?=
3398 =?iso-8859-1?q?xx?=
3399 =?iso-8859-1?q?xx?=
3400 =?iso-8859-1?q?_x?=
3401 =?iso-8859-1?q?xx?=
3402 =?iso-8859-1?q?x_?=
3403 =?iso-8859-1?q?xx?=
3404 =?iso-8859-1?q?xx?=
3405 =?iso-8859-1?q?_x?=
3406 =?iso-8859-1?q?xx?=
3407 =?iso-8859-1?q?x_?=
3408 =?iso-8859-1?q?xx?=
3409 =?iso-8859-1?q?xx?=
3410 =?iso-8859-1?q?_x?=
3411 =?iso-8859-1?q?xx?=
3412 =?iso-8859-1?q?x_?=
3413 =?iso-8859-1?q?xx?=
3414 =?iso-8859-1?q?xx?=
3415 =?iso-8859-1?q?_x?=
3416 =?iso-8859-1?q?xx?=
3417 =?iso-8859-1?q?x_?=
3418 =?iso-8859-1?q?xx?=
3419 =?iso-8859-1?q?xx?=
3420 =?iso-8859-1?q?_x?=
3421 =?iso-8859-1?q?xx?=
3422 =?iso-8859-1?q?x_?=
3423 =?iso-8859-1?q?xx?=
3424 =?iso-8859-1?q?xx?=
3425 =?iso-8859-1?q?_x?=
3426 =?iso-8859-1?q?xx?=
3427 =?iso-8859-1?q?x_?=
3428 =?iso-8859-1?q?xx?=
3429 =?iso-8859-1?q?xx?=
3430 =?iso-8859-1?q?_x?=
3431 =?iso-8859-1?q?xx?=
3432 =?iso-8859-1?q?x_?=
3433 =?iso-8859-1?q?xx?=
3434 =?iso-8859-1?q?xx?=
3435 =?iso-8859-1?q?_?=""")
3436 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003437 h = Header(charset='iso-8859-1', maxlinelen=40)
3438 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00003439 s = h.encode()
3440 eq(s, """\
3441=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
3442 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
3443 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
3444 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
3445 =?iso-8859-1?q?_xxxx_xxxx_?=""")
3446 eq(x, str(make_header(decode_header(s))))
3447
3448 def test_base64_splittable(self):
3449 eq = self.ndiffAssertEqual
3450 h = Header(charset='koi8-r', maxlinelen=20)
3451 x = 'xxxx ' * 20
3452 h.append(x)
3453 s = h.encode()
3454 eq(s, """\
3455=?koi8-r?b?eHh4?=
3456 =?koi8-r?b?eCB4?=
3457 =?koi8-r?b?eHh4?=
3458 =?koi8-r?b?IHh4?=
3459 =?koi8-r?b?eHgg?=
3460 =?koi8-r?b?eHh4?=
3461 =?koi8-r?b?eCB4?=
3462 =?koi8-r?b?eHh4?=
3463 =?koi8-r?b?IHh4?=
3464 =?koi8-r?b?eHgg?=
3465 =?koi8-r?b?eHh4?=
3466 =?koi8-r?b?eCB4?=
3467 =?koi8-r?b?eHh4?=
3468 =?koi8-r?b?IHh4?=
3469 =?koi8-r?b?eHgg?=
3470 =?koi8-r?b?eHh4?=
3471 =?koi8-r?b?eCB4?=
3472 =?koi8-r?b?eHh4?=
3473 =?koi8-r?b?IHh4?=
3474 =?koi8-r?b?eHgg?=
3475 =?koi8-r?b?eHh4?=
3476 =?koi8-r?b?eCB4?=
3477 =?koi8-r?b?eHh4?=
3478 =?koi8-r?b?IHh4?=
3479 =?koi8-r?b?eHgg?=
3480 =?koi8-r?b?eHh4?=
3481 =?koi8-r?b?eCB4?=
3482 =?koi8-r?b?eHh4?=
3483 =?koi8-r?b?IHh4?=
3484 =?koi8-r?b?eHgg?=
3485 =?koi8-r?b?eHh4?=
3486 =?koi8-r?b?eCB4?=
3487 =?koi8-r?b?eHh4?=
3488 =?koi8-r?b?IA==?=""")
3489 eq(x, str(make_header(decode_header(s))))
3490 h = Header(charset='koi8-r', maxlinelen=40)
3491 h.append(x)
3492 s = h.encode()
3493 eq(s, """\
3494=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3495 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3496 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3497 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3498 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3499 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3500 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003501
3502 def test_us_ascii_header(self):
3503 eq = self.assertEqual
3504 s = 'hello'
3505 x = decode_header(s)
3506 eq(x, [('hello', None)])
3507 h = make_header(x)
3508 eq(s, h.encode())
3509
3510 def test_string_charset(self):
3511 eq = self.assertEqual
3512 h = Header()
3513 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003514 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003515
3516## def test_unicode_error(self):
3517## raises = self.assertRaises
3518## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3519## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3520## h = Header()
3521## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3522## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3523## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3524
3525 def test_utf8_shortest(self):
3526 eq = self.assertEqual
3527 h = Header('p\xf6stal', 'utf-8')
3528 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3529 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3530 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3531
3532 def test_bad_8bit_header(self):
3533 raises = self.assertRaises
3534 eq = self.assertEqual
3535 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3536 raises(UnicodeError, Header, x)
3537 h = Header()
3538 raises(UnicodeError, h.append, x)
3539 e = x.decode('utf-8', 'replace')
3540 eq(str(Header(x, errors='replace')), e)
3541 h.append(x, errors='replace')
3542 eq(str(h), e)
3543
3544 def test_encoded_adjacent_nonencoded(self):
3545 eq = self.assertEqual
3546 h = Header()
3547 h.append('hello', 'iso-8859-1')
3548 h.append('world')
3549 s = h.encode()
3550 eq(s, '=?iso-8859-1?q?hello?= world')
3551 h = make_header(decode_header(s))
3552 eq(h.encode(), s)
3553
3554 def test_whitespace_eater(self):
3555 eq = self.assertEqual
3556 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3557 parts = decode_header(s)
3558 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3559 hdr = make_header(parts)
3560 eq(hdr.encode(),
3561 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3562
3563 def test_broken_base64_header(self):
3564 raises = self.assertRaises
R. David Murrayc4e69cc2010-08-03 22:14:10 +00003565 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3I ?='
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003566 raises(errors.HeaderParseError, decode_header, s)
3567
3568
Ezio Melottib3aedd42010-11-20 19:04:17 +00003569
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003570# Test RFC 2231 header parameters (en/de)coding
3571class TestRFC2231(TestEmailBase):
3572 def test_get_param(self):
3573 eq = self.assertEqual
3574 msg = self._msgobj('msg_29.txt')
3575 eq(msg.get_param('title'),
3576 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3577 eq(msg.get_param('title', unquote=False),
3578 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3579
3580 def test_set_param(self):
3581 eq = self.ndiffAssertEqual
3582 msg = Message()
3583 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3584 charset='us-ascii')
3585 eq(msg.get_param('title'),
3586 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3587 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3588 charset='us-ascii', language='en')
3589 eq(msg.get_param('title'),
3590 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3591 msg = self._msgobj('msg_01.txt')
3592 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3593 charset='us-ascii', language='en')
3594 eq(msg.as_string(maxheaderlen=78), """\
3595Return-Path: <bbb@zzz.org>
3596Delivered-To: bbb@zzz.org
3597Received: by mail.zzz.org (Postfix, from userid 889)
3598\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3599MIME-Version: 1.0
3600Content-Transfer-Encoding: 7bit
3601Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3602From: bbb@ddd.com (John X. Doe)
3603To: bbb@zzz.org
3604Subject: This is a test message
3605Date: Fri, 4 May 2001 14:05:44 -0400
3606Content-Type: text/plain; charset=us-ascii;
3607 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3608
3609
3610Hi,
3611
3612Do you like this message?
3613
3614-Me
3615""")
3616
3617 def test_del_param(self):
3618 eq = self.ndiffAssertEqual
3619 msg = self._msgobj('msg_01.txt')
3620 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3621 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3622 charset='us-ascii', language='en')
3623 msg.del_param('foo', header='Content-Type')
3624 eq(msg.as_string(maxheaderlen=78), """\
3625Return-Path: <bbb@zzz.org>
3626Delivered-To: bbb@zzz.org
3627Received: by mail.zzz.org (Postfix, from userid 889)
3628\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3629MIME-Version: 1.0
3630Content-Transfer-Encoding: 7bit
3631Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3632From: bbb@ddd.com (John X. Doe)
3633To: bbb@zzz.org
3634Subject: This is a test message
3635Date: Fri, 4 May 2001 14:05:44 -0400
3636Content-Type: text/plain; charset="us-ascii";
3637 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3638
3639
3640Hi,
3641
3642Do you like this message?
3643
3644-Me
3645""")
3646
3647 def test_rfc2231_get_content_charset(self):
3648 eq = self.assertEqual
3649 msg = self._msgobj('msg_32.txt')
3650 eq(msg.get_content_charset(), 'us-ascii')
3651
3652 def test_rfc2231_no_language_or_charset(self):
3653 m = '''\
3654Content-Transfer-Encoding: 8bit
3655Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3656Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3657
3658'''
3659 msg = email.message_from_string(m)
3660 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003661 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003662 self.assertEqual(
3663 param,
3664 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3665
3666 def test_rfc2231_no_language_or_charset_in_filename(self):
3667 m = '''\
3668Content-Disposition: inline;
3669\tfilename*0*="''This%20is%20even%20more%20";
3670\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3671\tfilename*2="is it not.pdf"
3672
3673'''
3674 msg = email.message_from_string(m)
3675 self.assertEqual(msg.get_filename(),
3676 'This is even more ***fun*** is it not.pdf')
3677
3678 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3679 m = '''\
3680Content-Disposition: inline;
3681\tfilename*0*="''This%20is%20even%20more%20";
3682\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3683\tfilename*2="is it not.pdf"
3684
3685'''
3686 msg = email.message_from_string(m)
3687 self.assertEqual(msg.get_filename(),
3688 'This is even more ***fun*** is it not.pdf')
3689
3690 def test_rfc2231_partly_encoded(self):
3691 m = '''\
3692Content-Disposition: inline;
3693\tfilename*0="''This%20is%20even%20more%20";
3694\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3695\tfilename*2="is it not.pdf"
3696
3697'''
3698 msg = email.message_from_string(m)
3699 self.assertEqual(
3700 msg.get_filename(),
3701 'This%20is%20even%20more%20***fun*** is it not.pdf')
3702
3703 def test_rfc2231_partly_nonencoded(self):
3704 m = '''\
3705Content-Disposition: inline;
3706\tfilename*0="This%20is%20even%20more%20";
3707\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3708\tfilename*2="is it not.pdf"
3709
3710'''
3711 msg = email.message_from_string(m)
3712 self.assertEqual(
3713 msg.get_filename(),
3714 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3715
3716 def test_rfc2231_no_language_or_charset_in_boundary(self):
3717 m = '''\
3718Content-Type: multipart/alternative;
3719\tboundary*0*="''This%20is%20even%20more%20";
3720\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3721\tboundary*2="is it not.pdf"
3722
3723'''
3724 msg = email.message_from_string(m)
3725 self.assertEqual(msg.get_boundary(),
3726 'This is even more ***fun*** is it not.pdf')
3727
3728 def test_rfc2231_no_language_or_charset_in_charset(self):
3729 # This is a nonsensical charset value, but tests the code anyway
3730 m = '''\
3731Content-Type: text/plain;
3732\tcharset*0*="This%20is%20even%20more%20";
3733\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3734\tcharset*2="is it not.pdf"
3735
3736'''
3737 msg = email.message_from_string(m)
3738 self.assertEqual(msg.get_content_charset(),
3739 'this is even more ***fun*** is it not.pdf')
3740
3741 def test_rfc2231_bad_encoding_in_filename(self):
3742 m = '''\
3743Content-Disposition: inline;
3744\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3745\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3746\tfilename*2="is it not.pdf"
3747
3748'''
3749 msg = email.message_from_string(m)
3750 self.assertEqual(msg.get_filename(),
3751 'This is even more ***fun*** is it not.pdf')
3752
3753 def test_rfc2231_bad_encoding_in_charset(self):
3754 m = """\
3755Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3756
3757"""
3758 msg = email.message_from_string(m)
3759 # This should return None because non-ascii characters in the charset
3760 # are not allowed.
3761 self.assertEqual(msg.get_content_charset(), None)
3762
3763 def test_rfc2231_bad_character_in_charset(self):
3764 m = """\
3765Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3766
3767"""
3768 msg = email.message_from_string(m)
3769 # This should return None because non-ascii characters in the charset
3770 # are not allowed.
3771 self.assertEqual(msg.get_content_charset(), None)
3772
3773 def test_rfc2231_bad_character_in_filename(self):
3774 m = '''\
3775Content-Disposition: inline;
3776\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3777\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3778\tfilename*2*="is it not.pdf%E2"
3779
3780'''
3781 msg = email.message_from_string(m)
3782 self.assertEqual(msg.get_filename(),
3783 'This is even more ***fun*** is it not.pdf\ufffd')
3784
3785 def test_rfc2231_unknown_encoding(self):
3786 m = """\
3787Content-Transfer-Encoding: 8bit
3788Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3789
3790"""
3791 msg = email.message_from_string(m)
3792 self.assertEqual(msg.get_filename(), 'myfile.txt')
3793
3794 def test_rfc2231_single_tick_in_filename_extended(self):
3795 eq = self.assertEqual
3796 m = """\
3797Content-Type: application/x-foo;
3798\tname*0*=\"Frank's\"; name*1*=\" Document\"
3799
3800"""
3801 msg = email.message_from_string(m)
3802 charset, language, s = msg.get_param('name')
3803 eq(charset, None)
3804 eq(language, None)
3805 eq(s, "Frank's Document")
3806
3807 def test_rfc2231_single_tick_in_filename(self):
3808 m = """\
3809Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3810
3811"""
3812 msg = email.message_from_string(m)
3813 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003814 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003815 self.assertEqual(param, "Frank's Document")
3816
3817 def test_rfc2231_tick_attack_extended(self):
3818 eq = self.assertEqual
3819 m = """\
3820Content-Type: application/x-foo;
3821\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3822
3823"""
3824 msg = email.message_from_string(m)
3825 charset, language, s = msg.get_param('name')
3826 eq(charset, 'us-ascii')
3827 eq(language, 'en-us')
3828 eq(s, "Frank's Document")
3829
3830 def test_rfc2231_tick_attack(self):
3831 m = """\
3832Content-Type: application/x-foo;
3833\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3834
3835"""
3836 msg = email.message_from_string(m)
3837 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003838 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003839 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3840
3841 def test_rfc2231_no_extended_values(self):
3842 eq = self.assertEqual
3843 m = """\
3844Content-Type: application/x-foo; name=\"Frank's Document\"
3845
3846"""
3847 msg = email.message_from_string(m)
3848 eq(msg.get_param('name'), "Frank's Document")
3849
3850 def test_rfc2231_encoded_then_unencoded_segments(self):
3851 eq = self.assertEqual
3852 m = """\
3853Content-Type: application/x-foo;
3854\tname*0*=\"us-ascii'en-us'My\";
3855\tname*1=\" Document\";
3856\tname*2*=\" For You\"
3857
3858"""
3859 msg = email.message_from_string(m)
3860 charset, language, s = msg.get_param('name')
3861 eq(charset, 'us-ascii')
3862 eq(language, 'en-us')
3863 eq(s, 'My Document For You')
3864
3865 def test_rfc2231_unencoded_then_encoded_segments(self):
3866 eq = self.assertEqual
3867 m = """\
3868Content-Type: application/x-foo;
3869\tname*0=\"us-ascii'en-us'My\";
3870\tname*1*=\" Document\";
3871\tname*2*=\" For You\"
3872
3873"""
3874 msg = email.message_from_string(m)
3875 charset, language, s = msg.get_param('name')
3876 eq(charset, 'us-ascii')
3877 eq(language, 'en-us')
3878 eq(s, 'My Document For You')
3879
3880
Ezio Melottib3aedd42010-11-20 19:04:17 +00003881
R. David Murraya8f480f2010-01-16 18:30:03 +00003882# Tests to ensure that signed parts of an email are completely preserved, as
3883# required by RFC1847 section 2.1. Note that these are incomplete, because the
3884# email package does not currently always preserve the body. See issue 1670765.
3885class TestSigned(TestEmailBase):
3886
3887 def _msg_and_obj(self, filename):
3888 with openfile(findfile(filename)) as fp:
3889 original = fp.read()
3890 msg = email.message_from_string(original)
3891 return original, msg
3892
3893 def _signed_parts_eq(self, original, result):
3894 # Extract the first mime part of each message
3895 import re
3896 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3897 inpart = repart.search(original).group(2)
3898 outpart = repart.search(result).group(2)
3899 self.assertEqual(outpart, inpart)
3900
3901 def test_long_headers_as_string(self):
3902 original, msg = self._msg_and_obj('msg_45.txt')
3903 result = msg.as_string()
3904 self._signed_parts_eq(original, result)
3905
3906 def test_long_headers_as_string_maxheaderlen(self):
3907 original, msg = self._msg_and_obj('msg_45.txt')
3908 result = msg.as_string(maxheaderlen=60)
3909 self._signed_parts_eq(original, result)
3910
3911 def test_long_headers_flatten(self):
3912 original, msg = self._msg_and_obj('msg_45.txt')
3913 fp = StringIO()
3914 Generator(fp).flatten(msg)
3915 result = fp.getvalue()
3916 self._signed_parts_eq(original, result)
3917
3918
Ezio Melottib3aedd42010-11-20 19:04:17 +00003919
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003920def _testclasses():
3921 mod = sys.modules[__name__]
3922 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3923
3924
3925def suite():
3926 suite = unittest.TestSuite()
3927 for testclass in _testclasses():
3928 suite.addTest(unittest.makeSuite(testclass))
3929 return suite
3930
3931
3932def test_main():
3933 for testclass in _testclasses():
3934 run_unittest(testclass)
3935
3936
Ezio Melottib3aedd42010-11-20 19:04:17 +00003937
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003938if __name__ == '__main__':
3939 unittest.main(defaultTest='suite')