blob: 2cf1a0a2649a784160dc0ed9919deb24e3707ea3 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
6import sys
7import time
8import base64
9import difflib
10import unittest
11import warnings
12
13from io import StringIO
14from itertools import chain
15
16import email
17
18from email.charset import Charset
19from email.header import Header, decode_header, make_header
20from email.parser import Parser, HeaderParser
21from email.generator import Generator, DecodedGenerator
22from email.message import Message
23from email.mime.application import MIMEApplication
24from email.mime.audio import MIMEAudio
25from email.mime.text import MIMEText
26from email.mime.image import MIMEImage
27from email.mime.base import MIMEBase
28from email.mime.message import MIMEMessage
29from email.mime.multipart import MIMEMultipart
30from email import utils
31from email import errors
32from email import encoders
33from email import iterators
34from email import base64mime
35from email import quoprimime
36
Benjamin Petersonee8712c2008-05-20 21:35:26 +000037from test.support import findfile, run_unittest
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038from email.test import __file__ as landmark
39
40
41NL = '\n'
42EMPTYSTRING = ''
43SPACE = ' '
44
45
46
47def openfile(filename, *args, **kws):
48 path = os.path.join(os.path.dirname(landmark), 'data', filename)
49 return open(path, *args, **kws)
50
51
52
53# Base test class
54class TestEmailBase(unittest.TestCase):
55 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000056 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 if first != second:
58 sfirst = str(first)
59 ssecond = str(second)
60 rfirst = [repr(line) for line in sfirst.splitlines()]
61 rsecond = [repr(line) for line in ssecond.splitlines()]
62 diff = difflib.ndiff(rfirst, rsecond)
63 raise self.failureException(NL + NL.join(diff))
64
65 def _msgobj(self, filename):
66 with openfile(findfile(filename)) as fp:
67 return email.message_from_file(fp)
68
69
70
71# Test various aspects of the Message class's API
72class TestMessageAPI(TestEmailBase):
73 def test_get_all(self):
74 eq = self.assertEqual
75 msg = self._msgobj('msg_20.txt')
76 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
77 eq(msg.get_all('xx', 'n/a'), 'n/a')
78
79 def test_getset_charset(self):
80 eq = self.assertEqual
81 msg = Message()
82 eq(msg.get_charset(), None)
83 charset = Charset('iso-8859-1')
84 msg.set_charset(charset)
85 eq(msg['mime-version'], '1.0')
86 eq(msg.get_content_type(), 'text/plain')
87 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
88 eq(msg.get_param('charset'), 'iso-8859-1')
89 eq(msg['content-transfer-encoding'], 'quoted-printable')
90 eq(msg.get_charset().input_charset, 'iso-8859-1')
91 # Remove the charset
92 msg.set_charset(None)
93 eq(msg.get_charset(), None)
94 eq(msg['content-type'], 'text/plain')
95 # Try adding a charset when there's already MIME headers present
96 msg = Message()
97 msg['MIME-Version'] = '2.0'
98 msg['Content-Type'] = 'text/x-weird'
99 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
100 msg.set_charset(charset)
101 eq(msg['mime-version'], '2.0')
102 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
103 eq(msg['content-transfer-encoding'], 'quinted-puntable')
104
105 def test_set_charset_from_string(self):
106 eq = self.assertEqual
107 msg = Message()
108 msg.set_charset('us-ascii')
109 eq(msg.get_charset().input_charset, 'us-ascii')
110 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
111
112 def test_set_payload_with_charset(self):
113 msg = Message()
114 charset = Charset('iso-8859-1')
115 msg.set_payload('This is a string payload', charset)
116 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
117
118 def test_get_charsets(self):
119 eq = self.assertEqual
120
121 msg = self._msgobj('msg_08.txt')
122 charsets = msg.get_charsets()
123 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
124
125 msg = self._msgobj('msg_09.txt')
126 charsets = msg.get_charsets('dingbat')
127 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
128 'koi8-r'])
129
130 msg = self._msgobj('msg_12.txt')
131 charsets = msg.get_charsets()
132 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
133 'iso-8859-3', 'us-ascii', 'koi8-r'])
134
135 def test_get_filename(self):
136 eq = self.assertEqual
137
138 msg = self._msgobj('msg_04.txt')
139 filenames = [p.get_filename() for p in msg.get_payload()]
140 eq(filenames, ['msg.txt', 'msg.txt'])
141
142 msg = self._msgobj('msg_07.txt')
143 subpart = msg.get_payload(1)
144 eq(subpart.get_filename(), 'dingusfish.gif')
145
146 def test_get_filename_with_name_parameter(self):
147 eq = self.assertEqual
148
149 msg = self._msgobj('msg_44.txt')
150 filenames = [p.get_filename() for p in msg.get_payload()]
151 eq(filenames, ['msg.txt', 'msg.txt'])
152
153 def test_get_boundary(self):
154 eq = self.assertEqual
155 msg = self._msgobj('msg_07.txt')
156 # No quotes!
157 eq(msg.get_boundary(), 'BOUNDARY')
158
159 def test_set_boundary(self):
160 eq = self.assertEqual
161 # This one has no existing boundary parameter, but the Content-Type:
162 # header appears fifth.
163 msg = self._msgobj('msg_01.txt')
164 msg.set_boundary('BOUNDARY')
165 header, value = msg.items()[4]
166 eq(header.lower(), 'content-type')
167 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
168 # This one has a Content-Type: header, with a boundary, stuck in the
169 # middle of its headers. Make sure the order is preserved; it should
170 # be fifth.
171 msg = self._msgobj('msg_04.txt')
172 msg.set_boundary('BOUNDARY')
173 header, value = msg.items()[4]
174 eq(header.lower(), 'content-type')
175 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
176 # And this one has no Content-Type: header at all.
177 msg = self._msgobj('msg_03.txt')
178 self.assertRaises(errors.HeaderParseError,
179 msg.set_boundary, 'BOUNDARY')
180
R. David Murray57c45ac2010-02-21 04:39:40 +0000181 def test_message_rfc822_only(self):
182 # Issue 7970: message/rfc822 not in multipart parsed by
183 # HeaderParser caused an exception when flattened.
184 fp = openfile(findfile('msg_46.txt'))
185 msgdata = fp.read()
186 parser = HeaderParser()
187 msg = parser.parsestr(msgdata)
188 out = StringIO()
189 gen = Generator(out, True, 0)
190 gen.flatten(msg, False)
191 self.assertEqual(out.getvalue(), msgdata)
192
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000193 def test_get_decoded_payload(self):
194 eq = self.assertEqual
195 msg = self._msgobj('msg_10.txt')
196 # The outer message is a multipart
197 eq(msg.get_payload(decode=True), None)
198 # Subpart 1 is 7bit encoded
199 eq(msg.get_payload(0).get_payload(decode=True),
200 b'This is a 7bit encoded message.\n')
201 # Subpart 2 is quopri
202 eq(msg.get_payload(1).get_payload(decode=True),
203 b'\xa1This is a Quoted Printable encoded message!\n')
204 # Subpart 3 is base64
205 eq(msg.get_payload(2).get_payload(decode=True),
206 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000207 # Subpart 4 is base64 with a trailing newline, which
208 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000209 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000210 b'This is a Base64 encoded message.\n')
211 # Subpart 5 has no Content-Transfer-Encoding: header.
212 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000213 b'This has no Content-Transfer-Encoding: header.\n')
214
215 def test_get_decoded_uu_payload(self):
216 eq = self.assertEqual
217 msg = Message()
218 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
219 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
220 msg['content-transfer-encoding'] = cte
221 eq(msg.get_payload(decode=True), b'hello world')
222 # Now try some bogus data
223 msg.set_payload('foo')
224 eq(msg.get_payload(decode=True), b'foo')
225
226 def test_decoded_generator(self):
227 eq = self.assertEqual
228 msg = self._msgobj('msg_07.txt')
229 with openfile('msg_17.txt') as fp:
230 text = fp.read()
231 s = StringIO()
232 g = DecodedGenerator(s)
233 g.flatten(msg)
234 eq(s.getvalue(), text)
235
236 def test__contains__(self):
237 msg = Message()
238 msg['From'] = 'Me'
239 msg['to'] = 'You'
240 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000241 self.assertTrue('from' in msg)
242 self.assertTrue('From' in msg)
243 self.assertTrue('FROM' in msg)
244 self.assertTrue('to' in msg)
245 self.assertTrue('To' in msg)
246 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000247
248 def test_as_string(self):
249 eq = self.ndiffAssertEqual
250 msg = self._msgobj('msg_01.txt')
251 with openfile('msg_01.txt') as fp:
252 text = fp.read()
253 eq(text, str(msg))
254 fullrepr = msg.as_string(unixfrom=True)
255 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000256 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000257 eq(text, NL.join(lines[1:]))
258
259 def test_bad_param(self):
260 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
261 self.assertEqual(msg.get_param('baz'), '')
262
263 def test_missing_filename(self):
264 msg = email.message_from_string("From: foo\n")
265 self.assertEqual(msg.get_filename(), None)
266
267 def test_bogus_filename(self):
268 msg = email.message_from_string(
269 "Content-Disposition: blarg; filename\n")
270 self.assertEqual(msg.get_filename(), '')
271
272 def test_missing_boundary(self):
273 msg = email.message_from_string("From: foo\n")
274 self.assertEqual(msg.get_boundary(), None)
275
276 def test_get_params(self):
277 eq = self.assertEqual
278 msg = email.message_from_string(
279 'X-Header: foo=one; bar=two; baz=three\n')
280 eq(msg.get_params(header='x-header'),
281 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
282 msg = email.message_from_string(
283 'X-Header: foo; bar=one; baz=two\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
286 eq(msg.get_params(), None)
287 msg = email.message_from_string(
288 'X-Header: foo; bar="one"; baz=two\n')
289 eq(msg.get_params(header='x-header'),
290 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
291
292 def test_get_param_liberal(self):
293 msg = Message()
294 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
295 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
296
297 def test_get_param(self):
298 eq = self.assertEqual
299 msg = email.message_from_string(
300 "X-Header: foo=one; bar=two; baz=three\n")
301 eq(msg.get_param('bar', header='x-header'), 'two')
302 eq(msg.get_param('quuz', header='x-header'), None)
303 eq(msg.get_param('quuz'), None)
304 msg = email.message_from_string(
305 'X-Header: foo; bar="one"; baz=two\n')
306 eq(msg.get_param('foo', header='x-header'), '')
307 eq(msg.get_param('bar', header='x-header'), 'one')
308 eq(msg.get_param('baz', header='x-header'), 'two')
309 # XXX: We are not RFC-2045 compliant! We cannot parse:
310 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
311 # msg.get_param("weird")
312 # yet.
313
314 def test_get_param_funky_continuation_lines(self):
315 msg = self._msgobj('msg_22.txt')
316 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
317
318 def test_get_param_with_semis_in_quotes(self):
319 msg = email.message_from_string(
320 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
321 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
322 self.assertEqual(msg.get_param('name', unquote=False),
323 '"Jim&amp;&amp;Jill"')
324
R. David Murrayd48739f2010-04-14 18:59:18 +0000325 def test_get_param_with_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
328 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
329 msg = email.message_from_string(
330 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
331 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
332
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000333 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000334 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 msg = email.message_from_string('Header: exists')
336 unless('header' in msg)
337 unless('Header' in msg)
338 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000339 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000340
341 def test_set_param(self):
342 eq = self.assertEqual
343 msg = Message()
344 msg.set_param('charset', 'iso-2022-jp')
345 eq(msg.get_param('charset'), 'iso-2022-jp')
346 msg.set_param('importance', 'high value')
347 eq(msg.get_param('importance'), 'high value')
348 eq(msg.get_param('importance', unquote=False), '"high value"')
349 eq(msg.get_params(), [('text/plain', ''),
350 ('charset', 'iso-2022-jp'),
351 ('importance', 'high value')])
352 eq(msg.get_params(unquote=False), [('text/plain', ''),
353 ('charset', '"iso-2022-jp"'),
354 ('importance', '"high value"')])
355 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
356 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
357
358 def test_del_param(self):
359 eq = self.assertEqual
360 msg = self._msgobj('msg_05.txt')
361 eq(msg.get_params(),
362 [('multipart/report', ''), ('report-type', 'delivery-status'),
363 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
364 old_val = msg.get_param("report-type")
365 msg.del_param("report-type")
366 eq(msg.get_params(),
367 [('multipart/report', ''),
368 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
369 msg.set_param("report-type", old_val)
370 eq(msg.get_params(),
371 [('multipart/report', ''),
372 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
373 ('report-type', old_val)])
374
375 def test_del_param_on_other_header(self):
376 msg = Message()
377 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
378 msg.del_param('filename', 'content-disposition')
379 self.assertEqual(msg['content-disposition'], 'attachment')
380
381 def test_set_type(self):
382 eq = self.assertEqual
383 msg = Message()
384 self.assertRaises(ValueError, msg.set_type, 'text')
385 msg.set_type('text/plain')
386 eq(msg['content-type'], 'text/plain')
387 msg.set_param('charset', 'us-ascii')
388 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
389 msg.set_type('text/html')
390 eq(msg['content-type'], 'text/html; charset="us-ascii"')
391
392 def test_set_type_on_other_header(self):
393 msg = Message()
394 msg['X-Content-Type'] = 'text/plain'
395 msg.set_type('application/octet-stream', 'X-Content-Type')
396 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
397
398 def test_get_content_type_missing(self):
399 msg = Message()
400 self.assertEqual(msg.get_content_type(), 'text/plain')
401
402 def test_get_content_type_missing_with_default_type(self):
403 msg = Message()
404 msg.set_default_type('message/rfc822')
405 self.assertEqual(msg.get_content_type(), 'message/rfc822')
406
407 def test_get_content_type_from_message_implicit(self):
408 msg = self._msgobj('msg_30.txt')
409 self.assertEqual(msg.get_payload(0).get_content_type(),
410 'message/rfc822')
411
412 def test_get_content_type_from_message_explicit(self):
413 msg = self._msgobj('msg_28.txt')
414 self.assertEqual(msg.get_payload(0).get_content_type(),
415 'message/rfc822')
416
417 def test_get_content_type_from_message_text_plain_implicit(self):
418 msg = self._msgobj('msg_03.txt')
419 self.assertEqual(msg.get_content_type(), 'text/plain')
420
421 def test_get_content_type_from_message_text_plain_explicit(self):
422 msg = self._msgobj('msg_01.txt')
423 self.assertEqual(msg.get_content_type(), 'text/plain')
424
425 def test_get_content_maintype_missing(self):
426 msg = Message()
427 self.assertEqual(msg.get_content_maintype(), 'text')
428
429 def test_get_content_maintype_missing_with_default_type(self):
430 msg = Message()
431 msg.set_default_type('message/rfc822')
432 self.assertEqual(msg.get_content_maintype(), 'message')
433
434 def test_get_content_maintype_from_message_implicit(self):
435 msg = self._msgobj('msg_30.txt')
436 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
437
438 def test_get_content_maintype_from_message_explicit(self):
439 msg = self._msgobj('msg_28.txt')
440 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
441
442 def test_get_content_maintype_from_message_text_plain_implicit(self):
443 msg = self._msgobj('msg_03.txt')
444 self.assertEqual(msg.get_content_maintype(), 'text')
445
446 def test_get_content_maintype_from_message_text_plain_explicit(self):
447 msg = self._msgobj('msg_01.txt')
448 self.assertEqual(msg.get_content_maintype(), 'text')
449
450 def test_get_content_subtype_missing(self):
451 msg = Message()
452 self.assertEqual(msg.get_content_subtype(), 'plain')
453
454 def test_get_content_subtype_missing_with_default_type(self):
455 msg = Message()
456 msg.set_default_type('message/rfc822')
457 self.assertEqual(msg.get_content_subtype(), 'rfc822')
458
459 def test_get_content_subtype_from_message_implicit(self):
460 msg = self._msgobj('msg_30.txt')
461 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
462
463 def test_get_content_subtype_from_message_explicit(self):
464 msg = self._msgobj('msg_28.txt')
465 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
466
467 def test_get_content_subtype_from_message_text_plain_implicit(self):
468 msg = self._msgobj('msg_03.txt')
469 self.assertEqual(msg.get_content_subtype(), 'plain')
470
471 def test_get_content_subtype_from_message_text_plain_explicit(self):
472 msg = self._msgobj('msg_01.txt')
473 self.assertEqual(msg.get_content_subtype(), 'plain')
474
475 def test_get_content_maintype_error(self):
476 msg = Message()
477 msg['Content-Type'] = 'no-slash-in-this-string'
478 self.assertEqual(msg.get_content_maintype(), 'text')
479
480 def test_get_content_subtype_error(self):
481 msg = Message()
482 msg['Content-Type'] = 'no-slash-in-this-string'
483 self.assertEqual(msg.get_content_subtype(), 'plain')
484
485 def test_replace_header(self):
486 eq = self.assertEqual
487 msg = Message()
488 msg.add_header('First', 'One')
489 msg.add_header('Second', 'Two')
490 msg.add_header('Third', 'Three')
491 eq(msg.keys(), ['First', 'Second', 'Third'])
492 eq(msg.values(), ['One', 'Two', 'Three'])
493 msg.replace_header('Second', 'Twenty')
494 eq(msg.keys(), ['First', 'Second', 'Third'])
495 eq(msg.values(), ['One', 'Twenty', 'Three'])
496 msg.add_header('First', 'Eleven')
497 msg.replace_header('First', 'One Hundred')
498 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
499 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
500 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
501
502 def test_broken_base64_payload(self):
503 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
504 msg = Message()
505 msg['content-type'] = 'audio/x-midi'
506 msg['content-transfer-encoding'] = 'base64'
507 msg.set_payload(x)
508 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000509 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000510
511
512
513# Test the email.encoders module
514class TestEncoders(unittest.TestCase):
515 def test_encode_empty_payload(self):
516 eq = self.assertEqual
517 msg = Message()
518 msg.set_charset('us-ascii')
519 eq(msg['content-transfer-encoding'], '7bit')
520
521 def test_default_cte(self):
522 eq = self.assertEqual
Ezio Melottic303c122010-04-22 11:57:12 +0000523 # 7bit data and the default us-ascii _charset
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000524 msg = MIMEText('hello world')
525 eq(msg['content-transfer-encoding'], '7bit')
Ezio Melottic303c122010-04-22 11:57:12 +0000526 # Similar, but with 8bit data
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000527 msg = MIMEText('hello \xf8 world')
528 eq(msg['content-transfer-encoding'], '8bit')
529 # And now with a different charset
530 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
531 eq(msg['content-transfer-encoding'], 'quoted-printable')
532
533
534
535# Test long header wrapping
536class TestLongHeaders(TestEmailBase):
537 def test_split_long_continuation(self):
538 eq = self.ndiffAssertEqual
539 msg = email.message_from_string("""\
540Subject: bug demonstration
541\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
542\tmore text
543
544test
545""")
546 sfp = StringIO()
547 g = Generator(sfp)
548 g.flatten(msg)
549 eq(sfp.getvalue(), """\
550Subject: bug demonstration
551\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
552\tmore text
553
554test
555""")
556
557 def test_another_long_almost_unsplittable_header(self):
558 eq = self.ndiffAssertEqual
559 hstr = """\
560bug demonstration
561\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
562\tmore text"""
563 h = Header(hstr, continuation_ws='\t')
564 eq(h.encode(), """\
565bug demonstration
566\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
567\tmore text""")
568 h = Header(hstr.replace('\t', ' '))
569 eq(h.encode(), """\
570bug demonstration
571 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
572 more text""")
573
574 def test_long_nonstring(self):
575 eq = self.ndiffAssertEqual
576 g = Charset("iso-8859-1")
577 cz = Charset("iso-8859-2")
578 utf8 = Charset("utf-8")
579 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
580 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
581 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
582 b'bef\xf6rdert. ')
583 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
584 b'd\xf9vtipu.. ')
585 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
586 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
587 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
588 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
589 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
590 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
591 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
592 '\u3044\u307e\u3059\u3002')
593 h = Header(g_head, g, header_name='Subject')
594 h.append(cz_head, cz)
595 h.append(utf8_head, utf8)
596 msg = Message()
597 msg['Subject'] = h
598 sfp = StringIO()
599 g = Generator(sfp)
600 g.flatten(msg)
601 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000602Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
603 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
604 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
605 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
606 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
607 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
608 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
609 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
610 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
611 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
612 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000613
614""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000615 eq(h.encode(maxlinelen=76), """\
616=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
617 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
618 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
619 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
620 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
621 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
622 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
623 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
624 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
625 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
626 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000627
628 def test_long_header_encode(self):
629 eq = self.ndiffAssertEqual
630 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
631 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
632 header_name='X-Foobar-Spoink-Defrobnit')
633 eq(h.encode(), '''\
634wasnipoop; giraffes="very-long-necked-animals";
635 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
636
637 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
638 eq = self.ndiffAssertEqual
639 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
640 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
641 header_name='X-Foobar-Spoink-Defrobnit',
642 continuation_ws='\t')
643 eq(h.encode(), '''\
644wasnipoop; giraffes="very-long-necked-animals";
645 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
646
647 def test_long_header_encode_with_tab_continuation(self):
648 eq = self.ndiffAssertEqual
649 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
650 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
651 header_name='X-Foobar-Spoink-Defrobnit',
652 continuation_ws='\t')
653 eq(h.encode(), '''\
654wasnipoop; giraffes="very-long-necked-animals";
655\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
656
657 def test_header_splitter(self):
658 eq = self.ndiffAssertEqual
659 msg = MIMEText('')
660 # It'd be great if we could use add_header() here, but that doesn't
661 # guarantee an order of the parameters.
662 msg['X-Foobar-Spoink-Defrobnit'] = (
663 'wasnipoop; giraffes="very-long-necked-animals"; '
664 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
665 sfp = StringIO()
666 g = Generator(sfp)
667 g.flatten(msg)
668 eq(sfp.getvalue(), '''\
669Content-Type: text/plain; charset="us-ascii"
670MIME-Version: 1.0
671Content-Transfer-Encoding: 7bit
672X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
673 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
674
675''')
676
677 def test_no_semis_header_splitter(self):
678 eq = self.ndiffAssertEqual
679 msg = Message()
680 msg['From'] = 'test@dom.ain'
681 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
682 msg.set_payload('Test')
683 sfp = StringIO()
684 g = Generator(sfp)
685 g.flatten(msg)
686 eq(sfp.getvalue(), """\
687From: test@dom.ain
688References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
689 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
690
691Test""")
692
693 def test_no_split_long_header(self):
694 eq = self.ndiffAssertEqual
695 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000696 h = Header(hstr)
697 # These come on two lines because Headers are really field value
698 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000699 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000700References:
701 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
702 h = Header('x' * 80)
703 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000704
705 def test_splitting_multiple_long_lines(self):
706 eq = self.ndiffAssertEqual
707 hstr = """\
708from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
709\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
710\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
711"""
712 h = Header(hstr, continuation_ws='\t')
713 eq(h.encode(), """\
714from babylon.socal-raves.org (localhost [127.0.0.1]);
715 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
716 for <mailman-admin@babylon.socal-raves.org>;
717 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
718\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
719 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
720 for <mailman-admin@babylon.socal-raves.org>;
721 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
722\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
723 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
724 for <mailman-admin@babylon.socal-raves.org>;
725 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
726
727 def test_splitting_first_line_only_is_long(self):
728 eq = self.ndiffAssertEqual
729 hstr = """\
730from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
731\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
732\tid 17k4h5-00034i-00
733\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
734 h = Header(hstr, maxlinelen=78, header_name='Received',
735 continuation_ws='\t')
736 eq(h.encode(), """\
737from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
738 helo=cthulhu.gerg.ca)
739\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
740\tid 17k4h5-00034i-00
741\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
742
743 def test_long_8bit_header(self):
744 eq = self.ndiffAssertEqual
745 msg = Message()
746 h = Header('Britische Regierung gibt', 'iso-8859-1',
747 header_name='Subject')
748 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000749 eq(h.encode(maxlinelen=76), """\
750=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
751 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000752 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000753 eq(msg.as_string(maxheaderlen=76), """\
754Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
755 =?iso-8859-1?q?hore-Windkraftprojekte?=
756
757""")
758 eq(msg.as_string(maxheaderlen=0), """\
759Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000760
761""")
762
763 def test_long_8bit_header_no_charset(self):
764 eq = self.ndiffAssertEqual
765 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000766 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
767 'f\xfcr Offshore-Windkraftprojekte '
768 '<a-very-long-address@example.com>')
769 msg['Reply-To'] = header_string
770 self.assertRaises(UnicodeEncodeError, msg.as_string)
771 msg = Message()
772 msg['Reply-To'] = Header(header_string, 'utf-8',
773 header_name='Reply-To')
774 eq(msg.as_string(maxheaderlen=78), """\
775Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
776 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000777
778""")
779
780 def test_long_to_header(self):
781 eq = self.ndiffAssertEqual
782 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
783 '<someone@eecs.umich.edu>,'
784 '"Someone Test #B" <someone@umich.edu>, '
785 '"Someone Test #C" <someone@eecs.umich.edu>, '
786 '"Someone Test #D" <someone@eecs.umich.edu>')
787 msg = Message()
788 msg['To'] = to
789 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000790To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000791 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000792 "Someone Test #C" <someone@eecs.umich.edu>,
793 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000794
795''')
796
797 def test_long_line_after_append(self):
798 eq = self.ndiffAssertEqual
799 s = 'This is an example of string which has almost the limit of header length.'
800 h = Header(s)
801 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000802 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000803This is an example of string which has almost the limit of header length.
804 Add another line.""")
805
806 def test_shorter_line_with_append(self):
807 eq = self.ndiffAssertEqual
808 s = 'This is a shorter line.'
809 h = Header(s)
810 h.append('Add another sentence. (Surprise?)')
811 eq(h.encode(),
812 'This is a shorter line. Add another sentence. (Surprise?)')
813
814 def test_long_field_name(self):
815 eq = self.ndiffAssertEqual
816 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000817 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
818 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
819 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
820 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000821 h = Header(gs, 'iso-8859-1', header_name=fn)
822 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000823 eq(h.encode(maxlinelen=76), """\
824=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
825 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
826 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
827 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000828
829 def test_long_received_header(self):
830 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
831 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
832 'Wed, 05 Mar 2003 18:10:18 -0700')
833 msg = Message()
834 msg['Received-1'] = Header(h, continuation_ws='\t')
835 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000836 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000837 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000838Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
839 Wed, 05 Mar 2003 18:10:18 -0700
840Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
841 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000842
843""")
844
845 def test_string_headerinst_eq(self):
846 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
847 'tu-muenchen.de> (David Bremner\'s message of '
848 '"Thu, 6 Mar 2003 13:58:21 +0100")')
849 msg = Message()
850 msg['Received-1'] = Header(h, header_name='Received-1',
851 continuation_ws='\t')
852 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000853 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000854 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000855Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
856 6 Mar 2003 13:58:21 +0100\")
857Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
858 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000859
860""")
861
862 def test_long_unbreakable_lines_with_continuation(self):
863 eq = self.ndiffAssertEqual
864 msg = Message()
865 t = """\
866iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
867 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
868 msg['Face-1'] = t
869 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000870 # XXX This splitting is all wrong. It the first value line should be
871 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000872 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000873Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000874 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000875 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000876Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000877 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000878 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
879
880""")
881
882 def test_another_long_multiline_header(self):
883 eq = self.ndiffAssertEqual
884 m = ('Received: from siimage.com '
885 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000886 'Microsoft SMTPSVC(5.0.2195.4905); '
887 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000888 msg = email.message_from_string(m)
889 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000890Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
891 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000892
893''')
894
895 def test_long_lines_with_different_header(self):
896 eq = self.ndiffAssertEqual
897 h = ('List-Unsubscribe: '
898 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
899 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
900 '?subject=unsubscribe>')
901 msg = Message()
902 msg['List'] = h
903 msg['List'] = Header(h, header_name='List')
904 eq(msg.as_string(maxheaderlen=78), """\
905List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000906 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000907List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000908 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000909
910""")
911
912
913
914# Test mangling of "From " lines in the body of a message
915class TestFromMangling(unittest.TestCase):
916 def setUp(self):
917 self.msg = Message()
918 self.msg['From'] = 'aaa@bbb.org'
919 self.msg.set_payload("""\
920From the desk of A.A.A.:
921Blah blah blah
922""")
923
924 def test_mangled_from(self):
925 s = StringIO()
926 g = Generator(s, mangle_from_=True)
927 g.flatten(self.msg)
928 self.assertEqual(s.getvalue(), """\
929From: aaa@bbb.org
930
931>From the desk of A.A.A.:
932Blah blah blah
933""")
934
935 def test_dont_mangle_from(self):
936 s = StringIO()
937 g = Generator(s, mangle_from_=False)
938 g.flatten(self.msg)
939 self.assertEqual(s.getvalue(), """\
940From: aaa@bbb.org
941
942From the desk of A.A.A.:
943Blah blah blah
944""")
945
946
947
948# Test the basic MIMEAudio class
949class TestMIMEAudio(unittest.TestCase):
950 def setUp(self):
951 # Make sure we pick up the audiotest.au that lives in email/test/data.
952 # In Python, there's an audiotest.au living in Lib/test but that isn't
953 # included in some binary distros that don't include the test
954 # package. The trailing empty string on the .join() is significant
955 # since findfile() will do a dirname().
956 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
957 with open(findfile('audiotest.au', datadir), 'rb') as fp:
958 self._audiodata = fp.read()
959 self._au = MIMEAudio(self._audiodata)
960
961 def test_guess_minor_type(self):
962 self.assertEqual(self._au.get_content_type(), 'audio/basic')
963
964 def test_encoding(self):
965 payload = self._au.get_payload()
Georg Brandl706824f2009-06-04 09:42:55 +0000966 self.assertEqual(base64.decodebytes(payload), self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000967
968 def test_checkSetMinor(self):
969 au = MIMEAudio(self._audiodata, 'fish')
970 self.assertEqual(au.get_content_type(), 'audio/fish')
971
972 def test_add_header(self):
973 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000974 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000975 self._au.add_header('Content-Disposition', 'attachment',
976 filename='audiotest.au')
977 eq(self._au['content-disposition'],
978 'attachment; filename="audiotest.au"')
979 eq(self._au.get_params(header='content-disposition'),
980 [('attachment', ''), ('filename', 'audiotest.au')])
981 eq(self._au.get_param('filename', header='content-disposition'),
982 'audiotest.au')
983 missing = []
984 eq(self._au.get_param('attachment', header='content-disposition'), '')
985 unless(self._au.get_param('foo', failobj=missing,
986 header='content-disposition') is missing)
987 # Try some missing stuff
988 unless(self._au.get_param('foobar', missing) is missing)
989 unless(self._au.get_param('attachment', missing,
990 header='foobar') is missing)
991
992
993
994# Test the basic MIMEImage class
995class TestMIMEImage(unittest.TestCase):
996 def setUp(self):
997 with openfile('PyBanner048.gif', 'rb') as fp:
998 self._imgdata = fp.read()
999 self._im = MIMEImage(self._imgdata)
1000
1001 def test_guess_minor_type(self):
1002 self.assertEqual(self._im.get_content_type(), 'image/gif')
1003
1004 def test_encoding(self):
1005 payload = self._im.get_payload()
Georg Brandl706824f2009-06-04 09:42:55 +00001006 self.assertEqual(base64.decodebytes(payload), self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001007
1008 def test_checkSetMinor(self):
1009 im = MIMEImage(self._imgdata, 'fish')
1010 self.assertEqual(im.get_content_type(), 'image/fish')
1011
1012 def test_add_header(self):
1013 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001014 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001015 self._im.add_header('Content-Disposition', 'attachment',
1016 filename='dingusfish.gif')
1017 eq(self._im['content-disposition'],
1018 'attachment; filename="dingusfish.gif"')
1019 eq(self._im.get_params(header='content-disposition'),
1020 [('attachment', ''), ('filename', 'dingusfish.gif')])
1021 eq(self._im.get_param('filename', header='content-disposition'),
1022 'dingusfish.gif')
1023 missing = []
1024 eq(self._im.get_param('attachment', header='content-disposition'), '')
1025 unless(self._im.get_param('foo', failobj=missing,
1026 header='content-disposition') is missing)
1027 # Try some missing stuff
1028 unless(self._im.get_param('foobar', missing) is missing)
1029 unless(self._im.get_param('attachment', missing,
1030 header='foobar') is missing)
1031
1032
1033
1034# Test the basic MIMEApplication class
1035class TestMIMEApplication(unittest.TestCase):
1036 def test_headers(self):
1037 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001038 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001039 eq(msg.get_content_type(), 'application/octet-stream')
1040 eq(msg['content-transfer-encoding'], 'base64')
1041
1042 def test_body(self):
1043 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001044 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001045 msg = MIMEApplication(bytes)
Barry Warsaw8c571042007-08-30 19:17:18 +00001046 eq(msg.get_payload(), b'+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001047 eq(msg.get_payload(decode=True), bytes)
1048
1049
1050
1051# Test the basic MIMEText class
1052class TestMIMEText(unittest.TestCase):
1053 def setUp(self):
1054 self._msg = MIMEText('hello there')
1055
1056 def test_types(self):
1057 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001058 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001059 eq(self._msg.get_content_type(), 'text/plain')
1060 eq(self._msg.get_param('charset'), 'us-ascii')
1061 missing = []
1062 unless(self._msg.get_param('foobar', missing) is missing)
1063 unless(self._msg.get_param('charset', missing, header='foobar')
1064 is missing)
1065
1066 def test_payload(self):
1067 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001068 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001069
1070 def test_charset(self):
1071 eq = self.assertEqual
1072 msg = MIMEText('hello there', _charset='us-ascii')
1073 eq(msg.get_charset().input_charset, 'us-ascii')
1074 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1075
1076
1077
1078# Test complicated multipart/* messages
1079class TestMultipart(TestEmailBase):
1080 def setUp(self):
1081 with openfile('PyBanner048.gif', 'rb') as fp:
1082 data = fp.read()
1083 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1084 image = MIMEImage(data, name='dingusfish.gif')
1085 image.add_header('content-disposition', 'attachment',
1086 filename='dingusfish.gif')
1087 intro = MIMEText('''\
1088Hi there,
1089
1090This is the dingus fish.
1091''')
1092 container.attach(intro)
1093 container.attach(image)
1094 container['From'] = 'Barry <barry@digicool.com>'
1095 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1096 container['Subject'] = 'Here is your dingus fish'
1097
1098 now = 987809702.54848599
1099 timetuple = time.localtime(now)
1100 if timetuple[-1] == 0:
1101 tzsecs = time.timezone
1102 else:
1103 tzsecs = time.altzone
1104 if tzsecs > 0:
1105 sign = '-'
1106 else:
1107 sign = '+'
1108 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1109 container['Date'] = time.strftime(
1110 '%a, %d %b %Y %H:%M:%S',
1111 time.localtime(now)) + tzoffset
1112 self._msg = container
1113 self._im = image
1114 self._txt = intro
1115
1116 def test_hierarchy(self):
1117 # convenience
1118 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001119 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001120 raises = self.assertRaises
1121 # tests
1122 m = self._msg
1123 unless(m.is_multipart())
1124 eq(m.get_content_type(), 'multipart/mixed')
1125 eq(len(m.get_payload()), 2)
1126 raises(IndexError, m.get_payload, 2)
1127 m0 = m.get_payload(0)
1128 m1 = m.get_payload(1)
1129 unless(m0 is self._txt)
1130 unless(m1 is self._im)
1131 eq(m.get_payload(), [m0, m1])
1132 unless(not m0.is_multipart())
1133 unless(not m1.is_multipart())
1134
1135 def test_empty_multipart_idempotent(self):
1136 text = """\
1137Content-Type: multipart/mixed; boundary="BOUNDARY"
1138MIME-Version: 1.0
1139Subject: A subject
1140To: aperson@dom.ain
1141From: bperson@dom.ain
1142
1143
1144--BOUNDARY
1145
1146
1147--BOUNDARY--
1148"""
1149 msg = Parser().parsestr(text)
1150 self.ndiffAssertEqual(text, msg.as_string())
1151
1152 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1153 outer = MIMEBase('multipart', 'mixed')
1154 outer['Subject'] = 'A subject'
1155 outer['To'] = 'aperson@dom.ain'
1156 outer['From'] = 'bperson@dom.ain'
1157 outer.set_boundary('BOUNDARY')
1158 self.ndiffAssertEqual(outer.as_string(), '''\
1159Content-Type: multipart/mixed; boundary="BOUNDARY"
1160MIME-Version: 1.0
1161Subject: A subject
1162To: aperson@dom.ain
1163From: bperson@dom.ain
1164
1165--BOUNDARY
1166
1167--BOUNDARY--''')
1168
1169 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1170 outer = MIMEBase('multipart', 'mixed')
1171 outer['Subject'] = 'A subject'
1172 outer['To'] = 'aperson@dom.ain'
1173 outer['From'] = 'bperson@dom.ain'
1174 outer.preamble = ''
1175 outer.epilogue = ''
1176 outer.set_boundary('BOUNDARY')
1177 self.ndiffAssertEqual(outer.as_string(), '''\
1178Content-Type: multipart/mixed; boundary="BOUNDARY"
1179MIME-Version: 1.0
1180Subject: A subject
1181To: aperson@dom.ain
1182From: bperson@dom.ain
1183
1184
1185--BOUNDARY
1186
1187--BOUNDARY--
1188''')
1189
1190 def test_one_part_in_a_multipart(self):
1191 eq = self.ndiffAssertEqual
1192 outer = MIMEBase('multipart', 'mixed')
1193 outer['Subject'] = 'A subject'
1194 outer['To'] = 'aperson@dom.ain'
1195 outer['From'] = 'bperson@dom.ain'
1196 outer.set_boundary('BOUNDARY')
1197 msg = MIMEText('hello world')
1198 outer.attach(msg)
1199 eq(outer.as_string(), '''\
1200Content-Type: multipart/mixed; boundary="BOUNDARY"
1201MIME-Version: 1.0
1202Subject: A subject
1203To: aperson@dom.ain
1204From: bperson@dom.ain
1205
1206--BOUNDARY
1207Content-Type: text/plain; charset="us-ascii"
1208MIME-Version: 1.0
1209Content-Transfer-Encoding: 7bit
1210
1211hello world
1212--BOUNDARY--''')
1213
1214 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1215 eq = self.ndiffAssertEqual
1216 outer = MIMEBase('multipart', 'mixed')
1217 outer['Subject'] = 'A subject'
1218 outer['To'] = 'aperson@dom.ain'
1219 outer['From'] = 'bperson@dom.ain'
1220 outer.preamble = ''
1221 msg = MIMEText('hello world')
1222 outer.attach(msg)
1223 outer.set_boundary('BOUNDARY')
1224 eq(outer.as_string(), '''\
1225Content-Type: multipart/mixed; boundary="BOUNDARY"
1226MIME-Version: 1.0
1227Subject: A subject
1228To: aperson@dom.ain
1229From: bperson@dom.ain
1230
1231
1232--BOUNDARY
1233Content-Type: text/plain; charset="us-ascii"
1234MIME-Version: 1.0
1235Content-Transfer-Encoding: 7bit
1236
1237hello world
1238--BOUNDARY--''')
1239
1240
1241 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1242 eq = self.ndiffAssertEqual
1243 outer = MIMEBase('multipart', 'mixed')
1244 outer['Subject'] = 'A subject'
1245 outer['To'] = 'aperson@dom.ain'
1246 outer['From'] = 'bperson@dom.ain'
1247 outer.preamble = None
1248 msg = MIMEText('hello world')
1249 outer.attach(msg)
1250 outer.set_boundary('BOUNDARY')
1251 eq(outer.as_string(), '''\
1252Content-Type: multipart/mixed; boundary="BOUNDARY"
1253MIME-Version: 1.0
1254Subject: A subject
1255To: aperson@dom.ain
1256From: bperson@dom.ain
1257
1258--BOUNDARY
1259Content-Type: text/plain; charset="us-ascii"
1260MIME-Version: 1.0
1261Content-Transfer-Encoding: 7bit
1262
1263hello world
1264--BOUNDARY--''')
1265
1266
1267 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1268 eq = self.ndiffAssertEqual
1269 outer = MIMEBase('multipart', 'mixed')
1270 outer['Subject'] = 'A subject'
1271 outer['To'] = 'aperson@dom.ain'
1272 outer['From'] = 'bperson@dom.ain'
1273 outer.epilogue = None
1274 msg = MIMEText('hello world')
1275 outer.attach(msg)
1276 outer.set_boundary('BOUNDARY')
1277 eq(outer.as_string(), '''\
1278Content-Type: multipart/mixed; boundary="BOUNDARY"
1279MIME-Version: 1.0
1280Subject: A subject
1281To: aperson@dom.ain
1282From: bperson@dom.ain
1283
1284--BOUNDARY
1285Content-Type: text/plain; charset="us-ascii"
1286MIME-Version: 1.0
1287Content-Transfer-Encoding: 7bit
1288
1289hello world
1290--BOUNDARY--''')
1291
1292
1293 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1294 eq = self.ndiffAssertEqual
1295 outer = MIMEBase('multipart', 'mixed')
1296 outer['Subject'] = 'A subject'
1297 outer['To'] = 'aperson@dom.ain'
1298 outer['From'] = 'bperson@dom.ain'
1299 outer.epilogue = ''
1300 msg = MIMEText('hello world')
1301 outer.attach(msg)
1302 outer.set_boundary('BOUNDARY')
1303 eq(outer.as_string(), '''\
1304Content-Type: multipart/mixed; boundary="BOUNDARY"
1305MIME-Version: 1.0
1306Subject: A subject
1307To: aperson@dom.ain
1308From: bperson@dom.ain
1309
1310--BOUNDARY
1311Content-Type: text/plain; charset="us-ascii"
1312MIME-Version: 1.0
1313Content-Transfer-Encoding: 7bit
1314
1315hello world
1316--BOUNDARY--
1317''')
1318
1319
1320 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1321 eq = self.ndiffAssertEqual
1322 outer = MIMEBase('multipart', 'mixed')
1323 outer['Subject'] = 'A subject'
1324 outer['To'] = 'aperson@dom.ain'
1325 outer['From'] = 'bperson@dom.ain'
1326 outer.epilogue = '\n'
1327 msg = MIMEText('hello world')
1328 outer.attach(msg)
1329 outer.set_boundary('BOUNDARY')
1330 eq(outer.as_string(), '''\
1331Content-Type: multipart/mixed; boundary="BOUNDARY"
1332MIME-Version: 1.0
1333Subject: A subject
1334To: aperson@dom.ain
1335From: bperson@dom.ain
1336
1337--BOUNDARY
1338Content-Type: text/plain; charset="us-ascii"
1339MIME-Version: 1.0
1340Content-Transfer-Encoding: 7bit
1341
1342hello world
1343--BOUNDARY--
1344
1345''')
1346
1347 def test_message_external_body(self):
1348 eq = self.assertEqual
1349 msg = self._msgobj('msg_36.txt')
1350 eq(len(msg.get_payload()), 2)
1351 msg1 = msg.get_payload(1)
1352 eq(msg1.get_content_type(), 'multipart/alternative')
1353 eq(len(msg1.get_payload()), 2)
1354 for subpart in msg1.get_payload():
1355 eq(subpart.get_content_type(), 'message/external-body')
1356 eq(len(subpart.get_payload()), 1)
1357 subsubpart = subpart.get_payload(0)
1358 eq(subsubpart.get_content_type(), 'text/plain')
1359
1360 def test_double_boundary(self):
1361 # msg_37.txt is a multipart that contains two dash-boundary's in a
1362 # row. Our interpretation of RFC 2046 calls for ignoring the second
1363 # and subsequent boundaries.
1364 msg = self._msgobj('msg_37.txt')
1365 self.assertEqual(len(msg.get_payload()), 3)
1366
1367 def test_nested_inner_contains_outer_boundary(self):
1368 eq = self.ndiffAssertEqual
1369 # msg_38.txt has an inner part that contains outer boundaries. My
1370 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1371 # these are illegal and should be interpreted as unterminated inner
1372 # parts.
1373 msg = self._msgobj('msg_38.txt')
1374 sfp = StringIO()
1375 iterators._structure(msg, sfp)
1376 eq(sfp.getvalue(), """\
1377multipart/mixed
1378 multipart/mixed
1379 multipart/alternative
1380 text/plain
1381 text/plain
1382 text/plain
1383 text/plain
1384""")
1385
1386 def test_nested_with_same_boundary(self):
1387 eq = self.ndiffAssertEqual
1388 # msg 39.txt is similarly evil in that it's got inner parts that use
1389 # the same boundary as outer parts. Again, I believe the way this is
1390 # parsed is closest to the spirit of RFC 2046
1391 msg = self._msgobj('msg_39.txt')
1392 sfp = StringIO()
1393 iterators._structure(msg, sfp)
1394 eq(sfp.getvalue(), """\
1395multipart/mixed
1396 multipart/mixed
1397 multipart/alternative
1398 application/octet-stream
1399 application/octet-stream
1400 text/plain
1401""")
1402
1403 def test_boundary_in_non_multipart(self):
1404 msg = self._msgobj('msg_40.txt')
1405 self.assertEqual(msg.as_string(), '''\
1406MIME-Version: 1.0
1407Content-Type: text/html; boundary="--961284236552522269"
1408
1409----961284236552522269
1410Content-Type: text/html;
1411Content-Transfer-Encoding: 7Bit
1412
1413<html></html>
1414
1415----961284236552522269--
1416''')
1417
1418 def test_boundary_with_leading_space(self):
1419 eq = self.assertEqual
1420 msg = email.message_from_string('''\
1421MIME-Version: 1.0
1422Content-Type: multipart/mixed; boundary=" XXXX"
1423
1424-- XXXX
1425Content-Type: text/plain
1426
1427
1428-- XXXX
1429Content-Type: text/plain
1430
1431-- XXXX--
1432''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001433 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001434 eq(msg.get_boundary(), ' XXXX')
1435 eq(len(msg.get_payload()), 2)
1436
1437 def test_boundary_without_trailing_newline(self):
1438 m = Parser().parsestr("""\
1439Content-Type: multipart/mixed; boundary="===============0012394164=="
1440MIME-Version: 1.0
1441
1442--===============0012394164==
1443Content-Type: image/file1.jpg
1444MIME-Version: 1.0
1445Content-Transfer-Encoding: base64
1446
1447YXNkZg==
1448--===============0012394164==--""")
1449 self.assertEquals(m.get_payload(0).get_payload(), 'YXNkZg==')
1450
1451
1452
1453# Test some badly formatted messages
1454class TestNonConformant(TestEmailBase):
1455 def test_parse_missing_minor_type(self):
1456 eq = self.assertEqual
1457 msg = self._msgobj('msg_14.txt')
1458 eq(msg.get_content_type(), 'text/plain')
1459 eq(msg.get_content_maintype(), 'text')
1460 eq(msg.get_content_subtype(), 'plain')
1461
1462 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001463 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001464 msg = self._msgobj('msg_15.txt')
1465 # XXX We can probably eventually do better
1466 inner = msg.get_payload(0)
1467 unless(hasattr(inner, 'defects'))
1468 self.assertEqual(len(inner.defects), 1)
1469 unless(isinstance(inner.defects[0],
1470 errors.StartBoundaryNotFoundDefect))
1471
1472 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001473 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001474 msg = self._msgobj('msg_25.txt')
1475 unless(isinstance(msg.get_payload(), str))
1476 self.assertEqual(len(msg.defects), 2)
1477 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1478 unless(isinstance(msg.defects[1],
1479 errors.MultipartInvariantViolationDefect))
1480
1481 def test_invalid_content_type(self):
1482 eq = self.assertEqual
1483 neq = self.ndiffAssertEqual
1484 msg = Message()
1485 # RFC 2045, $5.2 says invalid yields text/plain
1486 msg['Content-Type'] = 'text'
1487 eq(msg.get_content_maintype(), 'text')
1488 eq(msg.get_content_subtype(), 'plain')
1489 eq(msg.get_content_type(), 'text/plain')
1490 # Clear the old value and try something /really/ invalid
1491 del msg['content-type']
1492 msg['Content-Type'] = 'foo'
1493 eq(msg.get_content_maintype(), 'text')
1494 eq(msg.get_content_subtype(), 'plain')
1495 eq(msg.get_content_type(), 'text/plain')
1496 # Still, make sure that the message is idempotently generated
1497 s = StringIO()
1498 g = Generator(s)
1499 g.flatten(msg)
1500 neq(s.getvalue(), 'Content-Type: foo\n\n')
1501
1502 def test_no_start_boundary(self):
1503 eq = self.ndiffAssertEqual
1504 msg = self._msgobj('msg_31.txt')
1505 eq(msg.get_payload(), """\
1506--BOUNDARY
1507Content-Type: text/plain
1508
1509message 1
1510
1511--BOUNDARY
1512Content-Type: text/plain
1513
1514message 2
1515
1516--BOUNDARY--
1517""")
1518
1519 def test_no_separating_blank_line(self):
1520 eq = self.ndiffAssertEqual
1521 msg = self._msgobj('msg_35.txt')
1522 eq(msg.as_string(), """\
1523From: aperson@dom.ain
1524To: bperson@dom.ain
1525Subject: here's something interesting
1526
1527counter to RFC 2822, there's no separating newline here
1528""")
1529
1530 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001531 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001532 msg = self._msgobj('msg_41.txt')
1533 unless(hasattr(msg, 'defects'))
1534 self.assertEqual(len(msg.defects), 2)
1535 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1536 unless(isinstance(msg.defects[1],
1537 errors.MultipartInvariantViolationDefect))
1538
1539 def test_missing_start_boundary(self):
1540 outer = self._msgobj('msg_42.txt')
1541 # The message structure is:
1542 #
1543 # multipart/mixed
1544 # text/plain
1545 # message/rfc822
1546 # multipart/mixed [*]
1547 #
1548 # [*] This message is missing its start boundary
1549 bad = outer.get_payload(1).get_payload(0)
1550 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001551 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001552 errors.StartBoundaryNotFoundDefect))
1553
1554 def test_first_line_is_continuation_header(self):
1555 eq = self.assertEqual
1556 m = ' Line 1\nLine 2\nLine 3'
1557 msg = email.message_from_string(m)
1558 eq(msg.keys(), [])
1559 eq(msg.get_payload(), 'Line 2\nLine 3')
1560 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001561 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001562 errors.FirstHeaderLineIsContinuationDefect))
1563 eq(msg.defects[0].line, ' Line 1\n')
1564
1565
1566
1567# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001568class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001569 def test_rfc2047_multiline(self):
1570 eq = self.assertEqual
1571 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1572 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1573 dh = decode_header(s)
1574 eq(dh, [
1575 (b'Re:', None),
1576 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1577 (b'baz foo bar', None),
1578 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1579 header = make_header(dh)
1580 eq(str(header),
1581 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001582 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001583Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1584 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001585
1586 def test_whitespace_eater_unicode(self):
1587 eq = self.assertEqual
1588 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1589 dh = decode_header(s)
1590 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1591 (b'Pirard <pirard@dom.ain>', None)])
1592 header = str(make_header(dh))
1593 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1594
1595 def test_whitespace_eater_unicode_2(self):
1596 eq = self.assertEqual
1597 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1598 dh = decode_header(s)
1599 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1600 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1601 hu = str(make_header(dh))
1602 eq(hu, 'The quick brown fox jumped over the lazy dog')
1603
1604 def test_rfc2047_missing_whitespace(self):
1605 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1606 dh = decode_header(s)
1607 self.assertEqual(dh, [(s, None)])
1608
1609 def test_rfc2047_with_whitespace(self):
1610 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1611 dh = decode_header(s)
1612 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1613 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1614 (b'sbord', None)])
1615
1616
1617
1618# Test the MIMEMessage class
1619class TestMIMEMessage(TestEmailBase):
1620 def setUp(self):
1621 with openfile('msg_11.txt') as fp:
1622 self._text = fp.read()
1623
1624 def test_type_error(self):
1625 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1626
1627 def test_valid_argument(self):
1628 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001629 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001630 subject = 'A sub-message'
1631 m = Message()
1632 m['Subject'] = subject
1633 r = MIMEMessage(m)
1634 eq(r.get_content_type(), 'message/rfc822')
1635 payload = r.get_payload()
1636 unless(isinstance(payload, list))
1637 eq(len(payload), 1)
1638 subpart = payload[0]
1639 unless(subpart is m)
1640 eq(subpart['subject'], subject)
1641
1642 def test_bad_multipart(self):
1643 eq = self.assertEqual
1644 msg1 = Message()
1645 msg1['Subject'] = 'subpart 1'
1646 msg2 = Message()
1647 msg2['Subject'] = 'subpart 2'
1648 r = MIMEMessage(msg1)
1649 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1650
1651 def test_generate(self):
1652 # First craft the message to be encapsulated
1653 m = Message()
1654 m['Subject'] = 'An enclosed message'
1655 m.set_payload('Here is the body of the message.\n')
1656 r = MIMEMessage(m)
1657 r['Subject'] = 'The enclosing message'
1658 s = StringIO()
1659 g = Generator(s)
1660 g.flatten(r)
1661 self.assertEqual(s.getvalue(), """\
1662Content-Type: message/rfc822
1663MIME-Version: 1.0
1664Subject: The enclosing message
1665
1666Subject: An enclosed message
1667
1668Here is the body of the message.
1669""")
1670
1671 def test_parse_message_rfc822(self):
1672 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001673 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001674 msg = self._msgobj('msg_11.txt')
1675 eq(msg.get_content_type(), 'message/rfc822')
1676 payload = msg.get_payload()
1677 unless(isinstance(payload, list))
1678 eq(len(payload), 1)
1679 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001680 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001681 eq(submsg['subject'], 'An enclosed message')
1682 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1683
1684 def test_dsn(self):
1685 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001686 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001687 # msg 16 is a Delivery Status Notification, see RFC 1894
1688 msg = self._msgobj('msg_16.txt')
1689 eq(msg.get_content_type(), 'multipart/report')
1690 unless(msg.is_multipart())
1691 eq(len(msg.get_payload()), 3)
1692 # Subpart 1 is a text/plain, human readable section
1693 subpart = msg.get_payload(0)
1694 eq(subpart.get_content_type(), 'text/plain')
1695 eq(subpart.get_payload(), """\
1696This report relates to a message you sent with the following header fields:
1697
1698 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1699 Date: Sun, 23 Sep 2001 20:10:55 -0700
1700 From: "Ian T. Henry" <henryi@oxy.edu>
1701 To: SoCal Raves <scr@socal-raves.org>
1702 Subject: [scr] yeah for Ians!!
1703
1704Your message cannot be delivered to the following recipients:
1705
1706 Recipient address: jangel1@cougar.noc.ucla.edu
1707 Reason: recipient reached disk quota
1708
1709""")
1710 # Subpart 2 contains the machine parsable DSN information. It
1711 # consists of two blocks of headers, represented by two nested Message
1712 # objects.
1713 subpart = msg.get_payload(1)
1714 eq(subpart.get_content_type(), 'message/delivery-status')
1715 eq(len(subpart.get_payload()), 2)
1716 # message/delivery-status should treat each block as a bunch of
1717 # headers, i.e. a bunch of Message objects.
1718 dsn1 = subpart.get_payload(0)
1719 unless(isinstance(dsn1, Message))
1720 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1721 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1722 # Try a missing one <wink>
1723 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1724 dsn2 = subpart.get_payload(1)
1725 unless(isinstance(dsn2, Message))
1726 eq(dsn2['action'], 'failed')
1727 eq(dsn2.get_params(header='original-recipient'),
1728 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1729 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1730 # Subpart 3 is the original message
1731 subpart = msg.get_payload(2)
1732 eq(subpart.get_content_type(), 'message/rfc822')
1733 payload = subpart.get_payload()
1734 unless(isinstance(payload, list))
1735 eq(len(payload), 1)
1736 subsubpart = payload[0]
1737 unless(isinstance(subsubpart, Message))
1738 eq(subsubpart.get_content_type(), 'text/plain')
1739 eq(subsubpart['message-id'],
1740 '<002001c144a6$8752e060$56104586@oxy.edu>')
1741
1742 def test_epilogue(self):
1743 eq = self.ndiffAssertEqual
1744 with openfile('msg_21.txt') as fp:
1745 text = fp.read()
1746 msg = Message()
1747 msg['From'] = 'aperson@dom.ain'
1748 msg['To'] = 'bperson@dom.ain'
1749 msg['Subject'] = 'Test'
1750 msg.preamble = 'MIME message'
1751 msg.epilogue = 'End of MIME message\n'
1752 msg1 = MIMEText('One')
1753 msg2 = MIMEText('Two')
1754 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1755 msg.attach(msg1)
1756 msg.attach(msg2)
1757 sfp = StringIO()
1758 g = Generator(sfp)
1759 g.flatten(msg)
1760 eq(sfp.getvalue(), text)
1761
1762 def test_no_nl_preamble(self):
1763 eq = self.ndiffAssertEqual
1764 msg = Message()
1765 msg['From'] = 'aperson@dom.ain'
1766 msg['To'] = 'bperson@dom.ain'
1767 msg['Subject'] = 'Test'
1768 msg.preamble = 'MIME message'
1769 msg.epilogue = ''
1770 msg1 = MIMEText('One')
1771 msg2 = MIMEText('Two')
1772 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1773 msg.attach(msg1)
1774 msg.attach(msg2)
1775 eq(msg.as_string(), """\
1776From: aperson@dom.ain
1777To: bperson@dom.ain
1778Subject: Test
1779Content-Type: multipart/mixed; boundary="BOUNDARY"
1780
1781MIME message
1782--BOUNDARY
1783Content-Type: text/plain; charset="us-ascii"
1784MIME-Version: 1.0
1785Content-Transfer-Encoding: 7bit
1786
1787One
1788--BOUNDARY
1789Content-Type: text/plain; charset="us-ascii"
1790MIME-Version: 1.0
1791Content-Transfer-Encoding: 7bit
1792
1793Two
1794--BOUNDARY--
1795""")
1796
1797 def test_default_type(self):
1798 eq = self.assertEqual
1799 with openfile('msg_30.txt') as fp:
1800 msg = email.message_from_file(fp)
1801 container1 = msg.get_payload(0)
1802 eq(container1.get_default_type(), 'message/rfc822')
1803 eq(container1.get_content_type(), 'message/rfc822')
1804 container2 = msg.get_payload(1)
1805 eq(container2.get_default_type(), 'message/rfc822')
1806 eq(container2.get_content_type(), 'message/rfc822')
1807 container1a = container1.get_payload(0)
1808 eq(container1a.get_default_type(), 'text/plain')
1809 eq(container1a.get_content_type(), 'text/plain')
1810 container2a = container2.get_payload(0)
1811 eq(container2a.get_default_type(), 'text/plain')
1812 eq(container2a.get_content_type(), 'text/plain')
1813
1814 def test_default_type_with_explicit_container_type(self):
1815 eq = self.assertEqual
1816 with openfile('msg_28.txt') as fp:
1817 msg = email.message_from_file(fp)
1818 container1 = msg.get_payload(0)
1819 eq(container1.get_default_type(), 'message/rfc822')
1820 eq(container1.get_content_type(), 'message/rfc822')
1821 container2 = msg.get_payload(1)
1822 eq(container2.get_default_type(), 'message/rfc822')
1823 eq(container2.get_content_type(), 'message/rfc822')
1824 container1a = container1.get_payload(0)
1825 eq(container1a.get_default_type(), 'text/plain')
1826 eq(container1a.get_content_type(), 'text/plain')
1827 container2a = container2.get_payload(0)
1828 eq(container2a.get_default_type(), 'text/plain')
1829 eq(container2a.get_content_type(), 'text/plain')
1830
1831 def test_default_type_non_parsed(self):
1832 eq = self.assertEqual
1833 neq = self.ndiffAssertEqual
1834 # Set up container
1835 container = MIMEMultipart('digest', 'BOUNDARY')
1836 container.epilogue = ''
1837 # Set up subparts
1838 subpart1a = MIMEText('message 1\n')
1839 subpart2a = MIMEText('message 2\n')
1840 subpart1 = MIMEMessage(subpart1a)
1841 subpart2 = MIMEMessage(subpart2a)
1842 container.attach(subpart1)
1843 container.attach(subpart2)
1844 eq(subpart1.get_content_type(), 'message/rfc822')
1845 eq(subpart1.get_default_type(), 'message/rfc822')
1846 eq(subpart2.get_content_type(), 'message/rfc822')
1847 eq(subpart2.get_default_type(), 'message/rfc822')
1848 neq(container.as_string(0), '''\
1849Content-Type: multipart/digest; boundary="BOUNDARY"
1850MIME-Version: 1.0
1851
1852--BOUNDARY
1853Content-Type: message/rfc822
1854MIME-Version: 1.0
1855
1856Content-Type: text/plain; charset="us-ascii"
1857MIME-Version: 1.0
1858Content-Transfer-Encoding: 7bit
1859
1860message 1
1861
1862--BOUNDARY
1863Content-Type: message/rfc822
1864MIME-Version: 1.0
1865
1866Content-Type: text/plain; charset="us-ascii"
1867MIME-Version: 1.0
1868Content-Transfer-Encoding: 7bit
1869
1870message 2
1871
1872--BOUNDARY--
1873''')
1874 del subpart1['content-type']
1875 del subpart1['mime-version']
1876 del subpart2['content-type']
1877 del subpart2['mime-version']
1878 eq(subpart1.get_content_type(), 'message/rfc822')
1879 eq(subpart1.get_default_type(), 'message/rfc822')
1880 eq(subpart2.get_content_type(), 'message/rfc822')
1881 eq(subpart2.get_default_type(), 'message/rfc822')
1882 neq(container.as_string(0), '''\
1883Content-Type: multipart/digest; boundary="BOUNDARY"
1884MIME-Version: 1.0
1885
1886--BOUNDARY
1887
1888Content-Type: text/plain; charset="us-ascii"
1889MIME-Version: 1.0
1890Content-Transfer-Encoding: 7bit
1891
1892message 1
1893
1894--BOUNDARY
1895
1896Content-Type: text/plain; charset="us-ascii"
1897MIME-Version: 1.0
1898Content-Transfer-Encoding: 7bit
1899
1900message 2
1901
1902--BOUNDARY--
1903''')
1904
1905 def test_mime_attachments_in_constructor(self):
1906 eq = self.assertEqual
1907 text1 = MIMEText('')
1908 text2 = MIMEText('')
1909 msg = MIMEMultipart(_subparts=(text1, text2))
1910 eq(len(msg.get_payload()), 2)
1911 eq(msg.get_payload(0), text1)
1912 eq(msg.get_payload(1), text2)
1913
Christian Heimes587c2bf2008-01-19 16:21:02 +00001914 def test_default_multipart_constructor(self):
1915 msg = MIMEMultipart()
1916 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001917
1918
1919# A general test of parser->model->generator idempotency. IOW, read a message
1920# in, parse it into a message object tree, then without touching the tree,
1921# regenerate the plain text. The original text and the transformed text
1922# should be identical. Note: that we ignore the Unix-From since that may
1923# contain a changed date.
1924class TestIdempotent(TestEmailBase):
1925 def _msgobj(self, filename):
1926 with openfile(filename) as fp:
1927 data = fp.read()
1928 msg = email.message_from_string(data)
1929 return msg, data
1930
1931 def _idempotent(self, msg, text):
1932 eq = self.ndiffAssertEqual
1933 s = StringIO()
1934 g = Generator(s, maxheaderlen=0)
1935 g.flatten(msg)
1936 eq(text, s.getvalue())
1937
1938 def test_parse_text_message(self):
1939 eq = self.assertEquals
1940 msg, text = self._msgobj('msg_01.txt')
1941 eq(msg.get_content_type(), 'text/plain')
1942 eq(msg.get_content_maintype(), 'text')
1943 eq(msg.get_content_subtype(), 'plain')
1944 eq(msg.get_params()[1], ('charset', 'us-ascii'))
1945 eq(msg.get_param('charset'), 'us-ascii')
1946 eq(msg.preamble, None)
1947 eq(msg.epilogue, None)
1948 self._idempotent(msg, text)
1949
1950 def test_parse_untyped_message(self):
1951 eq = self.assertEquals
1952 msg, text = self._msgobj('msg_03.txt')
1953 eq(msg.get_content_type(), 'text/plain')
1954 eq(msg.get_params(), None)
1955 eq(msg.get_param('charset'), None)
1956 self._idempotent(msg, text)
1957
1958 def test_simple_multipart(self):
1959 msg, text = self._msgobj('msg_04.txt')
1960 self._idempotent(msg, text)
1961
1962 def test_MIME_digest(self):
1963 msg, text = self._msgobj('msg_02.txt')
1964 self._idempotent(msg, text)
1965
1966 def test_long_header(self):
1967 msg, text = self._msgobj('msg_27.txt')
1968 self._idempotent(msg, text)
1969
1970 def test_MIME_digest_with_part_headers(self):
1971 msg, text = self._msgobj('msg_28.txt')
1972 self._idempotent(msg, text)
1973
1974 def test_mixed_with_image(self):
1975 msg, text = self._msgobj('msg_06.txt')
1976 self._idempotent(msg, text)
1977
1978 def test_multipart_report(self):
1979 msg, text = self._msgobj('msg_05.txt')
1980 self._idempotent(msg, text)
1981
1982 def test_dsn(self):
1983 msg, text = self._msgobj('msg_16.txt')
1984 self._idempotent(msg, text)
1985
1986 def test_preamble_epilogue(self):
1987 msg, text = self._msgobj('msg_21.txt')
1988 self._idempotent(msg, text)
1989
1990 def test_multipart_one_part(self):
1991 msg, text = self._msgobj('msg_23.txt')
1992 self._idempotent(msg, text)
1993
1994 def test_multipart_no_parts(self):
1995 msg, text = self._msgobj('msg_24.txt')
1996 self._idempotent(msg, text)
1997
1998 def test_no_start_boundary(self):
1999 msg, text = self._msgobj('msg_31.txt')
2000 self._idempotent(msg, text)
2001
2002 def test_rfc2231_charset(self):
2003 msg, text = self._msgobj('msg_32.txt')
2004 self._idempotent(msg, text)
2005
2006 def test_more_rfc2231_parameters(self):
2007 msg, text = self._msgobj('msg_33.txt')
2008 self._idempotent(msg, text)
2009
2010 def test_text_plain_in_a_multipart_digest(self):
2011 msg, text = self._msgobj('msg_34.txt')
2012 self._idempotent(msg, text)
2013
2014 def test_nested_multipart_mixeds(self):
2015 msg, text = self._msgobj('msg_12a.txt')
2016 self._idempotent(msg, text)
2017
2018 def test_message_external_body_idempotent(self):
2019 msg, text = self._msgobj('msg_36.txt')
2020 self._idempotent(msg, text)
2021
2022 def test_content_type(self):
2023 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002024 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002025 # Get a message object and reset the seek pointer for other tests
2026 msg, text = self._msgobj('msg_05.txt')
2027 eq(msg.get_content_type(), 'multipart/report')
2028 # Test the Content-Type: parameters
2029 params = {}
2030 for pk, pv in msg.get_params():
2031 params[pk] = pv
2032 eq(params['report-type'], 'delivery-status')
2033 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2034 eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
2035 eq(msg.epilogue, '\n')
2036 eq(len(msg.get_payload()), 3)
2037 # Make sure the subparts are what we expect
2038 msg1 = msg.get_payload(0)
2039 eq(msg1.get_content_type(), 'text/plain')
2040 eq(msg1.get_payload(), 'Yadda yadda yadda\n')
2041 msg2 = msg.get_payload(1)
2042 eq(msg2.get_content_type(), 'text/plain')
2043 eq(msg2.get_payload(), 'Yadda yadda yadda\n')
2044 msg3 = msg.get_payload(2)
2045 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002046 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002047 payload = msg3.get_payload()
2048 unless(isinstance(payload, list))
2049 eq(len(payload), 1)
2050 msg4 = payload[0]
2051 unless(isinstance(msg4, Message))
2052 eq(msg4.get_payload(), 'Yadda yadda yadda\n')
2053
2054 def test_parser(self):
2055 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002056 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002057 msg, text = self._msgobj('msg_06.txt')
2058 # Check some of the outer headers
2059 eq(msg.get_content_type(), 'message/rfc822')
2060 # Make sure the payload is a list of exactly one sub-Message, and that
2061 # that submessage has a type of text/plain
2062 payload = msg.get_payload()
2063 unless(isinstance(payload, list))
2064 eq(len(payload), 1)
2065 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002066 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002067 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002068 self.assertTrue(isinstance(msg1.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002069 eq(msg1.get_payload(), '\n')
2070
2071
2072
2073# Test various other bits of the package's functionality
2074class TestMiscellaneous(TestEmailBase):
2075 def test_message_from_string(self):
2076 with openfile('msg_01.txt') as fp:
2077 text = fp.read()
2078 msg = email.message_from_string(text)
2079 s = StringIO()
2080 # Don't wrap/continue long headers since we're trying to test
2081 # idempotency.
2082 g = Generator(s, maxheaderlen=0)
2083 g.flatten(msg)
2084 self.assertEqual(text, s.getvalue())
2085
2086 def test_message_from_file(self):
2087 with openfile('msg_01.txt') as fp:
2088 text = fp.read()
2089 fp.seek(0)
2090 msg = email.message_from_file(fp)
2091 s = StringIO()
2092 # Don't wrap/continue long headers since we're trying to test
2093 # idempotency.
2094 g = Generator(s, maxheaderlen=0)
2095 g.flatten(msg)
2096 self.assertEqual(text, s.getvalue())
2097
2098 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002099 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002100 with openfile('msg_01.txt') as fp:
2101 text = fp.read()
2102
2103 # Create a subclass
2104 class MyMessage(Message):
2105 pass
2106
2107 msg = email.message_from_string(text, MyMessage)
2108 unless(isinstance(msg, MyMessage))
2109 # Try something more complicated
2110 with openfile('msg_02.txt') as fp:
2111 text = fp.read()
2112 msg = email.message_from_string(text, MyMessage)
2113 for subpart in msg.walk():
2114 unless(isinstance(subpart, MyMessage))
2115
2116 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002117 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002118 # Create a subclass
2119 class MyMessage(Message):
2120 pass
2121
2122 with openfile('msg_01.txt') as fp:
2123 msg = email.message_from_file(fp, MyMessage)
2124 unless(isinstance(msg, MyMessage))
2125 # Try something more complicated
2126 with openfile('msg_02.txt') as fp:
2127 msg = email.message_from_file(fp, MyMessage)
2128 for subpart in msg.walk():
2129 unless(isinstance(subpart, MyMessage))
2130
2131 def test__all__(self):
2132 module = __import__('email')
2133 # Can't use sorted() here due to Python 2.3 compatibility
2134 all = module.__all__[:]
2135 all.sort()
2136 self.assertEqual(all, [
2137 'base64mime', 'charset', 'encoders', 'errors', 'generator',
2138 'header', 'iterators', 'message', 'message_from_file',
2139 'message_from_string', 'mime', 'parser',
2140 'quoprimime', 'utils',
2141 ])
2142
2143 def test_formatdate(self):
2144 now = time.time()
2145 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2146 time.gmtime(now)[:6])
2147
2148 def test_formatdate_localtime(self):
2149 now = time.time()
2150 self.assertEqual(
2151 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2152 time.localtime(now)[:6])
2153
2154 def test_formatdate_usegmt(self):
2155 now = time.time()
2156 self.assertEqual(
2157 utils.formatdate(now, localtime=False),
2158 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2159 self.assertEqual(
2160 utils.formatdate(now, localtime=False, usegmt=True),
2161 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2162
2163 def test_parsedate_none(self):
2164 self.assertEqual(utils.parsedate(''), None)
2165
2166 def test_parsedate_compact(self):
2167 # The FWS after the comma is optional
2168 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2169 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2170
2171 def test_parsedate_no_dayofweek(self):
2172 eq = self.assertEqual
2173 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2174 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2175
2176 def test_parsedate_compact_no_dayofweek(self):
2177 eq = self.assertEqual
2178 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2179 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2180
2181 def test_parsedate_acceptable_to_time_functions(self):
2182 eq = self.assertEqual
2183 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2184 t = int(time.mktime(timetup))
2185 eq(time.localtime(t)[:6], timetup[:6])
2186 eq(int(time.strftime('%Y', timetup)), 2003)
2187 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2188 t = int(time.mktime(timetup[:9]))
2189 eq(time.localtime(t)[:6], timetup[:6])
2190 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2191
2192 def test_parseaddr_empty(self):
2193 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2194 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2195
2196 def test_noquote_dump(self):
2197 self.assertEqual(
2198 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2199 'A Silly Person <person@dom.ain>')
2200
2201 def test_escape_dump(self):
2202 self.assertEqual(
2203 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2204 r'"A \(Very\) Silly Person" <person@dom.ain>')
2205 a = r'A \(Special\) Person'
2206 b = 'person@dom.ain'
2207 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2208
2209 def test_escape_backslashes(self):
2210 self.assertEqual(
2211 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2212 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2213 a = r'Arthur \Backslash\ Foobar'
2214 b = 'person@dom.ain'
2215 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2216
2217 def test_name_with_dot(self):
2218 x = 'John X. Doe <jxd@example.com>'
2219 y = '"John X. Doe" <jxd@example.com>'
2220 a, b = ('John X. Doe', 'jxd@example.com')
2221 self.assertEqual(utils.parseaddr(x), (a, b))
2222 self.assertEqual(utils.parseaddr(y), (a, b))
2223 # formataddr() quotes the name if there's a dot in it
2224 self.assertEqual(utils.formataddr((a, b)), y)
2225
2226 def test_multiline_from_comment(self):
2227 x = """\
2228Foo
2229\tBar <foo@example.com>"""
2230 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2231
2232 def test_quote_dump(self):
2233 self.assertEqual(
2234 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2235 r'"A Silly; Person" <person@dom.ain>')
2236
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002237 def test_charset_richcomparisons(self):
2238 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002239 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002240 cset1 = Charset()
2241 cset2 = Charset()
2242 eq(cset1, 'us-ascii')
2243 eq(cset1, 'US-ASCII')
2244 eq(cset1, 'Us-AsCiI')
2245 eq('us-ascii', cset1)
2246 eq('US-ASCII', cset1)
2247 eq('Us-AsCiI', cset1)
2248 ne(cset1, 'usascii')
2249 ne(cset1, 'USASCII')
2250 ne(cset1, 'UsAsCiI')
2251 ne('usascii', cset1)
2252 ne('USASCII', cset1)
2253 ne('UsAsCiI', cset1)
2254 eq(cset1, cset2)
2255 eq(cset2, cset1)
2256
2257 def test_getaddresses(self):
2258 eq = self.assertEqual
2259 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2260 'Bud Person <bperson@dom.ain>']),
2261 [('Al Person', 'aperson@dom.ain'),
2262 ('Bud Person', 'bperson@dom.ain')])
2263
2264 def test_getaddresses_nasty(self):
2265 eq = self.assertEqual
2266 eq(utils.getaddresses(['foo: ;']), [('', '')])
2267 eq(utils.getaddresses(
2268 ['[]*-- =~$']),
2269 [('', ''), ('', ''), ('', '*--')])
2270 eq(utils.getaddresses(
2271 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2272 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2273
2274 def test_getaddresses_embedded_comment(self):
2275 """Test proper handling of a nested comment"""
2276 eq = self.assertEqual
2277 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2278 eq(addrs[0][1], 'foo@bar.com')
2279
2280 def test_utils_quote_unquote(self):
2281 eq = self.assertEqual
2282 msg = Message()
2283 msg.add_header('content-disposition', 'attachment',
2284 filename='foo\\wacky"name')
2285 eq(msg.get_filename(), 'foo\\wacky"name')
2286
2287 def test_get_body_encoding_with_bogus_charset(self):
2288 charset = Charset('not a charset')
2289 self.assertEqual(charset.get_body_encoding(), 'base64')
2290
2291 def test_get_body_encoding_with_uppercase_charset(self):
2292 eq = self.assertEqual
2293 msg = Message()
2294 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2295 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2296 charsets = msg.get_charsets()
2297 eq(len(charsets), 1)
2298 eq(charsets[0], 'utf-8')
2299 charset = Charset(charsets[0])
2300 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002301 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002302 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2303 eq(msg.get_payload(decode=True), b'hello world')
2304 eq(msg['content-transfer-encoding'], 'base64')
2305 # Try another one
2306 msg = Message()
2307 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2308 charsets = msg.get_charsets()
2309 eq(len(charsets), 1)
2310 eq(charsets[0], 'us-ascii')
2311 charset = Charset(charsets[0])
2312 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2313 msg.set_payload('hello world', charset=charset)
2314 eq(msg.get_payload(), 'hello world')
2315 eq(msg['content-transfer-encoding'], '7bit')
2316
2317 def test_charsets_case_insensitive(self):
2318 lc = Charset('us-ascii')
2319 uc = Charset('US-ASCII')
2320 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2321
2322 def test_partial_falls_inside_message_delivery_status(self):
2323 eq = self.ndiffAssertEqual
2324 # The Parser interface provides chunks of data to FeedParser in 8192
2325 # byte gulps. SF bug #1076485 found one of those chunks inside
2326 # message/delivery-status header block, which triggered an
2327 # unreadline() of NeedMoreData.
2328 msg = self._msgobj('msg_43.txt')
2329 sfp = StringIO()
2330 iterators._structure(msg, sfp)
2331 eq(sfp.getvalue(), """\
2332multipart/report
2333 text/plain
2334 message/delivery-status
2335 text/plain
2336 text/plain
2337 text/plain
2338 text/plain
2339 text/plain
2340 text/plain
2341 text/plain
2342 text/plain
2343 text/plain
2344 text/plain
2345 text/plain
2346 text/plain
2347 text/plain
2348 text/plain
2349 text/plain
2350 text/plain
2351 text/plain
2352 text/plain
2353 text/plain
2354 text/plain
2355 text/plain
2356 text/plain
2357 text/plain
2358 text/plain
2359 text/plain
2360 text/plain
2361 text/rfc822-headers
2362""")
2363
2364
2365
2366# Test the iterator/generators
2367class TestIterators(TestEmailBase):
2368 def test_body_line_iterator(self):
2369 eq = self.assertEqual
2370 neq = self.ndiffAssertEqual
2371 # First a simple non-multipart message
2372 msg = self._msgobj('msg_01.txt')
2373 it = iterators.body_line_iterator(msg)
2374 lines = list(it)
2375 eq(len(lines), 6)
2376 neq(EMPTYSTRING.join(lines), msg.get_payload())
2377 # Now a more complicated multipart
2378 msg = self._msgobj('msg_02.txt')
2379 it = iterators.body_line_iterator(msg)
2380 lines = list(it)
2381 eq(len(lines), 43)
2382 with openfile('msg_19.txt') as fp:
2383 neq(EMPTYSTRING.join(lines), fp.read())
2384
2385 def test_typed_subpart_iterator(self):
2386 eq = self.assertEqual
2387 msg = self._msgobj('msg_04.txt')
2388 it = iterators.typed_subpart_iterator(msg, 'text')
2389 lines = []
2390 subparts = 0
2391 for subpart in it:
2392 subparts += 1
2393 lines.append(subpart.get_payload())
2394 eq(subparts, 2)
2395 eq(EMPTYSTRING.join(lines), """\
2396a simple kind of mirror
2397to reflect upon our own
2398a simple kind of mirror
2399to reflect upon our own
2400""")
2401
2402 def test_typed_subpart_iterator_default_type(self):
2403 eq = self.assertEqual
2404 msg = self._msgobj('msg_03.txt')
2405 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2406 lines = []
2407 subparts = 0
2408 for subpart in it:
2409 subparts += 1
2410 lines.append(subpart.get_payload())
2411 eq(subparts, 1)
2412 eq(EMPTYSTRING.join(lines), """\
2413
2414Hi,
2415
2416Do you like this message?
2417
2418-Me
2419""")
2420
2421
2422
2423class TestParsers(TestEmailBase):
2424 def test_header_parser(self):
2425 eq = self.assertEqual
2426 # Parse only the headers of a complex multipart MIME document
2427 with openfile('msg_02.txt') as fp:
2428 msg = HeaderParser().parse(fp)
2429 eq(msg['from'], 'ppp-request@zzz.org')
2430 eq(msg['to'], 'ppp@zzz.org')
2431 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002432 self.assertFalse(msg.is_multipart())
2433 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002434
2435 def test_whitespace_continuation(self):
2436 eq = self.assertEqual
2437 # This message contains a line after the Subject: header that has only
2438 # whitespace, but it is not empty!
2439 msg = email.message_from_string("""\
2440From: aperson@dom.ain
2441To: bperson@dom.ain
2442Subject: the next line has a space on it
2443\x20
2444Date: Mon, 8 Apr 2002 15:09:19 -0400
2445Message-ID: spam
2446
2447Here's the message body
2448""")
2449 eq(msg['subject'], 'the next line has a space on it\n ')
2450 eq(msg['message-id'], 'spam')
2451 eq(msg.get_payload(), "Here's the message body\n")
2452
2453 def test_whitespace_continuation_last_header(self):
2454 eq = self.assertEqual
2455 # Like the previous test, but the subject line is the last
2456 # header.
2457 msg = email.message_from_string("""\
2458From: aperson@dom.ain
2459To: bperson@dom.ain
2460Date: Mon, 8 Apr 2002 15:09:19 -0400
2461Message-ID: spam
2462Subject: the next line has a space on it
2463\x20
2464
2465Here's the message body
2466""")
2467 eq(msg['subject'], 'the next line has a space on it\n ')
2468 eq(msg['message-id'], 'spam')
2469 eq(msg.get_payload(), "Here's the message body\n")
2470
2471 def test_crlf_separation(self):
2472 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002473 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002474 msg = Parser().parse(fp)
2475 eq(len(msg.get_payload()), 2)
2476 part1 = msg.get_payload(0)
2477 eq(part1.get_content_type(), 'text/plain')
2478 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2479 part2 = msg.get_payload(1)
2480 eq(part2.get_content_type(), 'application/riscos')
2481
2482 def test_multipart_digest_with_extra_mime_headers(self):
2483 eq = self.assertEqual
2484 neq = self.ndiffAssertEqual
2485 with openfile('msg_28.txt') as fp:
2486 msg = email.message_from_file(fp)
2487 # Structure is:
2488 # multipart/digest
2489 # message/rfc822
2490 # text/plain
2491 # message/rfc822
2492 # text/plain
2493 eq(msg.is_multipart(), 1)
2494 eq(len(msg.get_payload()), 2)
2495 part1 = msg.get_payload(0)
2496 eq(part1.get_content_type(), 'message/rfc822')
2497 eq(part1.is_multipart(), 1)
2498 eq(len(part1.get_payload()), 1)
2499 part1a = part1.get_payload(0)
2500 eq(part1a.is_multipart(), 0)
2501 eq(part1a.get_content_type(), 'text/plain')
2502 neq(part1a.get_payload(), 'message 1\n')
2503 # next message/rfc822
2504 part2 = msg.get_payload(1)
2505 eq(part2.get_content_type(), 'message/rfc822')
2506 eq(part2.is_multipart(), 1)
2507 eq(len(part2.get_payload()), 1)
2508 part2a = part2.get_payload(0)
2509 eq(part2a.is_multipart(), 0)
2510 eq(part2a.get_content_type(), 'text/plain')
2511 neq(part2a.get_payload(), 'message 2\n')
2512
2513 def test_three_lines(self):
2514 # A bug report by Andrew McNamara
2515 lines = ['From: Andrew Person <aperson@dom.ain',
2516 'Subject: Test',
2517 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2518 msg = email.message_from_string(NL.join(lines))
2519 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2520
2521 def test_strip_line_feed_and_carriage_return_in_headers(self):
2522 eq = self.assertEqual
2523 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2524 value1 = 'text'
2525 value2 = 'more text'
2526 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2527 value1, value2)
2528 msg = email.message_from_string(m)
2529 eq(msg.get('Header'), value1)
2530 eq(msg.get('Next-Header'), value2)
2531
2532 def test_rfc2822_header_syntax(self):
2533 eq = self.assertEqual
2534 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2535 msg = email.message_from_string(m)
2536 eq(len(msg), 3)
2537 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2538 eq(msg.get_payload(), 'body')
2539
2540 def test_rfc2822_space_not_allowed_in_header(self):
2541 eq = self.assertEqual
2542 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2543 msg = email.message_from_string(m)
2544 eq(len(msg.keys()), 0)
2545
2546 def test_rfc2822_one_character_header(self):
2547 eq = self.assertEqual
2548 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2549 msg = email.message_from_string(m)
2550 headers = msg.keys()
2551 headers.sort()
2552 eq(headers, ['A', 'B', 'CC'])
2553 eq(msg.get_payload(), 'body')
2554
2555
2556
2557class TestBase64(unittest.TestCase):
2558 def test_len(self):
2559 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002560 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002561 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002562 for size in range(15):
2563 if size == 0 : bsize = 0
2564 elif size <= 3 : bsize = 4
2565 elif size <= 6 : bsize = 8
2566 elif size <= 9 : bsize = 12
2567 elif size <= 12: bsize = 16
2568 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00002569 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002570
2571 def test_decode(self):
2572 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00002573 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002574 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002575
2576 def test_encode(self):
2577 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002578 eq(base64mime.body_encode(b''), b'')
2579 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002580 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002581 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002582 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002583 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002584eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2585eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2586eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2587eHh4eCB4eHh4IA==
2588""")
2589 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002590 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002591 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002592eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2593eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2594eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2595eHh4eCB4eHh4IA==\r
2596""")
2597
2598 def test_header_encode(self):
2599 eq = self.assertEqual
2600 he = base64mime.header_encode
2601 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00002602 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
2603 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002604 # Test the charset option
2605 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
2606 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002607
2608
2609
2610class TestQuopri(unittest.TestCase):
2611 def setUp(self):
2612 # Set of characters (as byte integers) that don't need to be encoded
2613 # in headers.
2614 self.hlit = list(chain(
2615 range(ord('a'), ord('z') + 1),
2616 range(ord('A'), ord('Z') + 1),
2617 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00002618 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002619 # Set of characters (as byte integers) that do need to be encoded in
2620 # headers.
2621 self.hnon = [c for c in range(256) if c not in self.hlit]
2622 assert len(self.hlit) + len(self.hnon) == 256
2623 # Set of characters (as byte integers) that don't need to be encoded
2624 # in bodies.
2625 self.blit = list(range(ord(' '), ord('~') + 1))
2626 self.blit.append(ord('\t'))
2627 self.blit.remove(ord('='))
2628 # Set of characters (as byte integers) that do need to be encoded in
2629 # bodies.
2630 self.bnon = [c for c in range(256) if c not in self.blit]
2631 assert len(self.blit) + len(self.bnon) == 256
2632
Guido van Rossum9604e662007-08-30 03:46:43 +00002633 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002634 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002635 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002636 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002637 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002638 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002639 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002640
Guido van Rossum9604e662007-08-30 03:46:43 +00002641 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002642 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002643 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002644 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002645 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002646 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002647 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002648
2649 def test_header_quopri_len(self):
2650 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002651 eq(quoprimime.header_length(b'hello'), 5)
2652 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002653 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002654 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002655 # =?xxx?q?...?= means 10 extra characters
2656 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00002657 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
2658 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002659 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002660 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002661 # =?xxx?q?...?= means 10 extra characters
2662 10)
2663 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002664 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002665 'expected length 1 for %r' % chr(c))
2666 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002667 # Space is special; it's encoded to _
2668 if c == ord(' '):
2669 continue
2670 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002671 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00002672 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002673
2674 def test_body_quopri_len(self):
2675 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002676 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002677 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002678 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002679 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002680
2681 def test_quote_unquote_idempotent(self):
2682 for x in range(256):
2683 c = chr(x)
2684 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
2685
2686 def test_header_encode(self):
2687 eq = self.assertEqual
2688 he = quoprimime.header_encode
2689 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
2690 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
2691 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
2692 # Test a non-ASCII character
2693 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
2694
2695 def test_decode(self):
2696 eq = self.assertEqual
2697 eq(quoprimime.decode(''), '')
2698 eq(quoprimime.decode('hello'), 'hello')
2699 eq(quoprimime.decode('hello', 'X'), 'hello')
2700 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
2701
2702 def test_encode(self):
2703 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002704 eq(quoprimime.body_encode(''), '')
2705 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002706 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00002707 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002708 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00002709 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002710xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
2711 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
2712x xxxx xxxx xxxx xxxx=20""")
2713 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00002714 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
2715 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002716xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
2717 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
2718x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00002719 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002720one line
2721
2722two line"""), """\
2723one line
2724
2725two line""")
2726
2727
2728
2729# Test the Charset class
2730class TestCharset(unittest.TestCase):
2731 def tearDown(self):
2732 from email import charset as CharsetModule
2733 try:
2734 del CharsetModule.CHARSETS['fake']
2735 except KeyError:
2736 pass
2737
Guido van Rossum9604e662007-08-30 03:46:43 +00002738 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002739 eq = self.assertEqual
2740 # Make sure us-ascii = no Unicode conversion
2741 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00002742 eq(c.header_encode('Hello World!'), 'Hello World!')
2743 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002744 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00002745 self.assertRaises(UnicodeError, c.header_encode, s)
2746 c = Charset('utf-8')
2747 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002748
2749 def test_body_encode(self):
2750 eq = self.assertEqual
2751 # Try a charset with QP body encoding
2752 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002753 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002754 # Try a charset with Base64 body encoding
2755 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002756 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002757 # Try a charset with None body encoding
2758 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002759 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002760 # Try the convert argument, where input codec != output codec
2761 c = Charset('euc-jp')
2762 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00002763 # XXX FIXME
2764## try:
2765## eq('\x1b$B5FCO;~IW\x1b(B',
2766## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
2767## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
2768## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
2769## except LookupError:
2770## # We probably don't have the Japanese codecs installed
2771## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002772 # Testing SF bug #625509, which we have to fake, since there are no
2773 # built-in encodings where the header encoding is QP but the body
2774 # encoding is not.
2775 from email import charset as CharsetModule
2776 CharsetModule.add_charset('fake', CharsetModule.QP, None)
2777 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002778 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002779
2780 def test_unicode_charset_name(self):
2781 charset = Charset('us-ascii')
2782 self.assertEqual(str(charset), 'us-ascii')
2783 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
2784
2785
2786
2787# Test multilingual MIME headers.
2788class TestHeader(TestEmailBase):
2789 def test_simple(self):
2790 eq = self.ndiffAssertEqual
2791 h = Header('Hello World!')
2792 eq(h.encode(), 'Hello World!')
2793 h.append(' Goodbye World!')
2794 eq(h.encode(), 'Hello World! Goodbye World!')
2795
2796 def test_simple_surprise(self):
2797 eq = self.ndiffAssertEqual
2798 h = Header('Hello World!')
2799 eq(h.encode(), 'Hello World!')
2800 h.append('Goodbye World!')
2801 eq(h.encode(), 'Hello World! Goodbye World!')
2802
2803 def test_header_needs_no_decoding(self):
2804 h = 'no decoding needed'
2805 self.assertEqual(decode_header(h), [(h, None)])
2806
2807 def test_long(self):
2808 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
2809 maxlinelen=76)
2810 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002811 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002812
2813 def test_multilingual(self):
2814 eq = self.ndiffAssertEqual
2815 g = Charset("iso-8859-1")
2816 cz = Charset("iso-8859-2")
2817 utf8 = Charset("utf-8")
2818 g_head = (b'Die Mieter treten hier ein werden mit einem '
2819 b'Foerderband komfortabel den Korridor entlang, '
2820 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
2821 b'gegen die rotierenden Klingen bef\xf6rdert. ')
2822 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
2823 b'd\xf9vtipu.. ')
2824 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
2825 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
2826 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
2827 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
2828 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
2829 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
2830 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
2831 '\u3044\u307e\u3059\u3002')
2832 h = Header(g_head, g)
2833 h.append(cz_head, cz)
2834 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00002835 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002836 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002837=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
2838 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
2839 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
2840 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002841 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
2842 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
2843 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
2844 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00002845 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
2846 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
2847 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
2848 decoded = decode_header(enc)
2849 eq(len(decoded), 3)
2850 eq(decoded[0], (g_head, 'iso-8859-1'))
2851 eq(decoded[1], (cz_head, 'iso-8859-2'))
2852 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002853 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00002854 eq(ustr,
2855 (b'Die Mieter treten hier ein werden mit einem Foerderband '
2856 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
2857 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
2858 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
2859 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
2860 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
2861 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
2862 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
2863 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
2864 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
2865 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
2866 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
2867 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
2868 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
2869 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
2870 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
2871 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002872 # Test make_header()
2873 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00002874 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002875
2876 def test_empty_header_encode(self):
2877 h = Header()
2878 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00002879
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002880 def test_header_ctor_default_args(self):
2881 eq = self.ndiffAssertEqual
2882 h = Header()
2883 eq(h, '')
2884 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00002885 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002886
2887 def test_explicit_maxlinelen(self):
2888 eq = self.ndiffAssertEqual
2889 hstr = ('A very long line that must get split to something other '
2890 'than at the 76th character boundary to test the non-default '
2891 'behavior')
2892 h = Header(hstr)
2893 eq(h.encode(), '''\
2894A very long line that must get split to something other than at the 76th
2895 character boundary to test the non-default behavior''')
2896 eq(str(h), hstr)
2897 h = Header(hstr, header_name='Subject')
2898 eq(h.encode(), '''\
2899A very long line that must get split to something other than at the
2900 76th character boundary to test the non-default behavior''')
2901 eq(str(h), hstr)
2902 h = Header(hstr, maxlinelen=1024, header_name='Subject')
2903 eq(h.encode(), hstr)
2904 eq(str(h), hstr)
2905
Guido van Rossum9604e662007-08-30 03:46:43 +00002906 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002907 eq = self.ndiffAssertEqual
2908 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00002909 x = 'xxxx ' * 20
2910 h.append(x)
2911 s = h.encode()
2912 eq(s, """\
2913=?iso-8859-1?q?xxx?=
2914 =?iso-8859-1?q?x_?=
2915 =?iso-8859-1?q?xx?=
2916 =?iso-8859-1?q?xx?=
2917 =?iso-8859-1?q?_x?=
2918 =?iso-8859-1?q?xx?=
2919 =?iso-8859-1?q?x_?=
2920 =?iso-8859-1?q?xx?=
2921 =?iso-8859-1?q?xx?=
2922 =?iso-8859-1?q?_x?=
2923 =?iso-8859-1?q?xx?=
2924 =?iso-8859-1?q?x_?=
2925 =?iso-8859-1?q?xx?=
2926 =?iso-8859-1?q?xx?=
2927 =?iso-8859-1?q?_x?=
2928 =?iso-8859-1?q?xx?=
2929 =?iso-8859-1?q?x_?=
2930 =?iso-8859-1?q?xx?=
2931 =?iso-8859-1?q?xx?=
2932 =?iso-8859-1?q?_x?=
2933 =?iso-8859-1?q?xx?=
2934 =?iso-8859-1?q?x_?=
2935 =?iso-8859-1?q?xx?=
2936 =?iso-8859-1?q?xx?=
2937 =?iso-8859-1?q?_x?=
2938 =?iso-8859-1?q?xx?=
2939 =?iso-8859-1?q?x_?=
2940 =?iso-8859-1?q?xx?=
2941 =?iso-8859-1?q?xx?=
2942 =?iso-8859-1?q?_x?=
2943 =?iso-8859-1?q?xx?=
2944 =?iso-8859-1?q?x_?=
2945 =?iso-8859-1?q?xx?=
2946 =?iso-8859-1?q?xx?=
2947 =?iso-8859-1?q?_x?=
2948 =?iso-8859-1?q?xx?=
2949 =?iso-8859-1?q?x_?=
2950 =?iso-8859-1?q?xx?=
2951 =?iso-8859-1?q?xx?=
2952 =?iso-8859-1?q?_x?=
2953 =?iso-8859-1?q?xx?=
2954 =?iso-8859-1?q?x_?=
2955 =?iso-8859-1?q?xx?=
2956 =?iso-8859-1?q?xx?=
2957 =?iso-8859-1?q?_x?=
2958 =?iso-8859-1?q?xx?=
2959 =?iso-8859-1?q?x_?=
2960 =?iso-8859-1?q?xx?=
2961 =?iso-8859-1?q?xx?=
2962 =?iso-8859-1?q?_?=""")
2963 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002964 h = Header(charset='iso-8859-1', maxlinelen=40)
2965 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00002966 s = h.encode()
2967 eq(s, """\
2968=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
2969 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
2970 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
2971 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
2972 =?iso-8859-1?q?_xxxx_xxxx_?=""")
2973 eq(x, str(make_header(decode_header(s))))
2974
2975 def test_base64_splittable(self):
2976 eq = self.ndiffAssertEqual
2977 h = Header(charset='koi8-r', maxlinelen=20)
2978 x = 'xxxx ' * 20
2979 h.append(x)
2980 s = h.encode()
2981 eq(s, """\
2982=?koi8-r?b?eHh4?=
2983 =?koi8-r?b?eCB4?=
2984 =?koi8-r?b?eHh4?=
2985 =?koi8-r?b?IHh4?=
2986 =?koi8-r?b?eHgg?=
2987 =?koi8-r?b?eHh4?=
2988 =?koi8-r?b?eCB4?=
2989 =?koi8-r?b?eHh4?=
2990 =?koi8-r?b?IHh4?=
2991 =?koi8-r?b?eHgg?=
2992 =?koi8-r?b?eHh4?=
2993 =?koi8-r?b?eCB4?=
2994 =?koi8-r?b?eHh4?=
2995 =?koi8-r?b?IHh4?=
2996 =?koi8-r?b?eHgg?=
2997 =?koi8-r?b?eHh4?=
2998 =?koi8-r?b?eCB4?=
2999 =?koi8-r?b?eHh4?=
3000 =?koi8-r?b?IHh4?=
3001 =?koi8-r?b?eHgg?=
3002 =?koi8-r?b?eHh4?=
3003 =?koi8-r?b?eCB4?=
3004 =?koi8-r?b?eHh4?=
3005 =?koi8-r?b?IHh4?=
3006 =?koi8-r?b?eHgg?=
3007 =?koi8-r?b?eHh4?=
3008 =?koi8-r?b?eCB4?=
3009 =?koi8-r?b?eHh4?=
3010 =?koi8-r?b?IHh4?=
3011 =?koi8-r?b?eHgg?=
3012 =?koi8-r?b?eHh4?=
3013 =?koi8-r?b?eCB4?=
3014 =?koi8-r?b?eHh4?=
3015 =?koi8-r?b?IA==?=""")
3016 eq(x, str(make_header(decode_header(s))))
3017 h = Header(charset='koi8-r', maxlinelen=40)
3018 h.append(x)
3019 s = h.encode()
3020 eq(s, """\
3021=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3022 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3023 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3024 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3025 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3026 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3027 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003028
3029 def test_us_ascii_header(self):
3030 eq = self.assertEqual
3031 s = 'hello'
3032 x = decode_header(s)
3033 eq(x, [('hello', None)])
3034 h = make_header(x)
3035 eq(s, h.encode())
3036
3037 def test_string_charset(self):
3038 eq = self.assertEqual
3039 h = Header()
3040 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003041 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003042
3043## def test_unicode_error(self):
3044## raises = self.assertRaises
3045## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3046## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3047## h = Header()
3048## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3049## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3050## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3051
3052 def test_utf8_shortest(self):
3053 eq = self.assertEqual
3054 h = Header('p\xf6stal', 'utf-8')
3055 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3056 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3057 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3058
3059 def test_bad_8bit_header(self):
3060 raises = self.assertRaises
3061 eq = self.assertEqual
3062 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3063 raises(UnicodeError, Header, x)
3064 h = Header()
3065 raises(UnicodeError, h.append, x)
3066 e = x.decode('utf-8', 'replace')
3067 eq(str(Header(x, errors='replace')), e)
3068 h.append(x, errors='replace')
3069 eq(str(h), e)
3070
3071 def test_encoded_adjacent_nonencoded(self):
3072 eq = self.assertEqual
3073 h = Header()
3074 h.append('hello', 'iso-8859-1')
3075 h.append('world')
3076 s = h.encode()
3077 eq(s, '=?iso-8859-1?q?hello?= world')
3078 h = make_header(decode_header(s))
3079 eq(h.encode(), s)
3080
3081 def test_whitespace_eater(self):
3082 eq = self.assertEqual
3083 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3084 parts = decode_header(s)
3085 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3086 hdr = make_header(parts)
3087 eq(hdr.encode(),
3088 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3089
3090 def test_broken_base64_header(self):
3091 raises = self.assertRaises
3092 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?='
3093 raises(errors.HeaderParseError, decode_header, s)
3094
3095
3096
3097# Test RFC 2231 header parameters (en/de)coding
3098class TestRFC2231(TestEmailBase):
3099 def test_get_param(self):
3100 eq = self.assertEqual
3101 msg = self._msgobj('msg_29.txt')
3102 eq(msg.get_param('title'),
3103 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3104 eq(msg.get_param('title', unquote=False),
3105 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3106
3107 def test_set_param(self):
3108 eq = self.ndiffAssertEqual
3109 msg = Message()
3110 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3111 charset='us-ascii')
3112 eq(msg.get_param('title'),
3113 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3114 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3115 charset='us-ascii', language='en')
3116 eq(msg.get_param('title'),
3117 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3118 msg = self._msgobj('msg_01.txt')
3119 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3120 charset='us-ascii', language='en')
3121 eq(msg.as_string(maxheaderlen=78), """\
3122Return-Path: <bbb@zzz.org>
3123Delivered-To: bbb@zzz.org
3124Received: by mail.zzz.org (Postfix, from userid 889)
3125\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3126MIME-Version: 1.0
3127Content-Transfer-Encoding: 7bit
3128Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3129From: bbb@ddd.com (John X. Doe)
3130To: bbb@zzz.org
3131Subject: This is a test message
3132Date: Fri, 4 May 2001 14:05:44 -0400
3133Content-Type: text/plain; charset=us-ascii;
3134 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3135
3136
3137Hi,
3138
3139Do you like this message?
3140
3141-Me
3142""")
3143
3144 def test_del_param(self):
3145 eq = self.ndiffAssertEqual
3146 msg = self._msgobj('msg_01.txt')
3147 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3148 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3149 charset='us-ascii', language='en')
3150 msg.del_param('foo', header='Content-Type')
3151 eq(msg.as_string(maxheaderlen=78), """\
3152Return-Path: <bbb@zzz.org>
3153Delivered-To: bbb@zzz.org
3154Received: by mail.zzz.org (Postfix, from userid 889)
3155\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3156MIME-Version: 1.0
3157Content-Transfer-Encoding: 7bit
3158Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3159From: bbb@ddd.com (John X. Doe)
3160To: bbb@zzz.org
3161Subject: This is a test message
3162Date: Fri, 4 May 2001 14:05:44 -0400
3163Content-Type: text/plain; charset="us-ascii";
3164 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3165
3166
3167Hi,
3168
3169Do you like this message?
3170
3171-Me
3172""")
3173
3174 def test_rfc2231_get_content_charset(self):
3175 eq = self.assertEqual
3176 msg = self._msgobj('msg_32.txt')
3177 eq(msg.get_content_charset(), 'us-ascii')
3178
3179 def test_rfc2231_no_language_or_charset(self):
3180 m = '''\
3181Content-Transfer-Encoding: 8bit
3182Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3183Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3184
3185'''
3186 msg = email.message_from_string(m)
3187 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003188 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003189 self.assertEqual(
3190 param,
3191 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3192
3193 def test_rfc2231_no_language_or_charset_in_filename(self):
3194 m = '''\
3195Content-Disposition: inline;
3196\tfilename*0*="''This%20is%20even%20more%20";
3197\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3198\tfilename*2="is it not.pdf"
3199
3200'''
3201 msg = email.message_from_string(m)
3202 self.assertEqual(msg.get_filename(),
3203 'This is even more ***fun*** is it not.pdf')
3204
3205 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3206 m = '''\
3207Content-Disposition: inline;
3208\tfilename*0*="''This%20is%20even%20more%20";
3209\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3210\tfilename*2="is it not.pdf"
3211
3212'''
3213 msg = email.message_from_string(m)
3214 self.assertEqual(msg.get_filename(),
3215 'This is even more ***fun*** is it not.pdf')
3216
3217 def test_rfc2231_partly_encoded(self):
3218 m = '''\
3219Content-Disposition: inline;
3220\tfilename*0="''This%20is%20even%20more%20";
3221\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3222\tfilename*2="is it not.pdf"
3223
3224'''
3225 msg = email.message_from_string(m)
3226 self.assertEqual(
3227 msg.get_filename(),
3228 'This%20is%20even%20more%20***fun*** is it not.pdf')
3229
3230 def test_rfc2231_partly_nonencoded(self):
3231 m = '''\
3232Content-Disposition: inline;
3233\tfilename*0="This%20is%20even%20more%20";
3234\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3235\tfilename*2="is it not.pdf"
3236
3237'''
3238 msg = email.message_from_string(m)
3239 self.assertEqual(
3240 msg.get_filename(),
3241 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3242
3243 def test_rfc2231_no_language_or_charset_in_boundary(self):
3244 m = '''\
3245Content-Type: multipart/alternative;
3246\tboundary*0*="''This%20is%20even%20more%20";
3247\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3248\tboundary*2="is it not.pdf"
3249
3250'''
3251 msg = email.message_from_string(m)
3252 self.assertEqual(msg.get_boundary(),
3253 'This is even more ***fun*** is it not.pdf')
3254
3255 def test_rfc2231_no_language_or_charset_in_charset(self):
3256 # This is a nonsensical charset value, but tests the code anyway
3257 m = '''\
3258Content-Type: text/plain;
3259\tcharset*0*="This%20is%20even%20more%20";
3260\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3261\tcharset*2="is it not.pdf"
3262
3263'''
3264 msg = email.message_from_string(m)
3265 self.assertEqual(msg.get_content_charset(),
3266 'this is even more ***fun*** is it not.pdf')
3267
3268 def test_rfc2231_bad_encoding_in_filename(self):
3269 m = '''\
3270Content-Disposition: inline;
3271\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3272\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3273\tfilename*2="is it not.pdf"
3274
3275'''
3276 msg = email.message_from_string(m)
3277 self.assertEqual(msg.get_filename(),
3278 'This is even more ***fun*** is it not.pdf')
3279
3280 def test_rfc2231_bad_encoding_in_charset(self):
3281 m = """\
3282Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3283
3284"""
3285 msg = email.message_from_string(m)
3286 # This should return None because non-ascii characters in the charset
3287 # are not allowed.
3288 self.assertEqual(msg.get_content_charset(), None)
3289
3290 def test_rfc2231_bad_character_in_charset(self):
3291 m = """\
3292Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3293
3294"""
3295 msg = email.message_from_string(m)
3296 # This should return None because non-ascii characters in the charset
3297 # are not allowed.
3298 self.assertEqual(msg.get_content_charset(), None)
3299
3300 def test_rfc2231_bad_character_in_filename(self):
3301 m = '''\
3302Content-Disposition: inline;
3303\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3304\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3305\tfilename*2*="is it not.pdf%E2"
3306
3307'''
3308 msg = email.message_from_string(m)
3309 self.assertEqual(msg.get_filename(),
3310 'This is even more ***fun*** is it not.pdf\ufffd')
3311
3312 def test_rfc2231_unknown_encoding(self):
3313 m = """\
3314Content-Transfer-Encoding: 8bit
3315Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3316
3317"""
3318 msg = email.message_from_string(m)
3319 self.assertEqual(msg.get_filename(), 'myfile.txt')
3320
3321 def test_rfc2231_single_tick_in_filename_extended(self):
3322 eq = self.assertEqual
3323 m = """\
3324Content-Type: application/x-foo;
3325\tname*0*=\"Frank's\"; name*1*=\" Document\"
3326
3327"""
3328 msg = email.message_from_string(m)
3329 charset, language, s = msg.get_param('name')
3330 eq(charset, None)
3331 eq(language, None)
3332 eq(s, "Frank's Document")
3333
3334 def test_rfc2231_single_tick_in_filename(self):
3335 m = """\
3336Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3337
3338"""
3339 msg = email.message_from_string(m)
3340 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003341 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003342 self.assertEqual(param, "Frank's Document")
3343
3344 def test_rfc2231_tick_attack_extended(self):
3345 eq = self.assertEqual
3346 m = """\
3347Content-Type: application/x-foo;
3348\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3349
3350"""
3351 msg = email.message_from_string(m)
3352 charset, language, s = msg.get_param('name')
3353 eq(charset, 'us-ascii')
3354 eq(language, 'en-us')
3355 eq(s, "Frank's Document")
3356
3357 def test_rfc2231_tick_attack(self):
3358 m = """\
3359Content-Type: application/x-foo;
3360\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3361
3362"""
3363 msg = email.message_from_string(m)
3364 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003365 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003366 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3367
3368 def test_rfc2231_no_extended_values(self):
3369 eq = self.assertEqual
3370 m = """\
3371Content-Type: application/x-foo; name=\"Frank's Document\"
3372
3373"""
3374 msg = email.message_from_string(m)
3375 eq(msg.get_param('name'), "Frank's Document")
3376
3377 def test_rfc2231_encoded_then_unencoded_segments(self):
3378 eq = self.assertEqual
3379 m = """\
3380Content-Type: application/x-foo;
3381\tname*0*=\"us-ascii'en-us'My\";
3382\tname*1=\" Document\";
3383\tname*2*=\" For You\"
3384
3385"""
3386 msg = email.message_from_string(m)
3387 charset, language, s = msg.get_param('name')
3388 eq(charset, 'us-ascii')
3389 eq(language, 'en-us')
3390 eq(s, 'My Document For You')
3391
3392 def test_rfc2231_unencoded_then_encoded_segments(self):
3393 eq = self.assertEqual
3394 m = """\
3395Content-Type: application/x-foo;
3396\tname*0=\"us-ascii'en-us'My\";
3397\tname*1*=\" Document\";
3398\tname*2*=\" For You\"
3399
3400"""
3401 msg = email.message_from_string(m)
3402 charset, language, s = msg.get_param('name')
3403 eq(charset, 'us-ascii')
3404 eq(language, 'en-us')
3405 eq(s, 'My Document For You')
3406
3407
3408
R. David Murraya8f480f2010-01-16 18:30:03 +00003409# Tests to ensure that signed parts of an email are completely preserved, as
3410# required by RFC1847 section 2.1. Note that these are incomplete, because the
3411# email package does not currently always preserve the body. See issue 1670765.
3412class TestSigned(TestEmailBase):
3413
3414 def _msg_and_obj(self, filename):
3415 with openfile(findfile(filename)) as fp:
3416 original = fp.read()
3417 msg = email.message_from_string(original)
3418 return original, msg
3419
3420 def _signed_parts_eq(self, original, result):
3421 # Extract the first mime part of each message
3422 import re
3423 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3424 inpart = repart.search(original).group(2)
3425 outpart = repart.search(result).group(2)
3426 self.assertEqual(outpart, inpart)
3427
3428 def test_long_headers_as_string(self):
3429 original, msg = self._msg_and_obj('msg_45.txt')
3430 result = msg.as_string()
3431 self._signed_parts_eq(original, result)
3432
3433 def test_long_headers_as_string_maxheaderlen(self):
3434 original, msg = self._msg_and_obj('msg_45.txt')
3435 result = msg.as_string(maxheaderlen=60)
3436 self._signed_parts_eq(original, result)
3437
3438 def test_long_headers_flatten(self):
3439 original, msg = self._msg_and_obj('msg_45.txt')
3440 fp = StringIO()
3441 Generator(fp).flatten(msg)
3442 result = fp.getvalue()
3443 self._signed_parts_eq(original, result)
3444
3445
3446
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003447def _testclasses():
3448 mod = sys.modules[__name__]
3449 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3450
3451
3452def suite():
3453 suite = unittest.TestSuite()
3454 for testclass in _testclasses():
3455 suite.addTest(unittest.makeSuite(testclass))
3456 return suite
3457
3458
3459def test_main():
3460 for testclass in _testclasses():
3461 run_unittest(testclass)
3462
3463
3464
3465if __name__ == '__main__':
3466 unittest.main(defaultTest='suite')