blob: cd28206913320ab7557f64131c2acc54fd42cffb [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
6import sys
7import time
8import base64
9import difflib
10import unittest
11import warnings
12
13from io import StringIO
14from itertools import chain
15
16import email
17
18from email.charset import Charset
19from email.header import Header, decode_header, make_header
20from email.parser import Parser, HeaderParser
21from email.generator import Generator, DecodedGenerator
22from email.message import Message
23from email.mime.application import MIMEApplication
24from email.mime.audio import MIMEAudio
25from email.mime.text import MIMEText
26from email.mime.image import MIMEImage
27from email.mime.base import MIMEBase
28from email.mime.message import MIMEMessage
29from email.mime.multipart import MIMEMultipart
30from email import utils
31from email import errors
32from email import encoders
33from email import iterators
34from email import base64mime
35from email import quoprimime
36
Benjamin Petersonee8712c2008-05-20 21:35:26 +000037from test.support import findfile, run_unittest
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038from email.test import __file__ as landmark
39
40
41NL = '\n'
42EMPTYSTRING = ''
43SPACE = ' '
44
45
46
47def openfile(filename, *args, **kws):
48 path = os.path.join(os.path.dirname(landmark), 'data', filename)
49 return open(path, *args, **kws)
50
51
52
53# Base test class
54class TestEmailBase(unittest.TestCase):
55 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000056 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 if first != second:
58 sfirst = str(first)
59 ssecond = str(second)
60 rfirst = [repr(line) for line in sfirst.splitlines()]
61 rsecond = [repr(line) for line in ssecond.splitlines()]
62 diff = difflib.ndiff(rfirst, rsecond)
63 raise self.failureException(NL + NL.join(diff))
64
65 def _msgobj(self, filename):
66 with openfile(findfile(filename)) as fp:
67 return email.message_from_file(fp)
68
69
70
71# Test various aspects of the Message class's API
72class TestMessageAPI(TestEmailBase):
73 def test_get_all(self):
74 eq = self.assertEqual
75 msg = self._msgobj('msg_20.txt')
76 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
77 eq(msg.get_all('xx', 'n/a'), 'n/a')
78
79 def test_getset_charset(self):
80 eq = self.assertEqual
81 msg = Message()
82 eq(msg.get_charset(), None)
83 charset = Charset('iso-8859-1')
84 msg.set_charset(charset)
85 eq(msg['mime-version'], '1.0')
86 eq(msg.get_content_type(), 'text/plain')
87 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
88 eq(msg.get_param('charset'), 'iso-8859-1')
89 eq(msg['content-transfer-encoding'], 'quoted-printable')
90 eq(msg.get_charset().input_charset, 'iso-8859-1')
91 # Remove the charset
92 msg.set_charset(None)
93 eq(msg.get_charset(), None)
94 eq(msg['content-type'], 'text/plain')
95 # Try adding a charset when there's already MIME headers present
96 msg = Message()
97 msg['MIME-Version'] = '2.0'
98 msg['Content-Type'] = 'text/x-weird'
99 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
100 msg.set_charset(charset)
101 eq(msg['mime-version'], '2.0')
102 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
103 eq(msg['content-transfer-encoding'], 'quinted-puntable')
104
105 def test_set_charset_from_string(self):
106 eq = self.assertEqual
107 msg = Message()
108 msg.set_charset('us-ascii')
109 eq(msg.get_charset().input_charset, 'us-ascii')
110 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
111
112 def test_set_payload_with_charset(self):
113 msg = Message()
114 charset = Charset('iso-8859-1')
115 msg.set_payload('This is a string payload', charset)
116 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
117
118 def test_get_charsets(self):
119 eq = self.assertEqual
120
121 msg = self._msgobj('msg_08.txt')
122 charsets = msg.get_charsets()
123 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
124
125 msg = self._msgobj('msg_09.txt')
126 charsets = msg.get_charsets('dingbat')
127 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
128 'koi8-r'])
129
130 msg = self._msgobj('msg_12.txt')
131 charsets = msg.get_charsets()
132 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
133 'iso-8859-3', 'us-ascii', 'koi8-r'])
134
135 def test_get_filename(self):
136 eq = self.assertEqual
137
138 msg = self._msgobj('msg_04.txt')
139 filenames = [p.get_filename() for p in msg.get_payload()]
140 eq(filenames, ['msg.txt', 'msg.txt'])
141
142 msg = self._msgobj('msg_07.txt')
143 subpart = msg.get_payload(1)
144 eq(subpart.get_filename(), 'dingusfish.gif')
145
146 def test_get_filename_with_name_parameter(self):
147 eq = self.assertEqual
148
149 msg = self._msgobj('msg_44.txt')
150 filenames = [p.get_filename() for p in msg.get_payload()]
151 eq(filenames, ['msg.txt', 'msg.txt'])
152
153 def test_get_boundary(self):
154 eq = self.assertEqual
155 msg = self._msgobj('msg_07.txt')
156 # No quotes!
157 eq(msg.get_boundary(), 'BOUNDARY')
158
159 def test_set_boundary(self):
160 eq = self.assertEqual
161 # This one has no existing boundary parameter, but the Content-Type:
162 # header appears fifth.
163 msg = self._msgobj('msg_01.txt')
164 msg.set_boundary('BOUNDARY')
165 header, value = msg.items()[4]
166 eq(header.lower(), 'content-type')
167 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
168 # This one has a Content-Type: header, with a boundary, stuck in the
169 # middle of its headers. Make sure the order is preserved; it should
170 # be fifth.
171 msg = self._msgobj('msg_04.txt')
172 msg.set_boundary('BOUNDARY')
173 header, value = msg.items()[4]
174 eq(header.lower(), 'content-type')
175 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
176 # And this one has no Content-Type: header at all.
177 msg = self._msgobj('msg_03.txt')
178 self.assertRaises(errors.HeaderParseError,
179 msg.set_boundary, 'BOUNDARY')
180
R. David Murray57c45ac2010-02-21 04:39:40 +0000181 def test_message_rfc822_only(self):
182 # Issue 7970: message/rfc822 not in multipart parsed by
183 # HeaderParser caused an exception when flattened.
184 fp = openfile(findfile('msg_46.txt'))
185 msgdata = fp.read()
186 parser = HeaderParser()
187 msg = parser.parsestr(msgdata)
188 out = StringIO()
189 gen = Generator(out, True, 0)
190 gen.flatten(msg, False)
191 self.assertEqual(out.getvalue(), msgdata)
192
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000193 def test_get_decoded_payload(self):
194 eq = self.assertEqual
195 msg = self._msgobj('msg_10.txt')
196 # The outer message is a multipart
197 eq(msg.get_payload(decode=True), None)
198 # Subpart 1 is 7bit encoded
199 eq(msg.get_payload(0).get_payload(decode=True),
200 b'This is a 7bit encoded message.\n')
201 # Subpart 2 is quopri
202 eq(msg.get_payload(1).get_payload(decode=True),
203 b'\xa1This is a Quoted Printable encoded message!\n')
204 # Subpart 3 is base64
205 eq(msg.get_payload(2).get_payload(decode=True),
206 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000207 # Subpart 4 is base64 with a trailing newline, which
208 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000209 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000210 b'This is a Base64 encoded message.\n')
211 # Subpart 5 has no Content-Transfer-Encoding: header.
212 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000213 b'This has no Content-Transfer-Encoding: header.\n')
214
215 def test_get_decoded_uu_payload(self):
216 eq = self.assertEqual
217 msg = Message()
218 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
219 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
220 msg['content-transfer-encoding'] = cte
221 eq(msg.get_payload(decode=True), b'hello world')
222 # Now try some bogus data
223 msg.set_payload('foo')
224 eq(msg.get_payload(decode=True), b'foo')
225
226 def test_decoded_generator(self):
227 eq = self.assertEqual
228 msg = self._msgobj('msg_07.txt')
229 with openfile('msg_17.txt') as fp:
230 text = fp.read()
231 s = StringIO()
232 g = DecodedGenerator(s)
233 g.flatten(msg)
234 eq(s.getvalue(), text)
235
236 def test__contains__(self):
237 msg = Message()
238 msg['From'] = 'Me'
239 msg['to'] = 'You'
240 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000241 self.assertTrue('from' in msg)
242 self.assertTrue('From' in msg)
243 self.assertTrue('FROM' in msg)
244 self.assertTrue('to' in msg)
245 self.assertTrue('To' in msg)
246 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000247
248 def test_as_string(self):
249 eq = self.ndiffAssertEqual
250 msg = self._msgobj('msg_01.txt')
251 with openfile('msg_01.txt') as fp:
252 text = fp.read()
253 eq(text, str(msg))
254 fullrepr = msg.as_string(unixfrom=True)
255 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000256 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000257 eq(text, NL.join(lines[1:]))
258
259 def test_bad_param(self):
260 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
261 self.assertEqual(msg.get_param('baz'), '')
262
263 def test_missing_filename(self):
264 msg = email.message_from_string("From: foo\n")
265 self.assertEqual(msg.get_filename(), None)
266
267 def test_bogus_filename(self):
268 msg = email.message_from_string(
269 "Content-Disposition: blarg; filename\n")
270 self.assertEqual(msg.get_filename(), '')
271
272 def test_missing_boundary(self):
273 msg = email.message_from_string("From: foo\n")
274 self.assertEqual(msg.get_boundary(), None)
275
276 def test_get_params(self):
277 eq = self.assertEqual
278 msg = email.message_from_string(
279 'X-Header: foo=one; bar=two; baz=three\n')
280 eq(msg.get_params(header='x-header'),
281 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
282 msg = email.message_from_string(
283 'X-Header: foo; bar=one; baz=two\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
286 eq(msg.get_params(), None)
287 msg = email.message_from_string(
288 'X-Header: foo; bar="one"; baz=two\n')
289 eq(msg.get_params(header='x-header'),
290 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
291
292 def test_get_param_liberal(self):
293 msg = Message()
294 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
295 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
296
297 def test_get_param(self):
298 eq = self.assertEqual
299 msg = email.message_from_string(
300 "X-Header: foo=one; bar=two; baz=three\n")
301 eq(msg.get_param('bar', header='x-header'), 'two')
302 eq(msg.get_param('quuz', header='x-header'), None)
303 eq(msg.get_param('quuz'), None)
304 msg = email.message_from_string(
305 'X-Header: foo; bar="one"; baz=two\n')
306 eq(msg.get_param('foo', header='x-header'), '')
307 eq(msg.get_param('bar', header='x-header'), 'one')
308 eq(msg.get_param('baz', header='x-header'), 'two')
309 # XXX: We are not RFC-2045 compliant! We cannot parse:
310 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
311 # msg.get_param("weird")
312 # yet.
313
314 def test_get_param_funky_continuation_lines(self):
315 msg = self._msgobj('msg_22.txt')
316 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
317
318 def test_get_param_with_semis_in_quotes(self):
319 msg = email.message_from_string(
320 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
321 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
322 self.assertEqual(msg.get_param('name', unquote=False),
323 '"Jim&amp;&amp;Jill"')
324
R. David Murrayd48739f2010-04-14 18:59:18 +0000325 def test_get_param_with_quotes(self):
326 msg = email.message_from_string(
327 'Content-Type: foo; bar*0="baz\\"foobar"; bar*1="\\"baz"')
328 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
329 msg = email.message_from_string(
330 "Content-Type: foo; bar*0=\"baz\\\"foobar\"; bar*1=\"\\\"baz\"")
331 self.assertEqual(msg.get_param('bar'), 'baz"foobar"baz')
332
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000333 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000334 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000335 msg = email.message_from_string('Header: exists')
336 unless('header' in msg)
337 unless('Header' in msg)
338 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000339 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000340
341 def test_set_param(self):
342 eq = self.assertEqual
343 msg = Message()
344 msg.set_param('charset', 'iso-2022-jp')
345 eq(msg.get_param('charset'), 'iso-2022-jp')
346 msg.set_param('importance', 'high value')
347 eq(msg.get_param('importance'), 'high value')
348 eq(msg.get_param('importance', unquote=False), '"high value"')
349 eq(msg.get_params(), [('text/plain', ''),
350 ('charset', 'iso-2022-jp'),
351 ('importance', 'high value')])
352 eq(msg.get_params(unquote=False), [('text/plain', ''),
353 ('charset', '"iso-2022-jp"'),
354 ('importance', '"high value"')])
355 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
356 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
357
358 def test_del_param(self):
359 eq = self.assertEqual
360 msg = self._msgobj('msg_05.txt')
361 eq(msg.get_params(),
362 [('multipart/report', ''), ('report-type', 'delivery-status'),
363 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
364 old_val = msg.get_param("report-type")
365 msg.del_param("report-type")
366 eq(msg.get_params(),
367 [('multipart/report', ''),
368 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
369 msg.set_param("report-type", old_val)
370 eq(msg.get_params(),
371 [('multipart/report', ''),
372 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
373 ('report-type', old_val)])
374
375 def test_del_param_on_other_header(self):
376 msg = Message()
377 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
378 msg.del_param('filename', 'content-disposition')
379 self.assertEqual(msg['content-disposition'], 'attachment')
380
381 def test_set_type(self):
382 eq = self.assertEqual
383 msg = Message()
384 self.assertRaises(ValueError, msg.set_type, 'text')
385 msg.set_type('text/plain')
386 eq(msg['content-type'], 'text/plain')
387 msg.set_param('charset', 'us-ascii')
388 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
389 msg.set_type('text/html')
390 eq(msg['content-type'], 'text/html; charset="us-ascii"')
391
392 def test_set_type_on_other_header(self):
393 msg = Message()
394 msg['X-Content-Type'] = 'text/plain'
395 msg.set_type('application/octet-stream', 'X-Content-Type')
396 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
397
398 def test_get_content_type_missing(self):
399 msg = Message()
400 self.assertEqual(msg.get_content_type(), 'text/plain')
401
402 def test_get_content_type_missing_with_default_type(self):
403 msg = Message()
404 msg.set_default_type('message/rfc822')
405 self.assertEqual(msg.get_content_type(), 'message/rfc822')
406
407 def test_get_content_type_from_message_implicit(self):
408 msg = self._msgobj('msg_30.txt')
409 self.assertEqual(msg.get_payload(0).get_content_type(),
410 'message/rfc822')
411
412 def test_get_content_type_from_message_explicit(self):
413 msg = self._msgobj('msg_28.txt')
414 self.assertEqual(msg.get_payload(0).get_content_type(),
415 'message/rfc822')
416
417 def test_get_content_type_from_message_text_plain_implicit(self):
418 msg = self._msgobj('msg_03.txt')
419 self.assertEqual(msg.get_content_type(), 'text/plain')
420
421 def test_get_content_type_from_message_text_plain_explicit(self):
422 msg = self._msgobj('msg_01.txt')
423 self.assertEqual(msg.get_content_type(), 'text/plain')
424
425 def test_get_content_maintype_missing(self):
426 msg = Message()
427 self.assertEqual(msg.get_content_maintype(), 'text')
428
429 def test_get_content_maintype_missing_with_default_type(self):
430 msg = Message()
431 msg.set_default_type('message/rfc822')
432 self.assertEqual(msg.get_content_maintype(), 'message')
433
434 def test_get_content_maintype_from_message_implicit(self):
435 msg = self._msgobj('msg_30.txt')
436 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
437
438 def test_get_content_maintype_from_message_explicit(self):
439 msg = self._msgobj('msg_28.txt')
440 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
441
442 def test_get_content_maintype_from_message_text_plain_implicit(self):
443 msg = self._msgobj('msg_03.txt')
444 self.assertEqual(msg.get_content_maintype(), 'text')
445
446 def test_get_content_maintype_from_message_text_plain_explicit(self):
447 msg = self._msgobj('msg_01.txt')
448 self.assertEqual(msg.get_content_maintype(), 'text')
449
450 def test_get_content_subtype_missing(self):
451 msg = Message()
452 self.assertEqual(msg.get_content_subtype(), 'plain')
453
454 def test_get_content_subtype_missing_with_default_type(self):
455 msg = Message()
456 msg.set_default_type('message/rfc822')
457 self.assertEqual(msg.get_content_subtype(), 'rfc822')
458
459 def test_get_content_subtype_from_message_implicit(self):
460 msg = self._msgobj('msg_30.txt')
461 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
462
463 def test_get_content_subtype_from_message_explicit(self):
464 msg = self._msgobj('msg_28.txt')
465 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
466
467 def test_get_content_subtype_from_message_text_plain_implicit(self):
468 msg = self._msgobj('msg_03.txt')
469 self.assertEqual(msg.get_content_subtype(), 'plain')
470
471 def test_get_content_subtype_from_message_text_plain_explicit(self):
472 msg = self._msgobj('msg_01.txt')
473 self.assertEqual(msg.get_content_subtype(), 'plain')
474
475 def test_get_content_maintype_error(self):
476 msg = Message()
477 msg['Content-Type'] = 'no-slash-in-this-string'
478 self.assertEqual(msg.get_content_maintype(), 'text')
479
480 def test_get_content_subtype_error(self):
481 msg = Message()
482 msg['Content-Type'] = 'no-slash-in-this-string'
483 self.assertEqual(msg.get_content_subtype(), 'plain')
484
485 def test_replace_header(self):
486 eq = self.assertEqual
487 msg = Message()
488 msg.add_header('First', 'One')
489 msg.add_header('Second', 'Two')
490 msg.add_header('Third', 'Three')
491 eq(msg.keys(), ['First', 'Second', 'Third'])
492 eq(msg.values(), ['One', 'Two', 'Three'])
493 msg.replace_header('Second', 'Twenty')
494 eq(msg.keys(), ['First', 'Second', 'Third'])
495 eq(msg.values(), ['One', 'Twenty', 'Three'])
496 msg.add_header('First', 'Eleven')
497 msg.replace_header('First', 'One Hundred')
498 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
499 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
500 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
501
502 def test_broken_base64_payload(self):
503 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
504 msg = Message()
505 msg['content-type'] = 'audio/x-midi'
506 msg['content-transfer-encoding'] = 'base64'
507 msg.set_payload(x)
508 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000509 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000510
511
512
513# Test the email.encoders module
514class TestEncoders(unittest.TestCase):
515 def test_encode_empty_payload(self):
516 eq = self.assertEqual
517 msg = Message()
518 msg.set_charset('us-ascii')
519 eq(msg['content-transfer-encoding'], '7bit')
520
521 def test_default_cte(self):
522 eq = self.assertEqual
523 msg = MIMEText('hello world')
524 eq(msg['content-transfer-encoding'], '7bit')
525
526 def test_default_cte(self):
527 eq = self.assertEqual
528 # With no explicit _charset its us-ascii, and all are 7-bit
529 msg = MIMEText('hello world')
530 eq(msg['content-transfer-encoding'], '7bit')
531 # Similar, but with 8-bit data
532 msg = MIMEText('hello \xf8 world')
533 eq(msg['content-transfer-encoding'], '8bit')
534 # And now with a different charset
535 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
536 eq(msg['content-transfer-encoding'], 'quoted-printable')
537
538
539
540# Test long header wrapping
541class TestLongHeaders(TestEmailBase):
542 def test_split_long_continuation(self):
543 eq = self.ndiffAssertEqual
544 msg = email.message_from_string("""\
545Subject: bug demonstration
546\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
547\tmore text
548
549test
550""")
551 sfp = StringIO()
552 g = Generator(sfp)
553 g.flatten(msg)
554 eq(sfp.getvalue(), """\
555Subject: bug demonstration
556\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
557\tmore text
558
559test
560""")
561
562 def test_another_long_almost_unsplittable_header(self):
563 eq = self.ndiffAssertEqual
564 hstr = """\
565bug demonstration
566\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
567\tmore text"""
568 h = Header(hstr, continuation_ws='\t')
569 eq(h.encode(), """\
570bug demonstration
571\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
572\tmore text""")
573 h = Header(hstr.replace('\t', ' '))
574 eq(h.encode(), """\
575bug demonstration
576 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
577 more text""")
578
579 def test_long_nonstring(self):
580 eq = self.ndiffAssertEqual
581 g = Charset("iso-8859-1")
582 cz = Charset("iso-8859-2")
583 utf8 = Charset("utf-8")
584 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
585 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
586 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
587 b'bef\xf6rdert. ')
588 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
589 b'd\xf9vtipu.. ')
590 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
591 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
592 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
593 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
594 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
595 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
596 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
597 '\u3044\u307e\u3059\u3002')
598 h = Header(g_head, g, header_name='Subject')
599 h.append(cz_head, cz)
600 h.append(utf8_head, utf8)
601 msg = Message()
602 msg['Subject'] = h
603 sfp = StringIO()
604 g = Generator(sfp)
605 g.flatten(msg)
606 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000607Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
608 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
609 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
610 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
611 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
612 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
613 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
614 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
615 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
616 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
617 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000618
619""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000620 eq(h.encode(maxlinelen=76), """\
621=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
622 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
623 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
624 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
625 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
626 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
627 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
628 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
629 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
630 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
631 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000632
633 def test_long_header_encode(self):
634 eq = self.ndiffAssertEqual
635 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
636 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
637 header_name='X-Foobar-Spoink-Defrobnit')
638 eq(h.encode(), '''\
639wasnipoop; giraffes="very-long-necked-animals";
640 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
641
642 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
643 eq = self.ndiffAssertEqual
644 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
645 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
646 header_name='X-Foobar-Spoink-Defrobnit',
647 continuation_ws='\t')
648 eq(h.encode(), '''\
649wasnipoop; giraffes="very-long-necked-animals";
650 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
651
652 def test_long_header_encode_with_tab_continuation(self):
653 eq = self.ndiffAssertEqual
654 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
655 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
656 header_name='X-Foobar-Spoink-Defrobnit',
657 continuation_ws='\t')
658 eq(h.encode(), '''\
659wasnipoop; giraffes="very-long-necked-animals";
660\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
661
662 def test_header_splitter(self):
663 eq = self.ndiffAssertEqual
664 msg = MIMEText('')
665 # It'd be great if we could use add_header() here, but that doesn't
666 # guarantee an order of the parameters.
667 msg['X-Foobar-Spoink-Defrobnit'] = (
668 'wasnipoop; giraffes="very-long-necked-animals"; '
669 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
670 sfp = StringIO()
671 g = Generator(sfp)
672 g.flatten(msg)
673 eq(sfp.getvalue(), '''\
674Content-Type: text/plain; charset="us-ascii"
675MIME-Version: 1.0
676Content-Transfer-Encoding: 7bit
677X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
678 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
679
680''')
681
682 def test_no_semis_header_splitter(self):
683 eq = self.ndiffAssertEqual
684 msg = Message()
685 msg['From'] = 'test@dom.ain'
686 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
687 msg.set_payload('Test')
688 sfp = StringIO()
689 g = Generator(sfp)
690 g.flatten(msg)
691 eq(sfp.getvalue(), """\
692From: test@dom.ain
693References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
694 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
695
696Test""")
697
698 def test_no_split_long_header(self):
699 eq = self.ndiffAssertEqual
700 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000701 h = Header(hstr)
702 # These come on two lines because Headers are really field value
703 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000704 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000705References:
706 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
707 h = Header('x' * 80)
708 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000709
710 def test_splitting_multiple_long_lines(self):
711 eq = self.ndiffAssertEqual
712 hstr = """\
713from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
714\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
715\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
716"""
717 h = Header(hstr, continuation_ws='\t')
718 eq(h.encode(), """\
719from babylon.socal-raves.org (localhost [127.0.0.1]);
720 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
721 for <mailman-admin@babylon.socal-raves.org>;
722 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
723\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
724 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
725 for <mailman-admin@babylon.socal-raves.org>;
726 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
727\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
728 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
729 for <mailman-admin@babylon.socal-raves.org>;
730 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
731
732 def test_splitting_first_line_only_is_long(self):
733 eq = self.ndiffAssertEqual
734 hstr = """\
735from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
736\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
737\tid 17k4h5-00034i-00
738\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
739 h = Header(hstr, maxlinelen=78, header_name='Received',
740 continuation_ws='\t')
741 eq(h.encode(), """\
742from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
743 helo=cthulhu.gerg.ca)
744\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
745\tid 17k4h5-00034i-00
746\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
747
748 def test_long_8bit_header(self):
749 eq = self.ndiffAssertEqual
750 msg = Message()
751 h = Header('Britische Regierung gibt', 'iso-8859-1',
752 header_name='Subject')
753 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000754 eq(h.encode(maxlinelen=76), """\
755=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
756 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000757 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000758 eq(msg.as_string(maxheaderlen=76), """\
759Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
760 =?iso-8859-1?q?hore-Windkraftprojekte?=
761
762""")
763 eq(msg.as_string(maxheaderlen=0), """\
764Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000765
766""")
767
768 def test_long_8bit_header_no_charset(self):
769 eq = self.ndiffAssertEqual
770 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000771 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
772 'f\xfcr Offshore-Windkraftprojekte '
773 '<a-very-long-address@example.com>')
774 msg['Reply-To'] = header_string
775 self.assertRaises(UnicodeEncodeError, msg.as_string)
776 msg = Message()
777 msg['Reply-To'] = Header(header_string, 'utf-8',
778 header_name='Reply-To')
779 eq(msg.as_string(maxheaderlen=78), """\
780Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
781 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000782
783""")
784
785 def test_long_to_header(self):
786 eq = self.ndiffAssertEqual
787 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
788 '<someone@eecs.umich.edu>,'
789 '"Someone Test #B" <someone@umich.edu>, '
790 '"Someone Test #C" <someone@eecs.umich.edu>, '
791 '"Someone Test #D" <someone@eecs.umich.edu>')
792 msg = Message()
793 msg['To'] = to
794 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000795To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000796 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000797 "Someone Test #C" <someone@eecs.umich.edu>,
798 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000799
800''')
801
802 def test_long_line_after_append(self):
803 eq = self.ndiffAssertEqual
804 s = 'This is an example of string which has almost the limit of header length.'
805 h = Header(s)
806 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000807 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000808This is an example of string which has almost the limit of header length.
809 Add another line.""")
810
811 def test_shorter_line_with_append(self):
812 eq = self.ndiffAssertEqual
813 s = 'This is a shorter line.'
814 h = Header(s)
815 h.append('Add another sentence. (Surprise?)')
816 eq(h.encode(),
817 'This is a shorter line. Add another sentence. (Surprise?)')
818
819 def test_long_field_name(self):
820 eq = self.ndiffAssertEqual
821 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000822 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
823 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
824 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
825 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000826 h = Header(gs, 'iso-8859-1', header_name=fn)
827 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000828 eq(h.encode(maxlinelen=76), """\
829=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
830 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
831 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
832 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000833
834 def test_long_received_header(self):
835 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
836 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
837 'Wed, 05 Mar 2003 18:10:18 -0700')
838 msg = Message()
839 msg['Received-1'] = Header(h, continuation_ws='\t')
840 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000841 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000842 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000843Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
844 Wed, 05 Mar 2003 18:10:18 -0700
845Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
846 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000847
848""")
849
850 def test_string_headerinst_eq(self):
851 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
852 'tu-muenchen.de> (David Bremner\'s message of '
853 '"Thu, 6 Mar 2003 13:58:21 +0100")')
854 msg = Message()
855 msg['Received-1'] = Header(h, header_name='Received-1',
856 continuation_ws='\t')
857 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000858 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000859 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000860Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
861 6 Mar 2003 13:58:21 +0100\")
862Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
863 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000864
865""")
866
867 def test_long_unbreakable_lines_with_continuation(self):
868 eq = self.ndiffAssertEqual
869 msg = Message()
870 t = """\
871iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
872 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
873 msg['Face-1'] = t
874 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000875 # XXX This splitting is all wrong. It the first value line should be
876 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000877 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000878Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000879 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000880 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000881Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000882 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000883 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
884
885""")
886
887 def test_another_long_multiline_header(self):
888 eq = self.ndiffAssertEqual
889 m = ('Received: from siimage.com '
890 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000891 'Microsoft SMTPSVC(5.0.2195.4905); '
892 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000893 msg = email.message_from_string(m)
894 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000895Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
896 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000897
898''')
899
900 def test_long_lines_with_different_header(self):
901 eq = self.ndiffAssertEqual
902 h = ('List-Unsubscribe: '
903 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
904 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
905 '?subject=unsubscribe>')
906 msg = Message()
907 msg['List'] = h
908 msg['List'] = Header(h, header_name='List')
909 eq(msg.as_string(maxheaderlen=78), """\
910List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000911 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000912List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000913 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000914
915""")
916
917
918
919# Test mangling of "From " lines in the body of a message
920class TestFromMangling(unittest.TestCase):
921 def setUp(self):
922 self.msg = Message()
923 self.msg['From'] = 'aaa@bbb.org'
924 self.msg.set_payload("""\
925From the desk of A.A.A.:
926Blah blah blah
927""")
928
929 def test_mangled_from(self):
930 s = StringIO()
931 g = Generator(s, mangle_from_=True)
932 g.flatten(self.msg)
933 self.assertEqual(s.getvalue(), """\
934From: aaa@bbb.org
935
936>From the desk of A.A.A.:
937Blah blah blah
938""")
939
940 def test_dont_mangle_from(self):
941 s = StringIO()
942 g = Generator(s, mangle_from_=False)
943 g.flatten(self.msg)
944 self.assertEqual(s.getvalue(), """\
945From: aaa@bbb.org
946
947From the desk of A.A.A.:
948Blah blah blah
949""")
950
951
952
953# Test the basic MIMEAudio class
954class TestMIMEAudio(unittest.TestCase):
955 def setUp(self):
956 # Make sure we pick up the audiotest.au that lives in email/test/data.
957 # In Python, there's an audiotest.au living in Lib/test but that isn't
958 # included in some binary distros that don't include the test
959 # package. The trailing empty string on the .join() is significant
960 # since findfile() will do a dirname().
961 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
962 with open(findfile('audiotest.au', datadir), 'rb') as fp:
963 self._audiodata = fp.read()
964 self._au = MIMEAudio(self._audiodata)
965
966 def test_guess_minor_type(self):
967 self.assertEqual(self._au.get_content_type(), 'audio/basic')
968
969 def test_encoding(self):
970 payload = self._au.get_payload()
Georg Brandl706824f2009-06-04 09:42:55 +0000971 self.assertEqual(base64.decodebytes(payload), self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000972
973 def test_checkSetMinor(self):
974 au = MIMEAudio(self._audiodata, 'fish')
975 self.assertEqual(au.get_content_type(), 'audio/fish')
976
977 def test_add_header(self):
978 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000979 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000980 self._au.add_header('Content-Disposition', 'attachment',
981 filename='audiotest.au')
982 eq(self._au['content-disposition'],
983 'attachment; filename="audiotest.au"')
984 eq(self._au.get_params(header='content-disposition'),
985 [('attachment', ''), ('filename', 'audiotest.au')])
986 eq(self._au.get_param('filename', header='content-disposition'),
987 'audiotest.au')
988 missing = []
989 eq(self._au.get_param('attachment', header='content-disposition'), '')
990 unless(self._au.get_param('foo', failobj=missing,
991 header='content-disposition') is missing)
992 # Try some missing stuff
993 unless(self._au.get_param('foobar', missing) is missing)
994 unless(self._au.get_param('attachment', missing,
995 header='foobar') is missing)
996
997
998
999# Test the basic MIMEImage class
1000class TestMIMEImage(unittest.TestCase):
1001 def setUp(self):
1002 with openfile('PyBanner048.gif', 'rb') as fp:
1003 self._imgdata = fp.read()
1004 self._im = MIMEImage(self._imgdata)
1005
1006 def test_guess_minor_type(self):
1007 self.assertEqual(self._im.get_content_type(), 'image/gif')
1008
1009 def test_encoding(self):
1010 payload = self._im.get_payload()
Georg Brandl706824f2009-06-04 09:42:55 +00001011 self.assertEqual(base64.decodebytes(payload), self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001012
1013 def test_checkSetMinor(self):
1014 im = MIMEImage(self._imgdata, 'fish')
1015 self.assertEqual(im.get_content_type(), 'image/fish')
1016
1017 def test_add_header(self):
1018 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001019 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001020 self._im.add_header('Content-Disposition', 'attachment',
1021 filename='dingusfish.gif')
1022 eq(self._im['content-disposition'],
1023 'attachment; filename="dingusfish.gif"')
1024 eq(self._im.get_params(header='content-disposition'),
1025 [('attachment', ''), ('filename', 'dingusfish.gif')])
1026 eq(self._im.get_param('filename', header='content-disposition'),
1027 'dingusfish.gif')
1028 missing = []
1029 eq(self._im.get_param('attachment', header='content-disposition'), '')
1030 unless(self._im.get_param('foo', failobj=missing,
1031 header='content-disposition') is missing)
1032 # Try some missing stuff
1033 unless(self._im.get_param('foobar', missing) is missing)
1034 unless(self._im.get_param('attachment', missing,
1035 header='foobar') is missing)
1036
1037
1038
1039# Test the basic MIMEApplication class
1040class TestMIMEApplication(unittest.TestCase):
1041 def test_headers(self):
1042 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001043 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001044 eq(msg.get_content_type(), 'application/octet-stream')
1045 eq(msg['content-transfer-encoding'], 'base64')
1046
1047 def test_body(self):
1048 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001049 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001050 msg = MIMEApplication(bytes)
Barry Warsaw8c571042007-08-30 19:17:18 +00001051 eq(msg.get_payload(), b'+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001052 eq(msg.get_payload(decode=True), bytes)
1053
1054
1055
1056# Test the basic MIMEText class
1057class TestMIMEText(unittest.TestCase):
1058 def setUp(self):
1059 self._msg = MIMEText('hello there')
1060
1061 def test_types(self):
1062 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001063 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001064 eq(self._msg.get_content_type(), 'text/plain')
1065 eq(self._msg.get_param('charset'), 'us-ascii')
1066 missing = []
1067 unless(self._msg.get_param('foobar', missing) is missing)
1068 unless(self._msg.get_param('charset', missing, header='foobar')
1069 is missing)
1070
1071 def test_payload(self):
1072 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001073 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001074
1075 def test_charset(self):
1076 eq = self.assertEqual
1077 msg = MIMEText('hello there', _charset='us-ascii')
1078 eq(msg.get_charset().input_charset, 'us-ascii')
1079 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1080
1081
1082
1083# Test complicated multipart/* messages
1084class TestMultipart(TestEmailBase):
1085 def setUp(self):
1086 with openfile('PyBanner048.gif', 'rb') as fp:
1087 data = fp.read()
1088 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1089 image = MIMEImage(data, name='dingusfish.gif')
1090 image.add_header('content-disposition', 'attachment',
1091 filename='dingusfish.gif')
1092 intro = MIMEText('''\
1093Hi there,
1094
1095This is the dingus fish.
1096''')
1097 container.attach(intro)
1098 container.attach(image)
1099 container['From'] = 'Barry <barry@digicool.com>'
1100 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1101 container['Subject'] = 'Here is your dingus fish'
1102
1103 now = 987809702.54848599
1104 timetuple = time.localtime(now)
1105 if timetuple[-1] == 0:
1106 tzsecs = time.timezone
1107 else:
1108 tzsecs = time.altzone
1109 if tzsecs > 0:
1110 sign = '-'
1111 else:
1112 sign = '+'
1113 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1114 container['Date'] = time.strftime(
1115 '%a, %d %b %Y %H:%M:%S',
1116 time.localtime(now)) + tzoffset
1117 self._msg = container
1118 self._im = image
1119 self._txt = intro
1120
1121 def test_hierarchy(self):
1122 # convenience
1123 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001124 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001125 raises = self.assertRaises
1126 # tests
1127 m = self._msg
1128 unless(m.is_multipart())
1129 eq(m.get_content_type(), 'multipart/mixed')
1130 eq(len(m.get_payload()), 2)
1131 raises(IndexError, m.get_payload, 2)
1132 m0 = m.get_payload(0)
1133 m1 = m.get_payload(1)
1134 unless(m0 is self._txt)
1135 unless(m1 is self._im)
1136 eq(m.get_payload(), [m0, m1])
1137 unless(not m0.is_multipart())
1138 unless(not m1.is_multipart())
1139
1140 def test_empty_multipart_idempotent(self):
1141 text = """\
1142Content-Type: multipart/mixed; boundary="BOUNDARY"
1143MIME-Version: 1.0
1144Subject: A subject
1145To: aperson@dom.ain
1146From: bperson@dom.ain
1147
1148
1149--BOUNDARY
1150
1151
1152--BOUNDARY--
1153"""
1154 msg = Parser().parsestr(text)
1155 self.ndiffAssertEqual(text, msg.as_string())
1156
1157 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1158 outer = MIMEBase('multipart', 'mixed')
1159 outer['Subject'] = 'A subject'
1160 outer['To'] = 'aperson@dom.ain'
1161 outer['From'] = 'bperson@dom.ain'
1162 outer.set_boundary('BOUNDARY')
1163 self.ndiffAssertEqual(outer.as_string(), '''\
1164Content-Type: multipart/mixed; boundary="BOUNDARY"
1165MIME-Version: 1.0
1166Subject: A subject
1167To: aperson@dom.ain
1168From: bperson@dom.ain
1169
1170--BOUNDARY
1171
1172--BOUNDARY--''')
1173
1174 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1175 outer = MIMEBase('multipart', 'mixed')
1176 outer['Subject'] = 'A subject'
1177 outer['To'] = 'aperson@dom.ain'
1178 outer['From'] = 'bperson@dom.ain'
1179 outer.preamble = ''
1180 outer.epilogue = ''
1181 outer.set_boundary('BOUNDARY')
1182 self.ndiffAssertEqual(outer.as_string(), '''\
1183Content-Type: multipart/mixed; boundary="BOUNDARY"
1184MIME-Version: 1.0
1185Subject: A subject
1186To: aperson@dom.ain
1187From: bperson@dom.ain
1188
1189
1190--BOUNDARY
1191
1192--BOUNDARY--
1193''')
1194
1195 def test_one_part_in_a_multipart(self):
1196 eq = self.ndiffAssertEqual
1197 outer = MIMEBase('multipart', 'mixed')
1198 outer['Subject'] = 'A subject'
1199 outer['To'] = 'aperson@dom.ain'
1200 outer['From'] = 'bperson@dom.ain'
1201 outer.set_boundary('BOUNDARY')
1202 msg = MIMEText('hello world')
1203 outer.attach(msg)
1204 eq(outer.as_string(), '''\
1205Content-Type: multipart/mixed; boundary="BOUNDARY"
1206MIME-Version: 1.0
1207Subject: A subject
1208To: aperson@dom.ain
1209From: bperson@dom.ain
1210
1211--BOUNDARY
1212Content-Type: text/plain; charset="us-ascii"
1213MIME-Version: 1.0
1214Content-Transfer-Encoding: 7bit
1215
1216hello world
1217--BOUNDARY--''')
1218
1219 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1220 eq = self.ndiffAssertEqual
1221 outer = MIMEBase('multipart', 'mixed')
1222 outer['Subject'] = 'A subject'
1223 outer['To'] = 'aperson@dom.ain'
1224 outer['From'] = 'bperson@dom.ain'
1225 outer.preamble = ''
1226 msg = MIMEText('hello world')
1227 outer.attach(msg)
1228 outer.set_boundary('BOUNDARY')
1229 eq(outer.as_string(), '''\
1230Content-Type: multipart/mixed; boundary="BOUNDARY"
1231MIME-Version: 1.0
1232Subject: A subject
1233To: aperson@dom.ain
1234From: bperson@dom.ain
1235
1236
1237--BOUNDARY
1238Content-Type: text/plain; charset="us-ascii"
1239MIME-Version: 1.0
1240Content-Transfer-Encoding: 7bit
1241
1242hello world
1243--BOUNDARY--''')
1244
1245
1246 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1247 eq = self.ndiffAssertEqual
1248 outer = MIMEBase('multipart', 'mixed')
1249 outer['Subject'] = 'A subject'
1250 outer['To'] = 'aperson@dom.ain'
1251 outer['From'] = 'bperson@dom.ain'
1252 outer.preamble = None
1253 msg = MIMEText('hello world')
1254 outer.attach(msg)
1255 outer.set_boundary('BOUNDARY')
1256 eq(outer.as_string(), '''\
1257Content-Type: multipart/mixed; boundary="BOUNDARY"
1258MIME-Version: 1.0
1259Subject: A subject
1260To: aperson@dom.ain
1261From: bperson@dom.ain
1262
1263--BOUNDARY
1264Content-Type: text/plain; charset="us-ascii"
1265MIME-Version: 1.0
1266Content-Transfer-Encoding: 7bit
1267
1268hello world
1269--BOUNDARY--''')
1270
1271
1272 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1273 eq = self.ndiffAssertEqual
1274 outer = MIMEBase('multipart', 'mixed')
1275 outer['Subject'] = 'A subject'
1276 outer['To'] = 'aperson@dom.ain'
1277 outer['From'] = 'bperson@dom.ain'
1278 outer.epilogue = None
1279 msg = MIMEText('hello world')
1280 outer.attach(msg)
1281 outer.set_boundary('BOUNDARY')
1282 eq(outer.as_string(), '''\
1283Content-Type: multipart/mixed; boundary="BOUNDARY"
1284MIME-Version: 1.0
1285Subject: A subject
1286To: aperson@dom.ain
1287From: bperson@dom.ain
1288
1289--BOUNDARY
1290Content-Type: text/plain; charset="us-ascii"
1291MIME-Version: 1.0
1292Content-Transfer-Encoding: 7bit
1293
1294hello world
1295--BOUNDARY--''')
1296
1297
1298 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1299 eq = self.ndiffAssertEqual
1300 outer = MIMEBase('multipart', 'mixed')
1301 outer['Subject'] = 'A subject'
1302 outer['To'] = 'aperson@dom.ain'
1303 outer['From'] = 'bperson@dom.ain'
1304 outer.epilogue = ''
1305 msg = MIMEText('hello world')
1306 outer.attach(msg)
1307 outer.set_boundary('BOUNDARY')
1308 eq(outer.as_string(), '''\
1309Content-Type: multipart/mixed; boundary="BOUNDARY"
1310MIME-Version: 1.0
1311Subject: A subject
1312To: aperson@dom.ain
1313From: bperson@dom.ain
1314
1315--BOUNDARY
1316Content-Type: text/plain; charset="us-ascii"
1317MIME-Version: 1.0
1318Content-Transfer-Encoding: 7bit
1319
1320hello world
1321--BOUNDARY--
1322''')
1323
1324
1325 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1326 eq = self.ndiffAssertEqual
1327 outer = MIMEBase('multipart', 'mixed')
1328 outer['Subject'] = 'A subject'
1329 outer['To'] = 'aperson@dom.ain'
1330 outer['From'] = 'bperson@dom.ain'
1331 outer.epilogue = '\n'
1332 msg = MIMEText('hello world')
1333 outer.attach(msg)
1334 outer.set_boundary('BOUNDARY')
1335 eq(outer.as_string(), '''\
1336Content-Type: multipart/mixed; boundary="BOUNDARY"
1337MIME-Version: 1.0
1338Subject: A subject
1339To: aperson@dom.ain
1340From: bperson@dom.ain
1341
1342--BOUNDARY
1343Content-Type: text/plain; charset="us-ascii"
1344MIME-Version: 1.0
1345Content-Transfer-Encoding: 7bit
1346
1347hello world
1348--BOUNDARY--
1349
1350''')
1351
1352 def test_message_external_body(self):
1353 eq = self.assertEqual
1354 msg = self._msgobj('msg_36.txt')
1355 eq(len(msg.get_payload()), 2)
1356 msg1 = msg.get_payload(1)
1357 eq(msg1.get_content_type(), 'multipart/alternative')
1358 eq(len(msg1.get_payload()), 2)
1359 for subpart in msg1.get_payload():
1360 eq(subpart.get_content_type(), 'message/external-body')
1361 eq(len(subpart.get_payload()), 1)
1362 subsubpart = subpart.get_payload(0)
1363 eq(subsubpart.get_content_type(), 'text/plain')
1364
1365 def test_double_boundary(self):
1366 # msg_37.txt is a multipart that contains two dash-boundary's in a
1367 # row. Our interpretation of RFC 2046 calls for ignoring the second
1368 # and subsequent boundaries.
1369 msg = self._msgobj('msg_37.txt')
1370 self.assertEqual(len(msg.get_payload()), 3)
1371
1372 def test_nested_inner_contains_outer_boundary(self):
1373 eq = self.ndiffAssertEqual
1374 # msg_38.txt has an inner part that contains outer boundaries. My
1375 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1376 # these are illegal and should be interpreted as unterminated inner
1377 # parts.
1378 msg = self._msgobj('msg_38.txt')
1379 sfp = StringIO()
1380 iterators._structure(msg, sfp)
1381 eq(sfp.getvalue(), """\
1382multipart/mixed
1383 multipart/mixed
1384 multipart/alternative
1385 text/plain
1386 text/plain
1387 text/plain
1388 text/plain
1389""")
1390
1391 def test_nested_with_same_boundary(self):
1392 eq = self.ndiffAssertEqual
1393 # msg 39.txt is similarly evil in that it's got inner parts that use
1394 # the same boundary as outer parts. Again, I believe the way this is
1395 # parsed is closest to the spirit of RFC 2046
1396 msg = self._msgobj('msg_39.txt')
1397 sfp = StringIO()
1398 iterators._structure(msg, sfp)
1399 eq(sfp.getvalue(), """\
1400multipart/mixed
1401 multipart/mixed
1402 multipart/alternative
1403 application/octet-stream
1404 application/octet-stream
1405 text/plain
1406""")
1407
1408 def test_boundary_in_non_multipart(self):
1409 msg = self._msgobj('msg_40.txt')
1410 self.assertEqual(msg.as_string(), '''\
1411MIME-Version: 1.0
1412Content-Type: text/html; boundary="--961284236552522269"
1413
1414----961284236552522269
1415Content-Type: text/html;
1416Content-Transfer-Encoding: 7Bit
1417
1418<html></html>
1419
1420----961284236552522269--
1421''')
1422
1423 def test_boundary_with_leading_space(self):
1424 eq = self.assertEqual
1425 msg = email.message_from_string('''\
1426MIME-Version: 1.0
1427Content-Type: multipart/mixed; boundary=" XXXX"
1428
1429-- XXXX
1430Content-Type: text/plain
1431
1432
1433-- XXXX
1434Content-Type: text/plain
1435
1436-- XXXX--
1437''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001438 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001439 eq(msg.get_boundary(), ' XXXX')
1440 eq(len(msg.get_payload()), 2)
1441
1442 def test_boundary_without_trailing_newline(self):
1443 m = Parser().parsestr("""\
1444Content-Type: multipart/mixed; boundary="===============0012394164=="
1445MIME-Version: 1.0
1446
1447--===============0012394164==
1448Content-Type: image/file1.jpg
1449MIME-Version: 1.0
1450Content-Transfer-Encoding: base64
1451
1452YXNkZg==
1453--===============0012394164==--""")
1454 self.assertEquals(m.get_payload(0).get_payload(), 'YXNkZg==')
1455
1456
1457
1458# Test some badly formatted messages
1459class TestNonConformant(TestEmailBase):
1460 def test_parse_missing_minor_type(self):
1461 eq = self.assertEqual
1462 msg = self._msgobj('msg_14.txt')
1463 eq(msg.get_content_type(), 'text/plain')
1464 eq(msg.get_content_maintype(), 'text')
1465 eq(msg.get_content_subtype(), 'plain')
1466
1467 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001468 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001469 msg = self._msgobj('msg_15.txt')
1470 # XXX We can probably eventually do better
1471 inner = msg.get_payload(0)
1472 unless(hasattr(inner, 'defects'))
1473 self.assertEqual(len(inner.defects), 1)
1474 unless(isinstance(inner.defects[0],
1475 errors.StartBoundaryNotFoundDefect))
1476
1477 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001478 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001479 msg = self._msgobj('msg_25.txt')
1480 unless(isinstance(msg.get_payload(), str))
1481 self.assertEqual(len(msg.defects), 2)
1482 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1483 unless(isinstance(msg.defects[1],
1484 errors.MultipartInvariantViolationDefect))
1485
1486 def test_invalid_content_type(self):
1487 eq = self.assertEqual
1488 neq = self.ndiffAssertEqual
1489 msg = Message()
1490 # RFC 2045, $5.2 says invalid yields text/plain
1491 msg['Content-Type'] = 'text'
1492 eq(msg.get_content_maintype(), 'text')
1493 eq(msg.get_content_subtype(), 'plain')
1494 eq(msg.get_content_type(), 'text/plain')
1495 # Clear the old value and try something /really/ invalid
1496 del msg['content-type']
1497 msg['Content-Type'] = 'foo'
1498 eq(msg.get_content_maintype(), 'text')
1499 eq(msg.get_content_subtype(), 'plain')
1500 eq(msg.get_content_type(), 'text/plain')
1501 # Still, make sure that the message is idempotently generated
1502 s = StringIO()
1503 g = Generator(s)
1504 g.flatten(msg)
1505 neq(s.getvalue(), 'Content-Type: foo\n\n')
1506
1507 def test_no_start_boundary(self):
1508 eq = self.ndiffAssertEqual
1509 msg = self._msgobj('msg_31.txt')
1510 eq(msg.get_payload(), """\
1511--BOUNDARY
1512Content-Type: text/plain
1513
1514message 1
1515
1516--BOUNDARY
1517Content-Type: text/plain
1518
1519message 2
1520
1521--BOUNDARY--
1522""")
1523
1524 def test_no_separating_blank_line(self):
1525 eq = self.ndiffAssertEqual
1526 msg = self._msgobj('msg_35.txt')
1527 eq(msg.as_string(), """\
1528From: aperson@dom.ain
1529To: bperson@dom.ain
1530Subject: here's something interesting
1531
1532counter to RFC 2822, there's no separating newline here
1533""")
1534
1535 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001536 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001537 msg = self._msgobj('msg_41.txt')
1538 unless(hasattr(msg, 'defects'))
1539 self.assertEqual(len(msg.defects), 2)
1540 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1541 unless(isinstance(msg.defects[1],
1542 errors.MultipartInvariantViolationDefect))
1543
1544 def test_missing_start_boundary(self):
1545 outer = self._msgobj('msg_42.txt')
1546 # The message structure is:
1547 #
1548 # multipart/mixed
1549 # text/plain
1550 # message/rfc822
1551 # multipart/mixed [*]
1552 #
1553 # [*] This message is missing its start boundary
1554 bad = outer.get_payload(1).get_payload(0)
1555 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001556 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001557 errors.StartBoundaryNotFoundDefect))
1558
1559 def test_first_line_is_continuation_header(self):
1560 eq = self.assertEqual
1561 m = ' Line 1\nLine 2\nLine 3'
1562 msg = email.message_from_string(m)
1563 eq(msg.keys(), [])
1564 eq(msg.get_payload(), 'Line 2\nLine 3')
1565 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001566 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001567 errors.FirstHeaderLineIsContinuationDefect))
1568 eq(msg.defects[0].line, ' Line 1\n')
1569
1570
1571
1572# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001573class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001574 def test_rfc2047_multiline(self):
1575 eq = self.assertEqual
1576 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1577 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1578 dh = decode_header(s)
1579 eq(dh, [
1580 (b'Re:', None),
1581 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1582 (b'baz foo bar', None),
1583 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1584 header = make_header(dh)
1585 eq(str(header),
1586 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001587 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001588Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1589 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001590
1591 def test_whitespace_eater_unicode(self):
1592 eq = self.assertEqual
1593 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1594 dh = decode_header(s)
1595 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1596 (b'Pirard <pirard@dom.ain>', None)])
1597 header = str(make_header(dh))
1598 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1599
1600 def test_whitespace_eater_unicode_2(self):
1601 eq = self.assertEqual
1602 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1603 dh = decode_header(s)
1604 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1605 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1606 hu = str(make_header(dh))
1607 eq(hu, 'The quick brown fox jumped over the lazy dog')
1608
1609 def test_rfc2047_missing_whitespace(self):
1610 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1611 dh = decode_header(s)
1612 self.assertEqual(dh, [(s, None)])
1613
1614 def test_rfc2047_with_whitespace(self):
1615 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1616 dh = decode_header(s)
1617 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1618 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1619 (b'sbord', None)])
1620
1621
1622
1623# Test the MIMEMessage class
1624class TestMIMEMessage(TestEmailBase):
1625 def setUp(self):
1626 with openfile('msg_11.txt') as fp:
1627 self._text = fp.read()
1628
1629 def test_type_error(self):
1630 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1631
1632 def test_valid_argument(self):
1633 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001634 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001635 subject = 'A sub-message'
1636 m = Message()
1637 m['Subject'] = subject
1638 r = MIMEMessage(m)
1639 eq(r.get_content_type(), 'message/rfc822')
1640 payload = r.get_payload()
1641 unless(isinstance(payload, list))
1642 eq(len(payload), 1)
1643 subpart = payload[0]
1644 unless(subpart is m)
1645 eq(subpart['subject'], subject)
1646
1647 def test_bad_multipart(self):
1648 eq = self.assertEqual
1649 msg1 = Message()
1650 msg1['Subject'] = 'subpart 1'
1651 msg2 = Message()
1652 msg2['Subject'] = 'subpart 2'
1653 r = MIMEMessage(msg1)
1654 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1655
1656 def test_generate(self):
1657 # First craft the message to be encapsulated
1658 m = Message()
1659 m['Subject'] = 'An enclosed message'
1660 m.set_payload('Here is the body of the message.\n')
1661 r = MIMEMessage(m)
1662 r['Subject'] = 'The enclosing message'
1663 s = StringIO()
1664 g = Generator(s)
1665 g.flatten(r)
1666 self.assertEqual(s.getvalue(), """\
1667Content-Type: message/rfc822
1668MIME-Version: 1.0
1669Subject: The enclosing message
1670
1671Subject: An enclosed message
1672
1673Here is the body of the message.
1674""")
1675
1676 def test_parse_message_rfc822(self):
1677 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001678 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001679 msg = self._msgobj('msg_11.txt')
1680 eq(msg.get_content_type(), 'message/rfc822')
1681 payload = msg.get_payload()
1682 unless(isinstance(payload, list))
1683 eq(len(payload), 1)
1684 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001685 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001686 eq(submsg['subject'], 'An enclosed message')
1687 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1688
1689 def test_dsn(self):
1690 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001691 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001692 # msg 16 is a Delivery Status Notification, see RFC 1894
1693 msg = self._msgobj('msg_16.txt')
1694 eq(msg.get_content_type(), 'multipart/report')
1695 unless(msg.is_multipart())
1696 eq(len(msg.get_payload()), 3)
1697 # Subpart 1 is a text/plain, human readable section
1698 subpart = msg.get_payload(0)
1699 eq(subpart.get_content_type(), 'text/plain')
1700 eq(subpart.get_payload(), """\
1701This report relates to a message you sent with the following header fields:
1702
1703 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1704 Date: Sun, 23 Sep 2001 20:10:55 -0700
1705 From: "Ian T. Henry" <henryi@oxy.edu>
1706 To: SoCal Raves <scr@socal-raves.org>
1707 Subject: [scr] yeah for Ians!!
1708
1709Your message cannot be delivered to the following recipients:
1710
1711 Recipient address: jangel1@cougar.noc.ucla.edu
1712 Reason: recipient reached disk quota
1713
1714""")
1715 # Subpart 2 contains the machine parsable DSN information. It
1716 # consists of two blocks of headers, represented by two nested Message
1717 # objects.
1718 subpart = msg.get_payload(1)
1719 eq(subpart.get_content_type(), 'message/delivery-status')
1720 eq(len(subpart.get_payload()), 2)
1721 # message/delivery-status should treat each block as a bunch of
1722 # headers, i.e. a bunch of Message objects.
1723 dsn1 = subpart.get_payload(0)
1724 unless(isinstance(dsn1, Message))
1725 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1726 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1727 # Try a missing one <wink>
1728 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1729 dsn2 = subpart.get_payload(1)
1730 unless(isinstance(dsn2, Message))
1731 eq(dsn2['action'], 'failed')
1732 eq(dsn2.get_params(header='original-recipient'),
1733 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1734 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1735 # Subpart 3 is the original message
1736 subpart = msg.get_payload(2)
1737 eq(subpart.get_content_type(), 'message/rfc822')
1738 payload = subpart.get_payload()
1739 unless(isinstance(payload, list))
1740 eq(len(payload), 1)
1741 subsubpart = payload[0]
1742 unless(isinstance(subsubpart, Message))
1743 eq(subsubpart.get_content_type(), 'text/plain')
1744 eq(subsubpart['message-id'],
1745 '<002001c144a6$8752e060$56104586@oxy.edu>')
1746
1747 def test_epilogue(self):
1748 eq = self.ndiffAssertEqual
1749 with openfile('msg_21.txt') as fp:
1750 text = fp.read()
1751 msg = Message()
1752 msg['From'] = 'aperson@dom.ain'
1753 msg['To'] = 'bperson@dom.ain'
1754 msg['Subject'] = 'Test'
1755 msg.preamble = 'MIME message'
1756 msg.epilogue = 'End of MIME message\n'
1757 msg1 = MIMEText('One')
1758 msg2 = MIMEText('Two')
1759 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1760 msg.attach(msg1)
1761 msg.attach(msg2)
1762 sfp = StringIO()
1763 g = Generator(sfp)
1764 g.flatten(msg)
1765 eq(sfp.getvalue(), text)
1766
1767 def test_no_nl_preamble(self):
1768 eq = self.ndiffAssertEqual
1769 msg = Message()
1770 msg['From'] = 'aperson@dom.ain'
1771 msg['To'] = 'bperson@dom.ain'
1772 msg['Subject'] = 'Test'
1773 msg.preamble = 'MIME message'
1774 msg.epilogue = ''
1775 msg1 = MIMEText('One')
1776 msg2 = MIMEText('Two')
1777 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1778 msg.attach(msg1)
1779 msg.attach(msg2)
1780 eq(msg.as_string(), """\
1781From: aperson@dom.ain
1782To: bperson@dom.ain
1783Subject: Test
1784Content-Type: multipart/mixed; boundary="BOUNDARY"
1785
1786MIME message
1787--BOUNDARY
1788Content-Type: text/plain; charset="us-ascii"
1789MIME-Version: 1.0
1790Content-Transfer-Encoding: 7bit
1791
1792One
1793--BOUNDARY
1794Content-Type: text/plain; charset="us-ascii"
1795MIME-Version: 1.0
1796Content-Transfer-Encoding: 7bit
1797
1798Two
1799--BOUNDARY--
1800""")
1801
1802 def test_default_type(self):
1803 eq = self.assertEqual
1804 with openfile('msg_30.txt') as fp:
1805 msg = email.message_from_file(fp)
1806 container1 = msg.get_payload(0)
1807 eq(container1.get_default_type(), 'message/rfc822')
1808 eq(container1.get_content_type(), 'message/rfc822')
1809 container2 = msg.get_payload(1)
1810 eq(container2.get_default_type(), 'message/rfc822')
1811 eq(container2.get_content_type(), 'message/rfc822')
1812 container1a = container1.get_payload(0)
1813 eq(container1a.get_default_type(), 'text/plain')
1814 eq(container1a.get_content_type(), 'text/plain')
1815 container2a = container2.get_payload(0)
1816 eq(container2a.get_default_type(), 'text/plain')
1817 eq(container2a.get_content_type(), 'text/plain')
1818
1819 def test_default_type_with_explicit_container_type(self):
1820 eq = self.assertEqual
1821 with openfile('msg_28.txt') as fp:
1822 msg = email.message_from_file(fp)
1823 container1 = msg.get_payload(0)
1824 eq(container1.get_default_type(), 'message/rfc822')
1825 eq(container1.get_content_type(), 'message/rfc822')
1826 container2 = msg.get_payload(1)
1827 eq(container2.get_default_type(), 'message/rfc822')
1828 eq(container2.get_content_type(), 'message/rfc822')
1829 container1a = container1.get_payload(0)
1830 eq(container1a.get_default_type(), 'text/plain')
1831 eq(container1a.get_content_type(), 'text/plain')
1832 container2a = container2.get_payload(0)
1833 eq(container2a.get_default_type(), 'text/plain')
1834 eq(container2a.get_content_type(), 'text/plain')
1835
1836 def test_default_type_non_parsed(self):
1837 eq = self.assertEqual
1838 neq = self.ndiffAssertEqual
1839 # Set up container
1840 container = MIMEMultipart('digest', 'BOUNDARY')
1841 container.epilogue = ''
1842 # Set up subparts
1843 subpart1a = MIMEText('message 1\n')
1844 subpart2a = MIMEText('message 2\n')
1845 subpart1 = MIMEMessage(subpart1a)
1846 subpart2 = MIMEMessage(subpart2a)
1847 container.attach(subpart1)
1848 container.attach(subpart2)
1849 eq(subpart1.get_content_type(), 'message/rfc822')
1850 eq(subpart1.get_default_type(), 'message/rfc822')
1851 eq(subpart2.get_content_type(), 'message/rfc822')
1852 eq(subpart2.get_default_type(), 'message/rfc822')
1853 neq(container.as_string(0), '''\
1854Content-Type: multipart/digest; boundary="BOUNDARY"
1855MIME-Version: 1.0
1856
1857--BOUNDARY
1858Content-Type: message/rfc822
1859MIME-Version: 1.0
1860
1861Content-Type: text/plain; charset="us-ascii"
1862MIME-Version: 1.0
1863Content-Transfer-Encoding: 7bit
1864
1865message 1
1866
1867--BOUNDARY
1868Content-Type: message/rfc822
1869MIME-Version: 1.0
1870
1871Content-Type: text/plain; charset="us-ascii"
1872MIME-Version: 1.0
1873Content-Transfer-Encoding: 7bit
1874
1875message 2
1876
1877--BOUNDARY--
1878''')
1879 del subpart1['content-type']
1880 del subpart1['mime-version']
1881 del subpart2['content-type']
1882 del subpart2['mime-version']
1883 eq(subpart1.get_content_type(), 'message/rfc822')
1884 eq(subpart1.get_default_type(), 'message/rfc822')
1885 eq(subpart2.get_content_type(), 'message/rfc822')
1886 eq(subpart2.get_default_type(), 'message/rfc822')
1887 neq(container.as_string(0), '''\
1888Content-Type: multipart/digest; boundary="BOUNDARY"
1889MIME-Version: 1.0
1890
1891--BOUNDARY
1892
1893Content-Type: text/plain; charset="us-ascii"
1894MIME-Version: 1.0
1895Content-Transfer-Encoding: 7bit
1896
1897message 1
1898
1899--BOUNDARY
1900
1901Content-Type: text/plain; charset="us-ascii"
1902MIME-Version: 1.0
1903Content-Transfer-Encoding: 7bit
1904
1905message 2
1906
1907--BOUNDARY--
1908''')
1909
1910 def test_mime_attachments_in_constructor(self):
1911 eq = self.assertEqual
1912 text1 = MIMEText('')
1913 text2 = MIMEText('')
1914 msg = MIMEMultipart(_subparts=(text1, text2))
1915 eq(len(msg.get_payload()), 2)
1916 eq(msg.get_payload(0), text1)
1917 eq(msg.get_payload(1), text2)
1918
Christian Heimes587c2bf2008-01-19 16:21:02 +00001919 def test_default_multipart_constructor(self):
1920 msg = MIMEMultipart()
1921 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001922
1923
1924# A general test of parser->model->generator idempotency. IOW, read a message
1925# in, parse it into a message object tree, then without touching the tree,
1926# regenerate the plain text. The original text and the transformed text
1927# should be identical. Note: that we ignore the Unix-From since that may
1928# contain a changed date.
1929class TestIdempotent(TestEmailBase):
1930 def _msgobj(self, filename):
1931 with openfile(filename) as fp:
1932 data = fp.read()
1933 msg = email.message_from_string(data)
1934 return msg, data
1935
1936 def _idempotent(self, msg, text):
1937 eq = self.ndiffAssertEqual
1938 s = StringIO()
1939 g = Generator(s, maxheaderlen=0)
1940 g.flatten(msg)
1941 eq(text, s.getvalue())
1942
1943 def test_parse_text_message(self):
1944 eq = self.assertEquals
1945 msg, text = self._msgobj('msg_01.txt')
1946 eq(msg.get_content_type(), 'text/plain')
1947 eq(msg.get_content_maintype(), 'text')
1948 eq(msg.get_content_subtype(), 'plain')
1949 eq(msg.get_params()[1], ('charset', 'us-ascii'))
1950 eq(msg.get_param('charset'), 'us-ascii')
1951 eq(msg.preamble, None)
1952 eq(msg.epilogue, None)
1953 self._idempotent(msg, text)
1954
1955 def test_parse_untyped_message(self):
1956 eq = self.assertEquals
1957 msg, text = self._msgobj('msg_03.txt')
1958 eq(msg.get_content_type(), 'text/plain')
1959 eq(msg.get_params(), None)
1960 eq(msg.get_param('charset'), None)
1961 self._idempotent(msg, text)
1962
1963 def test_simple_multipart(self):
1964 msg, text = self._msgobj('msg_04.txt')
1965 self._idempotent(msg, text)
1966
1967 def test_MIME_digest(self):
1968 msg, text = self._msgobj('msg_02.txt')
1969 self._idempotent(msg, text)
1970
1971 def test_long_header(self):
1972 msg, text = self._msgobj('msg_27.txt')
1973 self._idempotent(msg, text)
1974
1975 def test_MIME_digest_with_part_headers(self):
1976 msg, text = self._msgobj('msg_28.txt')
1977 self._idempotent(msg, text)
1978
1979 def test_mixed_with_image(self):
1980 msg, text = self._msgobj('msg_06.txt')
1981 self._idempotent(msg, text)
1982
1983 def test_multipart_report(self):
1984 msg, text = self._msgobj('msg_05.txt')
1985 self._idempotent(msg, text)
1986
1987 def test_dsn(self):
1988 msg, text = self._msgobj('msg_16.txt')
1989 self._idempotent(msg, text)
1990
1991 def test_preamble_epilogue(self):
1992 msg, text = self._msgobj('msg_21.txt')
1993 self._idempotent(msg, text)
1994
1995 def test_multipart_one_part(self):
1996 msg, text = self._msgobj('msg_23.txt')
1997 self._idempotent(msg, text)
1998
1999 def test_multipart_no_parts(self):
2000 msg, text = self._msgobj('msg_24.txt')
2001 self._idempotent(msg, text)
2002
2003 def test_no_start_boundary(self):
2004 msg, text = self._msgobj('msg_31.txt')
2005 self._idempotent(msg, text)
2006
2007 def test_rfc2231_charset(self):
2008 msg, text = self._msgobj('msg_32.txt')
2009 self._idempotent(msg, text)
2010
2011 def test_more_rfc2231_parameters(self):
2012 msg, text = self._msgobj('msg_33.txt')
2013 self._idempotent(msg, text)
2014
2015 def test_text_plain_in_a_multipart_digest(self):
2016 msg, text = self._msgobj('msg_34.txt')
2017 self._idempotent(msg, text)
2018
2019 def test_nested_multipart_mixeds(self):
2020 msg, text = self._msgobj('msg_12a.txt')
2021 self._idempotent(msg, text)
2022
2023 def test_message_external_body_idempotent(self):
2024 msg, text = self._msgobj('msg_36.txt')
2025 self._idempotent(msg, text)
2026
2027 def test_content_type(self):
2028 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002029 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002030 # Get a message object and reset the seek pointer for other tests
2031 msg, text = self._msgobj('msg_05.txt')
2032 eq(msg.get_content_type(), 'multipart/report')
2033 # Test the Content-Type: parameters
2034 params = {}
2035 for pk, pv in msg.get_params():
2036 params[pk] = pv
2037 eq(params['report-type'], 'delivery-status')
2038 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2039 eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
2040 eq(msg.epilogue, '\n')
2041 eq(len(msg.get_payload()), 3)
2042 # Make sure the subparts are what we expect
2043 msg1 = msg.get_payload(0)
2044 eq(msg1.get_content_type(), 'text/plain')
2045 eq(msg1.get_payload(), 'Yadda yadda yadda\n')
2046 msg2 = msg.get_payload(1)
2047 eq(msg2.get_content_type(), 'text/plain')
2048 eq(msg2.get_payload(), 'Yadda yadda yadda\n')
2049 msg3 = msg.get_payload(2)
2050 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002051 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002052 payload = msg3.get_payload()
2053 unless(isinstance(payload, list))
2054 eq(len(payload), 1)
2055 msg4 = payload[0]
2056 unless(isinstance(msg4, Message))
2057 eq(msg4.get_payload(), 'Yadda yadda yadda\n')
2058
2059 def test_parser(self):
2060 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002061 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002062 msg, text = self._msgobj('msg_06.txt')
2063 # Check some of the outer headers
2064 eq(msg.get_content_type(), 'message/rfc822')
2065 # Make sure the payload is a list of exactly one sub-Message, and that
2066 # that submessage has a type of text/plain
2067 payload = msg.get_payload()
2068 unless(isinstance(payload, list))
2069 eq(len(payload), 1)
2070 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002071 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002072 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002073 self.assertTrue(isinstance(msg1.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002074 eq(msg1.get_payload(), '\n')
2075
2076
2077
2078# Test various other bits of the package's functionality
2079class TestMiscellaneous(TestEmailBase):
2080 def test_message_from_string(self):
2081 with openfile('msg_01.txt') as fp:
2082 text = fp.read()
2083 msg = email.message_from_string(text)
2084 s = StringIO()
2085 # Don't wrap/continue long headers since we're trying to test
2086 # idempotency.
2087 g = Generator(s, maxheaderlen=0)
2088 g.flatten(msg)
2089 self.assertEqual(text, s.getvalue())
2090
2091 def test_message_from_file(self):
2092 with openfile('msg_01.txt') as fp:
2093 text = fp.read()
2094 fp.seek(0)
2095 msg = email.message_from_file(fp)
2096 s = StringIO()
2097 # Don't wrap/continue long headers since we're trying to test
2098 # idempotency.
2099 g = Generator(s, maxheaderlen=0)
2100 g.flatten(msg)
2101 self.assertEqual(text, s.getvalue())
2102
2103 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002104 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002105 with openfile('msg_01.txt') as fp:
2106 text = fp.read()
2107
2108 # Create a subclass
2109 class MyMessage(Message):
2110 pass
2111
2112 msg = email.message_from_string(text, MyMessage)
2113 unless(isinstance(msg, MyMessage))
2114 # Try something more complicated
2115 with openfile('msg_02.txt') as fp:
2116 text = fp.read()
2117 msg = email.message_from_string(text, MyMessage)
2118 for subpart in msg.walk():
2119 unless(isinstance(subpart, MyMessage))
2120
2121 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002122 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002123 # Create a subclass
2124 class MyMessage(Message):
2125 pass
2126
2127 with openfile('msg_01.txt') as fp:
2128 msg = email.message_from_file(fp, MyMessage)
2129 unless(isinstance(msg, MyMessage))
2130 # Try something more complicated
2131 with openfile('msg_02.txt') as fp:
2132 msg = email.message_from_file(fp, MyMessage)
2133 for subpart in msg.walk():
2134 unless(isinstance(subpart, MyMessage))
2135
2136 def test__all__(self):
2137 module = __import__('email')
2138 # Can't use sorted() here due to Python 2.3 compatibility
2139 all = module.__all__[:]
2140 all.sort()
2141 self.assertEqual(all, [
2142 'base64mime', 'charset', 'encoders', 'errors', 'generator',
2143 'header', 'iterators', 'message', 'message_from_file',
2144 'message_from_string', 'mime', 'parser',
2145 'quoprimime', 'utils',
2146 ])
2147
2148 def test_formatdate(self):
2149 now = time.time()
2150 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2151 time.gmtime(now)[:6])
2152
2153 def test_formatdate_localtime(self):
2154 now = time.time()
2155 self.assertEqual(
2156 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2157 time.localtime(now)[:6])
2158
2159 def test_formatdate_usegmt(self):
2160 now = time.time()
2161 self.assertEqual(
2162 utils.formatdate(now, localtime=False),
2163 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2164 self.assertEqual(
2165 utils.formatdate(now, localtime=False, usegmt=True),
2166 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2167
2168 def test_parsedate_none(self):
2169 self.assertEqual(utils.parsedate(''), None)
2170
2171 def test_parsedate_compact(self):
2172 # The FWS after the comma is optional
2173 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2174 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2175
2176 def test_parsedate_no_dayofweek(self):
2177 eq = self.assertEqual
2178 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2179 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2180
2181 def test_parsedate_compact_no_dayofweek(self):
2182 eq = self.assertEqual
2183 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2184 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2185
2186 def test_parsedate_acceptable_to_time_functions(self):
2187 eq = self.assertEqual
2188 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2189 t = int(time.mktime(timetup))
2190 eq(time.localtime(t)[:6], timetup[:6])
2191 eq(int(time.strftime('%Y', timetup)), 2003)
2192 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2193 t = int(time.mktime(timetup[:9]))
2194 eq(time.localtime(t)[:6], timetup[:6])
2195 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2196
2197 def test_parseaddr_empty(self):
2198 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2199 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2200
2201 def test_noquote_dump(self):
2202 self.assertEqual(
2203 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2204 'A Silly Person <person@dom.ain>')
2205
2206 def test_escape_dump(self):
2207 self.assertEqual(
2208 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2209 r'"A \(Very\) Silly Person" <person@dom.ain>')
2210 a = r'A \(Special\) Person'
2211 b = 'person@dom.ain'
2212 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2213
2214 def test_escape_backslashes(self):
2215 self.assertEqual(
2216 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2217 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2218 a = r'Arthur \Backslash\ Foobar'
2219 b = 'person@dom.ain'
2220 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2221
2222 def test_name_with_dot(self):
2223 x = 'John X. Doe <jxd@example.com>'
2224 y = '"John X. Doe" <jxd@example.com>'
2225 a, b = ('John X. Doe', 'jxd@example.com')
2226 self.assertEqual(utils.parseaddr(x), (a, b))
2227 self.assertEqual(utils.parseaddr(y), (a, b))
2228 # formataddr() quotes the name if there's a dot in it
2229 self.assertEqual(utils.formataddr((a, b)), y)
2230
2231 def test_multiline_from_comment(self):
2232 x = """\
2233Foo
2234\tBar <foo@example.com>"""
2235 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2236
2237 def test_quote_dump(self):
2238 self.assertEqual(
2239 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2240 r'"A Silly; Person" <person@dom.ain>')
2241
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002242 def test_charset_richcomparisons(self):
2243 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002244 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002245 cset1 = Charset()
2246 cset2 = Charset()
2247 eq(cset1, 'us-ascii')
2248 eq(cset1, 'US-ASCII')
2249 eq(cset1, 'Us-AsCiI')
2250 eq('us-ascii', cset1)
2251 eq('US-ASCII', cset1)
2252 eq('Us-AsCiI', cset1)
2253 ne(cset1, 'usascii')
2254 ne(cset1, 'USASCII')
2255 ne(cset1, 'UsAsCiI')
2256 ne('usascii', cset1)
2257 ne('USASCII', cset1)
2258 ne('UsAsCiI', cset1)
2259 eq(cset1, cset2)
2260 eq(cset2, cset1)
2261
2262 def test_getaddresses(self):
2263 eq = self.assertEqual
2264 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2265 'Bud Person <bperson@dom.ain>']),
2266 [('Al Person', 'aperson@dom.ain'),
2267 ('Bud Person', 'bperson@dom.ain')])
2268
2269 def test_getaddresses_nasty(self):
2270 eq = self.assertEqual
2271 eq(utils.getaddresses(['foo: ;']), [('', '')])
2272 eq(utils.getaddresses(
2273 ['[]*-- =~$']),
2274 [('', ''), ('', ''), ('', '*--')])
2275 eq(utils.getaddresses(
2276 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2277 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2278
2279 def test_getaddresses_embedded_comment(self):
2280 """Test proper handling of a nested comment"""
2281 eq = self.assertEqual
2282 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2283 eq(addrs[0][1], 'foo@bar.com')
2284
2285 def test_utils_quote_unquote(self):
2286 eq = self.assertEqual
2287 msg = Message()
2288 msg.add_header('content-disposition', 'attachment',
2289 filename='foo\\wacky"name')
2290 eq(msg.get_filename(), 'foo\\wacky"name')
2291
2292 def test_get_body_encoding_with_bogus_charset(self):
2293 charset = Charset('not a charset')
2294 self.assertEqual(charset.get_body_encoding(), 'base64')
2295
2296 def test_get_body_encoding_with_uppercase_charset(self):
2297 eq = self.assertEqual
2298 msg = Message()
2299 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2300 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2301 charsets = msg.get_charsets()
2302 eq(len(charsets), 1)
2303 eq(charsets[0], 'utf-8')
2304 charset = Charset(charsets[0])
2305 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002306 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002307 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2308 eq(msg.get_payload(decode=True), b'hello world')
2309 eq(msg['content-transfer-encoding'], 'base64')
2310 # Try another one
2311 msg = Message()
2312 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2313 charsets = msg.get_charsets()
2314 eq(len(charsets), 1)
2315 eq(charsets[0], 'us-ascii')
2316 charset = Charset(charsets[0])
2317 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2318 msg.set_payload('hello world', charset=charset)
2319 eq(msg.get_payload(), 'hello world')
2320 eq(msg['content-transfer-encoding'], '7bit')
2321
2322 def test_charsets_case_insensitive(self):
2323 lc = Charset('us-ascii')
2324 uc = Charset('US-ASCII')
2325 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2326
2327 def test_partial_falls_inside_message_delivery_status(self):
2328 eq = self.ndiffAssertEqual
2329 # The Parser interface provides chunks of data to FeedParser in 8192
2330 # byte gulps. SF bug #1076485 found one of those chunks inside
2331 # message/delivery-status header block, which triggered an
2332 # unreadline() of NeedMoreData.
2333 msg = self._msgobj('msg_43.txt')
2334 sfp = StringIO()
2335 iterators._structure(msg, sfp)
2336 eq(sfp.getvalue(), """\
2337multipart/report
2338 text/plain
2339 message/delivery-status
2340 text/plain
2341 text/plain
2342 text/plain
2343 text/plain
2344 text/plain
2345 text/plain
2346 text/plain
2347 text/plain
2348 text/plain
2349 text/plain
2350 text/plain
2351 text/plain
2352 text/plain
2353 text/plain
2354 text/plain
2355 text/plain
2356 text/plain
2357 text/plain
2358 text/plain
2359 text/plain
2360 text/plain
2361 text/plain
2362 text/plain
2363 text/plain
2364 text/plain
2365 text/plain
2366 text/rfc822-headers
2367""")
2368
2369
2370
2371# Test the iterator/generators
2372class TestIterators(TestEmailBase):
2373 def test_body_line_iterator(self):
2374 eq = self.assertEqual
2375 neq = self.ndiffAssertEqual
2376 # First a simple non-multipart message
2377 msg = self._msgobj('msg_01.txt')
2378 it = iterators.body_line_iterator(msg)
2379 lines = list(it)
2380 eq(len(lines), 6)
2381 neq(EMPTYSTRING.join(lines), msg.get_payload())
2382 # Now a more complicated multipart
2383 msg = self._msgobj('msg_02.txt')
2384 it = iterators.body_line_iterator(msg)
2385 lines = list(it)
2386 eq(len(lines), 43)
2387 with openfile('msg_19.txt') as fp:
2388 neq(EMPTYSTRING.join(lines), fp.read())
2389
2390 def test_typed_subpart_iterator(self):
2391 eq = self.assertEqual
2392 msg = self._msgobj('msg_04.txt')
2393 it = iterators.typed_subpart_iterator(msg, 'text')
2394 lines = []
2395 subparts = 0
2396 for subpart in it:
2397 subparts += 1
2398 lines.append(subpart.get_payload())
2399 eq(subparts, 2)
2400 eq(EMPTYSTRING.join(lines), """\
2401a simple kind of mirror
2402to reflect upon our own
2403a simple kind of mirror
2404to reflect upon our own
2405""")
2406
2407 def test_typed_subpart_iterator_default_type(self):
2408 eq = self.assertEqual
2409 msg = self._msgobj('msg_03.txt')
2410 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2411 lines = []
2412 subparts = 0
2413 for subpart in it:
2414 subparts += 1
2415 lines.append(subpart.get_payload())
2416 eq(subparts, 1)
2417 eq(EMPTYSTRING.join(lines), """\
2418
2419Hi,
2420
2421Do you like this message?
2422
2423-Me
2424""")
2425
2426
2427
2428class TestParsers(TestEmailBase):
2429 def test_header_parser(self):
2430 eq = self.assertEqual
2431 # Parse only the headers of a complex multipart MIME document
2432 with openfile('msg_02.txt') as fp:
2433 msg = HeaderParser().parse(fp)
2434 eq(msg['from'], 'ppp-request@zzz.org')
2435 eq(msg['to'], 'ppp@zzz.org')
2436 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002437 self.assertFalse(msg.is_multipart())
2438 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002439
2440 def test_whitespace_continuation(self):
2441 eq = self.assertEqual
2442 # This message contains a line after the Subject: header that has only
2443 # whitespace, but it is not empty!
2444 msg = email.message_from_string("""\
2445From: aperson@dom.ain
2446To: bperson@dom.ain
2447Subject: the next line has a space on it
2448\x20
2449Date: Mon, 8 Apr 2002 15:09:19 -0400
2450Message-ID: spam
2451
2452Here's the message body
2453""")
2454 eq(msg['subject'], 'the next line has a space on it\n ')
2455 eq(msg['message-id'], 'spam')
2456 eq(msg.get_payload(), "Here's the message body\n")
2457
2458 def test_whitespace_continuation_last_header(self):
2459 eq = self.assertEqual
2460 # Like the previous test, but the subject line is the last
2461 # header.
2462 msg = email.message_from_string("""\
2463From: aperson@dom.ain
2464To: bperson@dom.ain
2465Date: Mon, 8 Apr 2002 15:09:19 -0400
2466Message-ID: spam
2467Subject: the next line has a space on it
2468\x20
2469
2470Here's the message body
2471""")
2472 eq(msg['subject'], 'the next line has a space on it\n ')
2473 eq(msg['message-id'], 'spam')
2474 eq(msg.get_payload(), "Here's the message body\n")
2475
2476 def test_crlf_separation(self):
2477 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002478 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002479 msg = Parser().parse(fp)
2480 eq(len(msg.get_payload()), 2)
2481 part1 = msg.get_payload(0)
2482 eq(part1.get_content_type(), 'text/plain')
2483 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2484 part2 = msg.get_payload(1)
2485 eq(part2.get_content_type(), 'application/riscos')
2486
2487 def test_multipart_digest_with_extra_mime_headers(self):
2488 eq = self.assertEqual
2489 neq = self.ndiffAssertEqual
2490 with openfile('msg_28.txt') as fp:
2491 msg = email.message_from_file(fp)
2492 # Structure is:
2493 # multipart/digest
2494 # message/rfc822
2495 # text/plain
2496 # message/rfc822
2497 # text/plain
2498 eq(msg.is_multipart(), 1)
2499 eq(len(msg.get_payload()), 2)
2500 part1 = msg.get_payload(0)
2501 eq(part1.get_content_type(), 'message/rfc822')
2502 eq(part1.is_multipart(), 1)
2503 eq(len(part1.get_payload()), 1)
2504 part1a = part1.get_payload(0)
2505 eq(part1a.is_multipart(), 0)
2506 eq(part1a.get_content_type(), 'text/plain')
2507 neq(part1a.get_payload(), 'message 1\n')
2508 # next message/rfc822
2509 part2 = msg.get_payload(1)
2510 eq(part2.get_content_type(), 'message/rfc822')
2511 eq(part2.is_multipart(), 1)
2512 eq(len(part2.get_payload()), 1)
2513 part2a = part2.get_payload(0)
2514 eq(part2a.is_multipart(), 0)
2515 eq(part2a.get_content_type(), 'text/plain')
2516 neq(part2a.get_payload(), 'message 2\n')
2517
2518 def test_three_lines(self):
2519 # A bug report by Andrew McNamara
2520 lines = ['From: Andrew Person <aperson@dom.ain',
2521 'Subject: Test',
2522 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2523 msg = email.message_from_string(NL.join(lines))
2524 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2525
2526 def test_strip_line_feed_and_carriage_return_in_headers(self):
2527 eq = self.assertEqual
2528 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2529 value1 = 'text'
2530 value2 = 'more text'
2531 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2532 value1, value2)
2533 msg = email.message_from_string(m)
2534 eq(msg.get('Header'), value1)
2535 eq(msg.get('Next-Header'), value2)
2536
2537 def test_rfc2822_header_syntax(self):
2538 eq = self.assertEqual
2539 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2540 msg = email.message_from_string(m)
2541 eq(len(msg), 3)
2542 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2543 eq(msg.get_payload(), 'body')
2544
2545 def test_rfc2822_space_not_allowed_in_header(self):
2546 eq = self.assertEqual
2547 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2548 msg = email.message_from_string(m)
2549 eq(len(msg.keys()), 0)
2550
2551 def test_rfc2822_one_character_header(self):
2552 eq = self.assertEqual
2553 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2554 msg = email.message_from_string(m)
2555 headers = msg.keys()
2556 headers.sort()
2557 eq(headers, ['A', 'B', 'CC'])
2558 eq(msg.get_payload(), 'body')
2559
2560
2561
2562class TestBase64(unittest.TestCase):
2563 def test_len(self):
2564 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002565 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002566 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002567 for size in range(15):
2568 if size == 0 : bsize = 0
2569 elif size <= 3 : bsize = 4
2570 elif size <= 6 : bsize = 8
2571 elif size <= 9 : bsize = 12
2572 elif size <= 12: bsize = 16
2573 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00002574 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002575
2576 def test_decode(self):
2577 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00002578 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002579 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002580
2581 def test_encode(self):
2582 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002583 eq(base64mime.body_encode(b''), b'')
2584 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002585 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002586 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002587 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002588 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002589eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2590eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2591eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2592eHh4eCB4eHh4IA==
2593""")
2594 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002595 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002596 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002597eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2598eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2599eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2600eHh4eCB4eHh4IA==\r
2601""")
2602
2603 def test_header_encode(self):
2604 eq = self.assertEqual
2605 he = base64mime.header_encode
2606 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00002607 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
2608 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002609 # Test the charset option
2610 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
2611 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002612
2613
2614
2615class TestQuopri(unittest.TestCase):
2616 def setUp(self):
2617 # Set of characters (as byte integers) that don't need to be encoded
2618 # in headers.
2619 self.hlit = list(chain(
2620 range(ord('a'), ord('z') + 1),
2621 range(ord('A'), ord('Z') + 1),
2622 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00002623 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002624 # Set of characters (as byte integers) that do need to be encoded in
2625 # headers.
2626 self.hnon = [c for c in range(256) if c not in self.hlit]
2627 assert len(self.hlit) + len(self.hnon) == 256
2628 # Set of characters (as byte integers) that don't need to be encoded
2629 # in bodies.
2630 self.blit = list(range(ord(' '), ord('~') + 1))
2631 self.blit.append(ord('\t'))
2632 self.blit.remove(ord('='))
2633 # Set of characters (as byte integers) that do need to be encoded in
2634 # bodies.
2635 self.bnon = [c for c in range(256) if c not in self.blit]
2636 assert len(self.blit) + len(self.bnon) == 256
2637
Guido van Rossum9604e662007-08-30 03:46:43 +00002638 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002639 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002640 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002641 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002642 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002643 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002644 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002645
Guido van Rossum9604e662007-08-30 03:46:43 +00002646 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002647 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002648 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002649 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002650 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002651 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002652 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002653
2654 def test_header_quopri_len(self):
2655 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002656 eq(quoprimime.header_length(b'hello'), 5)
2657 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002658 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002659 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002660 # =?xxx?q?...?= means 10 extra characters
2661 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00002662 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
2663 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002664 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002665 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002666 # =?xxx?q?...?= means 10 extra characters
2667 10)
2668 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002669 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002670 'expected length 1 for %r' % chr(c))
2671 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002672 # Space is special; it's encoded to _
2673 if c == ord(' '):
2674 continue
2675 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002676 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00002677 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002678
2679 def test_body_quopri_len(self):
2680 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002681 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002682 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002683 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002684 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002685
2686 def test_quote_unquote_idempotent(self):
2687 for x in range(256):
2688 c = chr(x)
2689 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
2690
2691 def test_header_encode(self):
2692 eq = self.assertEqual
2693 he = quoprimime.header_encode
2694 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
2695 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
2696 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
2697 # Test a non-ASCII character
2698 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
2699
2700 def test_decode(self):
2701 eq = self.assertEqual
2702 eq(quoprimime.decode(''), '')
2703 eq(quoprimime.decode('hello'), 'hello')
2704 eq(quoprimime.decode('hello', 'X'), 'hello')
2705 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
2706
2707 def test_encode(self):
2708 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002709 eq(quoprimime.body_encode(''), '')
2710 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002711 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00002712 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002713 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00002714 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002715xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
2716 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
2717x xxxx xxxx xxxx xxxx=20""")
2718 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00002719 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
2720 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002721xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
2722 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
2723x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00002724 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002725one line
2726
2727two line"""), """\
2728one line
2729
2730two line""")
2731
2732
2733
2734# Test the Charset class
2735class TestCharset(unittest.TestCase):
2736 def tearDown(self):
2737 from email import charset as CharsetModule
2738 try:
2739 del CharsetModule.CHARSETS['fake']
2740 except KeyError:
2741 pass
2742
Guido van Rossum9604e662007-08-30 03:46:43 +00002743 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002744 eq = self.assertEqual
2745 # Make sure us-ascii = no Unicode conversion
2746 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00002747 eq(c.header_encode('Hello World!'), 'Hello World!')
2748 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002749 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00002750 self.assertRaises(UnicodeError, c.header_encode, s)
2751 c = Charset('utf-8')
2752 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002753
2754 def test_body_encode(self):
2755 eq = self.assertEqual
2756 # Try a charset with QP body encoding
2757 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002758 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002759 # Try a charset with Base64 body encoding
2760 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002761 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002762 # Try a charset with None body encoding
2763 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002764 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002765 # Try the convert argument, where input codec != output codec
2766 c = Charset('euc-jp')
2767 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00002768 # XXX FIXME
2769## try:
2770## eq('\x1b$B5FCO;~IW\x1b(B',
2771## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
2772## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
2773## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
2774## except LookupError:
2775## # We probably don't have the Japanese codecs installed
2776## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002777 # Testing SF bug #625509, which we have to fake, since there are no
2778 # built-in encodings where the header encoding is QP but the body
2779 # encoding is not.
2780 from email import charset as CharsetModule
2781 CharsetModule.add_charset('fake', CharsetModule.QP, None)
2782 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002783 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002784
2785 def test_unicode_charset_name(self):
2786 charset = Charset('us-ascii')
2787 self.assertEqual(str(charset), 'us-ascii')
2788 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
2789
2790
2791
2792# Test multilingual MIME headers.
2793class TestHeader(TestEmailBase):
2794 def test_simple(self):
2795 eq = self.ndiffAssertEqual
2796 h = Header('Hello World!')
2797 eq(h.encode(), 'Hello World!')
2798 h.append(' Goodbye World!')
2799 eq(h.encode(), 'Hello World! Goodbye World!')
2800
2801 def test_simple_surprise(self):
2802 eq = self.ndiffAssertEqual
2803 h = Header('Hello World!')
2804 eq(h.encode(), 'Hello World!')
2805 h.append('Goodbye World!')
2806 eq(h.encode(), 'Hello World! Goodbye World!')
2807
2808 def test_header_needs_no_decoding(self):
2809 h = 'no decoding needed'
2810 self.assertEqual(decode_header(h), [(h, None)])
2811
2812 def test_long(self):
2813 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
2814 maxlinelen=76)
2815 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002816 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002817
2818 def test_multilingual(self):
2819 eq = self.ndiffAssertEqual
2820 g = Charset("iso-8859-1")
2821 cz = Charset("iso-8859-2")
2822 utf8 = Charset("utf-8")
2823 g_head = (b'Die Mieter treten hier ein werden mit einem '
2824 b'Foerderband komfortabel den Korridor entlang, '
2825 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
2826 b'gegen die rotierenden Klingen bef\xf6rdert. ')
2827 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
2828 b'd\xf9vtipu.. ')
2829 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
2830 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
2831 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
2832 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
2833 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
2834 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
2835 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
2836 '\u3044\u307e\u3059\u3002')
2837 h = Header(g_head, g)
2838 h.append(cz_head, cz)
2839 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00002840 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002841 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002842=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
2843 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
2844 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
2845 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002846 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
2847 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
2848 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
2849 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00002850 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
2851 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
2852 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
2853 decoded = decode_header(enc)
2854 eq(len(decoded), 3)
2855 eq(decoded[0], (g_head, 'iso-8859-1'))
2856 eq(decoded[1], (cz_head, 'iso-8859-2'))
2857 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002858 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00002859 eq(ustr,
2860 (b'Die Mieter treten hier ein werden mit einem Foerderband '
2861 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
2862 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
2863 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
2864 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
2865 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
2866 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
2867 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
2868 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
2869 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
2870 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
2871 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
2872 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
2873 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
2874 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
2875 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
2876 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002877 # Test make_header()
2878 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00002879 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002880
2881 def test_empty_header_encode(self):
2882 h = Header()
2883 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00002884
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002885 def test_header_ctor_default_args(self):
2886 eq = self.ndiffAssertEqual
2887 h = Header()
2888 eq(h, '')
2889 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00002890 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002891
2892 def test_explicit_maxlinelen(self):
2893 eq = self.ndiffAssertEqual
2894 hstr = ('A very long line that must get split to something other '
2895 'than at the 76th character boundary to test the non-default '
2896 'behavior')
2897 h = Header(hstr)
2898 eq(h.encode(), '''\
2899A very long line that must get split to something other than at the 76th
2900 character boundary to test the non-default behavior''')
2901 eq(str(h), hstr)
2902 h = Header(hstr, header_name='Subject')
2903 eq(h.encode(), '''\
2904A very long line that must get split to something other than at the
2905 76th character boundary to test the non-default behavior''')
2906 eq(str(h), hstr)
2907 h = Header(hstr, maxlinelen=1024, header_name='Subject')
2908 eq(h.encode(), hstr)
2909 eq(str(h), hstr)
2910
Guido van Rossum9604e662007-08-30 03:46:43 +00002911 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002912 eq = self.ndiffAssertEqual
2913 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00002914 x = 'xxxx ' * 20
2915 h.append(x)
2916 s = h.encode()
2917 eq(s, """\
2918=?iso-8859-1?q?xxx?=
2919 =?iso-8859-1?q?x_?=
2920 =?iso-8859-1?q?xx?=
2921 =?iso-8859-1?q?xx?=
2922 =?iso-8859-1?q?_x?=
2923 =?iso-8859-1?q?xx?=
2924 =?iso-8859-1?q?x_?=
2925 =?iso-8859-1?q?xx?=
2926 =?iso-8859-1?q?xx?=
2927 =?iso-8859-1?q?_x?=
2928 =?iso-8859-1?q?xx?=
2929 =?iso-8859-1?q?x_?=
2930 =?iso-8859-1?q?xx?=
2931 =?iso-8859-1?q?xx?=
2932 =?iso-8859-1?q?_x?=
2933 =?iso-8859-1?q?xx?=
2934 =?iso-8859-1?q?x_?=
2935 =?iso-8859-1?q?xx?=
2936 =?iso-8859-1?q?xx?=
2937 =?iso-8859-1?q?_x?=
2938 =?iso-8859-1?q?xx?=
2939 =?iso-8859-1?q?x_?=
2940 =?iso-8859-1?q?xx?=
2941 =?iso-8859-1?q?xx?=
2942 =?iso-8859-1?q?_x?=
2943 =?iso-8859-1?q?xx?=
2944 =?iso-8859-1?q?x_?=
2945 =?iso-8859-1?q?xx?=
2946 =?iso-8859-1?q?xx?=
2947 =?iso-8859-1?q?_x?=
2948 =?iso-8859-1?q?xx?=
2949 =?iso-8859-1?q?x_?=
2950 =?iso-8859-1?q?xx?=
2951 =?iso-8859-1?q?xx?=
2952 =?iso-8859-1?q?_x?=
2953 =?iso-8859-1?q?xx?=
2954 =?iso-8859-1?q?x_?=
2955 =?iso-8859-1?q?xx?=
2956 =?iso-8859-1?q?xx?=
2957 =?iso-8859-1?q?_x?=
2958 =?iso-8859-1?q?xx?=
2959 =?iso-8859-1?q?x_?=
2960 =?iso-8859-1?q?xx?=
2961 =?iso-8859-1?q?xx?=
2962 =?iso-8859-1?q?_x?=
2963 =?iso-8859-1?q?xx?=
2964 =?iso-8859-1?q?x_?=
2965 =?iso-8859-1?q?xx?=
2966 =?iso-8859-1?q?xx?=
2967 =?iso-8859-1?q?_?=""")
2968 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002969 h = Header(charset='iso-8859-1', maxlinelen=40)
2970 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00002971 s = h.encode()
2972 eq(s, """\
2973=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
2974 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
2975 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
2976 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
2977 =?iso-8859-1?q?_xxxx_xxxx_?=""")
2978 eq(x, str(make_header(decode_header(s))))
2979
2980 def test_base64_splittable(self):
2981 eq = self.ndiffAssertEqual
2982 h = Header(charset='koi8-r', maxlinelen=20)
2983 x = 'xxxx ' * 20
2984 h.append(x)
2985 s = h.encode()
2986 eq(s, """\
2987=?koi8-r?b?eHh4?=
2988 =?koi8-r?b?eCB4?=
2989 =?koi8-r?b?eHh4?=
2990 =?koi8-r?b?IHh4?=
2991 =?koi8-r?b?eHgg?=
2992 =?koi8-r?b?eHh4?=
2993 =?koi8-r?b?eCB4?=
2994 =?koi8-r?b?eHh4?=
2995 =?koi8-r?b?IHh4?=
2996 =?koi8-r?b?eHgg?=
2997 =?koi8-r?b?eHh4?=
2998 =?koi8-r?b?eCB4?=
2999 =?koi8-r?b?eHh4?=
3000 =?koi8-r?b?IHh4?=
3001 =?koi8-r?b?eHgg?=
3002 =?koi8-r?b?eHh4?=
3003 =?koi8-r?b?eCB4?=
3004 =?koi8-r?b?eHh4?=
3005 =?koi8-r?b?IHh4?=
3006 =?koi8-r?b?eHgg?=
3007 =?koi8-r?b?eHh4?=
3008 =?koi8-r?b?eCB4?=
3009 =?koi8-r?b?eHh4?=
3010 =?koi8-r?b?IHh4?=
3011 =?koi8-r?b?eHgg?=
3012 =?koi8-r?b?eHh4?=
3013 =?koi8-r?b?eCB4?=
3014 =?koi8-r?b?eHh4?=
3015 =?koi8-r?b?IHh4?=
3016 =?koi8-r?b?eHgg?=
3017 =?koi8-r?b?eHh4?=
3018 =?koi8-r?b?eCB4?=
3019 =?koi8-r?b?eHh4?=
3020 =?koi8-r?b?IA==?=""")
3021 eq(x, str(make_header(decode_header(s))))
3022 h = Header(charset='koi8-r', maxlinelen=40)
3023 h.append(x)
3024 s = h.encode()
3025 eq(s, """\
3026=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3027 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3028 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3029 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3030 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3031 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3032 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003033
3034 def test_us_ascii_header(self):
3035 eq = self.assertEqual
3036 s = 'hello'
3037 x = decode_header(s)
3038 eq(x, [('hello', None)])
3039 h = make_header(x)
3040 eq(s, h.encode())
3041
3042 def test_string_charset(self):
3043 eq = self.assertEqual
3044 h = Header()
3045 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003046 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003047
3048## def test_unicode_error(self):
3049## raises = self.assertRaises
3050## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3051## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3052## h = Header()
3053## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3054## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3055## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3056
3057 def test_utf8_shortest(self):
3058 eq = self.assertEqual
3059 h = Header('p\xf6stal', 'utf-8')
3060 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3061 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3062 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3063
3064 def test_bad_8bit_header(self):
3065 raises = self.assertRaises
3066 eq = self.assertEqual
3067 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3068 raises(UnicodeError, Header, x)
3069 h = Header()
3070 raises(UnicodeError, h.append, x)
3071 e = x.decode('utf-8', 'replace')
3072 eq(str(Header(x, errors='replace')), e)
3073 h.append(x, errors='replace')
3074 eq(str(h), e)
3075
3076 def test_encoded_adjacent_nonencoded(self):
3077 eq = self.assertEqual
3078 h = Header()
3079 h.append('hello', 'iso-8859-1')
3080 h.append('world')
3081 s = h.encode()
3082 eq(s, '=?iso-8859-1?q?hello?= world')
3083 h = make_header(decode_header(s))
3084 eq(h.encode(), s)
3085
3086 def test_whitespace_eater(self):
3087 eq = self.assertEqual
3088 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3089 parts = decode_header(s)
3090 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3091 hdr = make_header(parts)
3092 eq(hdr.encode(),
3093 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3094
3095 def test_broken_base64_header(self):
3096 raises = self.assertRaises
3097 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?='
3098 raises(errors.HeaderParseError, decode_header, s)
3099
3100
3101
3102# Test RFC 2231 header parameters (en/de)coding
3103class TestRFC2231(TestEmailBase):
3104 def test_get_param(self):
3105 eq = self.assertEqual
3106 msg = self._msgobj('msg_29.txt')
3107 eq(msg.get_param('title'),
3108 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3109 eq(msg.get_param('title', unquote=False),
3110 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3111
3112 def test_set_param(self):
3113 eq = self.ndiffAssertEqual
3114 msg = Message()
3115 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3116 charset='us-ascii')
3117 eq(msg.get_param('title'),
3118 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3119 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3120 charset='us-ascii', language='en')
3121 eq(msg.get_param('title'),
3122 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3123 msg = self._msgobj('msg_01.txt')
3124 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3125 charset='us-ascii', language='en')
3126 eq(msg.as_string(maxheaderlen=78), """\
3127Return-Path: <bbb@zzz.org>
3128Delivered-To: bbb@zzz.org
3129Received: by mail.zzz.org (Postfix, from userid 889)
3130\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3131MIME-Version: 1.0
3132Content-Transfer-Encoding: 7bit
3133Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3134From: bbb@ddd.com (John X. Doe)
3135To: bbb@zzz.org
3136Subject: This is a test message
3137Date: Fri, 4 May 2001 14:05:44 -0400
3138Content-Type: text/plain; charset=us-ascii;
3139 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3140
3141
3142Hi,
3143
3144Do you like this message?
3145
3146-Me
3147""")
3148
3149 def test_del_param(self):
3150 eq = self.ndiffAssertEqual
3151 msg = self._msgobj('msg_01.txt')
3152 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3153 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3154 charset='us-ascii', language='en')
3155 msg.del_param('foo', header='Content-Type')
3156 eq(msg.as_string(maxheaderlen=78), """\
3157Return-Path: <bbb@zzz.org>
3158Delivered-To: bbb@zzz.org
3159Received: by mail.zzz.org (Postfix, from userid 889)
3160\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3161MIME-Version: 1.0
3162Content-Transfer-Encoding: 7bit
3163Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3164From: bbb@ddd.com (John X. Doe)
3165To: bbb@zzz.org
3166Subject: This is a test message
3167Date: Fri, 4 May 2001 14:05:44 -0400
3168Content-Type: text/plain; charset="us-ascii";
3169 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3170
3171
3172Hi,
3173
3174Do you like this message?
3175
3176-Me
3177""")
3178
3179 def test_rfc2231_get_content_charset(self):
3180 eq = self.assertEqual
3181 msg = self._msgobj('msg_32.txt')
3182 eq(msg.get_content_charset(), 'us-ascii')
3183
3184 def test_rfc2231_no_language_or_charset(self):
3185 m = '''\
3186Content-Transfer-Encoding: 8bit
3187Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3188Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3189
3190'''
3191 msg = email.message_from_string(m)
3192 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003193 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003194 self.assertEqual(
3195 param,
3196 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3197
3198 def test_rfc2231_no_language_or_charset_in_filename(self):
3199 m = '''\
3200Content-Disposition: inline;
3201\tfilename*0*="''This%20is%20even%20more%20";
3202\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3203\tfilename*2="is it not.pdf"
3204
3205'''
3206 msg = email.message_from_string(m)
3207 self.assertEqual(msg.get_filename(),
3208 'This is even more ***fun*** is it not.pdf')
3209
3210 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3211 m = '''\
3212Content-Disposition: inline;
3213\tfilename*0*="''This%20is%20even%20more%20";
3214\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3215\tfilename*2="is it not.pdf"
3216
3217'''
3218 msg = email.message_from_string(m)
3219 self.assertEqual(msg.get_filename(),
3220 'This is even more ***fun*** is it not.pdf')
3221
3222 def test_rfc2231_partly_encoded(self):
3223 m = '''\
3224Content-Disposition: inline;
3225\tfilename*0="''This%20is%20even%20more%20";
3226\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3227\tfilename*2="is it not.pdf"
3228
3229'''
3230 msg = email.message_from_string(m)
3231 self.assertEqual(
3232 msg.get_filename(),
3233 'This%20is%20even%20more%20***fun*** is it not.pdf')
3234
3235 def test_rfc2231_partly_nonencoded(self):
3236 m = '''\
3237Content-Disposition: inline;
3238\tfilename*0="This%20is%20even%20more%20";
3239\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3240\tfilename*2="is it not.pdf"
3241
3242'''
3243 msg = email.message_from_string(m)
3244 self.assertEqual(
3245 msg.get_filename(),
3246 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3247
3248 def test_rfc2231_no_language_or_charset_in_boundary(self):
3249 m = '''\
3250Content-Type: multipart/alternative;
3251\tboundary*0*="''This%20is%20even%20more%20";
3252\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3253\tboundary*2="is it not.pdf"
3254
3255'''
3256 msg = email.message_from_string(m)
3257 self.assertEqual(msg.get_boundary(),
3258 'This is even more ***fun*** is it not.pdf')
3259
3260 def test_rfc2231_no_language_or_charset_in_charset(self):
3261 # This is a nonsensical charset value, but tests the code anyway
3262 m = '''\
3263Content-Type: text/plain;
3264\tcharset*0*="This%20is%20even%20more%20";
3265\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3266\tcharset*2="is it not.pdf"
3267
3268'''
3269 msg = email.message_from_string(m)
3270 self.assertEqual(msg.get_content_charset(),
3271 'this is even more ***fun*** is it not.pdf')
3272
3273 def test_rfc2231_bad_encoding_in_filename(self):
3274 m = '''\
3275Content-Disposition: inline;
3276\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3277\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3278\tfilename*2="is it not.pdf"
3279
3280'''
3281 msg = email.message_from_string(m)
3282 self.assertEqual(msg.get_filename(),
3283 'This is even more ***fun*** is it not.pdf')
3284
3285 def test_rfc2231_bad_encoding_in_charset(self):
3286 m = """\
3287Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3288
3289"""
3290 msg = email.message_from_string(m)
3291 # This should return None because non-ascii characters in the charset
3292 # are not allowed.
3293 self.assertEqual(msg.get_content_charset(), None)
3294
3295 def test_rfc2231_bad_character_in_charset(self):
3296 m = """\
3297Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3298
3299"""
3300 msg = email.message_from_string(m)
3301 # This should return None because non-ascii characters in the charset
3302 # are not allowed.
3303 self.assertEqual(msg.get_content_charset(), None)
3304
3305 def test_rfc2231_bad_character_in_filename(self):
3306 m = '''\
3307Content-Disposition: inline;
3308\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3309\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3310\tfilename*2*="is it not.pdf%E2"
3311
3312'''
3313 msg = email.message_from_string(m)
3314 self.assertEqual(msg.get_filename(),
3315 'This is even more ***fun*** is it not.pdf\ufffd')
3316
3317 def test_rfc2231_unknown_encoding(self):
3318 m = """\
3319Content-Transfer-Encoding: 8bit
3320Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3321
3322"""
3323 msg = email.message_from_string(m)
3324 self.assertEqual(msg.get_filename(), 'myfile.txt')
3325
3326 def test_rfc2231_single_tick_in_filename_extended(self):
3327 eq = self.assertEqual
3328 m = """\
3329Content-Type: application/x-foo;
3330\tname*0*=\"Frank's\"; name*1*=\" Document\"
3331
3332"""
3333 msg = email.message_from_string(m)
3334 charset, language, s = msg.get_param('name')
3335 eq(charset, None)
3336 eq(language, None)
3337 eq(s, "Frank's Document")
3338
3339 def test_rfc2231_single_tick_in_filename(self):
3340 m = """\
3341Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3342
3343"""
3344 msg = email.message_from_string(m)
3345 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003346 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003347 self.assertEqual(param, "Frank's Document")
3348
3349 def test_rfc2231_tick_attack_extended(self):
3350 eq = self.assertEqual
3351 m = """\
3352Content-Type: application/x-foo;
3353\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3354
3355"""
3356 msg = email.message_from_string(m)
3357 charset, language, s = msg.get_param('name')
3358 eq(charset, 'us-ascii')
3359 eq(language, 'en-us')
3360 eq(s, "Frank's Document")
3361
3362 def test_rfc2231_tick_attack(self):
3363 m = """\
3364Content-Type: application/x-foo;
3365\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3366
3367"""
3368 msg = email.message_from_string(m)
3369 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003370 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003371 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3372
3373 def test_rfc2231_no_extended_values(self):
3374 eq = self.assertEqual
3375 m = """\
3376Content-Type: application/x-foo; name=\"Frank's Document\"
3377
3378"""
3379 msg = email.message_from_string(m)
3380 eq(msg.get_param('name'), "Frank's Document")
3381
3382 def test_rfc2231_encoded_then_unencoded_segments(self):
3383 eq = self.assertEqual
3384 m = """\
3385Content-Type: application/x-foo;
3386\tname*0*=\"us-ascii'en-us'My\";
3387\tname*1=\" Document\";
3388\tname*2*=\" For You\"
3389
3390"""
3391 msg = email.message_from_string(m)
3392 charset, language, s = msg.get_param('name')
3393 eq(charset, 'us-ascii')
3394 eq(language, 'en-us')
3395 eq(s, 'My Document For You')
3396
3397 def test_rfc2231_unencoded_then_encoded_segments(self):
3398 eq = self.assertEqual
3399 m = """\
3400Content-Type: application/x-foo;
3401\tname*0=\"us-ascii'en-us'My\";
3402\tname*1*=\" Document\";
3403\tname*2*=\" For You\"
3404
3405"""
3406 msg = email.message_from_string(m)
3407 charset, language, s = msg.get_param('name')
3408 eq(charset, 'us-ascii')
3409 eq(language, 'en-us')
3410 eq(s, 'My Document For You')
3411
3412
3413
R. David Murraya8f480f2010-01-16 18:30:03 +00003414# Tests to ensure that signed parts of an email are completely preserved, as
3415# required by RFC1847 section 2.1. Note that these are incomplete, because the
3416# email package does not currently always preserve the body. See issue 1670765.
3417class TestSigned(TestEmailBase):
3418
3419 def _msg_and_obj(self, filename):
3420 with openfile(findfile(filename)) as fp:
3421 original = fp.read()
3422 msg = email.message_from_string(original)
3423 return original, msg
3424
3425 def _signed_parts_eq(self, original, result):
3426 # Extract the first mime part of each message
3427 import re
3428 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3429 inpart = repart.search(original).group(2)
3430 outpart = repart.search(result).group(2)
3431 self.assertEqual(outpart, inpart)
3432
3433 def test_long_headers_as_string(self):
3434 original, msg = self._msg_and_obj('msg_45.txt')
3435 result = msg.as_string()
3436 self._signed_parts_eq(original, result)
3437
3438 def test_long_headers_as_string_maxheaderlen(self):
3439 original, msg = self._msg_and_obj('msg_45.txt')
3440 result = msg.as_string(maxheaderlen=60)
3441 self._signed_parts_eq(original, result)
3442
3443 def test_long_headers_flatten(self):
3444 original, msg = self._msg_and_obj('msg_45.txt')
3445 fp = StringIO()
3446 Generator(fp).flatten(msg)
3447 result = fp.getvalue()
3448 self._signed_parts_eq(original, result)
3449
3450
3451
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003452def _testclasses():
3453 mod = sys.modules[__name__]
3454 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3455
3456
3457def suite():
3458 suite = unittest.TestSuite()
3459 for testclass in _testclasses():
3460 suite.addTest(unittest.makeSuite(testclass))
3461 return suite
3462
3463
3464def test_main():
3465 for testclass in _testclasses():
3466 run_unittest(testclass)
3467
3468
3469
3470if __name__ == '__main__':
3471 unittest.main(defaultTest='suite')