blob: 5e4d9ba149f66cab0557b82849bcc72ae9068727 [file] [log] [blame]
Benjamin Peterson46a99002010-01-09 18:45:30 +00001# Copyright (C) 2001-2010 Python Software Foundation
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002# Contact: email-sig@python.org
3# email package unit tests
4
5import os
6import sys
7import time
8import base64
9import difflib
10import unittest
11import warnings
12
13from io import StringIO
14from itertools import chain
15
16import email
17
18from email.charset import Charset
19from email.header import Header, decode_header, make_header
20from email.parser import Parser, HeaderParser
21from email.generator import Generator, DecodedGenerator
22from email.message import Message
23from email.mime.application import MIMEApplication
24from email.mime.audio import MIMEAudio
25from email.mime.text import MIMEText
26from email.mime.image import MIMEImage
27from email.mime.base import MIMEBase
28from email.mime.message import MIMEMessage
29from email.mime.multipart import MIMEMultipart
30from email import utils
31from email import errors
32from email import encoders
33from email import iterators
34from email import base64mime
35from email import quoprimime
36
Benjamin Petersonee8712c2008-05-20 21:35:26 +000037from test.support import findfile, run_unittest
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038from email.test import __file__ as landmark
39
40
41NL = '\n'
42EMPTYSTRING = ''
43SPACE = ' '
44
45
46
47def openfile(filename, *args, **kws):
48 path = os.path.join(os.path.dirname(landmark), 'data', filename)
49 return open(path, *args, **kws)
50
51
52
53# Base test class
54class TestEmailBase(unittest.TestCase):
55 def ndiffAssertEqual(self, first, second):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +000056 """Like assertEqual except use ndiff for readable output."""
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 if first != second:
58 sfirst = str(first)
59 ssecond = str(second)
60 rfirst = [repr(line) for line in sfirst.splitlines()]
61 rsecond = [repr(line) for line in ssecond.splitlines()]
62 diff = difflib.ndiff(rfirst, rsecond)
63 raise self.failureException(NL + NL.join(diff))
64
65 def _msgobj(self, filename):
66 with openfile(findfile(filename)) as fp:
67 return email.message_from_file(fp)
68
69
70
71# Test various aspects of the Message class's API
72class TestMessageAPI(TestEmailBase):
73 def test_get_all(self):
74 eq = self.assertEqual
75 msg = self._msgobj('msg_20.txt')
76 eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
77 eq(msg.get_all('xx', 'n/a'), 'n/a')
78
79 def test_getset_charset(self):
80 eq = self.assertEqual
81 msg = Message()
82 eq(msg.get_charset(), None)
83 charset = Charset('iso-8859-1')
84 msg.set_charset(charset)
85 eq(msg['mime-version'], '1.0')
86 eq(msg.get_content_type(), 'text/plain')
87 eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
88 eq(msg.get_param('charset'), 'iso-8859-1')
89 eq(msg['content-transfer-encoding'], 'quoted-printable')
90 eq(msg.get_charset().input_charset, 'iso-8859-1')
91 # Remove the charset
92 msg.set_charset(None)
93 eq(msg.get_charset(), None)
94 eq(msg['content-type'], 'text/plain')
95 # Try adding a charset when there's already MIME headers present
96 msg = Message()
97 msg['MIME-Version'] = '2.0'
98 msg['Content-Type'] = 'text/x-weird'
99 msg['Content-Transfer-Encoding'] = 'quinted-puntable'
100 msg.set_charset(charset)
101 eq(msg['mime-version'], '2.0')
102 eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
103 eq(msg['content-transfer-encoding'], 'quinted-puntable')
104
105 def test_set_charset_from_string(self):
106 eq = self.assertEqual
107 msg = Message()
108 msg.set_charset('us-ascii')
109 eq(msg.get_charset().input_charset, 'us-ascii')
110 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
111
112 def test_set_payload_with_charset(self):
113 msg = Message()
114 charset = Charset('iso-8859-1')
115 msg.set_payload('This is a string payload', charset)
116 self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
117
118 def test_get_charsets(self):
119 eq = self.assertEqual
120
121 msg = self._msgobj('msg_08.txt')
122 charsets = msg.get_charsets()
123 eq(charsets, [None, 'us-ascii', 'iso-8859-1', 'iso-8859-2', 'koi8-r'])
124
125 msg = self._msgobj('msg_09.txt')
126 charsets = msg.get_charsets('dingbat')
127 eq(charsets, ['dingbat', 'us-ascii', 'iso-8859-1', 'dingbat',
128 'koi8-r'])
129
130 msg = self._msgobj('msg_12.txt')
131 charsets = msg.get_charsets()
132 eq(charsets, [None, 'us-ascii', 'iso-8859-1', None, 'iso-8859-2',
133 'iso-8859-3', 'us-ascii', 'koi8-r'])
134
135 def test_get_filename(self):
136 eq = self.assertEqual
137
138 msg = self._msgobj('msg_04.txt')
139 filenames = [p.get_filename() for p in msg.get_payload()]
140 eq(filenames, ['msg.txt', 'msg.txt'])
141
142 msg = self._msgobj('msg_07.txt')
143 subpart = msg.get_payload(1)
144 eq(subpart.get_filename(), 'dingusfish.gif')
145
146 def test_get_filename_with_name_parameter(self):
147 eq = self.assertEqual
148
149 msg = self._msgobj('msg_44.txt')
150 filenames = [p.get_filename() for p in msg.get_payload()]
151 eq(filenames, ['msg.txt', 'msg.txt'])
152
153 def test_get_boundary(self):
154 eq = self.assertEqual
155 msg = self._msgobj('msg_07.txt')
156 # No quotes!
157 eq(msg.get_boundary(), 'BOUNDARY')
158
159 def test_set_boundary(self):
160 eq = self.assertEqual
161 # This one has no existing boundary parameter, but the Content-Type:
162 # header appears fifth.
163 msg = self._msgobj('msg_01.txt')
164 msg.set_boundary('BOUNDARY')
165 header, value = msg.items()[4]
166 eq(header.lower(), 'content-type')
167 eq(value, 'text/plain; charset="us-ascii"; boundary="BOUNDARY"')
168 # This one has a Content-Type: header, with a boundary, stuck in the
169 # middle of its headers. Make sure the order is preserved; it should
170 # be fifth.
171 msg = self._msgobj('msg_04.txt')
172 msg.set_boundary('BOUNDARY')
173 header, value = msg.items()[4]
174 eq(header.lower(), 'content-type')
175 eq(value, 'multipart/mixed; boundary="BOUNDARY"')
176 # And this one has no Content-Type: header at all.
177 msg = self._msgobj('msg_03.txt')
178 self.assertRaises(errors.HeaderParseError,
179 msg.set_boundary, 'BOUNDARY')
180
R. David Murray57c45ac2010-02-21 04:39:40 +0000181 def test_message_rfc822_only(self):
182 # Issue 7970: message/rfc822 not in multipart parsed by
183 # HeaderParser caused an exception when flattened.
184 fp = openfile(findfile('msg_46.txt'))
185 msgdata = fp.read()
186 parser = HeaderParser()
187 msg = parser.parsestr(msgdata)
188 out = StringIO()
189 gen = Generator(out, True, 0)
190 gen.flatten(msg, False)
191 self.assertEqual(out.getvalue(), msgdata)
192
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000193 def test_get_decoded_payload(self):
194 eq = self.assertEqual
195 msg = self._msgobj('msg_10.txt')
196 # The outer message is a multipart
197 eq(msg.get_payload(decode=True), None)
198 # Subpart 1 is 7bit encoded
199 eq(msg.get_payload(0).get_payload(decode=True),
200 b'This is a 7bit encoded message.\n')
201 # Subpart 2 is quopri
202 eq(msg.get_payload(1).get_payload(decode=True),
203 b'\xa1This is a Quoted Printable encoded message!\n')
204 # Subpart 3 is base64
205 eq(msg.get_payload(2).get_payload(decode=True),
206 b'This is a Base64 encoded message.')
R. David Murray57a4b982010-03-08 02:17:03 +0000207 # Subpart 4 is base64 with a trailing newline, which
208 # used to be stripped (issue 7143).
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000209 eq(msg.get_payload(3).get_payload(decode=True),
R. David Murray57a4b982010-03-08 02:17:03 +0000210 b'This is a Base64 encoded message.\n')
211 # Subpart 5 has no Content-Transfer-Encoding: header.
212 eq(msg.get_payload(4).get_payload(decode=True),
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000213 b'This has no Content-Transfer-Encoding: header.\n')
214
215 def test_get_decoded_uu_payload(self):
216 eq = self.assertEqual
217 msg = Message()
218 msg.set_payload('begin 666 -\n+:&5L;&\\@=V]R;&0 \n \nend\n')
219 for cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
220 msg['content-transfer-encoding'] = cte
221 eq(msg.get_payload(decode=True), b'hello world')
222 # Now try some bogus data
223 msg.set_payload('foo')
224 eq(msg.get_payload(decode=True), b'foo')
225
226 def test_decoded_generator(self):
227 eq = self.assertEqual
228 msg = self._msgobj('msg_07.txt')
229 with openfile('msg_17.txt') as fp:
230 text = fp.read()
231 s = StringIO()
232 g = DecodedGenerator(s)
233 g.flatten(msg)
234 eq(s.getvalue(), text)
235
236 def test__contains__(self):
237 msg = Message()
238 msg['From'] = 'Me'
239 msg['to'] = 'You'
240 # Check for case insensitivity
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000241 self.assertTrue('from' in msg)
242 self.assertTrue('From' in msg)
243 self.assertTrue('FROM' in msg)
244 self.assertTrue('to' in msg)
245 self.assertTrue('To' in msg)
246 self.assertTrue('TO' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000247
248 def test_as_string(self):
249 eq = self.ndiffAssertEqual
250 msg = self._msgobj('msg_01.txt')
251 with openfile('msg_01.txt') as fp:
252 text = fp.read()
253 eq(text, str(msg))
254 fullrepr = msg.as_string(unixfrom=True)
255 lines = fullrepr.split('\n')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000256 self.assertTrue(lines[0].startswith('From '))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000257 eq(text, NL.join(lines[1:]))
258
259 def test_bad_param(self):
260 msg = email.message_from_string("Content-Type: blarg; baz; boo\n")
261 self.assertEqual(msg.get_param('baz'), '')
262
263 def test_missing_filename(self):
264 msg = email.message_from_string("From: foo\n")
265 self.assertEqual(msg.get_filename(), None)
266
267 def test_bogus_filename(self):
268 msg = email.message_from_string(
269 "Content-Disposition: blarg; filename\n")
270 self.assertEqual(msg.get_filename(), '')
271
272 def test_missing_boundary(self):
273 msg = email.message_from_string("From: foo\n")
274 self.assertEqual(msg.get_boundary(), None)
275
276 def test_get_params(self):
277 eq = self.assertEqual
278 msg = email.message_from_string(
279 'X-Header: foo=one; bar=two; baz=three\n')
280 eq(msg.get_params(header='x-header'),
281 [('foo', 'one'), ('bar', 'two'), ('baz', 'three')])
282 msg = email.message_from_string(
283 'X-Header: foo; bar=one; baz=two\n')
284 eq(msg.get_params(header='x-header'),
285 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
286 eq(msg.get_params(), None)
287 msg = email.message_from_string(
288 'X-Header: foo; bar="one"; baz=two\n')
289 eq(msg.get_params(header='x-header'),
290 [('foo', ''), ('bar', 'one'), ('baz', 'two')])
291
292 def test_get_param_liberal(self):
293 msg = Message()
294 msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
295 self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
296
297 def test_get_param(self):
298 eq = self.assertEqual
299 msg = email.message_from_string(
300 "X-Header: foo=one; bar=two; baz=three\n")
301 eq(msg.get_param('bar', header='x-header'), 'two')
302 eq(msg.get_param('quuz', header='x-header'), None)
303 eq(msg.get_param('quuz'), None)
304 msg = email.message_from_string(
305 'X-Header: foo; bar="one"; baz=two\n')
306 eq(msg.get_param('foo', header='x-header'), '')
307 eq(msg.get_param('bar', header='x-header'), 'one')
308 eq(msg.get_param('baz', header='x-header'), 'two')
309 # XXX: We are not RFC-2045 compliant! We cannot parse:
310 # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
311 # msg.get_param("weird")
312 # yet.
313
314 def test_get_param_funky_continuation_lines(self):
315 msg = self._msgobj('msg_22.txt')
316 self.assertEqual(msg.get_payload(1).get_param('name'), 'wibble.JPG')
317
318 def test_get_param_with_semis_in_quotes(self):
319 msg = email.message_from_string(
320 'Content-Type: image/pjpeg; name="Jim&amp;&amp;Jill"\n')
321 self.assertEqual(msg.get_param('name'), 'Jim&amp;&amp;Jill')
322 self.assertEqual(msg.get_param('name', unquote=False),
323 '"Jim&amp;&amp;Jill"')
324
325 def test_field_containment(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000326 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000327 msg = email.message_from_string('Header: exists')
328 unless('header' in msg)
329 unless('Header' in msg)
330 unless('HEADER' in msg)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000331 self.assertFalse('headerx' in msg)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000332
333 def test_set_param(self):
334 eq = self.assertEqual
335 msg = Message()
336 msg.set_param('charset', 'iso-2022-jp')
337 eq(msg.get_param('charset'), 'iso-2022-jp')
338 msg.set_param('importance', 'high value')
339 eq(msg.get_param('importance'), 'high value')
340 eq(msg.get_param('importance', unquote=False), '"high value"')
341 eq(msg.get_params(), [('text/plain', ''),
342 ('charset', 'iso-2022-jp'),
343 ('importance', 'high value')])
344 eq(msg.get_params(unquote=False), [('text/plain', ''),
345 ('charset', '"iso-2022-jp"'),
346 ('importance', '"high value"')])
347 msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
348 eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
349
350 def test_del_param(self):
351 eq = self.assertEqual
352 msg = self._msgobj('msg_05.txt')
353 eq(msg.get_params(),
354 [('multipart/report', ''), ('report-type', 'delivery-status'),
355 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
356 old_val = msg.get_param("report-type")
357 msg.del_param("report-type")
358 eq(msg.get_params(),
359 [('multipart/report', ''),
360 ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
361 msg.set_param("report-type", old_val)
362 eq(msg.get_params(),
363 [('multipart/report', ''),
364 ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
365 ('report-type', old_val)])
366
367 def test_del_param_on_other_header(self):
368 msg = Message()
369 msg.add_header('Content-Disposition', 'attachment', filename='bud.gif')
370 msg.del_param('filename', 'content-disposition')
371 self.assertEqual(msg['content-disposition'], 'attachment')
372
373 def test_set_type(self):
374 eq = self.assertEqual
375 msg = Message()
376 self.assertRaises(ValueError, msg.set_type, 'text')
377 msg.set_type('text/plain')
378 eq(msg['content-type'], 'text/plain')
379 msg.set_param('charset', 'us-ascii')
380 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
381 msg.set_type('text/html')
382 eq(msg['content-type'], 'text/html; charset="us-ascii"')
383
384 def test_set_type_on_other_header(self):
385 msg = Message()
386 msg['X-Content-Type'] = 'text/plain'
387 msg.set_type('application/octet-stream', 'X-Content-Type')
388 self.assertEqual(msg['x-content-type'], 'application/octet-stream')
389
390 def test_get_content_type_missing(self):
391 msg = Message()
392 self.assertEqual(msg.get_content_type(), 'text/plain')
393
394 def test_get_content_type_missing_with_default_type(self):
395 msg = Message()
396 msg.set_default_type('message/rfc822')
397 self.assertEqual(msg.get_content_type(), 'message/rfc822')
398
399 def test_get_content_type_from_message_implicit(self):
400 msg = self._msgobj('msg_30.txt')
401 self.assertEqual(msg.get_payload(0).get_content_type(),
402 'message/rfc822')
403
404 def test_get_content_type_from_message_explicit(self):
405 msg = self._msgobj('msg_28.txt')
406 self.assertEqual(msg.get_payload(0).get_content_type(),
407 'message/rfc822')
408
409 def test_get_content_type_from_message_text_plain_implicit(self):
410 msg = self._msgobj('msg_03.txt')
411 self.assertEqual(msg.get_content_type(), 'text/plain')
412
413 def test_get_content_type_from_message_text_plain_explicit(self):
414 msg = self._msgobj('msg_01.txt')
415 self.assertEqual(msg.get_content_type(), 'text/plain')
416
417 def test_get_content_maintype_missing(self):
418 msg = Message()
419 self.assertEqual(msg.get_content_maintype(), 'text')
420
421 def test_get_content_maintype_missing_with_default_type(self):
422 msg = Message()
423 msg.set_default_type('message/rfc822')
424 self.assertEqual(msg.get_content_maintype(), 'message')
425
426 def test_get_content_maintype_from_message_implicit(self):
427 msg = self._msgobj('msg_30.txt')
428 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
429
430 def test_get_content_maintype_from_message_explicit(self):
431 msg = self._msgobj('msg_28.txt')
432 self.assertEqual(msg.get_payload(0).get_content_maintype(), 'message')
433
434 def test_get_content_maintype_from_message_text_plain_implicit(self):
435 msg = self._msgobj('msg_03.txt')
436 self.assertEqual(msg.get_content_maintype(), 'text')
437
438 def test_get_content_maintype_from_message_text_plain_explicit(self):
439 msg = self._msgobj('msg_01.txt')
440 self.assertEqual(msg.get_content_maintype(), 'text')
441
442 def test_get_content_subtype_missing(self):
443 msg = Message()
444 self.assertEqual(msg.get_content_subtype(), 'plain')
445
446 def test_get_content_subtype_missing_with_default_type(self):
447 msg = Message()
448 msg.set_default_type('message/rfc822')
449 self.assertEqual(msg.get_content_subtype(), 'rfc822')
450
451 def test_get_content_subtype_from_message_implicit(self):
452 msg = self._msgobj('msg_30.txt')
453 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
454
455 def test_get_content_subtype_from_message_explicit(self):
456 msg = self._msgobj('msg_28.txt')
457 self.assertEqual(msg.get_payload(0).get_content_subtype(), 'rfc822')
458
459 def test_get_content_subtype_from_message_text_plain_implicit(self):
460 msg = self._msgobj('msg_03.txt')
461 self.assertEqual(msg.get_content_subtype(), 'plain')
462
463 def test_get_content_subtype_from_message_text_plain_explicit(self):
464 msg = self._msgobj('msg_01.txt')
465 self.assertEqual(msg.get_content_subtype(), 'plain')
466
467 def test_get_content_maintype_error(self):
468 msg = Message()
469 msg['Content-Type'] = 'no-slash-in-this-string'
470 self.assertEqual(msg.get_content_maintype(), 'text')
471
472 def test_get_content_subtype_error(self):
473 msg = Message()
474 msg['Content-Type'] = 'no-slash-in-this-string'
475 self.assertEqual(msg.get_content_subtype(), 'plain')
476
477 def test_replace_header(self):
478 eq = self.assertEqual
479 msg = Message()
480 msg.add_header('First', 'One')
481 msg.add_header('Second', 'Two')
482 msg.add_header('Third', 'Three')
483 eq(msg.keys(), ['First', 'Second', 'Third'])
484 eq(msg.values(), ['One', 'Two', 'Three'])
485 msg.replace_header('Second', 'Twenty')
486 eq(msg.keys(), ['First', 'Second', 'Third'])
487 eq(msg.values(), ['One', 'Twenty', 'Three'])
488 msg.add_header('First', 'Eleven')
489 msg.replace_header('First', 'One Hundred')
490 eq(msg.keys(), ['First', 'Second', 'Third', 'First'])
491 eq(msg.values(), ['One Hundred', 'Twenty', 'Three', 'Eleven'])
492 self.assertRaises(KeyError, msg.replace_header, 'Fourth', 'Missing')
493
494 def test_broken_base64_payload(self):
495 x = 'AwDp0P7//y6LwKEAcPa/6Q=9'
496 msg = Message()
497 msg['content-type'] = 'audio/x-midi'
498 msg['content-transfer-encoding'] = 'base64'
499 msg.set_payload(x)
500 self.assertEqual(msg.get_payload(decode=True),
Guido van Rossum9604e662007-08-30 03:46:43 +0000501 bytes(x, 'raw-unicode-escape'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000502
503
504
505# Test the email.encoders module
506class TestEncoders(unittest.TestCase):
507 def test_encode_empty_payload(self):
508 eq = self.assertEqual
509 msg = Message()
510 msg.set_charset('us-ascii')
511 eq(msg['content-transfer-encoding'], '7bit')
512
513 def test_default_cte(self):
514 eq = self.assertEqual
515 msg = MIMEText('hello world')
516 eq(msg['content-transfer-encoding'], '7bit')
517
518 def test_default_cte(self):
519 eq = self.assertEqual
520 # With no explicit _charset its us-ascii, and all are 7-bit
521 msg = MIMEText('hello world')
522 eq(msg['content-transfer-encoding'], '7bit')
523 # Similar, but with 8-bit data
524 msg = MIMEText('hello \xf8 world')
525 eq(msg['content-transfer-encoding'], '8bit')
526 # And now with a different charset
527 msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
528 eq(msg['content-transfer-encoding'], 'quoted-printable')
529
530
531
532# Test long header wrapping
533class TestLongHeaders(TestEmailBase):
534 def test_split_long_continuation(self):
535 eq = self.ndiffAssertEqual
536 msg = email.message_from_string("""\
537Subject: bug demonstration
538\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
539\tmore text
540
541test
542""")
543 sfp = StringIO()
544 g = Generator(sfp)
545 g.flatten(msg)
546 eq(sfp.getvalue(), """\
547Subject: bug demonstration
548\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
549\tmore text
550
551test
552""")
553
554 def test_another_long_almost_unsplittable_header(self):
555 eq = self.ndiffAssertEqual
556 hstr = """\
557bug demonstration
558\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
559\tmore text"""
560 h = Header(hstr, continuation_ws='\t')
561 eq(h.encode(), """\
562bug demonstration
563\t12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
564\tmore text""")
565 h = Header(hstr.replace('\t', ' '))
566 eq(h.encode(), """\
567bug demonstration
568 12345678911234567892123456789312345678941234567895123456789612345678971234567898112345678911234567892123456789112345678911234567892123456789
569 more text""")
570
571 def test_long_nonstring(self):
572 eq = self.ndiffAssertEqual
573 g = Charset("iso-8859-1")
574 cz = Charset("iso-8859-2")
575 utf8 = Charset("utf-8")
576 g_head = (b'Die Mieter treten hier ein werden mit einem Foerderband '
577 b'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
578 b'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
579 b'bef\xf6rdert. ')
580 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
581 b'd\xf9vtipu.. ')
582 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
583 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
584 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
585 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
586 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
587 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
588 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
589 '\u3044\u307e\u3059\u3002')
590 h = Header(g_head, g, header_name='Subject')
591 h.append(cz_head, cz)
592 h.append(utf8_head, utf8)
593 msg = Message()
594 msg['Subject'] = h
595 sfp = StringIO()
596 g = Generator(sfp)
597 g.flatten(msg)
598 eq(sfp.getvalue(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000599Subject: =?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderb?=
600 =?iso-8859-1?q?and_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen?=
601 =?iso-8859-1?q?_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef?=
602 =?iso-8859-1?q?=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hrouti?=
603 =?iso-8859-2?q?ly_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
604 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
605 =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn44Gf44KJ?=
606 =?utf-8?b?44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFzIE51bnN0dWNr?=
607 =?utf-8?b?IGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5kIGRhcyBPZGVyIGRp?=
608 =?utf-8?b?ZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIDjgaPjgabjgYTjgb7jgZk=?=
609 =?utf-8?b?44CC?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000610
611""")
Guido van Rossum9604e662007-08-30 03:46:43 +0000612 eq(h.encode(maxlinelen=76), """\
613=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerde?=
614 =?iso-8859-1?q?rband_komfortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndis?=
615 =?iso-8859-1?q?chen_Wandgem=E4lden_vorbei=2C_gegen_die_rotierenden_Klinge?=
616 =?iso-8859-1?q?n_bef=F6rdert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se?=
617 =?iso-8859-2?q?_hroutily_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
618 =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb?=
619 =?utf-8?b?44KT44CC5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go?=
620 =?utf-8?b?44Gv44Gn44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBp?=
621 =?utf-8?b?c3QgZGFzIE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWlo?=
622 =?utf-8?b?ZXJodW5kIGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI0=?=
623 =?utf-8?b?44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000624
625 def test_long_header_encode(self):
626 eq = self.ndiffAssertEqual
627 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
628 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
629 header_name='X-Foobar-Spoink-Defrobnit')
630 eq(h.encode(), '''\
631wasnipoop; giraffes="very-long-necked-animals";
632 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
633
634 def test_long_header_encode_with_tab_continuation_is_just_a_hint(self):
635 eq = self.ndiffAssertEqual
636 h = Header('wasnipoop; giraffes="very-long-necked-animals"; '
637 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
638 header_name='X-Foobar-Spoink-Defrobnit',
639 continuation_ws='\t')
640 eq(h.encode(), '''\
641wasnipoop; giraffes="very-long-necked-animals";
642 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
643
644 def test_long_header_encode_with_tab_continuation(self):
645 eq = self.ndiffAssertEqual
646 h = Header('wasnipoop; giraffes="very-long-necked-animals";\t'
647 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"',
648 header_name='X-Foobar-Spoink-Defrobnit',
649 continuation_ws='\t')
650 eq(h.encode(), '''\
651wasnipoop; giraffes="very-long-necked-animals";
652\tspooge="yummy"; hippos="gargantuan"; marshmallows="gooey"''')
653
654 def test_header_splitter(self):
655 eq = self.ndiffAssertEqual
656 msg = MIMEText('')
657 # It'd be great if we could use add_header() here, but that doesn't
658 # guarantee an order of the parameters.
659 msg['X-Foobar-Spoink-Defrobnit'] = (
660 'wasnipoop; giraffes="very-long-necked-animals"; '
661 'spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"')
662 sfp = StringIO()
663 g = Generator(sfp)
664 g.flatten(msg)
665 eq(sfp.getvalue(), '''\
666Content-Type: text/plain; charset="us-ascii"
667MIME-Version: 1.0
668Content-Transfer-Encoding: 7bit
669X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
670 spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
671
672''')
673
674 def test_no_semis_header_splitter(self):
675 eq = self.ndiffAssertEqual
676 msg = Message()
677 msg['From'] = 'test@dom.ain'
678 msg['References'] = SPACE.join('<%d@dom.ain>' % i for i in range(10))
679 msg.set_payload('Test')
680 sfp = StringIO()
681 g = Generator(sfp)
682 g.flatten(msg)
683 eq(sfp.getvalue(), """\
684From: test@dom.ain
685References: <0@dom.ain> <1@dom.ain> <2@dom.ain> <3@dom.ain> <4@dom.ain>
686 <5@dom.ain> <6@dom.ain> <7@dom.ain> <8@dom.ain> <9@dom.ain>
687
688Test""")
689
690 def test_no_split_long_header(self):
691 eq = self.ndiffAssertEqual
692 hstr = 'References: ' + 'x' * 80
Guido van Rossum9604e662007-08-30 03:46:43 +0000693 h = Header(hstr)
694 # These come on two lines because Headers are really field value
695 # classes and don't really know about their field names.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000696 eq(h.encode(), """\
Guido van Rossum9604e662007-08-30 03:46:43 +0000697References:
698 xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx""")
699 h = Header('x' * 80)
700 eq(h.encode(), 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000701
702 def test_splitting_multiple_long_lines(self):
703 eq = self.ndiffAssertEqual
704 hstr = """\
705from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
706\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
707\tfrom babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
708"""
709 h = Header(hstr, continuation_ws='\t')
710 eq(h.encode(), """\
711from babylon.socal-raves.org (localhost [127.0.0.1]);
712 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
713 for <mailman-admin@babylon.socal-raves.org>;
714 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
715\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
716 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
717 for <mailman-admin@babylon.socal-raves.org>;
718 Sat, 2 Feb 2002 17:00:06 -0800 (PST)
719\tfrom babylon.socal-raves.org (localhost [127.0.0.1]);
720 by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
721 for <mailman-admin@babylon.socal-raves.org>;
722 Sat, 2 Feb 2002 17:00:06 -0800 (PST)""")
723
724 def test_splitting_first_line_only_is_long(self):
725 eq = self.ndiffAssertEqual
726 hstr = """\
727from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93] helo=cthulhu.gerg.ca)
728\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
729\tid 17k4h5-00034i-00
730\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400"""
731 h = Header(hstr, maxlinelen=78, header_name='Received',
732 continuation_ws='\t')
733 eq(h.encode(), """\
734from modemcable093.139-201-24.que.mc.videotron.ca ([24.201.139.93]
735 helo=cthulhu.gerg.ca)
736\tby kronos.mems-exchange.org with esmtp (Exim 4.05)
737\tid 17k4h5-00034i-00
738\tfor test@mems-exchange.org; Wed, 28 Aug 2002 11:25:20 -0400""")
739
740 def test_long_8bit_header(self):
741 eq = self.ndiffAssertEqual
742 msg = Message()
743 h = Header('Britische Regierung gibt', 'iso-8859-1',
744 header_name='Subject')
745 h.append('gr\xfcnes Licht f\xfcr Offshore-Windkraftprojekte')
Guido van Rossum9604e662007-08-30 03:46:43 +0000746 eq(h.encode(maxlinelen=76), """\
747=?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
748 =?iso-8859-1?q?hore-Windkraftprojekte?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000749 msg['Subject'] = h
Guido van Rossum9604e662007-08-30 03:46:43 +0000750 eq(msg.as_string(maxheaderlen=76), """\
751Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offs?=
752 =?iso-8859-1?q?hore-Windkraftprojekte?=
753
754""")
755 eq(msg.as_string(maxheaderlen=0), """\
756Subject: =?iso-8859-1?q?Britische_Regierung_gibt_gr=FCnes_Licht_f=FCr_Offshore-Windkraftprojekte?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000757
758""")
759
760 def test_long_8bit_header_no_charset(self):
761 eq = self.ndiffAssertEqual
762 msg = Message()
Barry Warsaw8c571042007-08-30 19:17:18 +0000763 header_string = ('Britische Regierung gibt gr\xfcnes Licht '
764 'f\xfcr Offshore-Windkraftprojekte '
765 '<a-very-long-address@example.com>')
766 msg['Reply-To'] = header_string
767 self.assertRaises(UnicodeEncodeError, msg.as_string)
768 msg = Message()
769 msg['Reply-To'] = Header(header_string, 'utf-8',
770 header_name='Reply-To')
771 eq(msg.as_string(maxheaderlen=78), """\
772Reply-To: =?utf-8?q?Britische_Regierung_gibt_gr=C3=BCnes_Licht_f=C3=BCr_Offs?=
773 =?utf-8?q?hore-Windkraftprojekte_=3Ca-very-long-address=40example=2Ecom=3E?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000774
775""")
776
777 def test_long_to_header(self):
778 eq = self.ndiffAssertEqual
779 to = ('"Someone Test #A" <someone@eecs.umich.edu>,'
780 '<someone@eecs.umich.edu>,'
781 '"Someone Test #B" <someone@umich.edu>, '
782 '"Someone Test #C" <someone@eecs.umich.edu>, '
783 '"Someone Test #D" <someone@eecs.umich.edu>')
784 msg = Message()
785 msg['To'] = to
786 eq(msg.as_string(maxheaderlen=78), '''\
Guido van Rossum9604e662007-08-30 03:46:43 +0000787To: "Someone Test #A" <someone@eecs.umich.edu>,<someone@eecs.umich.edu>,
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000788 "Someone Test #B" <someone@umich.edu>,
Guido van Rossum9604e662007-08-30 03:46:43 +0000789 "Someone Test #C" <someone@eecs.umich.edu>,
790 "Someone Test #D" <someone@eecs.umich.edu>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000791
792''')
793
794 def test_long_line_after_append(self):
795 eq = self.ndiffAssertEqual
796 s = 'This is an example of string which has almost the limit of header length.'
797 h = Header(s)
798 h.append('Add another line.')
Guido van Rossum9604e662007-08-30 03:46:43 +0000799 eq(h.encode(maxlinelen=76), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000800This is an example of string which has almost the limit of header length.
801 Add another line.""")
802
803 def test_shorter_line_with_append(self):
804 eq = self.ndiffAssertEqual
805 s = 'This is a shorter line.'
806 h = Header(s)
807 h.append('Add another sentence. (Surprise?)')
808 eq(h.encode(),
809 'This is a shorter line. Add another sentence. (Surprise?)')
810
811 def test_long_field_name(self):
812 eq = self.ndiffAssertEqual
813 fn = 'X-Very-Very-Very-Long-Header-Name'
Guido van Rossum9604e662007-08-30 03:46:43 +0000814 gs = ('Die Mieter treten hier ein werden mit einem Foerderband '
815 'komfortabel den Korridor entlang, an s\xfcdl\xfcndischen '
816 'Wandgem\xe4lden vorbei, gegen die rotierenden Klingen '
817 'bef\xf6rdert. ')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000818 h = Header(gs, 'iso-8859-1', header_name=fn)
819 # BAW: this seems broken because the first line is too long
Guido van Rossum9604e662007-08-30 03:46:43 +0000820 eq(h.encode(maxlinelen=76), """\
821=?iso-8859-1?q?Die_Mieter_treten_hier_e?=
822 =?iso-8859-1?q?in_werden_mit_einem_Foerderband_komfortabel_den_Korridor_e?=
823 =?iso-8859-1?q?ntlang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei=2C_ge?=
824 =?iso-8859-1?q?gen_die_rotierenden_Klingen_bef=F6rdert=2E_?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000825
826 def test_long_received_header(self):
827 h = ('from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) '
828 'by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP; '
829 'Wed, 05 Mar 2003 18:10:18 -0700')
830 msg = Message()
831 msg['Received-1'] = Header(h, continuation_ws='\t')
832 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000833 # This should be splitting on spaces not semicolons.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000834 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000835Received-1: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
836 Wed, 05 Mar 2003 18:10:18 -0700
837Received-2: from FOO.TLD (vizworld.acl.foo.tld [123.452.678.9]) by hrothgar.la.mastaler.com (tmda-ofmipd) with ESMTP;
838 Wed, 05 Mar 2003 18:10:18 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000839
840""")
841
842 def test_string_headerinst_eq(self):
843 h = ('<15975.17901.207240.414604@sgigritzmann1.mathematik.'
844 'tu-muenchen.de> (David Bremner\'s message of '
845 '"Thu, 6 Mar 2003 13:58:21 +0100")')
846 msg = Message()
847 msg['Received-1'] = Header(h, header_name='Received-1',
848 continuation_ws='\t')
849 msg['Received-2'] = h
Barry Warsawbef9d212007-08-31 10:55:37 +0000850 # XXX This should be splitting on spaces not commas.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000851 self.ndiffAssertEqual(msg.as_string(maxheaderlen=78), """\
Barry Warsawbef9d212007-08-31 10:55:37 +0000852Received-1: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
853 6 Mar 2003 13:58:21 +0100\")
854Received-2: <15975.17901.207240.414604@sgigritzmann1.mathematik.tu-muenchen.de> (David Bremner's message of \"Thu,
855 6 Mar 2003 13:58:21 +0100\")
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000856
857""")
858
859 def test_long_unbreakable_lines_with_continuation(self):
860 eq = self.ndiffAssertEqual
861 msg = Message()
862 t = """\
863iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
864 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp"""
865 msg['Face-1'] = t
866 msg['Face-2'] = Header(t, header_name='Face-2')
Barry Warsawbef9d212007-08-31 10:55:37 +0000867 # XXX This splitting is all wrong. It the first value line should be
868 # snug against the field name.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000869 eq(msg.as_string(maxheaderlen=78), """\
Barry Warsawc5a6a302007-08-31 11:19:21 +0000870Face-1:\x20
Barry Warsaw70d61ce2009-03-30 23:12:30 +0000871 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000872 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
Barry Warsawc5a6a302007-08-31 11:19:21 +0000873Face-2:\x20
Barry Warsawbef9d212007-08-31 10:55:37 +0000874 iVBORw0KGgoAAAANSUhEUgAAADAAAAAwBAMAAAClLOS0AAAAGFBMVEUAAAAkHiJeRUIcGBi9
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000875 locQDQ4zJykFBAXJfWDjAAACYUlEQVR4nF2TQY/jIAyFc6lydlG5x8Nyp1Y69wj1PN2I5gzp
876
877""")
878
879 def test_another_long_multiline_header(self):
880 eq = self.ndiffAssertEqual
881 m = ('Received: from siimage.com '
882 '([172.25.1.3]) by zima.siliconimage.com with '
Guido van Rossum9604e662007-08-30 03:46:43 +0000883 'Microsoft SMTPSVC(5.0.2195.4905); '
884 'Wed, 16 Oct 2002 07:41:11 -0700')
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000885 msg = email.message_from_string(m)
886 eq(msg.as_string(maxheaderlen=78), '''\
Barry Warsawbef9d212007-08-31 10:55:37 +0000887Received: from siimage.com ([172.25.1.3]) by zima.siliconimage.com with Microsoft SMTPSVC(5.0.2195.4905);
888 Wed, 16 Oct 2002 07:41:11 -0700
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000889
890''')
891
892 def test_long_lines_with_different_header(self):
893 eq = self.ndiffAssertEqual
894 h = ('List-Unsubscribe: '
895 '<http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,'
896 ' <mailto:spamassassin-talk-request@lists.sourceforge.net'
897 '?subject=unsubscribe>')
898 msg = Message()
899 msg['List'] = h
900 msg['List'] = Header(h, header_name='List')
901 eq(msg.as_string(maxheaderlen=78), """\
902List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000903 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000904List: List-Unsubscribe: <http://lists.sourceforge.net/lists/listinfo/spamassassin-talk>,
Barry Warsawbef9d212007-08-31 10:55:37 +0000905 <mailto:spamassassin-talk-request@lists.sourceforge.net?subject=unsubscribe>
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000906
907""")
908
909
910
911# Test mangling of "From " lines in the body of a message
912class TestFromMangling(unittest.TestCase):
913 def setUp(self):
914 self.msg = Message()
915 self.msg['From'] = 'aaa@bbb.org'
916 self.msg.set_payload("""\
917From the desk of A.A.A.:
918Blah blah blah
919""")
920
921 def test_mangled_from(self):
922 s = StringIO()
923 g = Generator(s, mangle_from_=True)
924 g.flatten(self.msg)
925 self.assertEqual(s.getvalue(), """\
926From: aaa@bbb.org
927
928>From the desk of A.A.A.:
929Blah blah blah
930""")
931
932 def test_dont_mangle_from(self):
933 s = StringIO()
934 g = Generator(s, mangle_from_=False)
935 g.flatten(self.msg)
936 self.assertEqual(s.getvalue(), """\
937From: aaa@bbb.org
938
939From the desk of A.A.A.:
940Blah blah blah
941""")
942
943
944
945# Test the basic MIMEAudio class
946class TestMIMEAudio(unittest.TestCase):
947 def setUp(self):
948 # Make sure we pick up the audiotest.au that lives in email/test/data.
949 # In Python, there's an audiotest.au living in Lib/test but that isn't
950 # included in some binary distros that don't include the test
951 # package. The trailing empty string on the .join() is significant
952 # since findfile() will do a dirname().
953 datadir = os.path.join(os.path.dirname(landmark), 'data', '')
954 with open(findfile('audiotest.au', datadir), 'rb') as fp:
955 self._audiodata = fp.read()
956 self._au = MIMEAudio(self._audiodata)
957
958 def test_guess_minor_type(self):
959 self.assertEqual(self._au.get_content_type(), 'audio/basic')
960
961 def test_encoding(self):
962 payload = self._au.get_payload()
Georg Brandl706824f2009-06-04 09:42:55 +0000963 self.assertEqual(base64.decodebytes(payload), self._audiodata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000964
965 def test_checkSetMinor(self):
966 au = MIMEAudio(self._audiodata, 'fish')
967 self.assertEqual(au.get_content_type(), 'audio/fish')
968
969 def test_add_header(self):
970 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +0000971 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000972 self._au.add_header('Content-Disposition', 'attachment',
973 filename='audiotest.au')
974 eq(self._au['content-disposition'],
975 'attachment; filename="audiotest.au"')
976 eq(self._au.get_params(header='content-disposition'),
977 [('attachment', ''), ('filename', 'audiotest.au')])
978 eq(self._au.get_param('filename', header='content-disposition'),
979 'audiotest.au')
980 missing = []
981 eq(self._au.get_param('attachment', header='content-disposition'), '')
982 unless(self._au.get_param('foo', failobj=missing,
983 header='content-disposition') is missing)
984 # Try some missing stuff
985 unless(self._au.get_param('foobar', missing) is missing)
986 unless(self._au.get_param('attachment', missing,
987 header='foobar') is missing)
988
989
990
991# Test the basic MIMEImage class
992class TestMIMEImage(unittest.TestCase):
993 def setUp(self):
994 with openfile('PyBanner048.gif', 'rb') as fp:
995 self._imgdata = fp.read()
996 self._im = MIMEImage(self._imgdata)
997
998 def test_guess_minor_type(self):
999 self.assertEqual(self._im.get_content_type(), 'image/gif')
1000
1001 def test_encoding(self):
1002 payload = self._im.get_payload()
Georg Brandl706824f2009-06-04 09:42:55 +00001003 self.assertEqual(base64.decodebytes(payload), self._imgdata)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001004
1005 def test_checkSetMinor(self):
1006 im = MIMEImage(self._imgdata, 'fish')
1007 self.assertEqual(im.get_content_type(), 'image/fish')
1008
1009 def test_add_header(self):
1010 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001011 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001012 self._im.add_header('Content-Disposition', 'attachment',
1013 filename='dingusfish.gif')
1014 eq(self._im['content-disposition'],
1015 'attachment; filename="dingusfish.gif"')
1016 eq(self._im.get_params(header='content-disposition'),
1017 [('attachment', ''), ('filename', 'dingusfish.gif')])
1018 eq(self._im.get_param('filename', header='content-disposition'),
1019 'dingusfish.gif')
1020 missing = []
1021 eq(self._im.get_param('attachment', header='content-disposition'), '')
1022 unless(self._im.get_param('foo', failobj=missing,
1023 header='content-disposition') is missing)
1024 # Try some missing stuff
1025 unless(self._im.get_param('foobar', missing) is missing)
1026 unless(self._im.get_param('attachment', missing,
1027 header='foobar') is missing)
1028
1029
1030
1031# Test the basic MIMEApplication class
1032class TestMIMEApplication(unittest.TestCase):
1033 def test_headers(self):
1034 eq = self.assertEqual
Barry Warsaw8b2af272007-08-31 03:04:26 +00001035 msg = MIMEApplication(b'\xfa\xfb\xfc\xfd\xfe\xff')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001036 eq(msg.get_content_type(), 'application/octet-stream')
1037 eq(msg['content-transfer-encoding'], 'base64')
1038
1039 def test_body(self):
1040 eq = self.assertEqual
Barry Warsaw8c571042007-08-30 19:17:18 +00001041 bytes = b'\xfa\xfb\xfc\xfd\xfe\xff'
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001042 msg = MIMEApplication(bytes)
Barry Warsaw8c571042007-08-30 19:17:18 +00001043 eq(msg.get_payload(), b'+vv8/f7/')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001044 eq(msg.get_payload(decode=True), bytes)
1045
1046
1047
1048# Test the basic MIMEText class
1049class TestMIMEText(unittest.TestCase):
1050 def setUp(self):
1051 self._msg = MIMEText('hello there')
1052
1053 def test_types(self):
1054 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001055 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001056 eq(self._msg.get_content_type(), 'text/plain')
1057 eq(self._msg.get_param('charset'), 'us-ascii')
1058 missing = []
1059 unless(self._msg.get_param('foobar', missing) is missing)
1060 unless(self._msg.get_param('charset', missing, header='foobar')
1061 is missing)
1062
1063 def test_payload(self):
1064 self.assertEqual(self._msg.get_payload(), 'hello there')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001065 self.assertTrue(not self._msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001066
1067 def test_charset(self):
1068 eq = self.assertEqual
1069 msg = MIMEText('hello there', _charset='us-ascii')
1070 eq(msg.get_charset().input_charset, 'us-ascii')
1071 eq(msg['content-type'], 'text/plain; charset="us-ascii"')
1072
1073
1074
1075# Test complicated multipart/* messages
1076class TestMultipart(TestEmailBase):
1077 def setUp(self):
1078 with openfile('PyBanner048.gif', 'rb') as fp:
1079 data = fp.read()
1080 container = MIMEBase('multipart', 'mixed', boundary='BOUNDARY')
1081 image = MIMEImage(data, name='dingusfish.gif')
1082 image.add_header('content-disposition', 'attachment',
1083 filename='dingusfish.gif')
1084 intro = MIMEText('''\
1085Hi there,
1086
1087This is the dingus fish.
1088''')
1089 container.attach(intro)
1090 container.attach(image)
1091 container['From'] = 'Barry <barry@digicool.com>'
1092 container['To'] = 'Dingus Lovers <cravindogs@cravindogs.com>'
1093 container['Subject'] = 'Here is your dingus fish'
1094
1095 now = 987809702.54848599
1096 timetuple = time.localtime(now)
1097 if timetuple[-1] == 0:
1098 tzsecs = time.timezone
1099 else:
1100 tzsecs = time.altzone
1101 if tzsecs > 0:
1102 sign = '-'
1103 else:
1104 sign = '+'
1105 tzoffset = ' %s%04d' % (sign, tzsecs / 36)
1106 container['Date'] = time.strftime(
1107 '%a, %d %b %Y %H:%M:%S',
1108 time.localtime(now)) + tzoffset
1109 self._msg = container
1110 self._im = image
1111 self._txt = intro
1112
1113 def test_hierarchy(self):
1114 # convenience
1115 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001116 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001117 raises = self.assertRaises
1118 # tests
1119 m = self._msg
1120 unless(m.is_multipart())
1121 eq(m.get_content_type(), 'multipart/mixed')
1122 eq(len(m.get_payload()), 2)
1123 raises(IndexError, m.get_payload, 2)
1124 m0 = m.get_payload(0)
1125 m1 = m.get_payload(1)
1126 unless(m0 is self._txt)
1127 unless(m1 is self._im)
1128 eq(m.get_payload(), [m0, m1])
1129 unless(not m0.is_multipart())
1130 unless(not m1.is_multipart())
1131
1132 def test_empty_multipart_idempotent(self):
1133 text = """\
1134Content-Type: multipart/mixed; boundary="BOUNDARY"
1135MIME-Version: 1.0
1136Subject: A subject
1137To: aperson@dom.ain
1138From: bperson@dom.ain
1139
1140
1141--BOUNDARY
1142
1143
1144--BOUNDARY--
1145"""
1146 msg = Parser().parsestr(text)
1147 self.ndiffAssertEqual(text, msg.as_string())
1148
1149 def test_no_parts_in_a_multipart_with_none_epilogue(self):
1150 outer = MIMEBase('multipart', 'mixed')
1151 outer['Subject'] = 'A subject'
1152 outer['To'] = 'aperson@dom.ain'
1153 outer['From'] = 'bperson@dom.ain'
1154 outer.set_boundary('BOUNDARY')
1155 self.ndiffAssertEqual(outer.as_string(), '''\
1156Content-Type: multipart/mixed; boundary="BOUNDARY"
1157MIME-Version: 1.0
1158Subject: A subject
1159To: aperson@dom.ain
1160From: bperson@dom.ain
1161
1162--BOUNDARY
1163
1164--BOUNDARY--''')
1165
1166 def test_no_parts_in_a_multipart_with_empty_epilogue(self):
1167 outer = MIMEBase('multipart', 'mixed')
1168 outer['Subject'] = 'A subject'
1169 outer['To'] = 'aperson@dom.ain'
1170 outer['From'] = 'bperson@dom.ain'
1171 outer.preamble = ''
1172 outer.epilogue = ''
1173 outer.set_boundary('BOUNDARY')
1174 self.ndiffAssertEqual(outer.as_string(), '''\
1175Content-Type: multipart/mixed; boundary="BOUNDARY"
1176MIME-Version: 1.0
1177Subject: A subject
1178To: aperson@dom.ain
1179From: bperson@dom.ain
1180
1181
1182--BOUNDARY
1183
1184--BOUNDARY--
1185''')
1186
1187 def test_one_part_in_a_multipart(self):
1188 eq = self.ndiffAssertEqual
1189 outer = MIMEBase('multipart', 'mixed')
1190 outer['Subject'] = 'A subject'
1191 outer['To'] = 'aperson@dom.ain'
1192 outer['From'] = 'bperson@dom.ain'
1193 outer.set_boundary('BOUNDARY')
1194 msg = MIMEText('hello world')
1195 outer.attach(msg)
1196 eq(outer.as_string(), '''\
1197Content-Type: multipart/mixed; boundary="BOUNDARY"
1198MIME-Version: 1.0
1199Subject: A subject
1200To: aperson@dom.ain
1201From: bperson@dom.ain
1202
1203--BOUNDARY
1204Content-Type: text/plain; charset="us-ascii"
1205MIME-Version: 1.0
1206Content-Transfer-Encoding: 7bit
1207
1208hello world
1209--BOUNDARY--''')
1210
1211 def test_seq_parts_in_a_multipart_with_empty_preamble(self):
1212 eq = self.ndiffAssertEqual
1213 outer = MIMEBase('multipart', 'mixed')
1214 outer['Subject'] = 'A subject'
1215 outer['To'] = 'aperson@dom.ain'
1216 outer['From'] = 'bperson@dom.ain'
1217 outer.preamble = ''
1218 msg = MIMEText('hello world')
1219 outer.attach(msg)
1220 outer.set_boundary('BOUNDARY')
1221 eq(outer.as_string(), '''\
1222Content-Type: multipart/mixed; boundary="BOUNDARY"
1223MIME-Version: 1.0
1224Subject: A subject
1225To: aperson@dom.ain
1226From: bperson@dom.ain
1227
1228
1229--BOUNDARY
1230Content-Type: text/plain; charset="us-ascii"
1231MIME-Version: 1.0
1232Content-Transfer-Encoding: 7bit
1233
1234hello world
1235--BOUNDARY--''')
1236
1237
1238 def test_seq_parts_in_a_multipart_with_none_preamble(self):
1239 eq = self.ndiffAssertEqual
1240 outer = MIMEBase('multipart', 'mixed')
1241 outer['Subject'] = 'A subject'
1242 outer['To'] = 'aperson@dom.ain'
1243 outer['From'] = 'bperson@dom.ain'
1244 outer.preamble = None
1245 msg = MIMEText('hello world')
1246 outer.attach(msg)
1247 outer.set_boundary('BOUNDARY')
1248 eq(outer.as_string(), '''\
1249Content-Type: multipart/mixed; boundary="BOUNDARY"
1250MIME-Version: 1.0
1251Subject: A subject
1252To: aperson@dom.ain
1253From: bperson@dom.ain
1254
1255--BOUNDARY
1256Content-Type: text/plain; charset="us-ascii"
1257MIME-Version: 1.0
1258Content-Transfer-Encoding: 7bit
1259
1260hello world
1261--BOUNDARY--''')
1262
1263
1264 def test_seq_parts_in_a_multipart_with_none_epilogue(self):
1265 eq = self.ndiffAssertEqual
1266 outer = MIMEBase('multipart', 'mixed')
1267 outer['Subject'] = 'A subject'
1268 outer['To'] = 'aperson@dom.ain'
1269 outer['From'] = 'bperson@dom.ain'
1270 outer.epilogue = None
1271 msg = MIMEText('hello world')
1272 outer.attach(msg)
1273 outer.set_boundary('BOUNDARY')
1274 eq(outer.as_string(), '''\
1275Content-Type: multipart/mixed; boundary="BOUNDARY"
1276MIME-Version: 1.0
1277Subject: A subject
1278To: aperson@dom.ain
1279From: bperson@dom.ain
1280
1281--BOUNDARY
1282Content-Type: text/plain; charset="us-ascii"
1283MIME-Version: 1.0
1284Content-Transfer-Encoding: 7bit
1285
1286hello world
1287--BOUNDARY--''')
1288
1289
1290 def test_seq_parts_in_a_multipart_with_empty_epilogue(self):
1291 eq = self.ndiffAssertEqual
1292 outer = MIMEBase('multipart', 'mixed')
1293 outer['Subject'] = 'A subject'
1294 outer['To'] = 'aperson@dom.ain'
1295 outer['From'] = 'bperson@dom.ain'
1296 outer.epilogue = ''
1297 msg = MIMEText('hello world')
1298 outer.attach(msg)
1299 outer.set_boundary('BOUNDARY')
1300 eq(outer.as_string(), '''\
1301Content-Type: multipart/mixed; boundary="BOUNDARY"
1302MIME-Version: 1.0
1303Subject: A subject
1304To: aperson@dom.ain
1305From: bperson@dom.ain
1306
1307--BOUNDARY
1308Content-Type: text/plain; charset="us-ascii"
1309MIME-Version: 1.0
1310Content-Transfer-Encoding: 7bit
1311
1312hello world
1313--BOUNDARY--
1314''')
1315
1316
1317 def test_seq_parts_in_a_multipart_with_nl_epilogue(self):
1318 eq = self.ndiffAssertEqual
1319 outer = MIMEBase('multipart', 'mixed')
1320 outer['Subject'] = 'A subject'
1321 outer['To'] = 'aperson@dom.ain'
1322 outer['From'] = 'bperson@dom.ain'
1323 outer.epilogue = '\n'
1324 msg = MIMEText('hello world')
1325 outer.attach(msg)
1326 outer.set_boundary('BOUNDARY')
1327 eq(outer.as_string(), '''\
1328Content-Type: multipart/mixed; boundary="BOUNDARY"
1329MIME-Version: 1.0
1330Subject: A subject
1331To: aperson@dom.ain
1332From: bperson@dom.ain
1333
1334--BOUNDARY
1335Content-Type: text/plain; charset="us-ascii"
1336MIME-Version: 1.0
1337Content-Transfer-Encoding: 7bit
1338
1339hello world
1340--BOUNDARY--
1341
1342''')
1343
1344 def test_message_external_body(self):
1345 eq = self.assertEqual
1346 msg = self._msgobj('msg_36.txt')
1347 eq(len(msg.get_payload()), 2)
1348 msg1 = msg.get_payload(1)
1349 eq(msg1.get_content_type(), 'multipart/alternative')
1350 eq(len(msg1.get_payload()), 2)
1351 for subpart in msg1.get_payload():
1352 eq(subpart.get_content_type(), 'message/external-body')
1353 eq(len(subpart.get_payload()), 1)
1354 subsubpart = subpart.get_payload(0)
1355 eq(subsubpart.get_content_type(), 'text/plain')
1356
1357 def test_double_boundary(self):
1358 # msg_37.txt is a multipart that contains two dash-boundary's in a
1359 # row. Our interpretation of RFC 2046 calls for ignoring the second
1360 # and subsequent boundaries.
1361 msg = self._msgobj('msg_37.txt')
1362 self.assertEqual(len(msg.get_payload()), 3)
1363
1364 def test_nested_inner_contains_outer_boundary(self):
1365 eq = self.ndiffAssertEqual
1366 # msg_38.txt has an inner part that contains outer boundaries. My
1367 # interpretation of RFC 2046 (based on sections 5.1 and 5.1.2) say
1368 # these are illegal and should be interpreted as unterminated inner
1369 # parts.
1370 msg = self._msgobj('msg_38.txt')
1371 sfp = StringIO()
1372 iterators._structure(msg, sfp)
1373 eq(sfp.getvalue(), """\
1374multipart/mixed
1375 multipart/mixed
1376 multipart/alternative
1377 text/plain
1378 text/plain
1379 text/plain
1380 text/plain
1381""")
1382
1383 def test_nested_with_same_boundary(self):
1384 eq = self.ndiffAssertEqual
1385 # msg 39.txt is similarly evil in that it's got inner parts that use
1386 # the same boundary as outer parts. Again, I believe the way this is
1387 # parsed is closest to the spirit of RFC 2046
1388 msg = self._msgobj('msg_39.txt')
1389 sfp = StringIO()
1390 iterators._structure(msg, sfp)
1391 eq(sfp.getvalue(), """\
1392multipart/mixed
1393 multipart/mixed
1394 multipart/alternative
1395 application/octet-stream
1396 application/octet-stream
1397 text/plain
1398""")
1399
1400 def test_boundary_in_non_multipart(self):
1401 msg = self._msgobj('msg_40.txt')
1402 self.assertEqual(msg.as_string(), '''\
1403MIME-Version: 1.0
1404Content-Type: text/html; boundary="--961284236552522269"
1405
1406----961284236552522269
1407Content-Type: text/html;
1408Content-Transfer-Encoding: 7Bit
1409
1410<html></html>
1411
1412----961284236552522269--
1413''')
1414
1415 def test_boundary_with_leading_space(self):
1416 eq = self.assertEqual
1417 msg = email.message_from_string('''\
1418MIME-Version: 1.0
1419Content-Type: multipart/mixed; boundary=" XXXX"
1420
1421-- XXXX
1422Content-Type: text/plain
1423
1424
1425-- XXXX
1426Content-Type: text/plain
1427
1428-- XXXX--
1429''')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001430 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001431 eq(msg.get_boundary(), ' XXXX')
1432 eq(len(msg.get_payload()), 2)
1433
1434 def test_boundary_without_trailing_newline(self):
1435 m = Parser().parsestr("""\
1436Content-Type: multipart/mixed; boundary="===============0012394164=="
1437MIME-Version: 1.0
1438
1439--===============0012394164==
1440Content-Type: image/file1.jpg
1441MIME-Version: 1.0
1442Content-Transfer-Encoding: base64
1443
1444YXNkZg==
1445--===============0012394164==--""")
1446 self.assertEquals(m.get_payload(0).get_payload(), 'YXNkZg==')
1447
1448
1449
1450# Test some badly formatted messages
1451class TestNonConformant(TestEmailBase):
1452 def test_parse_missing_minor_type(self):
1453 eq = self.assertEqual
1454 msg = self._msgobj('msg_14.txt')
1455 eq(msg.get_content_type(), 'text/plain')
1456 eq(msg.get_content_maintype(), 'text')
1457 eq(msg.get_content_subtype(), 'plain')
1458
1459 def test_same_boundary_inner_outer(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001460 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001461 msg = self._msgobj('msg_15.txt')
1462 # XXX We can probably eventually do better
1463 inner = msg.get_payload(0)
1464 unless(hasattr(inner, 'defects'))
1465 self.assertEqual(len(inner.defects), 1)
1466 unless(isinstance(inner.defects[0],
1467 errors.StartBoundaryNotFoundDefect))
1468
1469 def test_multipart_no_boundary(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001470 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001471 msg = self._msgobj('msg_25.txt')
1472 unless(isinstance(msg.get_payload(), str))
1473 self.assertEqual(len(msg.defects), 2)
1474 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1475 unless(isinstance(msg.defects[1],
1476 errors.MultipartInvariantViolationDefect))
1477
1478 def test_invalid_content_type(self):
1479 eq = self.assertEqual
1480 neq = self.ndiffAssertEqual
1481 msg = Message()
1482 # RFC 2045, $5.2 says invalid yields text/plain
1483 msg['Content-Type'] = 'text'
1484 eq(msg.get_content_maintype(), 'text')
1485 eq(msg.get_content_subtype(), 'plain')
1486 eq(msg.get_content_type(), 'text/plain')
1487 # Clear the old value and try something /really/ invalid
1488 del msg['content-type']
1489 msg['Content-Type'] = 'foo'
1490 eq(msg.get_content_maintype(), 'text')
1491 eq(msg.get_content_subtype(), 'plain')
1492 eq(msg.get_content_type(), 'text/plain')
1493 # Still, make sure that the message is idempotently generated
1494 s = StringIO()
1495 g = Generator(s)
1496 g.flatten(msg)
1497 neq(s.getvalue(), 'Content-Type: foo\n\n')
1498
1499 def test_no_start_boundary(self):
1500 eq = self.ndiffAssertEqual
1501 msg = self._msgobj('msg_31.txt')
1502 eq(msg.get_payload(), """\
1503--BOUNDARY
1504Content-Type: text/plain
1505
1506message 1
1507
1508--BOUNDARY
1509Content-Type: text/plain
1510
1511message 2
1512
1513--BOUNDARY--
1514""")
1515
1516 def test_no_separating_blank_line(self):
1517 eq = self.ndiffAssertEqual
1518 msg = self._msgobj('msg_35.txt')
1519 eq(msg.as_string(), """\
1520From: aperson@dom.ain
1521To: bperson@dom.ain
1522Subject: here's something interesting
1523
1524counter to RFC 2822, there's no separating newline here
1525""")
1526
1527 def test_lying_multipart(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001528 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001529 msg = self._msgobj('msg_41.txt')
1530 unless(hasattr(msg, 'defects'))
1531 self.assertEqual(len(msg.defects), 2)
1532 unless(isinstance(msg.defects[0], errors.NoBoundaryInMultipartDefect))
1533 unless(isinstance(msg.defects[1],
1534 errors.MultipartInvariantViolationDefect))
1535
1536 def test_missing_start_boundary(self):
1537 outer = self._msgobj('msg_42.txt')
1538 # The message structure is:
1539 #
1540 # multipart/mixed
1541 # text/plain
1542 # message/rfc822
1543 # multipart/mixed [*]
1544 #
1545 # [*] This message is missing its start boundary
1546 bad = outer.get_payload(1).get_payload(0)
1547 self.assertEqual(len(bad.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001548 self.assertTrue(isinstance(bad.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001549 errors.StartBoundaryNotFoundDefect))
1550
1551 def test_first_line_is_continuation_header(self):
1552 eq = self.assertEqual
1553 m = ' Line 1\nLine 2\nLine 3'
1554 msg = email.message_from_string(m)
1555 eq(msg.keys(), [])
1556 eq(msg.get_payload(), 'Line 2\nLine 3')
1557 eq(len(msg.defects), 1)
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001558 self.assertTrue(isinstance(msg.defects[0],
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001559 errors.FirstHeaderLineIsContinuationDefect))
1560 eq(msg.defects[0].line, ' Line 1\n')
1561
1562
1563
1564# Test RFC 2047 header encoding and decoding
Guido van Rossum9604e662007-08-30 03:46:43 +00001565class TestRFC2047(TestEmailBase):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001566 def test_rfc2047_multiline(self):
1567 eq = self.assertEqual
1568 s = """Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz
1569 foo bar =?mac-iceland?q?r=8Aksm=9Arg=8Cs?="""
1570 dh = decode_header(s)
1571 eq(dh, [
1572 (b'Re:', None),
1573 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland'),
1574 (b'baz foo bar', None),
1575 (b'r\x8aksm\x9arg\x8cs', 'mac-iceland')])
1576 header = make_header(dh)
1577 eq(str(header),
1578 'Re: r\xe4ksm\xf6rg\xe5s baz foo bar r\xe4ksm\xf6rg\xe5s')
Barry Warsaw00b34222007-08-31 02:35:00 +00001579 self.ndiffAssertEqual(header.encode(maxlinelen=76), """\
Guido van Rossum9604e662007-08-30 03:46:43 +00001580Re: =?mac-iceland?q?r=8Aksm=9Arg=8Cs?= baz foo bar =?mac-iceland?q?r=8Aksm?=
1581 =?mac-iceland?q?=9Arg=8Cs?=""")
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001582
1583 def test_whitespace_eater_unicode(self):
1584 eq = self.assertEqual
1585 s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard@dom.ain>'
1586 dh = decode_header(s)
1587 eq(dh, [(b'Andr\xe9', 'iso-8859-1'),
1588 (b'Pirard <pirard@dom.ain>', None)])
1589 header = str(make_header(dh))
1590 eq(header, 'Andr\xe9 Pirard <pirard@dom.ain>')
1591
1592 def test_whitespace_eater_unicode_2(self):
1593 eq = self.assertEqual
1594 s = 'The =?iso-8859-1?b?cXVpY2sgYnJvd24gZm94?= jumped over the =?iso-8859-1?b?bGF6eSBkb2c=?='
1595 dh = decode_header(s)
1596 eq(dh, [(b'The', None), (b'quick brown fox', 'iso-8859-1'),
1597 (b'jumped over the', None), (b'lazy dog', 'iso-8859-1')])
1598 hu = str(make_header(dh))
1599 eq(hu, 'The quick brown fox jumped over the lazy dog')
1600
1601 def test_rfc2047_missing_whitespace(self):
1602 s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
1603 dh = decode_header(s)
1604 self.assertEqual(dh, [(s, None)])
1605
1606 def test_rfc2047_with_whitespace(self):
1607 s = 'Sm =?ISO-8859-1?B?9g==?= rg =?ISO-8859-1?B?5Q==?= sbord'
1608 dh = decode_header(s)
1609 self.assertEqual(dh, [(b'Sm', None), (b'\xf6', 'iso-8859-1'),
1610 (b'rg', None), (b'\xe5', 'iso-8859-1'),
1611 (b'sbord', None)])
1612
1613
1614
1615# Test the MIMEMessage class
1616class TestMIMEMessage(TestEmailBase):
1617 def setUp(self):
1618 with openfile('msg_11.txt') as fp:
1619 self._text = fp.read()
1620
1621 def test_type_error(self):
1622 self.assertRaises(TypeError, MIMEMessage, 'a plain string')
1623
1624 def test_valid_argument(self):
1625 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001626 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001627 subject = 'A sub-message'
1628 m = Message()
1629 m['Subject'] = subject
1630 r = MIMEMessage(m)
1631 eq(r.get_content_type(), 'message/rfc822')
1632 payload = r.get_payload()
1633 unless(isinstance(payload, list))
1634 eq(len(payload), 1)
1635 subpart = payload[0]
1636 unless(subpart is m)
1637 eq(subpart['subject'], subject)
1638
1639 def test_bad_multipart(self):
1640 eq = self.assertEqual
1641 msg1 = Message()
1642 msg1['Subject'] = 'subpart 1'
1643 msg2 = Message()
1644 msg2['Subject'] = 'subpart 2'
1645 r = MIMEMessage(msg1)
1646 self.assertRaises(errors.MultipartConversionError, r.attach, msg2)
1647
1648 def test_generate(self):
1649 # First craft the message to be encapsulated
1650 m = Message()
1651 m['Subject'] = 'An enclosed message'
1652 m.set_payload('Here is the body of the message.\n')
1653 r = MIMEMessage(m)
1654 r['Subject'] = 'The enclosing message'
1655 s = StringIO()
1656 g = Generator(s)
1657 g.flatten(r)
1658 self.assertEqual(s.getvalue(), """\
1659Content-Type: message/rfc822
1660MIME-Version: 1.0
1661Subject: The enclosing message
1662
1663Subject: An enclosed message
1664
1665Here is the body of the message.
1666""")
1667
1668 def test_parse_message_rfc822(self):
1669 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001670 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001671 msg = self._msgobj('msg_11.txt')
1672 eq(msg.get_content_type(), 'message/rfc822')
1673 payload = msg.get_payload()
1674 unless(isinstance(payload, list))
1675 eq(len(payload), 1)
1676 submsg = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001677 self.assertTrue(isinstance(submsg, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001678 eq(submsg['subject'], 'An enclosed message')
1679 eq(submsg.get_payload(), 'Here is the body of the message.\n')
1680
1681 def test_dsn(self):
1682 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00001683 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001684 # msg 16 is a Delivery Status Notification, see RFC 1894
1685 msg = self._msgobj('msg_16.txt')
1686 eq(msg.get_content_type(), 'multipart/report')
1687 unless(msg.is_multipart())
1688 eq(len(msg.get_payload()), 3)
1689 # Subpart 1 is a text/plain, human readable section
1690 subpart = msg.get_payload(0)
1691 eq(subpart.get_content_type(), 'text/plain')
1692 eq(subpart.get_payload(), """\
1693This report relates to a message you sent with the following header fields:
1694
1695 Message-id: <002001c144a6$8752e060$56104586@oxy.edu>
1696 Date: Sun, 23 Sep 2001 20:10:55 -0700
1697 From: "Ian T. Henry" <henryi@oxy.edu>
1698 To: SoCal Raves <scr@socal-raves.org>
1699 Subject: [scr] yeah for Ians!!
1700
1701Your message cannot be delivered to the following recipients:
1702
1703 Recipient address: jangel1@cougar.noc.ucla.edu
1704 Reason: recipient reached disk quota
1705
1706""")
1707 # Subpart 2 contains the machine parsable DSN information. It
1708 # consists of two blocks of headers, represented by two nested Message
1709 # objects.
1710 subpart = msg.get_payload(1)
1711 eq(subpart.get_content_type(), 'message/delivery-status')
1712 eq(len(subpart.get_payload()), 2)
1713 # message/delivery-status should treat each block as a bunch of
1714 # headers, i.e. a bunch of Message objects.
1715 dsn1 = subpart.get_payload(0)
1716 unless(isinstance(dsn1, Message))
1717 eq(dsn1['original-envelope-id'], '0GK500B4HD0888@cougar.noc.ucla.edu')
1718 eq(dsn1.get_param('dns', header='reporting-mta'), '')
1719 # Try a missing one <wink>
1720 eq(dsn1.get_param('nsd', header='reporting-mta'), None)
1721 dsn2 = subpart.get_payload(1)
1722 unless(isinstance(dsn2, Message))
1723 eq(dsn2['action'], 'failed')
1724 eq(dsn2.get_params(header='original-recipient'),
1725 [('rfc822', ''), ('jangel1@cougar.noc.ucla.edu', '')])
1726 eq(dsn2.get_param('rfc822', header='final-recipient'), '')
1727 # Subpart 3 is the original message
1728 subpart = msg.get_payload(2)
1729 eq(subpart.get_content_type(), 'message/rfc822')
1730 payload = subpart.get_payload()
1731 unless(isinstance(payload, list))
1732 eq(len(payload), 1)
1733 subsubpart = payload[0]
1734 unless(isinstance(subsubpart, Message))
1735 eq(subsubpart.get_content_type(), 'text/plain')
1736 eq(subsubpart['message-id'],
1737 '<002001c144a6$8752e060$56104586@oxy.edu>')
1738
1739 def test_epilogue(self):
1740 eq = self.ndiffAssertEqual
1741 with openfile('msg_21.txt') as fp:
1742 text = fp.read()
1743 msg = Message()
1744 msg['From'] = 'aperson@dom.ain'
1745 msg['To'] = 'bperson@dom.ain'
1746 msg['Subject'] = 'Test'
1747 msg.preamble = 'MIME message'
1748 msg.epilogue = 'End of MIME message\n'
1749 msg1 = MIMEText('One')
1750 msg2 = MIMEText('Two')
1751 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1752 msg.attach(msg1)
1753 msg.attach(msg2)
1754 sfp = StringIO()
1755 g = Generator(sfp)
1756 g.flatten(msg)
1757 eq(sfp.getvalue(), text)
1758
1759 def test_no_nl_preamble(self):
1760 eq = self.ndiffAssertEqual
1761 msg = Message()
1762 msg['From'] = 'aperson@dom.ain'
1763 msg['To'] = 'bperson@dom.ain'
1764 msg['Subject'] = 'Test'
1765 msg.preamble = 'MIME message'
1766 msg.epilogue = ''
1767 msg1 = MIMEText('One')
1768 msg2 = MIMEText('Two')
1769 msg.add_header('Content-Type', 'multipart/mixed', boundary='BOUNDARY')
1770 msg.attach(msg1)
1771 msg.attach(msg2)
1772 eq(msg.as_string(), """\
1773From: aperson@dom.ain
1774To: bperson@dom.ain
1775Subject: Test
1776Content-Type: multipart/mixed; boundary="BOUNDARY"
1777
1778MIME message
1779--BOUNDARY
1780Content-Type: text/plain; charset="us-ascii"
1781MIME-Version: 1.0
1782Content-Transfer-Encoding: 7bit
1783
1784One
1785--BOUNDARY
1786Content-Type: text/plain; charset="us-ascii"
1787MIME-Version: 1.0
1788Content-Transfer-Encoding: 7bit
1789
1790Two
1791--BOUNDARY--
1792""")
1793
1794 def test_default_type(self):
1795 eq = self.assertEqual
1796 with openfile('msg_30.txt') as fp:
1797 msg = email.message_from_file(fp)
1798 container1 = msg.get_payload(0)
1799 eq(container1.get_default_type(), 'message/rfc822')
1800 eq(container1.get_content_type(), 'message/rfc822')
1801 container2 = msg.get_payload(1)
1802 eq(container2.get_default_type(), 'message/rfc822')
1803 eq(container2.get_content_type(), 'message/rfc822')
1804 container1a = container1.get_payload(0)
1805 eq(container1a.get_default_type(), 'text/plain')
1806 eq(container1a.get_content_type(), 'text/plain')
1807 container2a = container2.get_payload(0)
1808 eq(container2a.get_default_type(), 'text/plain')
1809 eq(container2a.get_content_type(), 'text/plain')
1810
1811 def test_default_type_with_explicit_container_type(self):
1812 eq = self.assertEqual
1813 with openfile('msg_28.txt') as fp:
1814 msg = email.message_from_file(fp)
1815 container1 = msg.get_payload(0)
1816 eq(container1.get_default_type(), 'message/rfc822')
1817 eq(container1.get_content_type(), 'message/rfc822')
1818 container2 = msg.get_payload(1)
1819 eq(container2.get_default_type(), 'message/rfc822')
1820 eq(container2.get_content_type(), 'message/rfc822')
1821 container1a = container1.get_payload(0)
1822 eq(container1a.get_default_type(), 'text/plain')
1823 eq(container1a.get_content_type(), 'text/plain')
1824 container2a = container2.get_payload(0)
1825 eq(container2a.get_default_type(), 'text/plain')
1826 eq(container2a.get_content_type(), 'text/plain')
1827
1828 def test_default_type_non_parsed(self):
1829 eq = self.assertEqual
1830 neq = self.ndiffAssertEqual
1831 # Set up container
1832 container = MIMEMultipart('digest', 'BOUNDARY')
1833 container.epilogue = ''
1834 # Set up subparts
1835 subpart1a = MIMEText('message 1\n')
1836 subpart2a = MIMEText('message 2\n')
1837 subpart1 = MIMEMessage(subpart1a)
1838 subpart2 = MIMEMessage(subpart2a)
1839 container.attach(subpart1)
1840 container.attach(subpart2)
1841 eq(subpart1.get_content_type(), 'message/rfc822')
1842 eq(subpart1.get_default_type(), 'message/rfc822')
1843 eq(subpart2.get_content_type(), 'message/rfc822')
1844 eq(subpart2.get_default_type(), 'message/rfc822')
1845 neq(container.as_string(0), '''\
1846Content-Type: multipart/digest; boundary="BOUNDARY"
1847MIME-Version: 1.0
1848
1849--BOUNDARY
1850Content-Type: message/rfc822
1851MIME-Version: 1.0
1852
1853Content-Type: text/plain; charset="us-ascii"
1854MIME-Version: 1.0
1855Content-Transfer-Encoding: 7bit
1856
1857message 1
1858
1859--BOUNDARY
1860Content-Type: message/rfc822
1861MIME-Version: 1.0
1862
1863Content-Type: text/plain; charset="us-ascii"
1864MIME-Version: 1.0
1865Content-Transfer-Encoding: 7bit
1866
1867message 2
1868
1869--BOUNDARY--
1870''')
1871 del subpart1['content-type']
1872 del subpart1['mime-version']
1873 del subpart2['content-type']
1874 del subpart2['mime-version']
1875 eq(subpart1.get_content_type(), 'message/rfc822')
1876 eq(subpart1.get_default_type(), 'message/rfc822')
1877 eq(subpart2.get_content_type(), 'message/rfc822')
1878 eq(subpart2.get_default_type(), 'message/rfc822')
1879 neq(container.as_string(0), '''\
1880Content-Type: multipart/digest; boundary="BOUNDARY"
1881MIME-Version: 1.0
1882
1883--BOUNDARY
1884
1885Content-Type: text/plain; charset="us-ascii"
1886MIME-Version: 1.0
1887Content-Transfer-Encoding: 7bit
1888
1889message 1
1890
1891--BOUNDARY
1892
1893Content-Type: text/plain; charset="us-ascii"
1894MIME-Version: 1.0
1895Content-Transfer-Encoding: 7bit
1896
1897message 2
1898
1899--BOUNDARY--
1900''')
1901
1902 def test_mime_attachments_in_constructor(self):
1903 eq = self.assertEqual
1904 text1 = MIMEText('')
1905 text2 = MIMEText('')
1906 msg = MIMEMultipart(_subparts=(text1, text2))
1907 eq(len(msg.get_payload()), 2)
1908 eq(msg.get_payload(0), text1)
1909 eq(msg.get_payload(1), text2)
1910
Christian Heimes587c2bf2008-01-19 16:21:02 +00001911 def test_default_multipart_constructor(self):
1912 msg = MIMEMultipart()
1913 self.assertTrue(msg.is_multipart())
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001914
1915
1916# A general test of parser->model->generator idempotency. IOW, read a message
1917# in, parse it into a message object tree, then without touching the tree,
1918# regenerate the plain text. The original text and the transformed text
1919# should be identical. Note: that we ignore the Unix-From since that may
1920# contain a changed date.
1921class TestIdempotent(TestEmailBase):
1922 def _msgobj(self, filename):
1923 with openfile(filename) as fp:
1924 data = fp.read()
1925 msg = email.message_from_string(data)
1926 return msg, data
1927
1928 def _idempotent(self, msg, text):
1929 eq = self.ndiffAssertEqual
1930 s = StringIO()
1931 g = Generator(s, maxheaderlen=0)
1932 g.flatten(msg)
1933 eq(text, s.getvalue())
1934
1935 def test_parse_text_message(self):
1936 eq = self.assertEquals
1937 msg, text = self._msgobj('msg_01.txt')
1938 eq(msg.get_content_type(), 'text/plain')
1939 eq(msg.get_content_maintype(), 'text')
1940 eq(msg.get_content_subtype(), 'plain')
1941 eq(msg.get_params()[1], ('charset', 'us-ascii'))
1942 eq(msg.get_param('charset'), 'us-ascii')
1943 eq(msg.preamble, None)
1944 eq(msg.epilogue, None)
1945 self._idempotent(msg, text)
1946
1947 def test_parse_untyped_message(self):
1948 eq = self.assertEquals
1949 msg, text = self._msgobj('msg_03.txt')
1950 eq(msg.get_content_type(), 'text/plain')
1951 eq(msg.get_params(), None)
1952 eq(msg.get_param('charset'), None)
1953 self._idempotent(msg, text)
1954
1955 def test_simple_multipart(self):
1956 msg, text = self._msgobj('msg_04.txt')
1957 self._idempotent(msg, text)
1958
1959 def test_MIME_digest(self):
1960 msg, text = self._msgobj('msg_02.txt')
1961 self._idempotent(msg, text)
1962
1963 def test_long_header(self):
1964 msg, text = self._msgobj('msg_27.txt')
1965 self._idempotent(msg, text)
1966
1967 def test_MIME_digest_with_part_headers(self):
1968 msg, text = self._msgobj('msg_28.txt')
1969 self._idempotent(msg, text)
1970
1971 def test_mixed_with_image(self):
1972 msg, text = self._msgobj('msg_06.txt')
1973 self._idempotent(msg, text)
1974
1975 def test_multipart_report(self):
1976 msg, text = self._msgobj('msg_05.txt')
1977 self._idempotent(msg, text)
1978
1979 def test_dsn(self):
1980 msg, text = self._msgobj('msg_16.txt')
1981 self._idempotent(msg, text)
1982
1983 def test_preamble_epilogue(self):
1984 msg, text = self._msgobj('msg_21.txt')
1985 self._idempotent(msg, text)
1986
1987 def test_multipart_one_part(self):
1988 msg, text = self._msgobj('msg_23.txt')
1989 self._idempotent(msg, text)
1990
1991 def test_multipart_no_parts(self):
1992 msg, text = self._msgobj('msg_24.txt')
1993 self._idempotent(msg, text)
1994
1995 def test_no_start_boundary(self):
1996 msg, text = self._msgobj('msg_31.txt')
1997 self._idempotent(msg, text)
1998
1999 def test_rfc2231_charset(self):
2000 msg, text = self._msgobj('msg_32.txt')
2001 self._idempotent(msg, text)
2002
2003 def test_more_rfc2231_parameters(self):
2004 msg, text = self._msgobj('msg_33.txt')
2005 self._idempotent(msg, text)
2006
2007 def test_text_plain_in_a_multipart_digest(self):
2008 msg, text = self._msgobj('msg_34.txt')
2009 self._idempotent(msg, text)
2010
2011 def test_nested_multipart_mixeds(self):
2012 msg, text = self._msgobj('msg_12a.txt')
2013 self._idempotent(msg, text)
2014
2015 def test_message_external_body_idempotent(self):
2016 msg, text = self._msgobj('msg_36.txt')
2017 self._idempotent(msg, text)
2018
2019 def test_content_type(self):
2020 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002021 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002022 # Get a message object and reset the seek pointer for other tests
2023 msg, text = self._msgobj('msg_05.txt')
2024 eq(msg.get_content_type(), 'multipart/report')
2025 # Test the Content-Type: parameters
2026 params = {}
2027 for pk, pv in msg.get_params():
2028 params[pk] = pv
2029 eq(params['report-type'], 'delivery-status')
2030 eq(params['boundary'], 'D1690A7AC1.996856090/mail.example.com')
2031 eq(msg.preamble, 'This is a MIME-encapsulated message.\n')
2032 eq(msg.epilogue, '\n')
2033 eq(len(msg.get_payload()), 3)
2034 # Make sure the subparts are what we expect
2035 msg1 = msg.get_payload(0)
2036 eq(msg1.get_content_type(), 'text/plain')
2037 eq(msg1.get_payload(), 'Yadda yadda yadda\n')
2038 msg2 = msg.get_payload(1)
2039 eq(msg2.get_content_type(), 'text/plain')
2040 eq(msg2.get_payload(), 'Yadda yadda yadda\n')
2041 msg3 = msg.get_payload(2)
2042 eq(msg3.get_content_type(), 'message/rfc822')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002043 self.assertTrue(isinstance(msg3, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002044 payload = msg3.get_payload()
2045 unless(isinstance(payload, list))
2046 eq(len(payload), 1)
2047 msg4 = payload[0]
2048 unless(isinstance(msg4, Message))
2049 eq(msg4.get_payload(), 'Yadda yadda yadda\n')
2050
2051 def test_parser(self):
2052 eq = self.assertEquals
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002053 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002054 msg, text = self._msgobj('msg_06.txt')
2055 # Check some of the outer headers
2056 eq(msg.get_content_type(), 'message/rfc822')
2057 # Make sure the payload is a list of exactly one sub-Message, and that
2058 # that submessage has a type of text/plain
2059 payload = msg.get_payload()
2060 unless(isinstance(payload, list))
2061 eq(len(payload), 1)
2062 msg1 = payload[0]
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002063 self.assertTrue(isinstance(msg1, Message))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002064 eq(msg1.get_content_type(), 'text/plain')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002065 self.assertTrue(isinstance(msg1.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002066 eq(msg1.get_payload(), '\n')
2067
2068
2069
2070# Test various other bits of the package's functionality
2071class TestMiscellaneous(TestEmailBase):
2072 def test_message_from_string(self):
2073 with openfile('msg_01.txt') as fp:
2074 text = fp.read()
2075 msg = email.message_from_string(text)
2076 s = StringIO()
2077 # Don't wrap/continue long headers since we're trying to test
2078 # idempotency.
2079 g = Generator(s, maxheaderlen=0)
2080 g.flatten(msg)
2081 self.assertEqual(text, s.getvalue())
2082
2083 def test_message_from_file(self):
2084 with openfile('msg_01.txt') as fp:
2085 text = fp.read()
2086 fp.seek(0)
2087 msg = email.message_from_file(fp)
2088 s = StringIO()
2089 # Don't wrap/continue long headers since we're trying to test
2090 # idempotency.
2091 g = Generator(s, maxheaderlen=0)
2092 g.flatten(msg)
2093 self.assertEqual(text, s.getvalue())
2094
2095 def test_message_from_string_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002096 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002097 with openfile('msg_01.txt') as fp:
2098 text = fp.read()
2099
2100 # Create a subclass
2101 class MyMessage(Message):
2102 pass
2103
2104 msg = email.message_from_string(text, MyMessage)
2105 unless(isinstance(msg, MyMessage))
2106 # Try something more complicated
2107 with openfile('msg_02.txt') as fp:
2108 text = fp.read()
2109 msg = email.message_from_string(text, MyMessage)
2110 for subpart in msg.walk():
2111 unless(isinstance(subpart, MyMessage))
2112
2113 def test_message_from_file_with_class(self):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002114 unless = self.assertTrue
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002115 # Create a subclass
2116 class MyMessage(Message):
2117 pass
2118
2119 with openfile('msg_01.txt') as fp:
2120 msg = email.message_from_file(fp, MyMessage)
2121 unless(isinstance(msg, MyMessage))
2122 # Try something more complicated
2123 with openfile('msg_02.txt') as fp:
2124 msg = email.message_from_file(fp, MyMessage)
2125 for subpart in msg.walk():
2126 unless(isinstance(subpart, MyMessage))
2127
2128 def test__all__(self):
2129 module = __import__('email')
2130 # Can't use sorted() here due to Python 2.3 compatibility
2131 all = module.__all__[:]
2132 all.sort()
2133 self.assertEqual(all, [
2134 'base64mime', 'charset', 'encoders', 'errors', 'generator',
2135 'header', 'iterators', 'message', 'message_from_file',
2136 'message_from_string', 'mime', 'parser',
2137 'quoprimime', 'utils',
2138 ])
2139
2140 def test_formatdate(self):
2141 now = time.time()
2142 self.assertEqual(utils.parsedate(utils.formatdate(now))[:6],
2143 time.gmtime(now)[:6])
2144
2145 def test_formatdate_localtime(self):
2146 now = time.time()
2147 self.assertEqual(
2148 utils.parsedate(utils.formatdate(now, localtime=True))[:6],
2149 time.localtime(now)[:6])
2150
2151 def test_formatdate_usegmt(self):
2152 now = time.time()
2153 self.assertEqual(
2154 utils.formatdate(now, localtime=False),
2155 time.strftime('%a, %d %b %Y %H:%M:%S -0000', time.gmtime(now)))
2156 self.assertEqual(
2157 utils.formatdate(now, localtime=False, usegmt=True),
2158 time.strftime('%a, %d %b %Y %H:%M:%S GMT', time.gmtime(now)))
2159
2160 def test_parsedate_none(self):
2161 self.assertEqual(utils.parsedate(''), None)
2162
2163 def test_parsedate_compact(self):
2164 # The FWS after the comma is optional
2165 self.assertEqual(utils.parsedate('Wed,3 Apr 2002 14:58:26 +0800'),
2166 utils.parsedate('Wed, 3 Apr 2002 14:58:26 +0800'))
2167
2168 def test_parsedate_no_dayofweek(self):
2169 eq = self.assertEqual
2170 eq(utils.parsedate_tz('25 Feb 2003 13:47:26 -0800'),
2171 (2003, 2, 25, 13, 47, 26, 0, 1, -1, -28800))
2172
2173 def test_parsedate_compact_no_dayofweek(self):
2174 eq = self.assertEqual
2175 eq(utils.parsedate_tz('5 Feb 2003 13:47:26 -0800'),
2176 (2003, 2, 5, 13, 47, 26, 0, 1, -1, -28800))
2177
2178 def test_parsedate_acceptable_to_time_functions(self):
2179 eq = self.assertEqual
2180 timetup = utils.parsedate('5 Feb 2003 13:47:26 -0800')
2181 t = int(time.mktime(timetup))
2182 eq(time.localtime(t)[:6], timetup[:6])
2183 eq(int(time.strftime('%Y', timetup)), 2003)
2184 timetup = utils.parsedate_tz('5 Feb 2003 13:47:26 -0800')
2185 t = int(time.mktime(timetup[:9]))
2186 eq(time.localtime(t)[:6], timetup[:6])
2187 eq(int(time.strftime('%Y', timetup[:9])), 2003)
2188
2189 def test_parseaddr_empty(self):
2190 self.assertEqual(utils.parseaddr('<>'), ('', ''))
2191 self.assertEqual(utils.formataddr(utils.parseaddr('<>')), '')
2192
2193 def test_noquote_dump(self):
2194 self.assertEqual(
2195 utils.formataddr(('A Silly Person', 'person@dom.ain')),
2196 'A Silly Person <person@dom.ain>')
2197
2198 def test_escape_dump(self):
2199 self.assertEqual(
2200 utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
2201 r'"A \(Very\) Silly Person" <person@dom.ain>')
2202 a = r'A \(Special\) Person'
2203 b = 'person@dom.ain'
2204 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2205
2206 def test_escape_backslashes(self):
2207 self.assertEqual(
2208 utils.formataddr(('Arthur \Backslash\ Foobar', 'person@dom.ain')),
2209 r'"Arthur \\Backslash\\ Foobar" <person@dom.ain>')
2210 a = r'Arthur \Backslash\ Foobar'
2211 b = 'person@dom.ain'
2212 self.assertEqual(utils.parseaddr(utils.formataddr((a, b))), (a, b))
2213
2214 def test_name_with_dot(self):
2215 x = 'John X. Doe <jxd@example.com>'
2216 y = '"John X. Doe" <jxd@example.com>'
2217 a, b = ('John X. Doe', 'jxd@example.com')
2218 self.assertEqual(utils.parseaddr(x), (a, b))
2219 self.assertEqual(utils.parseaddr(y), (a, b))
2220 # formataddr() quotes the name if there's a dot in it
2221 self.assertEqual(utils.formataddr((a, b)), y)
2222
2223 def test_multiline_from_comment(self):
2224 x = """\
2225Foo
2226\tBar <foo@example.com>"""
2227 self.assertEqual(utils.parseaddr(x), ('Foo Bar', 'foo@example.com'))
2228
2229 def test_quote_dump(self):
2230 self.assertEqual(
2231 utils.formataddr(('A Silly; Person', 'person@dom.ain')),
2232 r'"A Silly; Person" <person@dom.ain>')
2233
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002234 def test_charset_richcomparisons(self):
2235 eq = self.assertEqual
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002236 ne = self.assertNotEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002237 cset1 = Charset()
2238 cset2 = Charset()
2239 eq(cset1, 'us-ascii')
2240 eq(cset1, 'US-ASCII')
2241 eq(cset1, 'Us-AsCiI')
2242 eq('us-ascii', cset1)
2243 eq('US-ASCII', cset1)
2244 eq('Us-AsCiI', cset1)
2245 ne(cset1, 'usascii')
2246 ne(cset1, 'USASCII')
2247 ne(cset1, 'UsAsCiI')
2248 ne('usascii', cset1)
2249 ne('USASCII', cset1)
2250 ne('UsAsCiI', cset1)
2251 eq(cset1, cset2)
2252 eq(cset2, cset1)
2253
2254 def test_getaddresses(self):
2255 eq = self.assertEqual
2256 eq(utils.getaddresses(['aperson@dom.ain (Al Person)',
2257 'Bud Person <bperson@dom.ain>']),
2258 [('Al Person', 'aperson@dom.ain'),
2259 ('Bud Person', 'bperson@dom.ain')])
2260
2261 def test_getaddresses_nasty(self):
2262 eq = self.assertEqual
2263 eq(utils.getaddresses(['foo: ;']), [('', '')])
2264 eq(utils.getaddresses(
2265 ['[]*-- =~$']),
2266 [('', ''), ('', ''), ('', '*--')])
2267 eq(utils.getaddresses(
2268 ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2269 [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2270
2271 def test_getaddresses_embedded_comment(self):
2272 """Test proper handling of a nested comment"""
2273 eq = self.assertEqual
2274 addrs = utils.getaddresses(['User ((nested comment)) <foo@bar.com>'])
2275 eq(addrs[0][1], 'foo@bar.com')
2276
2277 def test_utils_quote_unquote(self):
2278 eq = self.assertEqual
2279 msg = Message()
2280 msg.add_header('content-disposition', 'attachment',
2281 filename='foo\\wacky"name')
2282 eq(msg.get_filename(), 'foo\\wacky"name')
2283
2284 def test_get_body_encoding_with_bogus_charset(self):
2285 charset = Charset('not a charset')
2286 self.assertEqual(charset.get_body_encoding(), 'base64')
2287
2288 def test_get_body_encoding_with_uppercase_charset(self):
2289 eq = self.assertEqual
2290 msg = Message()
2291 msg['Content-Type'] = 'text/plain; charset=UTF-8'
2292 eq(msg['content-type'], 'text/plain; charset=UTF-8')
2293 charsets = msg.get_charsets()
2294 eq(len(charsets), 1)
2295 eq(charsets[0], 'utf-8')
2296 charset = Charset(charsets[0])
2297 eq(charset.get_body_encoding(), 'base64')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002298 msg.set_payload(b'hello world', charset=charset)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002299 eq(msg.get_payload(), 'aGVsbG8gd29ybGQ=\n')
2300 eq(msg.get_payload(decode=True), b'hello world')
2301 eq(msg['content-transfer-encoding'], 'base64')
2302 # Try another one
2303 msg = Message()
2304 msg['Content-Type'] = 'text/plain; charset="US-ASCII"'
2305 charsets = msg.get_charsets()
2306 eq(len(charsets), 1)
2307 eq(charsets[0], 'us-ascii')
2308 charset = Charset(charsets[0])
2309 eq(charset.get_body_encoding(), encoders.encode_7or8bit)
2310 msg.set_payload('hello world', charset=charset)
2311 eq(msg.get_payload(), 'hello world')
2312 eq(msg['content-transfer-encoding'], '7bit')
2313
2314 def test_charsets_case_insensitive(self):
2315 lc = Charset('us-ascii')
2316 uc = Charset('US-ASCII')
2317 self.assertEqual(lc.get_body_encoding(), uc.get_body_encoding())
2318
2319 def test_partial_falls_inside_message_delivery_status(self):
2320 eq = self.ndiffAssertEqual
2321 # The Parser interface provides chunks of data to FeedParser in 8192
2322 # byte gulps. SF bug #1076485 found one of those chunks inside
2323 # message/delivery-status header block, which triggered an
2324 # unreadline() of NeedMoreData.
2325 msg = self._msgobj('msg_43.txt')
2326 sfp = StringIO()
2327 iterators._structure(msg, sfp)
2328 eq(sfp.getvalue(), """\
2329multipart/report
2330 text/plain
2331 message/delivery-status
2332 text/plain
2333 text/plain
2334 text/plain
2335 text/plain
2336 text/plain
2337 text/plain
2338 text/plain
2339 text/plain
2340 text/plain
2341 text/plain
2342 text/plain
2343 text/plain
2344 text/plain
2345 text/plain
2346 text/plain
2347 text/plain
2348 text/plain
2349 text/plain
2350 text/plain
2351 text/plain
2352 text/plain
2353 text/plain
2354 text/plain
2355 text/plain
2356 text/plain
2357 text/plain
2358 text/rfc822-headers
2359""")
2360
2361
2362
2363# Test the iterator/generators
2364class TestIterators(TestEmailBase):
2365 def test_body_line_iterator(self):
2366 eq = self.assertEqual
2367 neq = self.ndiffAssertEqual
2368 # First a simple non-multipart message
2369 msg = self._msgobj('msg_01.txt')
2370 it = iterators.body_line_iterator(msg)
2371 lines = list(it)
2372 eq(len(lines), 6)
2373 neq(EMPTYSTRING.join(lines), msg.get_payload())
2374 # Now a more complicated multipart
2375 msg = self._msgobj('msg_02.txt')
2376 it = iterators.body_line_iterator(msg)
2377 lines = list(it)
2378 eq(len(lines), 43)
2379 with openfile('msg_19.txt') as fp:
2380 neq(EMPTYSTRING.join(lines), fp.read())
2381
2382 def test_typed_subpart_iterator(self):
2383 eq = self.assertEqual
2384 msg = self._msgobj('msg_04.txt')
2385 it = iterators.typed_subpart_iterator(msg, 'text')
2386 lines = []
2387 subparts = 0
2388 for subpart in it:
2389 subparts += 1
2390 lines.append(subpart.get_payload())
2391 eq(subparts, 2)
2392 eq(EMPTYSTRING.join(lines), """\
2393a simple kind of mirror
2394to reflect upon our own
2395a simple kind of mirror
2396to reflect upon our own
2397""")
2398
2399 def test_typed_subpart_iterator_default_type(self):
2400 eq = self.assertEqual
2401 msg = self._msgobj('msg_03.txt')
2402 it = iterators.typed_subpart_iterator(msg, 'text', 'plain')
2403 lines = []
2404 subparts = 0
2405 for subpart in it:
2406 subparts += 1
2407 lines.append(subpart.get_payload())
2408 eq(subparts, 1)
2409 eq(EMPTYSTRING.join(lines), """\
2410
2411Hi,
2412
2413Do you like this message?
2414
2415-Me
2416""")
2417
2418
2419
2420class TestParsers(TestEmailBase):
2421 def test_header_parser(self):
2422 eq = self.assertEqual
2423 # Parse only the headers of a complex multipart MIME document
2424 with openfile('msg_02.txt') as fp:
2425 msg = HeaderParser().parse(fp)
2426 eq(msg['from'], 'ppp-request@zzz.org')
2427 eq(msg['to'], 'ppp@zzz.org')
2428 eq(msg.get_content_type(), 'multipart/mixed')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002429 self.assertFalse(msg.is_multipart())
2430 self.assertTrue(isinstance(msg.get_payload(), str))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002431
2432 def test_whitespace_continuation(self):
2433 eq = self.assertEqual
2434 # This message contains a line after the Subject: header that has only
2435 # whitespace, but it is not empty!
2436 msg = email.message_from_string("""\
2437From: aperson@dom.ain
2438To: bperson@dom.ain
2439Subject: the next line has a space on it
2440\x20
2441Date: Mon, 8 Apr 2002 15:09:19 -0400
2442Message-ID: spam
2443
2444Here's the message body
2445""")
2446 eq(msg['subject'], 'the next line has a space on it\n ')
2447 eq(msg['message-id'], 'spam')
2448 eq(msg.get_payload(), "Here's the message body\n")
2449
2450 def test_whitespace_continuation_last_header(self):
2451 eq = self.assertEqual
2452 # Like the previous test, but the subject line is the last
2453 # header.
2454 msg = email.message_from_string("""\
2455From: aperson@dom.ain
2456To: bperson@dom.ain
2457Date: Mon, 8 Apr 2002 15:09:19 -0400
2458Message-ID: spam
2459Subject: the next line has a space on it
2460\x20
2461
2462Here's the message body
2463""")
2464 eq(msg['subject'], 'the next line has a space on it\n ')
2465 eq(msg['message-id'], 'spam')
2466 eq(msg.get_payload(), "Here's the message body\n")
2467
2468 def test_crlf_separation(self):
2469 eq = self.assertEqual
Guido van Rossum98297ee2007-11-06 21:34:58 +00002470 with openfile('msg_26.txt', newline='\n') as fp:
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002471 msg = Parser().parse(fp)
2472 eq(len(msg.get_payload()), 2)
2473 part1 = msg.get_payload(0)
2474 eq(part1.get_content_type(), 'text/plain')
2475 eq(part1.get_payload(), 'Simple email with attachment.\r\n\r\n')
2476 part2 = msg.get_payload(1)
2477 eq(part2.get_content_type(), 'application/riscos')
2478
2479 def test_multipart_digest_with_extra_mime_headers(self):
2480 eq = self.assertEqual
2481 neq = self.ndiffAssertEqual
2482 with openfile('msg_28.txt') as fp:
2483 msg = email.message_from_file(fp)
2484 # Structure is:
2485 # multipart/digest
2486 # message/rfc822
2487 # text/plain
2488 # message/rfc822
2489 # text/plain
2490 eq(msg.is_multipart(), 1)
2491 eq(len(msg.get_payload()), 2)
2492 part1 = msg.get_payload(0)
2493 eq(part1.get_content_type(), 'message/rfc822')
2494 eq(part1.is_multipart(), 1)
2495 eq(len(part1.get_payload()), 1)
2496 part1a = part1.get_payload(0)
2497 eq(part1a.is_multipart(), 0)
2498 eq(part1a.get_content_type(), 'text/plain')
2499 neq(part1a.get_payload(), 'message 1\n')
2500 # next message/rfc822
2501 part2 = msg.get_payload(1)
2502 eq(part2.get_content_type(), 'message/rfc822')
2503 eq(part2.is_multipart(), 1)
2504 eq(len(part2.get_payload()), 1)
2505 part2a = part2.get_payload(0)
2506 eq(part2a.is_multipart(), 0)
2507 eq(part2a.get_content_type(), 'text/plain')
2508 neq(part2a.get_payload(), 'message 2\n')
2509
2510 def test_three_lines(self):
2511 # A bug report by Andrew McNamara
2512 lines = ['From: Andrew Person <aperson@dom.ain',
2513 'Subject: Test',
2514 'Date: Tue, 20 Aug 2002 16:43:45 +1000']
2515 msg = email.message_from_string(NL.join(lines))
2516 self.assertEqual(msg['date'], 'Tue, 20 Aug 2002 16:43:45 +1000')
2517
2518 def test_strip_line_feed_and_carriage_return_in_headers(self):
2519 eq = self.assertEqual
2520 # For [ 1002475 ] email message parser doesn't handle \r\n correctly
2521 value1 = 'text'
2522 value2 = 'more text'
2523 m = 'Header: %s\r\nNext-Header: %s\r\n\r\nBody\r\n\r\n' % (
2524 value1, value2)
2525 msg = email.message_from_string(m)
2526 eq(msg.get('Header'), value1)
2527 eq(msg.get('Next-Header'), value2)
2528
2529 def test_rfc2822_header_syntax(self):
2530 eq = self.assertEqual
2531 m = '>From: foo\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2532 msg = email.message_from_string(m)
2533 eq(len(msg), 3)
2534 eq(sorted(field for field in msg), ['!"#QUX;~', '>From', 'From'])
2535 eq(msg.get_payload(), 'body')
2536
2537 def test_rfc2822_space_not_allowed_in_header(self):
2538 eq = self.assertEqual
2539 m = '>From foo@example.com 11:25:53\nFrom: bar\n!"#QUX;~: zoo\n\nbody'
2540 msg = email.message_from_string(m)
2541 eq(len(msg.keys()), 0)
2542
2543 def test_rfc2822_one_character_header(self):
2544 eq = self.assertEqual
2545 m = 'A: first header\nB: second header\nCC: third header\n\nbody'
2546 msg = email.message_from_string(m)
2547 headers = msg.keys()
2548 headers.sort()
2549 eq(headers, ['A', 'B', 'CC'])
2550 eq(msg.get_payload(), 'body')
2551
2552
2553
2554class TestBase64(unittest.TestCase):
2555 def test_len(self):
2556 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002557 eq(base64mime.header_length('hello'),
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002558 len(base64mime.body_encode(b'hello', eol='')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002559 for size in range(15):
2560 if size == 0 : bsize = 0
2561 elif size <= 3 : bsize = 4
2562 elif size <= 6 : bsize = 8
2563 elif size <= 9 : bsize = 12
2564 elif size <= 12: bsize = 16
2565 else : bsize = 20
Guido van Rossum9604e662007-08-30 03:46:43 +00002566 eq(base64mime.header_length('x' * size), bsize)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002567
2568 def test_decode(self):
2569 eq = self.assertEqual
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +00002570 eq(base64mime.decode(''), b'')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002571 eq(base64mime.decode('aGVsbG8='), b'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002572
2573 def test_encode(self):
2574 eq = self.assertEqual
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002575 eq(base64mime.body_encode(b''), b'')
2576 eq(base64mime.body_encode(b'hello'), 'aGVsbG8=\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002577 # Test the binary flag
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002578 eq(base64mime.body_encode(b'hello\n'), 'aGVsbG8K\n')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002579 # Test the maxlinelen arg
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002580 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002581eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2582eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2583eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
2584eHh4eCB4eHh4IA==
2585""")
2586 # Test the eol argument
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002587 eq(base64mime.body_encode(b'xxxx ' * 20, maxlinelen=40, eol='\r\n'),
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002588 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002589eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2590eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2591eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
2592eHh4eCB4eHh4IA==\r
2593""")
2594
2595 def test_header_encode(self):
2596 eq = self.assertEqual
2597 he = base64mime.header_encode
2598 eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
Guido van Rossum9604e662007-08-30 03:46:43 +00002599 eq(he('hello\r\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
2600 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002601 # Test the charset option
2602 eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
2603 eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002604
2605
2606
2607class TestQuopri(unittest.TestCase):
2608 def setUp(self):
2609 # Set of characters (as byte integers) that don't need to be encoded
2610 # in headers.
2611 self.hlit = list(chain(
2612 range(ord('a'), ord('z') + 1),
2613 range(ord('A'), ord('Z') + 1),
2614 range(ord('0'), ord('9') + 1),
Guido van Rossum9604e662007-08-30 03:46:43 +00002615 (c for c in b'!*+-/')))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002616 # Set of characters (as byte integers) that do need to be encoded in
2617 # headers.
2618 self.hnon = [c for c in range(256) if c not in self.hlit]
2619 assert len(self.hlit) + len(self.hnon) == 256
2620 # Set of characters (as byte integers) that don't need to be encoded
2621 # in bodies.
2622 self.blit = list(range(ord(' '), ord('~') + 1))
2623 self.blit.append(ord('\t'))
2624 self.blit.remove(ord('='))
2625 # Set of characters (as byte integers) that do need to be encoded in
2626 # bodies.
2627 self.bnon = [c for c in range(256) if c not in self.blit]
2628 assert len(self.blit) + len(self.bnon) == 256
2629
Guido van Rossum9604e662007-08-30 03:46:43 +00002630 def test_quopri_header_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002631 for c in self.hlit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002632 self.assertFalse(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002633 'Should not be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002634 for c in self.hnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002635 self.assertTrue(quoprimime.header_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002636 'Should be header quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002637
Guido van Rossum9604e662007-08-30 03:46:43 +00002638 def test_quopri_body_check(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002639 for c in self.blit:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002640 self.assertFalse(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002641 'Should not be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002642 for c in self.bnon:
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002643 self.assertTrue(quoprimime.body_check(c),
Guido van Rossum9604e662007-08-30 03:46:43 +00002644 'Should be body quopri encoded: %s' % chr(c))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002645
2646 def test_header_quopri_len(self):
2647 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002648 eq(quoprimime.header_length(b'hello'), 5)
2649 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002650 eq(len(quoprimime.header_encode(b'hello', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002651 quoprimime.header_length(b'hello') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002652 # =?xxx?q?...?= means 10 extra characters
2653 10)
Guido van Rossum9604e662007-08-30 03:46:43 +00002654 eq(quoprimime.header_length(b'h@e@l@l@o@'), 20)
2655 # RFC 2047 chrome is not included in header_length().
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002656 eq(len(quoprimime.header_encode(b'h@e@l@l@o@', charset='xxx')),
Guido van Rossum9604e662007-08-30 03:46:43 +00002657 quoprimime.header_length(b'h@e@l@l@o@') +
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002658 # =?xxx?q?...?= means 10 extra characters
2659 10)
2660 for c in self.hlit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002661 eq(quoprimime.header_length(bytes([c])), 1,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002662 'expected length 1 for %r' % chr(c))
2663 for c in self.hnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002664 # Space is special; it's encoded to _
2665 if c == ord(' '):
2666 continue
2667 eq(quoprimime.header_length(bytes([c])), 3,
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002668 'expected length 3 for %r' % chr(c))
Guido van Rossum9604e662007-08-30 03:46:43 +00002669 eq(quoprimime.header_length(b' '), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002670
2671 def test_body_quopri_len(self):
2672 eq = self.assertEqual
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002673 for c in self.blit:
Guido van Rossum9604e662007-08-30 03:46:43 +00002674 eq(quoprimime.body_length(bytes([c])), 1)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002675 for c in self.bnon:
Guido van Rossum9604e662007-08-30 03:46:43 +00002676 eq(quoprimime.body_length(bytes([c])), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002677
2678 def test_quote_unquote_idempotent(self):
2679 for x in range(256):
2680 c = chr(x)
2681 self.assertEqual(quoprimime.unquote(quoprimime.quote(c)), c)
2682
2683 def test_header_encode(self):
2684 eq = self.assertEqual
2685 he = quoprimime.header_encode
2686 eq(he(b'hello'), '=?iso-8859-1?q?hello?=')
2687 eq(he(b'hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
2688 eq(he(b'hello\nworld'), '=?iso-8859-1?q?hello=0Aworld?=')
2689 # Test a non-ASCII character
2690 eq(he(b'hello\xc7there'), '=?iso-8859-1?q?hello=C7there?=')
2691
2692 def test_decode(self):
2693 eq = self.assertEqual
2694 eq(quoprimime.decode(''), '')
2695 eq(quoprimime.decode('hello'), 'hello')
2696 eq(quoprimime.decode('hello', 'X'), 'hello')
2697 eq(quoprimime.decode('hello\nworld', 'X'), 'helloXworld')
2698
2699 def test_encode(self):
2700 eq = self.assertEqual
Guido van Rossum9604e662007-08-30 03:46:43 +00002701 eq(quoprimime.body_encode(''), '')
2702 eq(quoprimime.body_encode('hello'), 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002703 # Test the binary flag
Guido van Rossum9604e662007-08-30 03:46:43 +00002704 eq(quoprimime.body_encode('hello\r\nworld'), 'hello\nworld')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002705 # Test the maxlinelen arg
Guido van Rossum9604e662007-08-30 03:46:43 +00002706 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40), """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002707xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
2708 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
2709x xxxx xxxx xxxx xxxx=20""")
2710 # Test the eol argument
Guido van Rossum9604e662007-08-30 03:46:43 +00002711 eq(quoprimime.body_encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'),
2712 """\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002713xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
2714 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
2715x xxxx xxxx xxxx xxxx=20""")
Guido van Rossum9604e662007-08-30 03:46:43 +00002716 eq(quoprimime.body_encode("""\
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002717one line
2718
2719two line"""), """\
2720one line
2721
2722two line""")
2723
2724
2725
2726# Test the Charset class
2727class TestCharset(unittest.TestCase):
2728 def tearDown(self):
2729 from email import charset as CharsetModule
2730 try:
2731 del CharsetModule.CHARSETS['fake']
2732 except KeyError:
2733 pass
2734
Guido van Rossum9604e662007-08-30 03:46:43 +00002735 def test_codec_encodeable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002736 eq = self.assertEqual
2737 # Make sure us-ascii = no Unicode conversion
2738 c = Charset('us-ascii')
Guido van Rossum9604e662007-08-30 03:46:43 +00002739 eq(c.header_encode('Hello World!'), 'Hello World!')
2740 # Test 8-bit idempotency with us-ascii
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002741 s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
Guido van Rossum9604e662007-08-30 03:46:43 +00002742 self.assertRaises(UnicodeError, c.header_encode, s)
2743 c = Charset('utf-8')
2744 eq(c.header_encode(s), '=?utf-8?b?wqTCosKkwqTCpMKmwqTCqMKkwqo=?=')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002745
2746 def test_body_encode(self):
2747 eq = self.assertEqual
2748 # Try a charset with QP body encoding
2749 c = Charset('iso-8859-1')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002750 eq('hello w=F6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002751 # Try a charset with Base64 body encoding
2752 c = Charset('utf-8')
Martin v. Löwis15b16a32008-12-02 06:00:15 +00002753 eq('aGVsbG8gd29ybGQ=\n', c.body_encode(b'hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002754 # Try a charset with None body encoding
2755 c = Charset('us-ascii')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002756 eq('hello world', c.body_encode('hello world'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002757 # Try the convert argument, where input codec != output codec
2758 c = Charset('euc-jp')
2759 # With apologies to Tokio Kikuchi ;)
Barry Warsawbef9d212007-08-31 10:55:37 +00002760 # XXX FIXME
2761## try:
2762## eq('\x1b$B5FCO;~IW\x1b(B',
2763## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7'))
2764## eq('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7',
2765## c.body_encode('\xb5\xc6\xc3\xcf\xbb\xfe\xc9\xd7', False))
2766## except LookupError:
2767## # We probably don't have the Japanese codecs installed
2768## pass
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002769 # Testing SF bug #625509, which we have to fake, since there are no
2770 # built-in encodings where the header encoding is QP but the body
2771 # encoding is not.
2772 from email import charset as CharsetModule
2773 CharsetModule.add_charset('fake', CharsetModule.QP, None)
2774 c = Charset('fake')
Barry Warsaw7aa02e62007-08-31 03:26:19 +00002775 eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002776
2777 def test_unicode_charset_name(self):
2778 charset = Charset('us-ascii')
2779 self.assertEqual(str(charset), 'us-ascii')
2780 self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
2781
2782
2783
2784# Test multilingual MIME headers.
2785class TestHeader(TestEmailBase):
2786 def test_simple(self):
2787 eq = self.ndiffAssertEqual
2788 h = Header('Hello World!')
2789 eq(h.encode(), 'Hello World!')
2790 h.append(' Goodbye World!')
2791 eq(h.encode(), 'Hello World! Goodbye World!')
2792
2793 def test_simple_surprise(self):
2794 eq = self.ndiffAssertEqual
2795 h = Header('Hello World!')
2796 eq(h.encode(), 'Hello World!')
2797 h.append('Goodbye World!')
2798 eq(h.encode(), 'Hello World! Goodbye World!')
2799
2800 def test_header_needs_no_decoding(self):
2801 h = 'no decoding needed'
2802 self.assertEqual(decode_header(h), [(h, None)])
2803
2804 def test_long(self):
2805 h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
2806 maxlinelen=76)
2807 for l in h.encode(splitchars=' ').split('\n '):
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00002808 self.assertTrue(len(l) <= 76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002809
2810 def test_multilingual(self):
2811 eq = self.ndiffAssertEqual
2812 g = Charset("iso-8859-1")
2813 cz = Charset("iso-8859-2")
2814 utf8 = Charset("utf-8")
2815 g_head = (b'Die Mieter treten hier ein werden mit einem '
2816 b'Foerderband komfortabel den Korridor entlang, '
2817 b'an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, '
2818 b'gegen die rotierenden Klingen bef\xf6rdert. ')
2819 cz_head = (b'Finan\xe8ni metropole se hroutily pod tlakem jejich '
2820 b'd\xf9vtipu.. ')
2821 utf8_head = ('\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
2822 '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
2823 '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
2824 '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
2825 '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das '
2826 'Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder '
2827 'die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066'
2828 '\u3044\u307e\u3059\u3002')
2829 h = Header(g_head, g)
2830 h.append(cz_head, cz)
2831 h.append(utf8_head, utf8)
Guido van Rossum9604e662007-08-30 03:46:43 +00002832 enc = h.encode(maxlinelen=76)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002833 eq(enc, """\
Guido van Rossum9604e662007-08-30 03:46:43 +00002834=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_einem_Foerderband_kom?=
2835 =?iso-8859-1?q?fortabel_den_Korridor_entlang=2C_an_s=FCdl=FCndischen_Wand?=
2836 =?iso-8859-1?q?gem=E4lden_vorbei=2C_gegen_die_rotierenden_Klingen_bef=F6r?=
2837 =?iso-8859-1?q?dert=2E_?= =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutily?=
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002838 =?iso-8859-2?q?_pod_tlakem_jejich_d=F9vtipu=2E=2E_?= =?utf-8?b?5q2j56K6?=
2839 =?utf-8?b?44Gr6KiA44GG44Go57+76Kiz44Gv44GV44KM44Gm44GE44G+44Gb44KT44CC?=
2840 =?utf-8?b?5LiA6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM44CB44GC44Go44Gv44Gn?=
2841 =?utf-8?b?44Gf44KJ44KB44Gn44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGFz?=
Guido van Rossum9604e662007-08-30 03:46:43 +00002842 =?utf-8?b?IE51bnN0dWNrIGdpdCB1bmQgU2xvdGVybWV5ZXI/IEphISBCZWloZXJodW5k?=
2843 =?utf-8?b?IGRhcyBPZGVyIGRpZSBGbGlwcGVyd2FsZHQgZ2Vyc3B1dC7jgI3jgajoqIA=?=
2844 =?utf-8?b?44Gj44Gm44GE44G+44GZ44CC?=""")
2845 decoded = decode_header(enc)
2846 eq(len(decoded), 3)
2847 eq(decoded[0], (g_head, 'iso-8859-1'))
2848 eq(decoded[1], (cz_head, 'iso-8859-2'))
2849 eq(decoded[2], (utf8_head.encode('utf-8'), 'utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002850 ustr = str(h)
Guido van Rossum9604e662007-08-30 03:46:43 +00002851 eq(ustr,
2852 (b'Die Mieter treten hier ein werden mit einem Foerderband '
2853 b'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
2854 b'Wandgem\xc3\xa4lden vorbei, gegen die rotierenden Klingen '
2855 b'bef\xc3\xb6rdert. Finan\xc4\x8dni metropole se hroutily pod '
2856 b'tlakem jejich d\xc5\xafvtipu.. \xe6\xad\xa3\xe7\xa2\xba\xe3\x81'
2857 b'\xab\xe8\xa8\x80\xe3\x81\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3'
2858 b'\xe3\x81\xaf\xe3\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3'
2859 b'\x81\xbe\xe3\x81\x9b\xe3\x82\x93\xe3\x80\x82\xe4\xb8\x80\xe9\x83'
2860 b'\xa8\xe3\x81\xaf\xe3\x83\x89\xe3\x82\xa4\xe3\x83\x84\xe8\xaa\x9e'
2861 b'\xe3\x81\xa7\xe3\x81\x99\xe3\x81\x8c\xe3\x80\x81\xe3\x81\x82\xe3'
2862 b'\x81\xa8\xe3\x81\xaf\xe3\x81\xa7\xe3\x81\x9f\xe3\x82\x89\xe3\x82'
2863 b'\x81\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\xe5\xae\x9f\xe9\x9a\x9b'
2864 b'\xe3\x81\xab\xe3\x81\xaf\xe3\x80\x8cWenn ist das Nunstuck git '
2865 b'und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt '
2866 b'gersput.\xe3\x80\x8d\xe3\x81\xa8\xe8\xa8\x80\xe3\x81\xa3\xe3\x81'
2867 b'\xa6\xe3\x81\x84\xe3\x81\xbe\xe3\x81\x99\xe3\x80\x82'
2868 ).decode('utf-8'))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002869 # Test make_header()
2870 newh = make_header(decode_header(enc))
Guido van Rossum9604e662007-08-30 03:46:43 +00002871 eq(newh, h)
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002872
2873 def test_empty_header_encode(self):
2874 h = Header()
2875 self.assertEqual(h.encode(), '')
Barry Warsaw8b3d6592007-08-30 02:10:49 +00002876
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002877 def test_header_ctor_default_args(self):
2878 eq = self.ndiffAssertEqual
2879 h = Header()
2880 eq(h, '')
2881 h.append('foo', Charset('iso-8859-1'))
Guido van Rossum9604e662007-08-30 03:46:43 +00002882 eq(h, 'foo')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002883
2884 def test_explicit_maxlinelen(self):
2885 eq = self.ndiffAssertEqual
2886 hstr = ('A very long line that must get split to something other '
2887 'than at the 76th character boundary to test the non-default '
2888 'behavior')
2889 h = Header(hstr)
2890 eq(h.encode(), '''\
2891A very long line that must get split to something other than at the 76th
2892 character boundary to test the non-default behavior''')
2893 eq(str(h), hstr)
2894 h = Header(hstr, header_name='Subject')
2895 eq(h.encode(), '''\
2896A very long line that must get split to something other than at the
2897 76th character boundary to test the non-default behavior''')
2898 eq(str(h), hstr)
2899 h = Header(hstr, maxlinelen=1024, header_name='Subject')
2900 eq(h.encode(), hstr)
2901 eq(str(h), hstr)
2902
Guido van Rossum9604e662007-08-30 03:46:43 +00002903 def test_quopri_splittable(self):
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002904 eq = self.ndiffAssertEqual
2905 h = Header(charset='iso-8859-1', maxlinelen=20)
Guido van Rossum9604e662007-08-30 03:46:43 +00002906 x = 'xxxx ' * 20
2907 h.append(x)
2908 s = h.encode()
2909 eq(s, """\
2910=?iso-8859-1?q?xxx?=
2911 =?iso-8859-1?q?x_?=
2912 =?iso-8859-1?q?xx?=
2913 =?iso-8859-1?q?xx?=
2914 =?iso-8859-1?q?_x?=
2915 =?iso-8859-1?q?xx?=
2916 =?iso-8859-1?q?x_?=
2917 =?iso-8859-1?q?xx?=
2918 =?iso-8859-1?q?xx?=
2919 =?iso-8859-1?q?_x?=
2920 =?iso-8859-1?q?xx?=
2921 =?iso-8859-1?q?x_?=
2922 =?iso-8859-1?q?xx?=
2923 =?iso-8859-1?q?xx?=
2924 =?iso-8859-1?q?_x?=
2925 =?iso-8859-1?q?xx?=
2926 =?iso-8859-1?q?x_?=
2927 =?iso-8859-1?q?xx?=
2928 =?iso-8859-1?q?xx?=
2929 =?iso-8859-1?q?_x?=
2930 =?iso-8859-1?q?xx?=
2931 =?iso-8859-1?q?x_?=
2932 =?iso-8859-1?q?xx?=
2933 =?iso-8859-1?q?xx?=
2934 =?iso-8859-1?q?_x?=
2935 =?iso-8859-1?q?xx?=
2936 =?iso-8859-1?q?x_?=
2937 =?iso-8859-1?q?xx?=
2938 =?iso-8859-1?q?xx?=
2939 =?iso-8859-1?q?_x?=
2940 =?iso-8859-1?q?xx?=
2941 =?iso-8859-1?q?x_?=
2942 =?iso-8859-1?q?xx?=
2943 =?iso-8859-1?q?xx?=
2944 =?iso-8859-1?q?_x?=
2945 =?iso-8859-1?q?xx?=
2946 =?iso-8859-1?q?x_?=
2947 =?iso-8859-1?q?xx?=
2948 =?iso-8859-1?q?xx?=
2949 =?iso-8859-1?q?_x?=
2950 =?iso-8859-1?q?xx?=
2951 =?iso-8859-1?q?x_?=
2952 =?iso-8859-1?q?xx?=
2953 =?iso-8859-1?q?xx?=
2954 =?iso-8859-1?q?_x?=
2955 =?iso-8859-1?q?xx?=
2956 =?iso-8859-1?q?x_?=
2957 =?iso-8859-1?q?xx?=
2958 =?iso-8859-1?q?xx?=
2959 =?iso-8859-1?q?_?=""")
2960 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00002961 h = Header(charset='iso-8859-1', maxlinelen=40)
2962 h.append('xxxx ' * 20)
Guido van Rossum9604e662007-08-30 03:46:43 +00002963 s = h.encode()
2964 eq(s, """\
2965=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xxx?=
2966 =?iso-8859-1?q?x_xxxx_xxxx_xxxx_xxxx_?=
2967 =?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
2968 =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
2969 =?iso-8859-1?q?_xxxx_xxxx_?=""")
2970 eq(x, str(make_header(decode_header(s))))
2971
2972 def test_base64_splittable(self):
2973 eq = self.ndiffAssertEqual
2974 h = Header(charset='koi8-r', maxlinelen=20)
2975 x = 'xxxx ' * 20
2976 h.append(x)
2977 s = h.encode()
2978 eq(s, """\
2979=?koi8-r?b?eHh4?=
2980 =?koi8-r?b?eCB4?=
2981 =?koi8-r?b?eHh4?=
2982 =?koi8-r?b?IHh4?=
2983 =?koi8-r?b?eHgg?=
2984 =?koi8-r?b?eHh4?=
2985 =?koi8-r?b?eCB4?=
2986 =?koi8-r?b?eHh4?=
2987 =?koi8-r?b?IHh4?=
2988 =?koi8-r?b?eHgg?=
2989 =?koi8-r?b?eHh4?=
2990 =?koi8-r?b?eCB4?=
2991 =?koi8-r?b?eHh4?=
2992 =?koi8-r?b?IHh4?=
2993 =?koi8-r?b?eHgg?=
2994 =?koi8-r?b?eHh4?=
2995 =?koi8-r?b?eCB4?=
2996 =?koi8-r?b?eHh4?=
2997 =?koi8-r?b?IHh4?=
2998 =?koi8-r?b?eHgg?=
2999 =?koi8-r?b?eHh4?=
3000 =?koi8-r?b?eCB4?=
3001 =?koi8-r?b?eHh4?=
3002 =?koi8-r?b?IHh4?=
3003 =?koi8-r?b?eHgg?=
3004 =?koi8-r?b?eHh4?=
3005 =?koi8-r?b?eCB4?=
3006 =?koi8-r?b?eHh4?=
3007 =?koi8-r?b?IHh4?=
3008 =?koi8-r?b?eHgg?=
3009 =?koi8-r?b?eHh4?=
3010 =?koi8-r?b?eCB4?=
3011 =?koi8-r?b?eHh4?=
3012 =?koi8-r?b?IA==?=""")
3013 eq(x, str(make_header(decode_header(s))))
3014 h = Header(charset='koi8-r', maxlinelen=40)
3015 h.append(x)
3016 s = h.encode()
3017 eq(s, """\
3018=?koi8-r?b?eHh4eCB4eHh4IHh4eHggeHh4?=
3019 =?koi8-r?b?eCB4eHh4IHh4eHggeHh4eCB4?=
3020 =?koi8-r?b?eHh4IHh4eHggeHh4eCB4eHh4?=
3021 =?koi8-r?b?IHh4eHggeHh4eCB4eHh4IHh4?=
3022 =?koi8-r?b?eHggeHh4eCB4eHh4IHh4eHgg?=
3023 =?koi8-r?b?eHh4eCB4eHh4IA==?=""")
3024 eq(x, str(make_header(decode_header(s))))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003025
3026 def test_us_ascii_header(self):
3027 eq = self.assertEqual
3028 s = 'hello'
3029 x = decode_header(s)
3030 eq(x, [('hello', None)])
3031 h = make_header(x)
3032 eq(s, h.encode())
3033
3034 def test_string_charset(self):
3035 eq = self.assertEqual
3036 h = Header()
3037 h.append('hello', 'iso-8859-1')
Guido van Rossum9604e662007-08-30 03:46:43 +00003038 eq(h, 'hello')
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003039
3040## def test_unicode_error(self):
3041## raises = self.assertRaises
3042## raises(UnicodeError, Header, u'[P\xf6stal]', 'us-ascii')
3043## raises(UnicodeError, Header, '[P\xf6stal]', 'us-ascii')
3044## h = Header()
3045## raises(UnicodeError, h.append, u'[P\xf6stal]', 'us-ascii')
3046## raises(UnicodeError, h.append, '[P\xf6stal]', 'us-ascii')
3047## raises(UnicodeError, Header, u'\u83ca\u5730\u6642\u592b', 'iso-8859-1')
3048
3049 def test_utf8_shortest(self):
3050 eq = self.assertEqual
3051 h = Header('p\xf6stal', 'utf-8')
3052 eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
3053 h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
3054 eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
3055
3056 def test_bad_8bit_header(self):
3057 raises = self.assertRaises
3058 eq = self.assertEqual
3059 x = b'Ynwp4dUEbay Auction Semiar- No Charge \x96 Earn Big'
3060 raises(UnicodeError, Header, x)
3061 h = Header()
3062 raises(UnicodeError, h.append, x)
3063 e = x.decode('utf-8', 'replace')
3064 eq(str(Header(x, errors='replace')), e)
3065 h.append(x, errors='replace')
3066 eq(str(h), e)
3067
3068 def test_encoded_adjacent_nonencoded(self):
3069 eq = self.assertEqual
3070 h = Header()
3071 h.append('hello', 'iso-8859-1')
3072 h.append('world')
3073 s = h.encode()
3074 eq(s, '=?iso-8859-1?q?hello?= world')
3075 h = make_header(decode_header(s))
3076 eq(h.encode(), s)
3077
3078 def test_whitespace_eater(self):
3079 eq = self.assertEqual
3080 s = 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztk=?= =?koi8-r?q?=CA?= zz.'
3081 parts = decode_header(s)
3082 eq(parts, [(b'Subject:', None), (b'\xf0\xd2\xcf\xd7\xc5\xd2\xcb\xc1 \xce\xc1 \xc6\xc9\xce\xc1\xcc\xd8\xce\xd9\xca', 'koi8-r'), (b'zz.', None)])
3083 hdr = make_header(parts)
3084 eq(hdr.encode(),
3085 'Subject: =?koi8-r?b?8NLP18XSy8EgzsEgxsnOwczYztnK?= zz.')
3086
3087 def test_broken_base64_header(self):
3088 raises = self.assertRaises
3089 s = 'Subject: =?EUC-KR?B?CSixpLDtKSC/7Liuvsax4iC6uLmwMcijIKHaILzSwd/H0SC8+LCjwLsgv7W/+Mj3IQ?='
3090 raises(errors.HeaderParseError, decode_header, s)
3091
3092
3093
3094# Test RFC 2231 header parameters (en/de)coding
3095class TestRFC2231(TestEmailBase):
3096 def test_get_param(self):
3097 eq = self.assertEqual
3098 msg = self._msgobj('msg_29.txt')
3099 eq(msg.get_param('title'),
3100 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3101 eq(msg.get_param('title', unquote=False),
3102 ('us-ascii', 'en', '"This is even more ***fun*** isn\'t it!"'))
3103
3104 def test_set_param(self):
3105 eq = self.ndiffAssertEqual
3106 msg = Message()
3107 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3108 charset='us-ascii')
3109 eq(msg.get_param('title'),
3110 ('us-ascii', '', 'This is even more ***fun*** isn\'t it!'))
3111 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3112 charset='us-ascii', language='en')
3113 eq(msg.get_param('title'),
3114 ('us-ascii', 'en', 'This is even more ***fun*** isn\'t it!'))
3115 msg = self._msgobj('msg_01.txt')
3116 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3117 charset='us-ascii', language='en')
3118 eq(msg.as_string(maxheaderlen=78), """\
3119Return-Path: <bbb@zzz.org>
3120Delivered-To: bbb@zzz.org
3121Received: by mail.zzz.org (Postfix, from userid 889)
3122\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3123MIME-Version: 1.0
3124Content-Transfer-Encoding: 7bit
3125Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3126From: bbb@ddd.com (John X. Doe)
3127To: bbb@zzz.org
3128Subject: This is a test message
3129Date: Fri, 4 May 2001 14:05:44 -0400
3130Content-Type: text/plain; charset=us-ascii;
3131 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3132
3133
3134Hi,
3135
3136Do you like this message?
3137
3138-Me
3139""")
3140
3141 def test_del_param(self):
3142 eq = self.ndiffAssertEqual
3143 msg = self._msgobj('msg_01.txt')
3144 msg.set_param('foo', 'bar', charset='us-ascii', language='en')
3145 msg.set_param('title', 'This is even more ***fun*** isn\'t it!',
3146 charset='us-ascii', language='en')
3147 msg.del_param('foo', header='Content-Type')
3148 eq(msg.as_string(maxheaderlen=78), """\
3149Return-Path: <bbb@zzz.org>
3150Delivered-To: bbb@zzz.org
3151Received: by mail.zzz.org (Postfix, from userid 889)
3152\tid 27CEAD38CC; Fri, 4 May 2001 14:05:44 -0400 (EDT)
3153MIME-Version: 1.0
3154Content-Transfer-Encoding: 7bit
3155Message-ID: <15090.61304.110929.45684@aaa.zzz.org>
3156From: bbb@ddd.com (John X. Doe)
3157To: bbb@zzz.org
3158Subject: This is a test message
3159Date: Fri, 4 May 2001 14:05:44 -0400
3160Content-Type: text/plain; charset="us-ascii";
3161 title*="us-ascii'en'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20isn%27t%20it%21"
3162
3163
3164Hi,
3165
3166Do you like this message?
3167
3168-Me
3169""")
3170
3171 def test_rfc2231_get_content_charset(self):
3172 eq = self.assertEqual
3173 msg = self._msgobj('msg_32.txt')
3174 eq(msg.get_content_charset(), 'us-ascii')
3175
3176 def test_rfc2231_no_language_or_charset(self):
3177 m = '''\
3178Content-Transfer-Encoding: 8bit
3179Content-Disposition: inline; filename="file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm"
3180Content-Type: text/html; NAME*0=file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEM; NAME*1=P_nsmail.htm
3181
3182'''
3183 msg = email.message_from_string(m)
3184 param = msg.get_param('NAME')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003185 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003186 self.assertEqual(
3187 param,
3188 'file____C__DOCUMENTS_20AND_20SETTINGS_FABIEN_LOCAL_20SETTINGS_TEMP_nsmail.htm')
3189
3190 def test_rfc2231_no_language_or_charset_in_filename(self):
3191 m = '''\
3192Content-Disposition: inline;
3193\tfilename*0*="''This%20is%20even%20more%20";
3194\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3195\tfilename*2="is it not.pdf"
3196
3197'''
3198 msg = email.message_from_string(m)
3199 self.assertEqual(msg.get_filename(),
3200 'This is even more ***fun*** is it not.pdf')
3201
3202 def test_rfc2231_no_language_or_charset_in_filename_encoded(self):
3203 m = '''\
3204Content-Disposition: inline;
3205\tfilename*0*="''This%20is%20even%20more%20";
3206\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3207\tfilename*2="is it not.pdf"
3208
3209'''
3210 msg = email.message_from_string(m)
3211 self.assertEqual(msg.get_filename(),
3212 'This is even more ***fun*** is it not.pdf')
3213
3214 def test_rfc2231_partly_encoded(self):
3215 m = '''\
3216Content-Disposition: inline;
3217\tfilename*0="''This%20is%20even%20more%20";
3218\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3219\tfilename*2="is it not.pdf"
3220
3221'''
3222 msg = email.message_from_string(m)
3223 self.assertEqual(
3224 msg.get_filename(),
3225 'This%20is%20even%20more%20***fun*** is it not.pdf')
3226
3227 def test_rfc2231_partly_nonencoded(self):
3228 m = '''\
3229Content-Disposition: inline;
3230\tfilename*0="This%20is%20even%20more%20";
3231\tfilename*1="%2A%2A%2Afun%2A%2A%2A%20";
3232\tfilename*2="is it not.pdf"
3233
3234'''
3235 msg = email.message_from_string(m)
3236 self.assertEqual(
3237 msg.get_filename(),
3238 'This%20is%20even%20more%20%2A%2A%2Afun%2A%2A%2A%20is it not.pdf')
3239
3240 def test_rfc2231_no_language_or_charset_in_boundary(self):
3241 m = '''\
3242Content-Type: multipart/alternative;
3243\tboundary*0*="''This%20is%20even%20more%20";
3244\tboundary*1*="%2A%2A%2Afun%2A%2A%2A%20";
3245\tboundary*2="is it not.pdf"
3246
3247'''
3248 msg = email.message_from_string(m)
3249 self.assertEqual(msg.get_boundary(),
3250 'This is even more ***fun*** is it not.pdf')
3251
3252 def test_rfc2231_no_language_or_charset_in_charset(self):
3253 # This is a nonsensical charset value, but tests the code anyway
3254 m = '''\
3255Content-Type: text/plain;
3256\tcharset*0*="This%20is%20even%20more%20";
3257\tcharset*1*="%2A%2A%2Afun%2A%2A%2A%20";
3258\tcharset*2="is it not.pdf"
3259
3260'''
3261 msg = email.message_from_string(m)
3262 self.assertEqual(msg.get_content_charset(),
3263 'this is even more ***fun*** is it not.pdf')
3264
3265 def test_rfc2231_bad_encoding_in_filename(self):
3266 m = '''\
3267Content-Disposition: inline;
3268\tfilename*0*="bogus'xx'This%20is%20even%20more%20";
3269\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3270\tfilename*2="is it not.pdf"
3271
3272'''
3273 msg = email.message_from_string(m)
3274 self.assertEqual(msg.get_filename(),
3275 'This is even more ***fun*** is it not.pdf')
3276
3277 def test_rfc2231_bad_encoding_in_charset(self):
3278 m = """\
3279Content-Type: text/plain; charset*=bogus''utf-8%E2%80%9D
3280
3281"""
3282 msg = email.message_from_string(m)
3283 # This should return None because non-ascii characters in the charset
3284 # are not allowed.
3285 self.assertEqual(msg.get_content_charset(), None)
3286
3287 def test_rfc2231_bad_character_in_charset(self):
3288 m = """\
3289Content-Type: text/plain; charset*=ascii''utf-8%E2%80%9D
3290
3291"""
3292 msg = email.message_from_string(m)
3293 # This should return None because non-ascii characters in the charset
3294 # are not allowed.
3295 self.assertEqual(msg.get_content_charset(), None)
3296
3297 def test_rfc2231_bad_character_in_filename(self):
3298 m = '''\
3299Content-Disposition: inline;
3300\tfilename*0*="ascii'xx'This%20is%20even%20more%20";
3301\tfilename*1*="%2A%2A%2Afun%2A%2A%2A%20";
3302\tfilename*2*="is it not.pdf%E2"
3303
3304'''
3305 msg = email.message_from_string(m)
3306 self.assertEqual(msg.get_filename(),
3307 'This is even more ***fun*** is it not.pdf\ufffd')
3308
3309 def test_rfc2231_unknown_encoding(self):
3310 m = """\
3311Content-Transfer-Encoding: 8bit
3312Content-Disposition: inline; filename*=X-UNKNOWN''myfile.txt
3313
3314"""
3315 msg = email.message_from_string(m)
3316 self.assertEqual(msg.get_filename(), 'myfile.txt')
3317
3318 def test_rfc2231_single_tick_in_filename_extended(self):
3319 eq = self.assertEqual
3320 m = """\
3321Content-Type: application/x-foo;
3322\tname*0*=\"Frank's\"; name*1*=\" Document\"
3323
3324"""
3325 msg = email.message_from_string(m)
3326 charset, language, s = msg.get_param('name')
3327 eq(charset, None)
3328 eq(language, None)
3329 eq(s, "Frank's Document")
3330
3331 def test_rfc2231_single_tick_in_filename(self):
3332 m = """\
3333Content-Type: application/x-foo; name*0=\"Frank's\"; name*1=\" Document\"
3334
3335"""
3336 msg = email.message_from_string(m)
3337 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003338 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003339 self.assertEqual(param, "Frank's Document")
3340
3341 def test_rfc2231_tick_attack_extended(self):
3342 eq = self.assertEqual
3343 m = """\
3344Content-Type: application/x-foo;
3345\tname*0*=\"us-ascii'en-us'Frank's\"; name*1*=\" Document\"
3346
3347"""
3348 msg = email.message_from_string(m)
3349 charset, language, s = msg.get_param('name')
3350 eq(charset, 'us-ascii')
3351 eq(language, 'en-us')
3352 eq(s, "Frank's Document")
3353
3354 def test_rfc2231_tick_attack(self):
3355 m = """\
3356Content-Type: application/x-foo;
3357\tname*0=\"us-ascii'en-us'Frank's\"; name*1=\" Document\"
3358
3359"""
3360 msg = email.message_from_string(m)
3361 param = msg.get_param('name')
Benjamin Petersonc9c0f202009-06-30 23:06:06 +00003362 self.assertFalse(isinstance(param, tuple))
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003363 self.assertEqual(param, "us-ascii'en-us'Frank's Document")
3364
3365 def test_rfc2231_no_extended_values(self):
3366 eq = self.assertEqual
3367 m = """\
3368Content-Type: application/x-foo; name=\"Frank's Document\"
3369
3370"""
3371 msg = email.message_from_string(m)
3372 eq(msg.get_param('name'), "Frank's Document")
3373
3374 def test_rfc2231_encoded_then_unencoded_segments(self):
3375 eq = self.assertEqual
3376 m = """\
3377Content-Type: application/x-foo;
3378\tname*0*=\"us-ascii'en-us'My\";
3379\tname*1=\" Document\";
3380\tname*2*=\" For You\"
3381
3382"""
3383 msg = email.message_from_string(m)
3384 charset, language, s = msg.get_param('name')
3385 eq(charset, 'us-ascii')
3386 eq(language, 'en-us')
3387 eq(s, 'My Document For You')
3388
3389 def test_rfc2231_unencoded_then_encoded_segments(self):
3390 eq = self.assertEqual
3391 m = """\
3392Content-Type: application/x-foo;
3393\tname*0=\"us-ascii'en-us'My\";
3394\tname*1*=\" Document\";
3395\tname*2*=\" For You\"
3396
3397"""
3398 msg = email.message_from_string(m)
3399 charset, language, s = msg.get_param('name')
3400 eq(charset, 'us-ascii')
3401 eq(language, 'en-us')
3402 eq(s, 'My Document For You')
3403
3404
3405
R. David Murraya8f480f2010-01-16 18:30:03 +00003406# Tests to ensure that signed parts of an email are completely preserved, as
3407# required by RFC1847 section 2.1. Note that these are incomplete, because the
3408# email package does not currently always preserve the body. See issue 1670765.
3409class TestSigned(TestEmailBase):
3410
3411 def _msg_and_obj(self, filename):
3412 with openfile(findfile(filename)) as fp:
3413 original = fp.read()
3414 msg = email.message_from_string(original)
3415 return original, msg
3416
3417 def _signed_parts_eq(self, original, result):
3418 # Extract the first mime part of each message
3419 import re
3420 repart = re.compile(r'^--([^\n]+)\n(.*?)\n--\1$', re.S | re.M)
3421 inpart = repart.search(original).group(2)
3422 outpart = repart.search(result).group(2)
3423 self.assertEqual(outpart, inpart)
3424
3425 def test_long_headers_as_string(self):
3426 original, msg = self._msg_and_obj('msg_45.txt')
3427 result = msg.as_string()
3428 self._signed_parts_eq(original, result)
3429
3430 def test_long_headers_as_string_maxheaderlen(self):
3431 original, msg = self._msg_and_obj('msg_45.txt')
3432 result = msg.as_string(maxheaderlen=60)
3433 self._signed_parts_eq(original, result)
3434
3435 def test_long_headers_flatten(self):
3436 original, msg = self._msg_and_obj('msg_45.txt')
3437 fp = StringIO()
3438 Generator(fp).flatten(msg)
3439 result = fp.getvalue()
3440 self._signed_parts_eq(original, result)
3441
3442
3443
Guido van Rossum8b3febe2007-08-30 01:15:14 +00003444def _testclasses():
3445 mod = sys.modules[__name__]
3446 return [getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
3447
3448
3449def suite():
3450 suite = unittest.TestSuite()
3451 for testclass in _testclasses():
3452 suite.addTest(unittest.makeSuite(testclass))
3453 return suite
3454
3455
3456def test_main():
3457 for testclass in _testclasses():
3458 run_unittest(testclass)
3459
3460
3461
3462if __name__ == '__main__':
3463 unittest.main(defaultTest='suite')