blob: 7db4da1ff081c1c66c1f0224d7543e68aa3b89a5 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
3# Contact: email-sig@python.org
4
5"""A parser of RFC 2822 and MIME email messages."""
6
R David Murray2fab3582013-03-15 21:00:48 -04007__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
8 'FeedParser', 'BytesFeedParser']
Guido van Rossum8b3febe2007-08-30 01:15:14 +00009
R. David Murray96fd54e2010-10-08 15:55:28 +000010from io import StringIO, TextIOWrapper
Guido van Rossum8b3febe2007-08-30 01:15:14 +000011
R David Murray612528d2013-03-15 20:38:15 -040012from email.feedparser import FeedParser, BytesFeedParser
R David Murrayc27e5222012-05-25 15:01:48 -040013from email._policybase import compat32
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
15
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016class Parser:
R David Murrayaa212972014-02-07 10:44:16 -050017 def __init__(self, _class=None, *, policy=compat32):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000018 """Parser of RFC 2822 and MIME email messages.
19
20 Creates an in-memory object tree representing the email message, which
21 can then be manipulated and turned over to a Generator to return the
22 textual representation of the message.
23
24 The string must be formatted as a block of RFC 2822 headers and header
Martin Panter46f50722016-05-26 05:35:26 +000025 continuation lines, optionally preceded by a `Unix-from' header. The
Guido van Rossum8b3febe2007-08-30 01:15:14 +000026 header block is terminated either by the end of the string or by a
27 blank line.
28
29 _class is the class to instantiate for new message objects when they
30 must be created. This class must have a constructor that can take
31 zero arguments. Default is Message.Message.
R David Murray3edd22a2011-04-18 13:59:37 -040032
33 The policy keyword specifies a policy object that controls a number of
34 aspects of the parser's operation. The default policy maintains
35 backward compatibility.
36
Guido van Rossum8b3febe2007-08-30 01:15:14 +000037 """
R David Murray86cc82e2011-03-29 11:32:35 -040038 self._class = _class
R David Murray3edd22a2011-04-18 13:59:37 -040039 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040
41 def parse(self, fp, headersonly=False):
42 """Create a message structure from the data in a file.
43
44 Reads all the data from the file and returns the root of the message
45 structure. Optional headersonly is a flag specifying whether to stop
46 parsing after reading the headers or not. The default is False,
47 meaning it parses the entire contents of the file.
48 """
R David Murray3edd22a2011-04-18 13:59:37 -040049 feedparser = FeedParser(self._class, policy=self.policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000050 if headersonly:
51 feedparser._set_headersonly()
52 while True:
53 data = fp.read(8192)
54 if not data:
55 break
Barry Warsaw820c1202008-06-12 04:06:45 +000056 feedparser.feed(data)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000057 return feedparser.close()
58
59 def parsestr(self, text, headersonly=False):
60 """Create a message structure from a string.
61
62 Returns the root of the message structure. Optional headersonly is a
63 flag specifying whether to stop parsing after reading the headers or
64 not. The default is False, meaning it parses the entire contents of
65 the file.
66 """
67 return self.parse(StringIO(text), headersonly=headersonly)
68
69
70
71class HeaderParser(Parser):
72 def parse(self, fp, headersonly=True):
73 return Parser.parse(self, fp, True)
74
75 def parsestr(self, text, headersonly=True):
76 return Parser.parsestr(self, text, True)
R. David Murray96fd54e2010-10-08 15:55:28 +000077
78
79class BytesParser:
80
81 def __init__(self, *args, **kw):
82 """Parser of binary RFC 2822 and MIME email messages.
83
84 Creates an in-memory object tree representing the email message, which
85 can then be manipulated and turned over to a Generator to return the
86 textual representation of the message.
87
88 The input must be formatted as a block of RFC 2822 headers and header
Martin Panter46f50722016-05-26 05:35:26 +000089 continuation lines, optionally preceded by a `Unix-from' header. The
R. David Murray96fd54e2010-10-08 15:55:28 +000090 header block is terminated either by the end of the input or by a
91 blank line.
92
93 _class is the class to instantiate for new message objects when they
94 must be created. This class must have a constructor that can take
95 zero arguments. Default is Message.Message.
96 """
97 self.parser = Parser(*args, **kw)
98
99 def parse(self, fp, headersonly=False):
100 """Create a message structure from the data in a binary file.
101
102 Reads all the data from the file and returns the root of the message
103 structure. Optional headersonly is a flag specifying whether to stop
104 parsing after reading the headers or not. The default is False,
105 meaning it parses the entire contents of the file.
106 """
107 fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
R David Murrayc6772c42014-06-26 13:31:43 -0400108 try:
Brett Cannon06407b32010-10-29 23:08:13 +0000109 return self.parser.parse(fp, headersonly)
R David Murrayc6772c42014-06-26 13:31:43 -0400110 finally:
111 fp.detach()
R. David Murray96fd54e2010-10-08 15:55:28 +0000112
113
114 def parsebytes(self, text, headersonly=False):
115 """Create a message structure from a byte string.
116
117 Returns the root of the message structure. Optional headersonly is a
118 flag specifying whether to stop parsing after reading the headers or
119 not. The default is False, meaning it parses the entire contents of
120 the file.
121 """
122 text = text.decode('ASCII', errors='surrogateescape')
123 return self.parser.parsestr(text, headersonly)
R David Murrayb35c8502011-04-13 16:46:05 -0400124
125
126class BytesHeaderParser(BytesParser):
127 def parse(self, fp, headersonly=True):
128 return BytesParser.parse(self, fp, headersonly=True)
129
130 def parsebytes(self, text, headersonly=True):
131 return BytesParser.parsebytes(self, text, headersonly=True)