blob: 8c9bc9e44e24a75a85d9f16b874d3ed875fe5fc9 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
3# Contact: email-sig@python.org
4
5"""A parser of RFC 2822 and MIME email messages."""
6
R David Murray2fab3582013-03-15 21:00:48 -04007__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
8 'FeedParser', 'BytesFeedParser']
Guido van Rossum8b3febe2007-08-30 01:15:14 +00009
R. David Murray96fd54e2010-10-08 15:55:28 +000010from io import StringIO, TextIOWrapper
Guido van Rossum8b3febe2007-08-30 01:15:14 +000011
R David Murray612528d2013-03-15 20:38:15 -040012from email.feedparser import FeedParser, BytesFeedParser
R David Murrayc27e5222012-05-25 15:01:48 -040013from email._policybase import compat32
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014
15
16
17class Parser:
R David Murrayaa212972014-02-07 10:44:16 -050018 def __init__(self, _class=None, *, policy=compat32):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000019 """Parser of RFC 2822 and MIME email messages.
20
21 Creates an in-memory object tree representing the email message, which
22 can then be manipulated and turned over to a Generator to return the
23 textual representation of the message.
24
25 The string must be formatted as a block of RFC 2822 headers and header
26 continuation lines, optionally preceeded by a `Unix-from' header. The
27 header block is terminated either by the end of the string or by a
28 blank line.
29
30 _class is the class to instantiate for new message objects when they
31 must be created. This class must have a constructor that can take
32 zero arguments. Default is Message.Message.
R David Murray3edd22a2011-04-18 13:59:37 -040033
34 The policy keyword specifies a policy object that controls a number of
35 aspects of the parser's operation. The default policy maintains
36 backward compatibility.
37
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038 """
R David Murray86cc82e2011-03-29 11:32:35 -040039 self._class = _class
R David Murray3edd22a2011-04-18 13:59:37 -040040 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42 def parse(self, fp, headersonly=False):
43 """Create a message structure from the data in a file.
44
45 Reads all the data from the file and returns the root of the message
46 structure. Optional headersonly is a flag specifying whether to stop
47 parsing after reading the headers or not. The default is False,
48 meaning it parses the entire contents of the file.
49 """
R David Murray3edd22a2011-04-18 13:59:37 -040050 feedparser = FeedParser(self._class, policy=self.policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000051 if headersonly:
52 feedparser._set_headersonly()
53 while True:
54 data = fp.read(8192)
55 if not data:
56 break
Barry Warsaw820c1202008-06-12 04:06:45 +000057 feedparser.feed(data)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000058 return feedparser.close()
59
60 def parsestr(self, text, headersonly=False):
61 """Create a message structure from a string.
62
63 Returns the root of the message structure. Optional headersonly is a
64 flag specifying whether to stop parsing after reading the headers or
65 not. The default is False, meaning it parses the entire contents of
66 the file.
67 """
68 return self.parse(StringIO(text), headersonly=headersonly)
69
70
71
72class HeaderParser(Parser):
73 def parse(self, fp, headersonly=True):
74 return Parser.parse(self, fp, True)
75
76 def parsestr(self, text, headersonly=True):
77 return Parser.parsestr(self, text, True)
R. David Murray96fd54e2010-10-08 15:55:28 +000078
79
80class BytesParser:
81
82 def __init__(self, *args, **kw):
83 """Parser of binary RFC 2822 and MIME email messages.
84
85 Creates an in-memory object tree representing the email message, which
86 can then be manipulated and turned over to a Generator to return the
87 textual representation of the message.
88
89 The input must be formatted as a block of RFC 2822 headers and header
90 continuation lines, optionally preceeded by a `Unix-from' header. The
91 header block is terminated either by the end of the input or by a
92 blank line.
93
94 _class is the class to instantiate for new message objects when they
95 must be created. This class must have a constructor that can take
96 zero arguments. Default is Message.Message.
97 """
98 self.parser = Parser(*args, **kw)
99
100 def parse(self, fp, headersonly=False):
101 """Create a message structure from the data in a binary file.
102
103 Reads all the data from the file and returns the root of the message
104 structure. Optional headersonly is a flag specifying whether to stop
105 parsing after reading the headers or not. The default is False,
106 meaning it parses the entire contents of the file.
107 """
108 fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
R David Murrayc6772c42014-06-26 13:31:43 -0400109 try:
Brett Cannon06407b32010-10-29 23:08:13 +0000110 return self.parser.parse(fp, headersonly)
R David Murrayc6772c42014-06-26 13:31:43 -0400111 finally:
112 fp.detach()
R. David Murray96fd54e2010-10-08 15:55:28 +0000113
114
115 def parsebytes(self, text, headersonly=False):
116 """Create a message structure from a byte string.
117
118 Returns the root of the message structure. Optional headersonly is a
119 flag specifying whether to stop parsing after reading the headers or
120 not. The default is False, meaning it parses the entire contents of
121 the file.
122 """
123 text = text.decode('ASCII', errors='surrogateescape')
124 return self.parser.parsestr(text, headersonly)
R David Murrayb35c8502011-04-13 16:46:05 -0400125
126
127class BytesHeaderParser(BytesParser):
128 def parse(self, fp, headersonly=True):
129 return BytesParser.parse(self, fp, headersonly=True)
130
131 def parsebytes(self, text, headersonly=True):
132 return BytesParser.parsebytes(self, text, headersonly=True)