blob: f49d31d43dfc531a4b276044c6dd9739efe9e24b [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
3# Contact: email-sig@python.org
4
5"""A parser of RFC 2822 and MIME email messages."""
6
R David Murray2fab3582013-03-15 21:00:48 -04007__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
8 'FeedParser', 'BytesFeedParser']
Guido van Rossum8b3febe2007-08-30 01:15:14 +00009
10import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000011from io import StringIO, TextIOWrapper
Guido van Rossum8b3febe2007-08-30 01:15:14 +000012
R David Murray612528d2013-03-15 20:38:15 -040013from email.feedparser import FeedParser, BytesFeedParser
Guido van Rossum8b3febe2007-08-30 01:15:14 +000014from email.message import Message
R David Murrayc27e5222012-05-25 15:01:48 -040015from email._policybase import compat32
Guido van Rossum8b3febe2007-08-30 01:15:14 +000016
17
18
19class Parser:
R David Murrayc27e5222012-05-25 15:01:48 -040020 def __init__(self, _class=Message, *, policy=compat32):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000021 """Parser of RFC 2822 and MIME email messages.
22
23 Creates an in-memory object tree representing the email message, which
24 can then be manipulated and turned over to a Generator to return the
25 textual representation of the message.
26
27 The string must be formatted as a block of RFC 2822 headers and header
28 continuation lines, optionally preceeded by a `Unix-from' header. The
29 header block is terminated either by the end of the string or by a
30 blank line.
31
32 _class is the class to instantiate for new message objects when they
33 must be created. This class must have a constructor that can take
34 zero arguments. Default is Message.Message.
R David Murray3edd22a2011-04-18 13:59:37 -040035
36 The policy keyword specifies a policy object that controls a number of
37 aspects of the parser's operation. The default policy maintains
38 backward compatibility.
39
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040 """
R David Murray86cc82e2011-03-29 11:32:35 -040041 self._class = _class
R David Murray3edd22a2011-04-18 13:59:37 -040042 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000043
44 def parse(self, fp, headersonly=False):
45 """Create a message structure from the data in a file.
46
47 Reads all the data from the file and returns the root of the message
48 structure. Optional headersonly is a flag specifying whether to stop
49 parsing after reading the headers or not. The default is False,
50 meaning it parses the entire contents of the file.
51 """
R David Murray3edd22a2011-04-18 13:59:37 -040052 feedparser = FeedParser(self._class, policy=self.policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 if headersonly:
54 feedparser._set_headersonly()
55 while True:
56 data = fp.read(8192)
57 if not data:
58 break
Barry Warsaw820c1202008-06-12 04:06:45 +000059 feedparser.feed(data)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000060 return feedparser.close()
61
62 def parsestr(self, text, headersonly=False):
63 """Create a message structure from a string.
64
65 Returns the root of the message structure. Optional headersonly is a
66 flag specifying whether to stop parsing after reading the headers or
67 not. The default is False, meaning it parses the entire contents of
68 the file.
69 """
70 return self.parse(StringIO(text), headersonly=headersonly)
71
72
73
74class HeaderParser(Parser):
75 def parse(self, fp, headersonly=True):
76 return Parser.parse(self, fp, True)
77
78 def parsestr(self, text, headersonly=True):
79 return Parser.parsestr(self, text, True)
R. David Murray96fd54e2010-10-08 15:55:28 +000080
81
82class BytesParser:
83
84 def __init__(self, *args, **kw):
85 """Parser of binary RFC 2822 and MIME email messages.
86
87 Creates an in-memory object tree representing the email message, which
88 can then be manipulated and turned over to a Generator to return the
89 textual representation of the message.
90
91 The input must be formatted as a block of RFC 2822 headers and header
92 continuation lines, optionally preceeded by a `Unix-from' header. The
93 header block is terminated either by the end of the input or by a
94 blank line.
95
96 _class is the class to instantiate for new message objects when they
97 must be created. This class must have a constructor that can take
98 zero arguments. Default is Message.Message.
99 """
100 self.parser = Parser(*args, **kw)
101
102 def parse(self, fp, headersonly=False):
103 """Create a message structure from the data in a binary file.
104
105 Reads all the data from the file and returns the root of the message
106 structure. Optional headersonly is a flag specifying whether to stop
107 parsing after reading the headers or not. The default is False,
108 meaning it parses the entire contents of the file.
109 """
110 fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
Brett Cannon06407b32010-10-29 23:08:13 +0000111 with fp:
112 return self.parser.parse(fp, headersonly)
R. David Murray96fd54e2010-10-08 15:55:28 +0000113
114
115 def parsebytes(self, text, headersonly=False):
116 """Create a message structure from a byte string.
117
118 Returns the root of the message structure. Optional headersonly is a
119 flag specifying whether to stop parsing after reading the headers or
120 not. The default is False, meaning it parses the entire contents of
121 the file.
122 """
123 text = text.decode('ASCII', errors='surrogateescape')
124 return self.parser.parsestr(text, headersonly)
R David Murrayb35c8502011-04-13 16:46:05 -0400125
126
127class BytesHeaderParser(BytesParser):
128 def parse(self, fp, headersonly=True):
129 return BytesParser.parse(self, fp, headersonly=True)
130
131 def parsebytes(self, text, headersonly=True):
132 return BytesParser.parsebytes(self, text, headersonly=True)