blob: 752bf35a6e11e57b0abf0f7ad4a37a6fb096f2e6 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
3# Contact: email-sig@python.org
4
5"""A parser of RFC 2822 and MIME email messages."""
6
R David Murrayb35c8502011-04-13 16:46:05 -04007__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008
9import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000010from io import StringIO, TextIOWrapper
Guido van Rossum8b3febe2007-08-30 01:15:14 +000011
R David Murray612528d2013-03-15 20:38:15 -040012from email.feedparser import FeedParser, BytesFeedParser
Guido van Rossum8b3febe2007-08-30 01:15:14 +000013from email.message import Message
R David Murrayc27e5222012-05-25 15:01:48 -040014from email._policybase import compat32
Guido van Rossum8b3febe2007-08-30 01:15:14 +000015
16
17
18class Parser:
R David Murrayc27e5222012-05-25 15:01:48 -040019 def __init__(self, _class=Message, *, policy=compat32):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000020 """Parser of RFC 2822 and MIME email messages.
21
22 Creates an in-memory object tree representing the email message, which
23 can then be manipulated and turned over to a Generator to return the
24 textual representation of the message.
25
26 The string must be formatted as a block of RFC 2822 headers and header
27 continuation lines, optionally preceeded by a `Unix-from' header. The
28 header block is terminated either by the end of the string or by a
29 blank line.
30
31 _class is the class to instantiate for new message objects when they
32 must be created. This class must have a constructor that can take
33 zero arguments. Default is Message.Message.
R David Murray3edd22a2011-04-18 13:59:37 -040034
35 The policy keyword specifies a policy object that controls a number of
36 aspects of the parser's operation. The default policy maintains
37 backward compatibility.
38
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039 """
R David Murray86cc82e2011-03-29 11:32:35 -040040 self._class = _class
R David Murray3edd22a2011-04-18 13:59:37 -040041 self.policy = policy
Guido van Rossum8b3febe2007-08-30 01:15:14 +000042
43 def parse(self, fp, headersonly=False):
44 """Create a message structure from the data in a file.
45
46 Reads all the data from the file and returns the root of the message
47 structure. Optional headersonly is a flag specifying whether to stop
48 parsing after reading the headers or not. The default is False,
49 meaning it parses the entire contents of the file.
50 """
R David Murray3edd22a2011-04-18 13:59:37 -040051 feedparser = FeedParser(self._class, policy=self.policy)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000052 if headersonly:
53 feedparser._set_headersonly()
54 while True:
55 data = fp.read(8192)
56 if not data:
57 break
Barry Warsaw820c1202008-06-12 04:06:45 +000058 feedparser.feed(data)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000059 return feedparser.close()
60
61 def parsestr(self, text, headersonly=False):
62 """Create a message structure from a string.
63
64 Returns the root of the message structure. Optional headersonly is a
65 flag specifying whether to stop parsing after reading the headers or
66 not. The default is False, meaning it parses the entire contents of
67 the file.
68 """
69 return self.parse(StringIO(text), headersonly=headersonly)
70
71
72
73class HeaderParser(Parser):
74 def parse(self, fp, headersonly=True):
75 return Parser.parse(self, fp, True)
76
77 def parsestr(self, text, headersonly=True):
78 return Parser.parsestr(self, text, True)
R. David Murray96fd54e2010-10-08 15:55:28 +000079
80
81class BytesParser:
82
83 def __init__(self, *args, **kw):
84 """Parser of binary RFC 2822 and MIME email messages.
85
86 Creates an in-memory object tree representing the email message, which
87 can then be manipulated and turned over to a Generator to return the
88 textual representation of the message.
89
90 The input must be formatted as a block of RFC 2822 headers and header
91 continuation lines, optionally preceeded by a `Unix-from' header. The
92 header block is terminated either by the end of the input or by a
93 blank line.
94
95 _class is the class to instantiate for new message objects when they
96 must be created. This class must have a constructor that can take
97 zero arguments. Default is Message.Message.
98 """
99 self.parser = Parser(*args, **kw)
100
101 def parse(self, fp, headersonly=False):
102 """Create a message structure from the data in a binary file.
103
104 Reads all the data from the file and returns the root of the message
105 structure. Optional headersonly is a flag specifying whether to stop
106 parsing after reading the headers or not. The default is False,
107 meaning it parses the entire contents of the file.
108 """
109 fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
Brett Cannon06407b32010-10-29 23:08:13 +0000110 with fp:
111 return self.parser.parse(fp, headersonly)
R. David Murray96fd54e2010-10-08 15:55:28 +0000112
113
114 def parsebytes(self, text, headersonly=False):
115 """Create a message structure from a byte string.
116
117 Returns the root of the message structure. Optional headersonly is a
118 flag specifying whether to stop parsing after reading the headers or
119 not. The default is False, meaning it parses the entire contents of
120 the file.
121 """
122 text = text.decode('ASCII', errors='surrogateescape')
123 return self.parser.parsestr(text, headersonly)
R David Murrayb35c8502011-04-13 16:46:05 -0400124
125
126class BytesHeaderParser(BytesParser):
127 def parse(self, fp, headersonly=True):
128 return BytesParser.parse(self, fp, headersonly=True)
129
130 def parsebytes(self, text, headersonly=True):
131 return BytesParser.parsebytes(self, text, headersonly=True)