blob: fc5090b38b399f39c0ef9c50e138bb2939470cfb [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
3# Contact: email-sig@python.org
4
5"""A parser of RFC 2822 and MIME email messages."""
6
R David Murrayb35c8502011-04-13 16:46:05 -04007__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser']
Guido van Rossum8b3febe2007-08-30 01:15:14 +00008
9import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000010from io import StringIO, TextIOWrapper
Guido van Rossum8b3febe2007-08-30 01:15:14 +000011
12from email.feedparser import FeedParser
13from email.message import Message
14
15
16
17class Parser:
R David Murray86cc82e2011-03-29 11:32:35 -040018 def __init__(self, _class=Message):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000019 """Parser of RFC 2822 and MIME email messages.
20
21 Creates an in-memory object tree representing the email message, which
22 can then be manipulated and turned over to a Generator to return the
23 textual representation of the message.
24
25 The string must be formatted as a block of RFC 2822 headers and header
26 continuation lines, optionally preceeded by a `Unix-from' header. The
27 header block is terminated either by the end of the string or by a
28 blank line.
29
30 _class is the class to instantiate for new message objects when they
31 must be created. This class must have a constructor that can take
32 zero arguments. Default is Message.Message.
33 """
R David Murray86cc82e2011-03-29 11:32:35 -040034 self._class = _class
Guido van Rossum8b3febe2007-08-30 01:15:14 +000035
36 def parse(self, fp, headersonly=False):
37 """Create a message structure from the data in a file.
38
39 Reads all the data from the file and returns the root of the message
40 structure. Optional headersonly is a flag specifying whether to stop
41 parsing after reading the headers or not. The default is False,
42 meaning it parses the entire contents of the file.
43 """
44 feedparser = FeedParser(self._class)
45 if headersonly:
46 feedparser._set_headersonly()
47 while True:
48 data = fp.read(8192)
49 if not data:
50 break
Barry Warsaw820c1202008-06-12 04:06:45 +000051 feedparser.feed(data)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000052 return feedparser.close()
53
54 def parsestr(self, text, headersonly=False):
55 """Create a message structure from a string.
56
57 Returns the root of the message structure. Optional headersonly is a
58 flag specifying whether to stop parsing after reading the headers or
59 not. The default is False, meaning it parses the entire contents of
60 the file.
61 """
62 return self.parse(StringIO(text), headersonly=headersonly)
63
64
65
66class HeaderParser(Parser):
67 def parse(self, fp, headersonly=True):
68 return Parser.parse(self, fp, True)
69
70 def parsestr(self, text, headersonly=True):
71 return Parser.parsestr(self, text, True)
R. David Murray96fd54e2010-10-08 15:55:28 +000072
73
74class BytesParser:
75
76 def __init__(self, *args, **kw):
77 """Parser of binary RFC 2822 and MIME email messages.
78
79 Creates an in-memory object tree representing the email message, which
80 can then be manipulated and turned over to a Generator to return the
81 textual representation of the message.
82
83 The input must be formatted as a block of RFC 2822 headers and header
84 continuation lines, optionally preceeded by a `Unix-from' header. The
85 header block is terminated either by the end of the input or by a
86 blank line.
87
88 _class is the class to instantiate for new message objects when they
89 must be created. This class must have a constructor that can take
90 zero arguments. Default is Message.Message.
91 """
92 self.parser = Parser(*args, **kw)
93
94 def parse(self, fp, headersonly=False):
95 """Create a message structure from the data in a binary file.
96
97 Reads all the data from the file and returns the root of the message
98 structure. Optional headersonly is a flag specifying whether to stop
99 parsing after reading the headers or not. The default is False,
100 meaning it parses the entire contents of the file.
101 """
102 fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
Brett Cannon06407b32010-10-29 23:08:13 +0000103 with fp:
104 return self.parser.parse(fp, headersonly)
R. David Murray96fd54e2010-10-08 15:55:28 +0000105
106
107 def parsebytes(self, text, headersonly=False):
108 """Create a message structure from a byte string.
109
110 Returns the root of the message structure. Optional headersonly is a
111 flag specifying whether to stop parsing after reading the headers or
112 not. The default is False, meaning it parses the entire contents of
113 the file.
114 """
115 text = text.decode('ASCII', errors='surrogateescape')
116 return self.parser.parsestr(text, headersonly)
R David Murrayb35c8502011-04-13 16:46:05 -0400117
118
119class BytesHeaderParser(BytesParser):
120 def parse(self, fp, headersonly=True):
121 return BytesParser.parse(self, fp, headersonly=True)
122
123 def parsebytes(self, text, headersonly=True):
124 return BytesParser.parsebytes(self, text, headersonly=True)