blob: 6caaff53ad40897b6fbc3c024e8365679dfc7fea [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2001-2007 Python Software Foundation
2# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
3# Contact: email-sig@python.org
4
5"""A parser of RFC 2822 and MIME email messages."""
6
7__all__ = ['Parser', 'HeaderParser']
8
9import warnings
R. David Murray96fd54e2010-10-08 15:55:28 +000010from io import StringIO, TextIOWrapper
Guido van Rossum8b3febe2007-08-30 01:15:14 +000011
12from email.feedparser import FeedParser
13from email.message import Message
14
15
16
17class Parser:
18 def __init__(self, *args, **kws):
19 """Parser of RFC 2822 and MIME email messages.
20
21 Creates an in-memory object tree representing the email message, which
22 can then be manipulated and turned over to a Generator to return the
23 textual representation of the message.
24
25 The string must be formatted as a block of RFC 2822 headers and header
26 continuation lines, optionally preceeded by a `Unix-from' header. The
27 header block is terminated either by the end of the string or by a
28 blank line.
29
30 _class is the class to instantiate for new message objects when they
31 must be created. This class must have a constructor that can take
32 zero arguments. Default is Message.Message.
33 """
34 if len(args) >= 1:
35 if '_class' in kws:
36 raise TypeError("Multiple values for keyword arg '_class'")
37 kws['_class'] = args[0]
38 if len(args) == 2:
39 if 'strict' in kws:
40 raise TypeError("Multiple values for keyword arg 'strict'")
41 kws['strict'] = args[1]
42 if len(args) > 2:
43 raise TypeError('Too many arguments')
44 if '_class' in kws:
45 self._class = kws['_class']
46 del kws['_class']
47 else:
48 self._class = Message
49 if 'strict' in kws:
50 warnings.warn("'strict' argument is deprecated (and ignored)",
51 DeprecationWarning, 2)
52 del kws['strict']
53 if kws:
54 raise TypeError('Unexpected keyword arguments')
55
56 def parse(self, fp, headersonly=False):
57 """Create a message structure from the data in a file.
58
59 Reads all the data from the file and returns the root of the message
60 structure. Optional headersonly is a flag specifying whether to stop
61 parsing after reading the headers or not. The default is False,
62 meaning it parses the entire contents of the file.
63 """
64 feedparser = FeedParser(self._class)
65 if headersonly:
66 feedparser._set_headersonly()
67 while True:
68 data = fp.read(8192)
69 if not data:
70 break
Barry Warsaw820c1202008-06-12 04:06:45 +000071 feedparser.feed(data)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000072 return feedparser.close()
73
74 def parsestr(self, text, headersonly=False):
75 """Create a message structure from a string.
76
77 Returns the root of the message structure. Optional headersonly is a
78 flag specifying whether to stop parsing after reading the headers or
79 not. The default is False, meaning it parses the entire contents of
80 the file.
81 """
82 return self.parse(StringIO(text), headersonly=headersonly)
83
84
85
86class HeaderParser(Parser):
87 def parse(self, fp, headersonly=True):
88 return Parser.parse(self, fp, True)
89
90 def parsestr(self, text, headersonly=True):
91 return Parser.parsestr(self, text, True)
R. David Murray96fd54e2010-10-08 15:55:28 +000092
93
94class BytesParser:
95
96 def __init__(self, *args, **kw):
97 """Parser of binary RFC 2822 and MIME email messages.
98
99 Creates an in-memory object tree representing the email message, which
100 can then be manipulated and turned over to a Generator to return the
101 textual representation of the message.
102
103 The input must be formatted as a block of RFC 2822 headers and header
104 continuation lines, optionally preceeded by a `Unix-from' header. The
105 header block is terminated either by the end of the input or by a
106 blank line.
107
108 _class is the class to instantiate for new message objects when they
109 must be created. This class must have a constructor that can take
110 zero arguments. Default is Message.Message.
111 """
112 self.parser = Parser(*args, **kw)
113
114 def parse(self, fp, headersonly=False):
115 """Create a message structure from the data in a binary file.
116
117 Reads all the data from the file and returns the root of the message
118 structure. Optional headersonly is a flag specifying whether to stop
119 parsing after reading the headers or not. The default is False,
120 meaning it parses the entire contents of the file.
121 """
122 fp = TextIOWrapper(fp, encoding='ascii', errors='surrogateescape')
Brett Cannon06407b32010-10-29 23:08:13 +0000123 with fp:
124 return self.parser.parse(fp, headersonly)
R. David Murray96fd54e2010-10-08 15:55:28 +0000125
126
127 def parsebytes(self, text, headersonly=False):
128 """Create a message structure from a byte string.
129
130 Returns the root of the message structure. Optional headersonly is a
131 flag specifying whether to stop parsing after reading the headers or
132 not. The default is False, meaning it parses the entire contents of
133 the file.
134 """
135 text = text.decode('ASCII', errors='surrogateescape')
136 return self.parser.parsestr(text, headersonly)