blob: 8c5661db99be649daeb704ab76f25c2e579cf032 [file] [log] [blame]
Barry Warsaw235c8eb2004-05-09 03:46:42 +00001# Copyright (C) 2001-2004 Python Software Foundation
2# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
3# Contact: email-sig@python.org
Barry Warsawba925802001-09-23 03:17:28 +00004
Barry Warsaw235c8eb2004-05-09 03:46:42 +00005"""A parser of RFC 2822 and MIME email messages."""
Barry Warsawba925802001-09-23 03:17:28 +00006
Barry Warsaw7e21b672002-05-19 23:51:50 +00007import re
Barry Warsawba925802001-09-23 03:17:28 +00008from cStringIO import StringIO
Barry Warsaw235c8eb2004-05-09 03:46:42 +00009from email.FeedParser import FeedParser
10from email.Message import Message
Barry Warsawe03e8f02002-09-28 20:44:58 +000011
Barry Warsaw0e4570b2003-03-06 05:25:35 +000012NLCRE = re.compile('\r\n|\r|\n')
Barry Warsaw487fe6a2002-10-07 17:27:35 +000013
Barry Warsawe03e8f02002-09-28 20:44:58 +000014
Barry Warsawe968ead2001-10-04 17:05:11 +000015
Barry Warsawba925802001-09-23 03:17:28 +000016class Parser:
Barry Warsaw235c8eb2004-05-09 03:46:42 +000017 def __init__(self, _class=Message, strict=False):
Barry Warsawba925802001-09-23 03:17:28 +000018 """Parser of RFC 2822 and MIME email messages.
19
20 Creates an in-memory object tree representing the email message, which
21 can then be manipulated and turned over to a Generator to return the
22 textual representation of the message.
23
24 The string must be formatted as a block of RFC 2822 headers and header
25 continuation lines, optionally preceeded by a `Unix-from' header. The
26 header block is terminated either by the end of the string or by a
27 blank line.
28
29 _class is the class to instantiate for new message objects when they
30 must be created. This class must have a constructor that can take
31 zero arguments. Default is Message.Message.
Barry Warsawf6caeba2002-07-09 02:50:02 +000032
33 Optional strict tells the parser to be strictly RFC compliant or to be
34 more forgiving in parsing of ill-formatted MIME documents. When
35 non-strict mode is used, the parser will try to make up for missing or
36 erroneous boundaries and other peculiarities seen in the wild.
Barry Warsawbb26b452002-07-19 22:25:34 +000037 Default is non-strict parsing.
Barry Warsawba925802001-09-23 03:17:28 +000038 """
39 self._class = _class
40
Barry Warsawe03e8f02002-09-28 20:44:58 +000041 def parse(self, fp, headersonly=False):
Barry Warsaw057b8422002-09-30 20:07:22 +000042 """Create a message structure from the data in a file.
43
44 Reads all the data from the file and returns the root of the message
45 structure. Optional headersonly is a flag specifying whether to stop
46 parsing after reading the headers or not. The default is False,
47 meaning it parses the entire contents of the file.
48 """
Barry Warsaw235c8eb2004-05-09 03:46:42 +000049 feedparser = FeedParser(self._class)
50 if headersonly:
51 feedparser._set_headersonly()
52 while True:
53 data = fp.read(8192)
54 if not data:
55 break
56 feedparser.feed(data)
57 return feedparser.close()
Barry Warsawba925802001-09-23 03:17:28 +000058
Barry Warsawe03e8f02002-09-28 20:44:58 +000059 def parsestr(self, text, headersonly=False):
Barry Warsaw057b8422002-09-30 20:07:22 +000060 """Create a message structure from a string.
61
62 Returns the root of the message structure. Optional headersonly is a
63 flag specifying whether to stop parsing after reading the headers or
64 not. The default is False, meaning it parses the entire contents of
65 the file.
66 """
Barry Warsawf6caeba2002-07-09 02:50:02 +000067 return self.parse(StringIO(text), headersonly=headersonly)
Barry Warsawba925802001-09-23 03:17:28 +000068
Barry Warsawe5528822001-10-11 15:43:00 +000069
70
71class HeaderParser(Parser):
Barry Warsaw235c8eb2004-05-09 03:46:42 +000072 def parse(self, fp, headersonly=True):
73 return Parser.parse(self, fp, True)
Barry Warsawe5528822001-10-11 15:43:00 +000074
Barry Warsaw235c8eb2004-05-09 03:46:42 +000075 def parsestr(self, text, headersonly=True):
76 return Parser.parsestr(self, text, True)