blob: 2fcaf25456273b94cbde5b7a55bc4ab7613affbf [file] [log] [blame]
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001# Copyright (C) 2001-2006 Python Software Foundation
Barry Warsaw235c8eb2004-05-09 03:46:42 +00002# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
3# Contact: email-sig@python.org
Barry Warsawba925802001-09-23 03:17:28 +00004
Barry Warsaw235c8eb2004-05-09 03:46:42 +00005"""A parser of RFC 2822 and MIME email messages."""
Barry Warsawba925802001-09-23 03:17:28 +00006
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00007__all__ = ['Parser', 'HeaderParser']
8
Barry Warsawbb113862004-10-03 03:16:19 +00009import warnings
Barry Warsawba925802001-09-23 03:17:28 +000010from cStringIO import StringIO
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000011
12from email.feedparser import FeedParser
13from email.message import Message
Barry Warsawe03e8f02002-09-28 20:44:58 +000014
15
Barry Warsawe968ead2001-10-04 17:05:11 +000016
Barry Warsawba925802001-09-23 03:17:28 +000017class Parser:
Barry Warsawbb113862004-10-03 03:16:19 +000018 def __init__(self, *args, **kws):
Barry Warsawba925802001-09-23 03:17:28 +000019 """Parser of RFC 2822 and MIME email messages.
20
21 Creates an in-memory object tree representing the email message, which
22 can then be manipulated and turned over to a Generator to return the
23 textual representation of the message.
24
25 The string must be formatted as a block of RFC 2822 headers and header
26 continuation lines, optionally preceeded by a `Unix-from' header. The
27 header block is terminated either by the end of the string or by a
28 blank line.
29
30 _class is the class to instantiate for new message objects when they
31 must be created. This class must have a constructor that can take
32 zero arguments. Default is Message.Message.
33 """
Barry Warsawbb113862004-10-03 03:16:19 +000034 if len(args) >= 1:
35 if '_class' in kws:
36 raise TypeError("Multiple values for keyword arg '_class'")
37 kws['_class'] = args[0]
38 if len(args) == 2:
39 if 'strict' in kws:
40 raise TypeError("Multiple values for keyword arg 'strict'")
41 kws['strict'] = args[1]
42 if len(args) > 2:
43 raise TypeError('Too many arguments')
44 if '_class' in kws:
45 self._class = kws['_class']
46 del kws['_class']
47 else:
48 self._class = Message
49 if 'strict' in kws:
50 warnings.warn("'strict' argument is deprecated (and ignored)",
51 DeprecationWarning, 2)
52 del kws['strict']
53 if kws:
54 raise TypeError('Unexpected keyword arguments')
Barry Warsawba925802001-09-23 03:17:28 +000055
Barry Warsawe03e8f02002-09-28 20:44:58 +000056 def parse(self, fp, headersonly=False):
Barry Warsaw057b8422002-09-30 20:07:22 +000057 """Create a message structure from the data in a file.
58
59 Reads all the data from the file and returns the root of the message
60 structure. Optional headersonly is a flag specifying whether to stop
61 parsing after reading the headers or not. The default is False,
62 meaning it parses the entire contents of the file.
63 """
Barry Warsaw235c8eb2004-05-09 03:46:42 +000064 feedparser = FeedParser(self._class)
65 if headersonly:
66 feedparser._set_headersonly()
67 while True:
68 data = fp.read(8192)
69 if not data:
70 break
71 feedparser.feed(data)
72 return feedparser.close()
Barry Warsawba925802001-09-23 03:17:28 +000073
Barry Warsawe03e8f02002-09-28 20:44:58 +000074 def parsestr(self, text, headersonly=False):
Barry Warsaw057b8422002-09-30 20:07:22 +000075 """Create a message structure from a string.
76
77 Returns the root of the message structure. Optional headersonly is a
78 flag specifying whether to stop parsing after reading the headers or
79 not. The default is False, meaning it parses the entire contents of
80 the file.
81 """
Barry Warsawf6caeba2002-07-09 02:50:02 +000082 return self.parse(StringIO(text), headersonly=headersonly)
Barry Warsawba925802001-09-23 03:17:28 +000083
Barry Warsawe5528822001-10-11 15:43:00 +000084
85
86class HeaderParser(Parser):
Barry Warsaw235c8eb2004-05-09 03:46:42 +000087 def parse(self, fp, headersonly=True):
88 return Parser.parse(self, fp, True)
Barry Warsawe5528822001-10-11 15:43:00 +000089
Barry Warsaw235c8eb2004-05-09 03:46:42 +000090 def parsestr(self, text, headersonly=True):
91 return Parser.parsestr(self, text, True)