blob: 4e65e918ce6298609c6bb9b91388768583ccf310 [file] [log] [blame]
Guido van Rossum470be141995-03-17 16:07:09 +00001\section{Standard Module \sectcode{rfc822}}
Guido van Rossume47da0a1997-07-17 16:34:52 +00002\label{module-rfc822}
Guido van Rossuma12ef941995-02-27 17:53:25 +00003\stmodindex{rfc822}
4
Fred Drake19479911998-02-13 06:58:54 +00005\setindexsubitem{(in module rfc822)}
Guido van Rossum86751151995-02-28 17:14:32 +00006
Guido van Rossuma12ef941995-02-27 17:53:25 +00007This module defines a class, \code{Message}, which represents a
8collection of ``email headers'' as defined by the Internet standard
Fred Drakec5891241998-02-09 19:16:20 +00009\rfc{822}. It is used in various contexts, usually to read such
10headers from a file.
Guido van Rossuma12ef941995-02-27 17:53:25 +000011
Fred Drake5ca90331997-12-16 15:19:47 +000012Note that there's a separate module to read \UNIX{}, MH, and MMDF
13style mailbox files: \code{mailbox}.
14\refstmodindex{mailbox}
Guido van Rossum067a2ac1997-06-02 17:30:03 +000015
Guido van Rossuma12ef941995-02-27 17:53:25 +000016A \code{Message} instance is instantiated with an open file object as
Guido van Rossum067a2ac1997-06-02 17:30:03 +000017parameter. The optional \code{seekable} parameter indicates if the
18file object is seekable; the default value is 1 for true.
19Instantiation reads headers from the file up to a blank line and
20stores them in the instance; after instantiation, the file is
Guido van Rossuma12ef941995-02-27 17:53:25 +000021positioned directly after the blank line that terminates the headers.
22
23Input lines as read from the file may either be terminated by CR-LF or
24by a single linefeed; a terminating CR-LF is replaced by a single
25linefeed before the line is stored.
26
27All header matching is done independent of upper or lower case;
28e.g. \code{m['From']}, \code{m['from']} and \code{m['FROM']} all yield
29the same result.
30
Guido van Rossum843e7121996-12-06 21:23:53 +000031\begin{funcdesc}{parsedate}{date}
Fred Drakec5891241998-02-09 19:16:20 +000032Attempts to parse a date according to the rules in \rfc{822}. however,
Guido van Rossum843e7121996-12-06 21:23:53 +000033some mailers don't follow that format as specified, so
34\code{parsedate()} tries to guess correctly in such cases.
Fred Drakec5891241998-02-09 19:16:20 +000035\var{date} is a string containing an \rfc{822} date, such as
Guido van Rossum843e7121996-12-06 21:23:53 +000036\code{"Mon, 20 Nov 1995 19:12:08 -0500"}. If it succeeds in parsing
37the date, \code{parsedate()} returns a 9-tuple that can be passed
38directly to \code{time.mktime()}; otherwise \code{None} will be
39returned.
40\end{funcdesc}
41
42\begin{funcdesc}{parsedate_tz}{date}
Guido van Rossum8cf94e61998-02-18 05:09:14 +000043Performs the same function as \code{parsedate()}, but returns either
Guido van Rossum843e7121996-12-06 21:23:53 +000044\code{None} or a 10-tuple; the first 9 elements make up a tuple that
45can be passed directly to \code{time.mktime()}, and the tenth is the
46offset of the date's time zone from UTC (which is the official term
47for Greenwich Mean Time).
48\end{funcdesc}
49
Guido van Rossum8cf94e61998-02-18 05:09:14 +000050\begin{funcdesc}{mktime_tz}{tuple}
51Turn a 10-tuple as returned by \code{parsedate_tz()} into a UTC timestamp.
52Minor deficiency: this first interprets the first 8 elements as a
53local time and then compensates for the timezone difference;
54this may yield a slight error around daylight savings time
55switch dates. Not enough to worry about for common use.
56\end{funcdesc}
57
Guido van Rossumecde7811995-03-28 13:35:14 +000058\subsection{Message Objects}
59
Guido van Rossuma12ef941995-02-27 17:53:25 +000060A \code{Message} instance has the following methods:
61
62\begin{funcdesc}{rewindbody}{}
63Seek to the start of the message body. This only works if the file
64object is seekable.
65\end{funcdesc}
66
67\begin{funcdesc}{getallmatchingheaders}{name}
Guido van Rossum6c4f0031995-03-07 10:14:09 +000068Return a list of lines consisting of all headers matching
Guido van Rossuma12ef941995-02-27 17:53:25 +000069\var{name}, if any. Each physical line, whether it is a continuation
70line or not, is a separate list item. Return the empty list if no
71header matches \var{name}.
72\end{funcdesc}
73
74\begin{funcdesc}{getfirstmatchingheader}{name}
75Return a list of lines comprising the first header matching
76\var{name}, and its continuation line(s), if any. Return \code{None}
77if there is no header matching \var{name}.
78\end{funcdesc}
79
80\begin{funcdesc}{getrawheader}{name}
81Return a single string consisting of the text after the colon in the
82first header matching \var{name}. This includes leading whitespace,
83the trailing linefeed, and internal linefeeds and whitespace if there
84any continuation line(s) were present. Return \code{None} if there is
85no header matching \var{name}.
86\end{funcdesc}
87
88\begin{funcdesc}{getheader}{name}
89Like \code{getrawheader(\var{name})}, but strip leading and trailing
90whitespace (but not internal whitespace).
91\end{funcdesc}
92
93\begin{funcdesc}{getaddr}{name}
94Return a pair (full name, email address) parsed from the string
95returned by \code{getheader(\var{name})}. If no header matching
96\var{name} exists, return \code{None, None}; otherwise both the full
97name and the address are (possibly empty )strings.
98
Guido van Rossum470be141995-03-17 16:07:09 +000099Example: If \code{m}'s first \code{From} header contains the string\\
100\code{'jack@cwi.nl (Jack Jansen)'}, then
Guido van Rossuma12ef941995-02-27 17:53:25 +0000101\code{m.getaddr('From')} will yield the pair
Guido van Rossum470be141995-03-17 16:07:09 +0000102\code{('Jack Jansen', 'jack@cwi.nl')}.
Guido van Rossuma12ef941995-02-27 17:53:25 +0000103If the header contained
Guido van Rossum470be141995-03-17 16:07:09 +0000104\code{'Jack Jansen <jack@cwi.nl>'} instead, it would yield the
Guido van Rossuma12ef941995-02-27 17:53:25 +0000105exact same result.
106\end{funcdesc}
107
108\begin{funcdesc}{getaddrlist}{name}
109This is similar to \code{getaddr(\var{list})}, but parses a header
110containing a list of email addresses (e.g. a \code{To} header) and
111returns a list of (full name, email address) pairs (even if there was
112only one address in the header). If there is no header matching
113\var{name}, return an empty list.
114
115XXX The current version of this function is not really correct. It
116yields bogus results if a full name contains a comma.
117\end{funcdesc}
118
119\begin{funcdesc}{getdate}{name}
120Retrieve a header using \code{getheader} and parse it into a 9-tuple
Guido van Rossum6c4f0031995-03-07 10:14:09 +0000121compatible with \code{time.mktime()}. If there is no header matching
Guido van Rossuma12ef941995-02-27 17:53:25 +0000122\var{name}, or it is unparsable, return \code{None}.
123
124Date parsing appears to be a black art, and not all mailers adhere to
125the standard. While it has been tested and found correct on a large
126collection of email from many sources, it is still possible that this
127function may occasionally yield an incorrect result.
128\end{funcdesc}
129
Guido van Rossum843e7121996-12-06 21:23:53 +0000130\begin{funcdesc}{getdate_tz}{name}
131Retrieve a header using \code{getheader} and parse it into a 10-tuple;
132the first 9 elements will make a tuple compatible with
133\code{time.mktime()}, and the 10th is a number giving the offset of
134the date's time zone from UTC. Similarly to \code{getdate()}, if
135there is no header matching \var{name}, or it is unparsable, return
136\code{None}.
137\end{funcdesc}
138
Guido van Rossuma12ef941995-02-27 17:53:25 +0000139\code{Message} instances also support a read-only mapping interface.
140In particular: \code{m[name]} is the same as \code{m.getheader(name)};
141and \code{len(m)}, \code{m.has_key(name)}, \code{m.keys()},
142\code{m.values()} and \code{m.items()} act as expected (and
143consistently).
144
145Finally, \code{Message} instances have two public instance variables:
146
147\begin{datadesc}{headers}
148A list containing the entire set of header lines, in the order in
149which they were read. Each line contains a trailing newline. The
150blank line terminating the headers is not contained in the list.
151\end{datadesc}
152
153\begin{datadesc}{fp}
154The file object passed at instantiation time.
155\end{datadesc}