blob: e2d182e89b4ef99a7b7d85359e8daefeb2e32d9e [file] [log] [blame]
Guido van Rossum470be141995-03-17 16:07:09 +00001\section{Standard Module \sectcode{rfc822}}
Guido van Rossuma12ef941995-02-27 17:53:25 +00002\stmodindex{rfc822}
3
Guido van Rossum86751151995-02-28 17:14:32 +00004\renewcommand{\indexsubitem}{(in module rfc822)}
5
Guido van Rossuma12ef941995-02-27 17:53:25 +00006This module defines a class, \code{Message}, which represents a
7collection of ``email headers'' as defined by the Internet standard
8RFC 822. It is used in various contexts, usually to read such headers
9from a file.
10
11A \code{Message} instance is instantiated with an open file object as
12parameter. Instantiation reads headers from the file up to a blank
13line and stores them in the instance; after instantiation, the file is
14positioned directly after the blank line that terminates the headers.
15
16Input lines as read from the file may either be terminated by CR-LF or
17by a single linefeed; a terminating CR-LF is replaced by a single
18linefeed before the line is stored.
19
20All header matching is done independent of upper or lower case;
21e.g. \code{m['From']}, \code{m['from']} and \code{m['FROM']} all yield
22the same result.
23
Guido van Rossumecde7811995-03-28 13:35:14 +000024\subsection{Message Objects}
25
Guido van Rossuma12ef941995-02-27 17:53:25 +000026A \code{Message} instance has the following methods:
27
28\begin{funcdesc}{rewindbody}{}
29Seek to the start of the message body. This only works if the file
30object is seekable.
31\end{funcdesc}
32
33\begin{funcdesc}{getallmatchingheaders}{name}
Guido van Rossum6c4f0031995-03-07 10:14:09 +000034Return a list of lines consisting of all headers matching
Guido van Rossuma12ef941995-02-27 17:53:25 +000035\var{name}, if any. Each physical line, whether it is a continuation
36line or not, is a separate list item. Return the empty list if no
37header matches \var{name}.
38\end{funcdesc}
39
40\begin{funcdesc}{getfirstmatchingheader}{name}
41Return a list of lines comprising the first header matching
42\var{name}, and its continuation line(s), if any. Return \code{None}
43if there is no header matching \var{name}.
44\end{funcdesc}
45
46\begin{funcdesc}{getrawheader}{name}
47Return a single string consisting of the text after the colon in the
48first header matching \var{name}. This includes leading whitespace,
49the trailing linefeed, and internal linefeeds and whitespace if there
50any continuation line(s) were present. Return \code{None} if there is
51no header matching \var{name}.
52\end{funcdesc}
53
54\begin{funcdesc}{getheader}{name}
55Like \code{getrawheader(\var{name})}, but strip leading and trailing
56whitespace (but not internal whitespace).
57\end{funcdesc}
58
59\begin{funcdesc}{getaddr}{name}
60Return a pair (full name, email address) parsed from the string
61returned by \code{getheader(\var{name})}. If no header matching
62\var{name} exists, return \code{None, None}; otherwise both the full
63name and the address are (possibly empty )strings.
64
Guido van Rossum470be141995-03-17 16:07:09 +000065Example: If \code{m}'s first \code{From} header contains the string\\
66\code{'jack@cwi.nl (Jack Jansen)'}, then
Guido van Rossuma12ef941995-02-27 17:53:25 +000067\code{m.getaddr('From')} will yield the pair
Guido van Rossum470be141995-03-17 16:07:09 +000068\code{('Jack Jansen', 'jack@cwi.nl')}.
Guido van Rossuma12ef941995-02-27 17:53:25 +000069If the header contained
Guido van Rossum470be141995-03-17 16:07:09 +000070\code{'Jack Jansen <jack@cwi.nl>'} instead, it would yield the
Guido van Rossuma12ef941995-02-27 17:53:25 +000071exact same result.
72\end{funcdesc}
73
74\begin{funcdesc}{getaddrlist}{name}
75This is similar to \code{getaddr(\var{list})}, but parses a header
76containing a list of email addresses (e.g. a \code{To} header) and
77returns a list of (full name, email address) pairs (even if there was
78only one address in the header). If there is no header matching
79\var{name}, return an empty list.
80
81XXX The current version of this function is not really correct. It
82yields bogus results if a full name contains a comma.
83\end{funcdesc}
84
85\begin{funcdesc}{getdate}{name}
86Retrieve a header using \code{getheader} and parse it into a 9-tuple
Guido van Rossum6c4f0031995-03-07 10:14:09 +000087compatible with \code{time.mktime()}. If there is no header matching
Guido van Rossuma12ef941995-02-27 17:53:25 +000088\var{name}, or it is unparsable, return \code{None}.
89
90Date parsing appears to be a black art, and not all mailers adhere to
91the standard. While it has been tested and found correct on a large
92collection of email from many sources, it is still possible that this
93function may occasionally yield an incorrect result.
94\end{funcdesc}
95
96\code{Message} instances also support a read-only mapping interface.
97In particular: \code{m[name]} is the same as \code{m.getheader(name)};
98and \code{len(m)}, \code{m.has_key(name)}, \code{m.keys()},
99\code{m.values()} and \code{m.items()} act as expected (and
100consistently).
101
102Finally, \code{Message} instances have two public instance variables:
103
104\begin{datadesc}{headers}
105A list containing the entire set of header lines, in the order in
106which they were read. Each line contains a trailing newline. The
107blank line terminating the headers is not contained in the list.
108\end{datadesc}
109
110\begin{datadesc}{fp}
111The file object passed at instantiation time.
112\end{datadesc}