blob: 66eb716e97b148524644806fa86a96ea8c24cb5a [file] [log] [blame]
Barry Warsaw5b9da892002-10-01 01:05:52 +00001\declaremodule{standard}{email.Header}
2\modulesynopsis{Representing non-ASCII headers}
3
4\rfc{2822} is the base standard that describes the format of email
5messages. It derives from the older \rfc{822} standard which came
Barry Warsaw5db478f2002-10-01 04:33:16 +00006into widespread use at a time when most email was composed of \ASCII{}
Barry Warsaw5b9da892002-10-01 01:05:52 +00007characters only. \rfc{2822} is a specification written assuming email
8contains only 7-bit \ASCII{} characters.
9
10Of course, as email has been deployed worldwide, it has become
11internationalized, such that language specific character sets can now
12be used in email messages. The base standard still requires email
13messages to be transfered using only 7-bit \ASCII{} characters, so a
14slew of RFCs have been written describing how to encode email
15containing non-\ASCII{} characters into \rfc{2822}-compliant format.
16These RFCs include \rfc{2045}, \rfc{2046}, \rfc{2047}, and \rfc{2231}.
17The \module{email} package supports these standards in its
18\module{email.Header} and \module{email.Charset} modules.
19
20If you want to include non-\ASCII{} characters in your email headers,
21say in the \mailheader{Subject} or \mailheader{To} fields, you should
Barry Warsaw5db478f2002-10-01 04:33:16 +000022use the \class{Header} class and assign the field in the
23\class{Message} object to an instance of \class{Header} instead of
24using a string for the header value. For example:
Barry Warsaw5b9da892002-10-01 01:05:52 +000025
26\begin{verbatim}
27>>> from email.Message import Message
28>>> from email.Header import Header
29>>> msg = Message()
30>>> h = Header('p\xf6stal', 'iso-8859-1')
31>>> msg['Subject'] = h
32>>> print msg.as_string()
33Subject: =?iso-8859-1?q?p=F6stal?=
34
35
36\end{verbatim}
37
38Notice here how we wanted the \mailheader{Subject} field to contain a
39non-\ASCII{} character? We did this by creating a \class{Header}
40instance and passing in the character set that the byte string was
41encoded in. When the subsequent \class{Message} instance was
42flattened, the \mailheader{Subject} field was properly \rfc{2047}
43encoded. MIME-aware mail readers would show this header using the
44embedded ISO-8859-1 character.
45
46\versionadded{2.2.2}
47
48Here is the \class{Header} class description:
49
50\begin{classdesc}{Header}{\optional{s\optional{, charset\optional{,
51 maxlinelen\optional{, header_name\optional{, continuation_ws}}}}}}
Barry Warsaw5db478f2002-10-01 04:33:16 +000052Create a MIME-compliant header that can contain strings in different
53character sets.
Barry Warsaw5b9da892002-10-01 01:05:52 +000054
55Optional \var{s} is the initial header value. If \code{None} (the
56default), the initial header value is not set. You can later append
57to the header with \method{append()} method calls. \var{s} may be a
58byte string or a Unicode string, but see the \method{append()}
59documentation for semantics.
60
61Optional \var{charset} serves two purposes: it has the same meaning as
62the \var{charset} argument to the \method{append()} method. It also
63sets the default character set for all subsequent \method{append()}
64calls that omit the \var{charset} argument. If \var{charset} is not
65provided in the constructor (the default), the \code{us-ascii}
66character set is used both as \var{s}'s initial charset and as the
67default for subsequent \method{append()} calls.
68
69The maximum line length can be specified explicit via
70\var{maxlinelen}. For splitting the first line to a shorter value (to
71account for the field header which isn't included in \var{s},
72e.g. \mailheader{Subject}) pass in the name of the field in
73\var{header_name}. The default \var{maxlinelen} is 76, and the
74default value for \var{header_name} is \code{None}, meaning it is not
75taken into account for the first line of a long, split header.
76
Barry Warsaw5db478f2002-10-01 04:33:16 +000077Optional \var{continuation_ws} must be \rfc{2822}-compliant folding
Barry Warsaw5b9da892002-10-01 01:05:52 +000078whitespace, and is usually either a space or a hard tab character.
79This character will be prepended to continuation lines.
80\end{classdesc}
81
82\begin{methoddesc}[Header]{append}{s\optional{, charset}}
83Append the string \var{s} to the MIME header.
84
85Optional \var{charset}, if given, should be a \class{Charset} instance
86(see \refmodule{email.Charset}) or the name of a character set, which
87will be converted to a \class{Charset} instance. A value of
88\code{None} (the default) means that the \var{charset} given in the
89constructor is used.
90
91\var{s} may be a byte string or a Unicode string. If it is a byte
Barry Warsaw5db478f2002-10-01 04:33:16 +000092string (i.e. \code{isinstance(s, str)} is true), then
Barry Warsaw5b9da892002-10-01 01:05:52 +000093\var{charset} is the encoding of that byte string, and a
94\exception{UnicodeError} will be raised if the string cannot be
95decoded with that character set.
96
97If \var{s} is a Unicode string, then \var{charset} is a hint
98specifying the character set of the characters in the string. In this
99case, when producing an \rfc{2822}-compliant header using \rfc{2047}
100rules, the Unicode string will be encoded using the following charsets
101in order: \code{us-ascii}, the \var{charset} hint, \code{utf-8}. The
102first character set to not provoke a \exception{UnicodeError} is used.
103\end{methoddesc}
104
105\begin{methoddesc}[Header]{encode}{}
106Encode a message header into an RFC-compliant format, possibly
107wrapping long lines and encapsulating non-\ASCII{} parts in base64 or
108quoted-printable encodings.
109\end{methoddesc}
110
111The \class{Header} class also provides a number of methods to support
112standard operators and built-in functions.
113
114\begin{methoddesc}[Header]{__str__}{}
115A synonym for \method{Header.encode()}. Useful for
Barry Warsaw5db478f2002-10-01 04:33:16 +0000116\code{str(aHeader)}.
Barry Warsaw5b9da892002-10-01 01:05:52 +0000117\end{methoddesc}
118
119\begin{methoddesc}[Header]{__unicode__}{}
120A helper for the built-in \function{unicode()} function. Returns the
121header as a Unicode string.
122\end{methoddesc}
123
124\begin{methoddesc}[Header]{__eq__}{other}
125This method allows you to compare two \class{Header} instances for equality.
126\end{methoddesc}
127
128\begin{methoddesc}[Header]{__ne__}{other}
129This method allows you to compare two \class{Header} instances for inequality.
130\end{methoddesc}
131
132The \module{email.Header} module also provides the following
133convenient functions.
134
135\begin{funcdesc}{decode_header}{header}
136Decode a message header value without converting the character set.
137The header value is in \var{header}.
138
139This function returns a list of \code{(decoded_string, charset)} pairs
140containing each of the decoded parts of the header. \var{charset} is
141\code{None} for non-encoded parts of the header, otherwise a lower
142case string containing the name of the character set specified in the
143encoded string.
144
145Here's an example:
146
147\begin{verbatim}
148>>> from email.Header import decode_header
149>>> decode_header('=?iso-8859-1?q?p=F6stal?=')
150[('p\\xf6stal', 'iso-8859-1')]
151\end{verbatim}
152\end{funcdesc}
153
154\begin{funcdesc}{make_header}{decoded_seq\optional{, maxlinelen\optional{,
155 header_name\optional{, continuation_ws}}}}
156Create a \class{Header} instance from a sequence of pairs as returned
157by \function{decode_header()}.
158
159\function{decode_header()} takes a header value string and returns a
160sequence of pairs of the format \code{(decoded_string, charset)} where
161\var{charset} is the name of the character set.
162
163This function takes one of those sequence of pairs and returns a
164\class{Header} instance. Optional \var{maxlinelen},
165\var{header_name}, and \var{continuation_ws} are as in the
166\class{Header} constructor.
167\end{funcdesc}