Sync'ing with standalone email package 2.0.1. This adds support for non-us-ascii character sets in headers and bodies. Some API changes (with DeprecationWarnings for the old APIs). Better RFC-compliant implementations of base64 and quoted-printable. Updated test cases. Documentation updates to follow (after I finish writing them ;).

commit: 409a4c08b545aa064cf8fe3b8de51404756a301e [log] [tgz]
author: Barry Warsaw <barry@python.org> Wed Apr 10 21:01:31 2002 +0000
committer: Barry Warsaw <barry@python.org> Wed Apr 10 21:01:31 2002 +0000
tree: 06cf8fe44e1fe28fbc0147635ec41961f2df6515
parent: 68e69338ae19c37bd3e69cb76e107bfa76231e06 [diff]
diff --git a/Lib/email/Charset.py b/Lib/email/Charset.py
new file mode 100644
index 0000000..4874597
--- /dev/null
+++ b/Lib/email/Charset.py

@@ -0,0 +1,327 @@
+# Copyright (C) 2001,2002 Python Software Foundation
+# Author: che@debian.org (Ben Gertzfield)
+
+from types import UnicodeType
+from email.Encoders import encode_7or8bit
+import email.base64MIME
+import email.quopriMIME
+
+
+
+# Flags for types of header encodings
+QP     = 1  # Quoted-Printable
+BASE64 = 2  # Base64
+
+# In "=?charset?q?hello_world?=", the =?, ?q?, and ?= add up to 7
+MISC_LEN = 7 
+
+DEFAULT_CHARSET = 'us-ascii'
+
+
+
+# Defaults
+CHARSETS = {
+    # input        header enc  body enc output conv
+    'iso-8859-1':  (QP,        QP,      None), 
+    'iso-8859-2':  (QP,        QP,      None),
+    'us-ascii':    (None,      None,    None),
+    'big5':        (BASE64,    BASE64,  None),
+    'gb2312':      (BASE64,    BASE64,  None), 
+    'euc-jp':      (BASE64,    None,    'iso-2022-jp'),
+    'shift_jis':   (BASE64,    None,    'iso-2022-jp'),
+    'iso-2022-jp': (BASE64,    None,    None),
+    'koi8-r':      (BASE64,    BASE64,  None),
+    'utf-8':       (BASE64,    BASE64,  'utf-8'),
+    }
+
+# Aliases for other commonly-used names for character sets.  Map
+# them to the real ones used in email.
+ALIASES = {
+    'latin_1': 'iso-8859-1',
+    'latin-1': 'iso-8859-1',
+    'ascii':   'us-ascii',
+    }
+
+# Map charsets to their Unicode codec strings.  Note that the Japanese
+# examples included below do not (yet) come with Python!  They are available
+# from http://pseudo.grad.sccs.chukyo-u.ac.jp/~kajiyama/python/
+
+# The Chinese and Korean codecs are available from SourceForge:
+#
+#     http://sourceforge.net/projects/python-codecs/
+#
+# although you'll need to check them out of cvs since they haven't been file
+# released yet.  You might also try to use
+#
+#     http://www.freshports.org/port-description.php3?port=6702
+#
+# if you can get logged in.  AFAICT, both the Chinese and Korean codecs are
+# fairly experimental at this point.
+CODEC_MAP = {
+    'euc-jp':      'japanese.euc-jp',
+    'iso-2022-jp': 'japanese.iso-2022-jp',
+    'shift_jis':   'japanese.shift_jis',
+    'gb2132':      'eucgb2312_cn',
+    'big5':        'big5_tw',
+    'utf-8':       'utf-8',
+    # Hack: We don't want *any* conversion for stuff marked us-ascii, as all
+    # sorts of garbage might be sent to us in the guise of 7-bit us-ascii.
+    # Let that stuff pass through without conversion to/from Unicode.
+    'us-ascii':    None,
+    }
+
+
+
+# Convenience functions for extending the above mappings
+def add_charset(charset, header_enc=None, body_enc=None, output_charset=None):
+    """Add charset properties to the global map.
+
+    charset is the input character set, and must be the canonical name of a
+    character set.
+
+    Optional header_enc and body_enc is either Charset.QP for
+    quoted-printable, Charset.BASE64 for base64 encoding, or None for no
+    encoding.  It describes how message headers and message bodies in the
+    input charset are to be encoded.  Default is no encoding.
+
+    Optional output_charset is the character set that the output should be
+    in.  Conversions will proceed from input charset, to Unicode, to the
+    output charset when the method Charset.convert() is called.  The default
+    is to output in the same character set as the input.
+
+    Both input_charset and output_charset must have Unicode codec entries in
+    the module's charset-to-codec mapping; use add_codec(charset, codecname)
+    to add codecs the module does not know about.  See the codec module's
+    documentation for more information.
+    """
+    CHARSETS[charset] = (header_enc, body_enc, output_charset)
+
+
+def add_alias(alias, canonical):
+    """Add a character set alias.
+
+    alias is the alias name, e.g. latin-1
+    canonical is the character set's canonical name, e.g. iso-8859-1
+    """
+    ALIASES[alias] = canonical
+
+
+def add_codec(charset, codecname):
+    """Add a codec that map characters in the given charset to/from Unicode.
+
+    charset is the canonical name of a character set.  codecname is the name
+    of a Python codec, as appropriate for the second argument to the unicode()
+    built-in, or to the .encode() method of a Unicode string.
+    """
+    CODEC_MAP[charset] = codecname
+
+
+
+class Charset:
+    """Map character sets to their email properties.
+
+    This class provides information about the requirements imposed on email
+    for a specific character set.  It also provides convenience routines for
+    converting between character sets, given the availability of the
+    applicable codecs.  Given an character set, it will do its best to provide
+    information on how to use that character set in an email.
+    
+    Certain character sets must be encoded with quoted-printable or base64
+    when used in email headers or bodies.  Certain character sets must be
+    converted outright, and are not allowed in email.  Instances of this
+    module expose the following information about a character set:
+
+    input_charset: The initial character set specified.  Common aliases
+                   are converted to their `official' email names (e.g. latin_1
+                   is converted to iso-8859-1).  Defaults to 7-bit us-ascii.
+
+    header_encoding: If the character set must be encoded before it can be
+                     used in an email header, this attribute will be set to
+                     Charset.QP (for quoted-printable) or Charset.BASE64 (for
+                     base64 encoding).  Otherwise, it will be None.
+
+    body_encoding: Same as header_encoding, but describes the encoding for the
+                   mail message's body, which indeed may be different than the
+                   header encoding.
+
+    output_charset: Some character sets must be converted before the can be
+                    used in email headers or bodies.  If the input_charset is
+                    one of them, this attribute will contain the name of the
+                    charset output will be converted to.  Otherwise, it will
+                    be None.
+
+    input_codec: The name of the Python codec used to convert the
+                 input_charset to Unicode.  If no conversion codec is
+                 necessary, this attribute will be None.
+
+    output_codec: The name of the Python codec used to convert Unicode
+                  to the output_charset.  If no conversion codec is necessary,
+                  this attribute will have the same value as the input_codec.
+    """
+    def __init__(self, input_charset=DEFAULT_CHARSET):
+        # Set the input charset after filtering through the aliases
+        self.input_charset = ALIASES.get(input_charset, input_charset)
+        # We can try to guess which encoding and conversion to use by the
+        # charset_map dictionary.  Try that first, but let the user override
+        # it.
+        henc, benc, conv = CHARSETS.get(self.input_charset,
+                                        (BASE64, BASE64, None))
+        # Set the attributes, allowing the arguments to override the default.
+        self.header_encoding = henc
+        self.body_encoding = benc
+        self.output_charset = ALIASES.get(conv, conv)
+        # Now set the codecs.  If one isn't defined for input_charset,
+        # guess and try a Unicode codec with the same name as input_codec.
+        self.input_codec = CODEC_MAP.get(self.input_charset,
+                                         self.input_charset)
+        self.output_codec = CODEC_MAP.get(self.output_charset,
+                                            self.input_codec)
+
+    def __str__(self):
+        return self.input_charset.lower()
+
+    def __eq__(self, other):
+        return str(self) == str(other).lower()
+
+    def __ne__(self, other):
+        return not self.__eq__(other)
+
+    def get_body_encoding(self):
+        """Return the content-transfer-encoding used for body encoding.
+
+        This is either the string `quoted-printable' or `base64' depending on
+        the encoding used, or it is a function in which case you should call
+        the function with a single argument, the Message object being
+        encoded.  The function should then set the Content-Transfer-Encoding:
+        header itself to whatever is appropriate.
+
+        Returns "quoted-printable" if self.body_encoding is QP.
+        Returns "base64" if self.body_encoding is BASE64.
+        Returns "7bit" otherwise.
+        """
+        if self.body_encoding == QP:
+            return 'quoted-printable'
+        elif self.body_encoding == BASE64:
+            return 'base64'
+        else:
+            return encode_7or8bit
+
+    def convert(self, s):
+        """Convert a string from the input_codec to the output_codec."""
+        if self.input_codec <> self.output_codec:
+            return unicode(s, self.input_codec).encode(self.output_codec)
+        else:
+            return s
+
+    def to_splittable(self, s):
+        """Convert a possibly multibyte string to a safely splittable format.
+
+        Uses the input_codec to try and convert the string to Unicode, so it
+        can be safely split on character boundaries (even for double-byte
+        characters).
+
+        Returns the string untouched if we don't know how to convert it to
+        Unicode with the input_charset.
+
+        Characters that could not be converted to Unicode will be replaced
+        with the Unicode replacement character U+FFFD.
+        """
+        if isinstance(s, UnicodeType) or self.input_codec is None:
+            return s
+        try:
+            return unicode(s, self.input_codec, 'replace')
+        except LookupError:
+            # Input codec not installed on system, so return the original
+            # string unchanged.
+            return s
+
+    def from_splittable(self, ustr, to_output=1):
+        """Convert a splittable string back into an encoded string.
+
+        Uses the proper codec to try and convert the string from
+        Unicode back into an encoded format.  Return the string as-is
+        if it is not Unicode, or if it could not be encoded from
+        Unicode.
+
+        Characters that could not be converted from Unicode will be replaced
+        with an appropriate character (usually '?').
+
+        If to_output is true, uses output_codec to convert to an encoded
+        format.  If to_output is false, uses input_codec.  to_output defaults
+        to 1.
+        """
+        if to_output:
+            codec = self.output_codec
+        else:
+            codec = self.input_codec
+        if not isinstance(ustr, UnicodeType) or codec is None:
+            return ustr
+        try:
+            return ustr.encode(codec, 'replace')
+        except LookupError:
+            # Output codec not installed
+            return ustr
+
+    def get_output_charset(self):
+        """Return the output character set.
+
+        This is self.output_charset if that is set, otherwise it is
+        self.input_charset.
+        """
+        return self.output_charset or self.input_charset
+
+    def encoded_header_len(self, s):
+        """Return the length of the encoded header string."""
+        cset = self.get_output_charset()
+        # The len(s) of a 7bit encoding is len(s)
+        if self.header_encoding is BASE64:
+            return email.base64MIME.base64_len(s) + len(cset) + MISC_LEN
+        elif self.header_encoding is QP:
+            return email.quopriMIME.header_quopri_len(s) + len(cset) + MISC_LEN
+        else:
+            return len(s)
+
+    def header_encode(self, s, convert=0):
+        """Header-encode a string, optionally converting it to output_charset.
+
+        If convert is true, the string will be converted from the input
+        charset to the output charset automatically.  This is not useful for
+        multibyte character sets, which have line length issues (multibyte
+        characters must be split on a character, not a byte boundary); use the
+        high-level Header class to deal with these issues.  convert defaults
+        to 0.
+
+        The type of encoding (base64 or quoted-printable) will be based on
+        self.header_encoding.
+        """
+        cset = self.get_output_charset()
+        if convert:
+            s = self.convert(s)
+        # 7bit/8bit encodings return the string unchanged (modulo conversions)
+        if self.header_encoding is BASE64:
+            return email.base64MIME.header_encode(s, cset)
+        elif self.header_encoding is QP:
+            return email.quopriMIME.header_encode(s, cset)
+        else:
+            return s
+
+    def body_encode(self, s, convert=1):
+        """Body-encode a string and convert it to output_charset.
+
+        If convert is true (the default), the string will be converted from
+        the input charset to output charset automatically.  Unlike
+        header_encode(), there are no issues with byte boundaries and
+        multibyte charsets in email bodies, so this is usually pretty safe.
+
+        The type of encoding (base64 or quoted-printable) will be based on
+        self.body_encoding.
+        """
+        if convert:
+            s = self.convert(s)
+        # 7bit/8bit encodings return the string unchanged (module conversions)
+        if self.body_encoding is BASE64:
+            return email.base64MIME.body_encode(s)
+        elif self.header_encoding is QP:
+            return email.quopriMIME.body_encode(s)
+        else:
+            return s

diff --git a/Lib/email/Encoders.py b/Lib/email/Encoders.py
index d9cd42d..f09affa 100644
--- a/Lib/email/Encoders.py
+++ b/Lib/email/Encoders.py

@@ -1,4 +1,4 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """Module containing encoding functions for Image.Image and Text.Text.
@@ -11,7 +11,9 @@
 
 # Helpers
 def _qencode(s):
-    return _encodestring(s, quotetabs=1)
+    enc = _encodestring(s, quotetabs=1)
+    # Must encode spaces, which quopri.encodestring() doesn't do
+    return enc.replace(' ', '=20')
 
 
 def _bencode(s):
@@ -54,6 +56,10 @@
 def encode_7or8bit(msg):
     """Set the Content-Transfer-Encoding: header to 7bit or 8bit."""
     orig = msg.get_payload()
+    if orig is None:
+        # There's no payload.  For backwards compatibility we use 7bit
+        msg['Content-Transfer-Encoding'] = '7bit'
+        return
     # We play a trick to make this go fast.  If encoding to ASCII succeeds, we
     # know the data must be 7bit, otherwise treat it as 8bit.
     try:

diff --git a/Lib/email/Errors.py b/Lib/email/Errors.py
index 71d7663..e3a3666 100644
--- a/Lib/email/Errors.py
+++ b/Lib/email/Errors.py

@@ -1,4 +1,4 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """email package exception classes.

diff --git a/Lib/email/Generator.py b/Lib/email/Generator.py
index 981e0ff..dbbcabc 100644
--- a/Lib/email/Generator.py
+++ b/Lib/email/Generator.py

@@ -1,4 +1,4 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """Classes to generate plain text from a message object tree.
@@ -166,30 +166,33 @@
             return text
         rtn = []
         for line in text.split('\n'):
+            splitline = []
             # Short lines can remain unchanged
             if len(line.replace('\t', SPACE8)) <= maxheaderlen:
-                rtn.append(line)
-                SEMINLTAB.join(rtn)
+                splitline.append(line)
+                rtn.append(SEMINLTAB.join(splitline))
             else:
-                oldlen = len(text)
+                oldlen = len(line)
                 # Try to break the line on semicolons, but if that doesn't
                 # work, try to split on folding whitespace.
-                while len(text) > maxheaderlen:
-                    i = text.rfind(';', 0, maxheaderlen)
+                while len(line) > maxheaderlen:
+                    i = line.rfind(';', 0, maxheaderlen)
                     if i < 0:
                         break
-                    rtn.append(text[:i])
-                    text = text[i+1:].lstrip()
-                if len(text) <> oldlen:
+                    splitline.append(line[:i])
+                    line = line[i+1:].lstrip()
+                if len(line) <> oldlen:
                     # Splitting on semis worked
-                    rtn.append(text)
-                    return SEMINLTAB.join(rtn)
+                    splitline.append(line)
+                    rtn.append(SEMINLTAB.join(splitline))
+                    continue
                 # Splitting on semis didn't help, so try to split on
                 # whitespace.
-                parts = re.split(r'(\s+)', text)
+                parts = re.split(r'(\s+)', line)
                 # Watch out though for "Header: longnonsplittableline"
                 if parts[0].endswith(':') and len(parts) == 3:
-                    return text
+                    rtn.append(line)
+                    continue
                 first = parts.pop(0)
                 sublines = [first]
                 acc = len(first)
@@ -203,13 +206,14 @@
                     else:
                         # Split it here, but don't forget to ignore the
                         # next whitespace-only part
-                        rtn.append(EMPTYSTRING.join(sublines))
+                        splitline.append(EMPTYSTRING.join(sublines))
                         del parts[0]
                         first = parts.pop(0)
                         sublines = [first]
                         acc = len(first)
-                rtn.append(EMPTYSTRING.join(sublines))
-                return NLTAB.join(rtn)
+                splitline.append(EMPTYSTRING.join(sublines))
+                rtn.append(NLTAB.join(splitline))
+        return NL.join(rtn)
 
     #
     # Handlers for writing types and subtypes
@@ -219,6 +223,9 @@
         payload = msg.get_payload()
         if payload is None:
             return
+        cset = msg.get_charset()
+        if cset is not None:
+            payload = cset.body_encode(payload)
         if not isinstance(payload, StringType):
             raise TypeError, 'string payload expected: %s' % type(payload)
         if self._mangle_from_:
@@ -233,7 +240,18 @@
         # together, and then make sure that the boundary we've chosen isn't
         # present in the payload.
         msgtexts = []
-        for part in msg.get_payload():
+        subparts = msg.get_payload()
+        if subparts is None:
+            # Nothing has every been attached
+            boundary = msg.get_boundary(failobj=_make_boundary())
+            print >> self._fp, '--' + boundary
+            print >> self._fp, '\n'
+            print >> self._fp, '--' + boundary + '--'
+            return
+        elif not isinstance(subparts, ListType):
+            # Scalar payload
+            subparts = [subparts]
+        for part in subparts:
             s = StringIO()
             g = self.__class__(s, self._mangle_from_, self.__maxheaderlen)
             g(part, unixfrom=0)
@@ -365,7 +383,7 @@
 
 
 # Helper
-def _make_boundary(self, text=None):
+def _make_boundary(text=None):
     # Craft a random boundary.  If text is given, ensure that the chosen
     # boundary doesn't appear in the text.
     boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='

diff --git a/Lib/email/Header.py b/Lib/email/Header.py
new file mode 100644
index 0000000..097b978
--- /dev/null
+++ b/Lib/email/Header.py

@@ -0,0 +1,210 @@
+# Copyright (C) 2002 Python Software Foundation
+# Author: che@debian.org (Ben Gertzfield)
+
+"""Header encoding and decoding functionality."""
+
+import re
+import email.quopriMIME
+import email.base64MIME
+from email.Charset import Charset
+
+CRLFSPACE = '\r\n '
+CRLF = '\r\n'
+NLSPACE = '\n '
+
+MAXLINELEN = 76
+
+ENCODE = 1
+DECODE = 2
+
+# Match encoded-word strings in the form =?charset?q?Hello_World?=
+ecre = re.compile(r'''
+  =\?                   # literal =?
+  (?P<charset>[^?]*?)   # non-greedy up to the next ? is the charset
+  \?                    # literal ?
+  (?P<encoding>[qb])    # either a "q" or a "b", case insensitive
+  \?                    # literal ?
+  (?P<encoded>.*?)      # non-greedy up to the next ?= is the encoded string
+  \?=                   # literal ?=
+  ''', re.VERBOSE | re.IGNORECASE)
+
+
+
+# Helpers
+_max_append = email.quopriMIME._max_append
+
+
+
+def decode_header(header):
+    """Decode a message header value without converting charset.
+
+    Returns a list of (decoded_string, charset) pairs containing each of the
+    decoded parts of the header.  Charset is None for non-encoded parts of the
+    header, otherwise a lower-case string containing the name of the character
+    set specified in the encoded string.
+    """
+    # If no encoding, just return the header
+    header = str(header)
+    if not ecre.search(header):
+        return [(header, None)]
+
+    decoded = []
+    dec = ''
+    for line in header.splitlines():
+        # This line might not have an encoding in it
+        if not ecre.search(line):
+            decoded.append((line, None))
+            continue
+        
+        parts = ecre.split(line)
+        while parts:
+            unenc = parts.pop(0).strip()
+            if unenc:
+                # Should we continue a long line?
+                if decoded and decoded[-1][1] is None:
+                    decoded[-1] = (decoded[-1][0] + dec, None)
+                else:
+                    decoded.append((unenc, None))
+            if parts:
+                charset, encoding = [s.lower() for s in parts[0:2]]
+                encoded = parts[2]
+                dec = ''
+                if encoding == 'q':
+                    dec = email.quopriMIME.header_decode(encoded)
+                elif encoding == 'b':
+                    dec = email.base64MIME.decode(encoded)
+                else:
+                    dec = encoded
+
+                if decoded and decoded[-1][1] == charset:
+                    decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
+                else:
+                    decoded.append((dec, charset))
+            del parts[0:3]
+    return decoded
+
+
+
+class Header:
+    def __init__(self, s, charset=None, maxlinelen=MAXLINELEN,
+                 header_name=None):
+        """Create a MIME-compliant header that can contain many languages.
+
+        Specify the initial header value in s.  Specify its character set as a
+        Charset object in the charset argument.  If none, a default Charset
+        instance will be used.
+
+        You can later append to the header with append(s, charset) below;
+        charset does not have to be the same as the one initially specified
+        here.  In fact, it's optional, and if not given, defaults to the
+        charset specified in the constructor.
+
+        The maximum line length can either be specified by maxlinelen, or you
+        can pass in the name of the header field (e.g. "Subject") to let this
+        class guess the best line length to use to prevent wrapping.  The
+        default maxlinelen is 76.
+        """
+        if charset is None:
+            charset = Charset()
+        self._charset = charset
+        # BAW: I believe `chunks' and `maxlinelen' should be non-public.
+        self._chunks = []
+        self.append(s, charset)
+        self._maxlinelen = maxlinelen
+        if header_name is not None:
+            self.guess_maxlinelen(header_name)
+
+    def __str__(self):
+        """A synonym for self.encode()."""
+        return self.encode()
+
+    def guess_maxlinelen(self, s=None):
+        """Guess the maximum length to make each header line.
+
+        Given a header name (e.g. "Subject"), set this header's maximum line
+        length to an appropriate length to avoid line wrapping.  If s is not
+        given, return the previous maximum line length and don't set it.
+
+        Returns the new maximum line length.
+        """
+        # BAW: is this semantic necessary?
+        if s is not None:
+            self._maxlinelen = MAXLINELEN - len(s) - 2
+        return self._maxlinelen
+
+    def append(self, s, charset=None):
+        """Append string s with Charset charset to the MIME header.
+
+        charset defaults to the one given in the class constructor.
+        """
+        if charset is None:
+            charset = self._charset
+        self._chunks.append((s, charset))
+        
+    def _split(self, s, charset):
+        # Split up a header safely for use with encode_chunks.  BAW: this
+        # appears to be a private convenience method.
+        splittable = charset.to_splittable(s)
+        encoded = charset.from_splittable(splittable)
+        
+        if charset.encoded_header_len(encoded) < self._maxlinelen:
+            return [(encoded, charset)]
+        else:
+            # Divide and conquer.  BAW: halfway depends on integer division.
+            # When porting to Python 2.2, use the // operator.
+            halfway = len(splittable) // 2
+            first = charset.from_splittable(splittable[:halfway], 0)
+            last = charset.from_splittable(splittable[halfway:], 0)
+            return self._split(first, charset) + self._split(last, charset)
+
+    def encode(self):
+        """Encode a message header, possibly converting charset and encoding.
+
+        There are many issues involved in converting a given string for use in
+        an email header.  Only certain character sets are readable in most
+        email clients, and as header strings can only contain a subset of
+        7-bit ASCII, care must be taken to properly convert and encode (with
+        Base64 or quoted-printable) header strings.  In addition, there is a
+        75-character length limit on any given encoded header field, so
+        line-wrapping must be performed, even with double-byte character sets.
+        
+        This method will do its best to convert the string to the correct
+        character set used in email, and encode and line wrap it safely with
+        the appropriate scheme for that character set.
+
+        If the given charset is not known or an error occurs during
+        conversion, this function will return the header untouched.
+        """
+        newchunks = []
+        for s, charset in self._chunks:
+            newchunks += self._split(s, charset)
+        self._chunks = newchunks
+        return self.encode_chunks()
+
+    def encode_chunks(self):
+        """MIME-encode a header with many different charsets and/or encodings.
+
+        Given a list of pairs (string, charset), return a MIME-encoded string
+        suitable for use in a header field.  Each pair may have different
+        charsets and/or encodings, and the resulting header will accurately
+        reflect each setting.
+
+        Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like
+        character sets like iso-8859-1), email.Utils.BASE64 (Base64, for
+        non-ASCII like character sets like KOI8-R and iso-2022-jp), or None
+        (no encoding).
+
+        Each pair will be represented on a separate line; the resulting string
+        will be in the format:
+
+        "=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
+          =?charset2?b?SvxyZ2VuIEL2aW5n?="
+        """
+        chunks = []
+        for header, charset in self._chunks:
+            if charset is None:
+                _max_append(chunks, header, self._maxlinelen, ' ')
+            else:
+                _max_append(chunks, charset.header_encode(header, 0),
+                            self._maxlinelen, ' ')
+        return NLSPACE.join(chunks)

diff --git a/Lib/email/Iterators.py b/Lib/email/Iterators.py
index a64495d..515bac9 100644
--- a/Lib/email/Iterators.py
+++ b/Lib/email/Iterators.py

@@ -1,4 +1,4 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """Various types of useful iterators and generators.

diff --git a/Lib/email/MIMEBase.py b/Lib/email/MIMEBase.py
index 33216f6..28816e8 100644
--- a/Lib/email/MIMEBase.py
+++ b/Lib/email/MIMEBase.py

@@ -1,4 +1,4 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """Base class for MIME specializations.

diff --git a/Lib/email/MIMEImage.py b/Lib/email/MIMEImage.py
index 963da23..f0e7931a 100644
--- a/Lib/email/MIMEImage.py
+++ b/Lib/email/MIMEImage.py

@@ -1,4 +1,4 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """Class representing image/* type MIME documents.

diff --git a/Lib/email/MIMEMessage.py b/Lib/email/MIMEMessage.py
index fc4b2c6..89da925 100644
--- a/Lib/email/MIMEMessage.py
+++ b/Lib/email/MIMEMessage.py

@@ -1,4 +1,4 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """Class representing message/* MIME documents.

diff --git a/Lib/email/MIMEText.py b/Lib/email/MIMEText.py
index ccce9fb..8669d28 100644
--- a/Lib/email/MIMEText.py
+++ b/Lib/email/MIMEText.py

@@ -1,9 +1,10 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """Class representing text/* type MIME documents.
 """
 
+import warnings
 import MIMEBase
 from Encoders import encode_7or8bit
 
@@ -13,7 +14,7 @@
     """Class for generating text/* type MIME documents."""
 
     def __init__(self, _text, _subtype='plain', _charset='us-ascii',
-                 _encoder=encode_7or8bit):
+                 _encoder=None):
         """Create a text/* type MIME document.
 
         _text is the string for this message object.  If the text does not end
@@ -22,20 +23,26 @@
         _subtype is the MIME sub content type, defaulting to "plain".
 
         _charset is the character set parameter added to the Content-Type:
-        header.  This defaults to "us-ascii".
+        header.  This defaults to "us-ascii".  Note that as a side-effect, the
+        Content-Transfer-Encoding: header will also be set.
 
-        _encoder is a function which will perform the actual encoding for
-        transport of the text data.  It takes one argument, which is this
-        Text instance.  It should use get_payload() and set_payload() to
-        change the payload to the encoded form.  It should also add any
-        Content-Transfer-Encoding: or other headers to the message as
-        necessary.  The default encoding doesn't actually modify the payload,
-        but it does set Content-Transfer-Encoding: to either `7bit' or `8bit'
-        as appropriate.
+        The use of the _encoder is deprecated.  The encoding of the payload,
+        and the setting of the character set parameter now happens implicitly
+        based on the _charset argument.  If _encoder is supplied, then a
+        DeprecationWarning is used, and the _encoder functionality may
+        override any header settings indicated by _charset.  This is probably
+        not what you want.
         """
         MIMEBase.MIMEBase.__init__(self, 'text', _subtype,
                                    **{'charset': _charset})
         if _text and _text[-1] <> '\n':
             _text += '\n'
-        self.set_payload(_text)
-        _encoder(self)
+        self.set_payload(_text, _charset)
+        if _encoder is not None:
+            warnings.warn('_encoder argument is obsolete.',
+                          DeprecationWarning, 2)
+            # Because set_payload() with a _charset will set its own
+            # Content-Transfer-Encoding: header, we need to delete the
+            # existing one or will end up with two of them. :(
+            del self['content-transfer-encoding']
+            _encoder(self)

diff --git a/Lib/email/Message.py b/Lib/email/Message.py
index 91931a1..71d10c4 100644
--- a/Lib/email/Message.py
+++ b/Lib/email/Message.py

@@ -1,23 +1,47 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """Basic message object for the email package object model.
 """
 
-from __future__ import generators
-
 import re
-import base64
-import quopri
+import warnings
 from cStringIO import StringIO
-from types import ListType
+from types import ListType, StringType
 
 # Intrapackage imports
 import Errors
 import Utils
+import Charset
 
 SEMISPACE = '; '
+
+# Regular expression used to split header parameters.  BAW: this may be too
+# simple.  It isn't strictly RFC 2045 (section 5.1) compliant, but it catches
+# most headers found in the wild.  We may eventually need a full fledged
+# parser eventually.
 paramre = re.compile(r'\s*;\s*')
+# Regular expression that matches `special' characters in parameters, the
+# existance of which force quoting of the parameter value.
+tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
+
+
+
+# Helper function
+def _formatparam(param, value=None, quote=1):
+    """Convenience function to format and return a key=value pair.
+
+    Will quote the value if needed or if quote is true.
+    """
+    if value is not None and len(value) > 0:
+        # BAW: Please check this.  I think that if quote is set it should
+        # force quoting even if not necessary.
+        if quote or tspecials.search(value):
+            return '%s="%s"' % (param, Utils.quote(value))
+        else:
+            return '%s=%s' % (param, value)
+    else:
+        return param
 
 
 
@@ -39,6 +63,7 @@
         self._headers = []
         self._unixfrom = None
         self._payload = None
+        self._charset = None
         # Defaults for multipart messages
         self.preamble = self.epilogue = None
 
@@ -83,6 +108,8 @@
         If the current payload is empty, then the current payload will be made
         a scalar, set to the given value.
         """
+        warnings.warn('add_payload() is deprecated, use attach() instead.',
+                      DeprecationWarning, 2)
         if self._payload is None:
             self._payload = payload
         elif type(self._payload) is ListType:
@@ -93,8 +120,18 @@
         else:
             self._payload = [self._payload, payload]
 
-    # A useful synonym
-    attach = add_payload
+    def attach(self, payload):
+        """Add the given payload to the current payload.
+
+        The current payload will always be a list of objects after this method
+        is called.  If you want to set the payload to a scalar object
+        (e.g. because you're attaching a message/rfc822 subpart), use
+        set_payload() instead.
+        """
+        if self._payload is None:
+            self._payload = [payload]
+        else:
+            self._payload.append(payload)
 
     def get_payload(self, i=None, decode=0):
         """Return the current payload exactly as is.
@@ -128,10 +165,58 @@
         return payload
 
 
-    def set_payload(self, payload):
-        """Set the payload to the given value."""
-        self._payload = payload
+    def set_payload(self, payload, charset=None):
+        """Set the payload to the given value.
 
+        Optionally set the charset, which must be a Charset instance."""
+        self._payload = payload
+        if charset is not None:
+            self.set_charset(charset)
+
+    def set_charset(self, charset):
+        """Set the charset of the payload to a given character set.
+
+        charset can be a string or a Charset object.  If it is a string, it
+        will be converted to a Charset object by calling Charset's
+        constructor.  If charset is None, the charset parameter will be
+        removed from the Content-Type: field.  Anything else will generate a
+        TypeError.
+
+        The message will be assumed to be a text message encoded with
+        charset.input_charset.  It will be converted to charset.output_charset
+        and encoded properly, if needed, when generating the plain text
+        representation of the message.  MIME headers (MIME-Version,
+        Content-Type, Content-Transfer-Encoding) will be added as needed.
+        """
+        if charset is None:
+            self.del_param('charset')
+            self._charset = None
+            return
+        if isinstance(charset, StringType):
+            charset = Charset.Charset(charset)
+        if not isinstance(charset, Charset.Charset):
+            raise TypeError, charset
+        # BAW: should we accept strings that can serve as arguments to the
+        # Charset constructor?
+        self._charset = charset
+        if not self.has_key('MIME-Version'):
+            self.add_header('MIME-Version', '1.0')
+        if not self.has_key('Content-Type'):
+            self.add_header('Content-Type', 'text/plain',
+                            charset=charset.get_output_charset())
+        else:
+            self.set_param('charset', charset.get_output_charset())
+        if not self.has_key('Content-Transfer-Encoding'):
+            cte = charset.get_body_encoding()
+            if callable(cte):
+                cte(self)
+            else:
+                self.add_header('Content-Transfer-Encoding', cte)
+
+    def get_charset(self):
+        """Return the Charset object associated with the message's payload."""
+        return self._charset
+        
     #
     # MAPPING INTERFACE (partial)
     #
@@ -257,7 +342,7 @@
             if v is None:
                 parts.append(k.replace('_', '-'))
             else:
-                parts.append('%s="%s"' % (k.replace('_', '-'), v))
+                parts.append(_formatparam(k.replace('_', '-'), v))
         if _value is not None:
             parts.insert(0, _value)
         self._headers.append((_name, SEMISPACE.join(parts)))
@@ -308,6 +393,8 @@
         for p in paramre.split(value):
             try:
                 name, val = p.split('=', 1)
+                name = name.rstrip()
+                val = val.lstrip()
             except ValueError:
                 # Must have been a bare attribute
                 name = p
@@ -315,26 +402,29 @@
             params.append((name, val))
         return params
 
-    def get_params(self, failobj=None, header='content-type'):
+    def get_params(self, failobj=None, header='content-type', unquote=1):
         """Return the message's Content-Type: parameters, as a list.
 
         The elements of the returned list are 2-tuples of key/value pairs, as
         split on the `=' sign.  The left hand side of the `=' is the key,
         while the right hand side is the value.  If there is no `=' sign in
         the parameter the value is the empty string.  The value is always
-        unquoted.
+        unquoted, unless unquote is set to a false value.
 
         Optional failobj is the object to return if there is no Content-Type:
         header.  Optional header is the header to search instead of
-        Content-Type:
+        Content-Type:.
         """
         missing = []
         params = self._get_params_preserve(missing, header)
         if params is missing:
             return failobj
-        return [(k, Utils.unquote(v)) for k, v in params]
+        if unquote:
+            return [(k, Utils.unquote(v)) for k, v in params]
+        else:
+            return params
 
-    def get_param(self, param, failobj=None, header='content-type'):
+    def get_param(self, param, failobj=None, header='content-type', unquote=1):
         """Return the parameter value if found in the Content-Type: header.
 
         Optional failobj is the object to return if there is no Content-Type:
@@ -342,15 +432,112 @@
         Content-Type:
 
         Parameter keys are always compared case insensitively.  Values are
-        always unquoted.
+        always unquoted, unless unquote is set to a false value.
         """
         if not self.has_key(header):
             return failobj
         for k, v in self._get_params_preserve(failobj, header):
             if k.lower() == param.lower():
-                return Utils.unquote(v)
+                if unquote:
+                    return Utils.unquote(v)
+                else:
+                    return v
         return failobj
 
+    def set_param(self, param, value, header='Content-Type', requote=1):
+        """Set a parameter in the Content-Type: header.
+
+        If the parameter already exists in the header, its value will be
+        replaced with the new value.
+
+        If header is Content-Type: and has not yet been defined in this
+        message, it will be set to "text/plain" and the new parameter and
+        value will be appended, as per RFC 2045.
+
+        An alternate header can specified in the header argument, and
+        all parameters will be quoted as appropriate unless requote is
+        set to a false value.
+        """
+        if not self.has_key(header) and header.lower() == 'content-type':
+            ctype = 'text/plain'
+        else:
+            ctype = self.get(header)
+        if not self.get_param(param, header=header):
+            if not ctype:
+                ctype = _formatparam(param, value, requote)
+            else:
+                ctype = SEMISPACE.join(
+                    [ctype, _formatparam(param, value, requote)])
+        else:
+            ctype = ''
+            for old_param, old_value in self.get_params(header=header,
+                                                        unquote=requote):
+                append_param = ''
+                if old_param.lower() == param.lower():
+                    append_param = _formatparam(param, value, requote)
+                else:
+                    append_param = _formatparam(old_param, old_value, requote)
+                if not ctype:
+                    ctype = append_param
+                else:
+                    ctype = SEMISPACE.join([ctype, append_param])
+        if ctype <> self.get(header):
+            del self[header]
+            self[header] = ctype
+
+    def del_param(self, param, header='content-type', requote=1):
+        """Remove the given parameter completely from the Content-Type header.
+
+        The header will be re-written in place without param or its value.
+        All values will be quoted as appropriate unless requote is set to a
+        false value.
+        """
+        if not self.has_key(header):
+            return
+        new_ctype = ''
+        for p, v in self.get_params(header, unquote=requote):
+            if p.lower() <> param.lower():
+                if not new_ctype:
+                    new_ctype = _formatparam(p, v, requote)
+                else:
+                    new_ctype = SEMISPACE.join([new_ctype,
+                                                _formatparam(p, v, requote)])
+        if new_ctype <> self.get(header):
+            del self[header]
+            self[header] = new_ctype
+
+    def set_type(self, type, header='Content-Type', requote=1):
+        """Set the main type and subtype for the Content-Type: header.
+
+        type must be a string in the form "maintype/subtype", otherwise a
+        ValueError is raised.
+
+        This method replaces the Content-Type: header, keeping all the
+        parameters in place.  If requote is false, this leaves the existing
+        header's quoting as is.  Otherwise, the parameters will be quoted (the
+        default).
+
+        An alternate header can be specified in the header argument.  When the
+        Content-Type: header is set, we'll always also add a MIME-Version:
+        header.
+        """
+        # BAW: should we be strict?
+        if not type.count('/') == 1:
+            raise ValueError
+        # Set the Content-Type: you get a MIME-Version:
+        if header.lower() == 'content-type':
+            del self['mime-version']
+            self['MIME-Version'] = '1.0'
+        if not self.has_key(header):
+            self[header] = type
+            return
+        params = self.get_params(header, unquote=requote)
+        del self[header]
+        self[header] = type
+        # Skip the first param; it's the old type.
+        for p, v in params[1:]:
+            self.set_param(p, v, header, requote)
+
     def get_filename(self, failobj=None):
         """Return the filename associated with the payload if present.
 

diff --git a/Lib/email/Parser.py b/Lib/email/Parser.py
index 2f131d6..7177dfc 100644
--- a/Lib/email/Parser.py
+++ b/Lib/email/Parser.py

@@ -51,9 +51,16 @@
         lastvalue = []
         lineno = 0
         while 1:
-            line = fp.readline()[:-1]
-            if not line or not line.strip():
+            # Don't strip the line before we test for the end condition,
+            # because whitespace-only header lines are RFC compliant
+            # continuation lines.
+            line = fp.readline()
+            if not line:
                 break
+            line = line.splitlines()[0]
+            if not line:
+                break
+            # Ignore the trailing newline
             lineno += 1
             # Check for initial Unix From_ line
             if line.startswith('From '):
@@ -63,7 +70,6 @@
                 else:
                     raise Errors.HeaderParseError(
                         'Unix-from in headers after first rfc822 header')
-            #
             # Header continuation line
             if line[0] in ' \t':
                 if not lastheader:
@@ -134,11 +140,11 @@
                 msgobj = self.parsestr(part)
                 container.preamble = preamble
                 container.epilogue = epilogue
-                # Ensure that the container's payload is a list
-                if not isinstance(container.get_payload(), ListType):
-                    container.set_payload([msgobj])
-                else:
-                    container.add_payload(msgobj)
+                container.attach(msgobj)
+        elif container.get_main_type() == 'multipart':
+            # Very bad.  A message is a multipart with no boundary!
+            raise Errors.BoundaryError(
+                'multipart message with no defined boundary')
         elif container.get_type() == 'message/delivery-status':
             # This special kind of type contains blocks of headers separated
             # by a blank line.  We'll represent each header block as a
@@ -160,9 +166,9 @@
             except Errors.HeaderParseError:
                 msg = self._class()
                 self._parsebody(msg, fp)
-            container.add_payload(msg)
+            container.set_payload(msg)
         else:
-            container.add_payload(fp.read())
+            container.set_payload(fp.read())
 
 
 

diff --git a/Lib/email/Utils.py b/Lib/email/Utils.py
index 3d48287..887be55 100644
--- a/Lib/email/Utils.py
+++ b/Lib/email/Utils.py

@@ -1,16 +1,26 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """Miscellaneous utilities.
 """
 
 import time
+import socket
 import re
+import random
+import os
+import warnings
+from cStringIO import StringIO
+from types import ListType
 
-from rfc822 import unquote, quote, parseaddr
-from rfc822 import dump_address_pair
+from rfc822 import unquote, quote
 from rfc822 import AddrlistClass as _AddrlistClass
-from rfc822 import parsedate_tz, parsedate, mktime_tz
+from rfc822 import mktime_tz
+
+# We need wormarounds for bugs in these methods in older Pythons (see below)
+from rfc822 import parsedate as _parsedate
+from rfc822 import parsedate_tz as _parsedate_tz
+from rfc822 import parseaddr as _parseaddr
 
 from quopri import decodestring as _qdecode
 import base64
@@ -20,6 +30,10 @@
 
 COMMASPACE = ', '
 UEMPTYSTRING = u''
+CRLF = '\r\n'
+
+specialsre = re.compile(r'[][\()<>@,:;".]')
+escapesre = re.compile(r'[][\()"]')
 
 
 
@@ -44,6 +58,41 @@
 
 
 
+def fix_eols(s):
+    """Replace all line-ending characters with \r\n."""
+    # Fix newlines with no preceding carriage return
+    s = re.sub(r'(?<!\r)\n', CRLF, s)
+    # Fix carriage returns with no following newline
+    s = re.sub(r'\r(?!\n)', CRLF, s)
+    return s
+
+
+
+def formataddr(pair):
+    """The inverse of parseaddr(), this takes a 2-tuple of the form
+    (realname, email_address) and returns the string value suitable
+    for an RFC 2822 From:, To: or Cc:.
+    
+    If the first element of pair is false, then the second element is
+    returned unmodified.
+    """
+    name, address = pair
+    if name:
+        quotes = ''
+        if specialsre.search(name):
+            quotes = '"'
+        name = escapesre.sub(r'\\\g<0>', name)
+        return '%s%s%s <%s>' % (quotes, name, quotes, address)
+    return address
+
+# For backwards compatibility
+def dump_address_pair(pair):
+    warnings.warn('Use email.Utils.formataddr() instead',
+                  DeprecationWarning, 2)
+    return formataddr(pair)
+
+
+
 def getaddresses(fieldvalues):
     """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
     all = COMMASPACE.join(fieldvalues)
@@ -64,30 +113,26 @@
 
 
 def decode(s):
-    """Return a decoded string according to RFC 2047, as a unicode string."""
+    """Return a decoded string according to RFC 2047, as a unicode string.
+
+    NOTE: This function is deprecated.  Use Header.decode_header() instead.
+    """
+    warnings.warn('Use Header.decode_header() instead.', DeprecationWarning, 2)
+    # Intra-package import here to avoid circular import problems.
+    from Header import decode_header
+    L = decode_header(s)
+    if not isinstance(L, ListType):
+        # s wasn't decoded
+        return s
+
     rtn = []
-    parts = ecre.split(s, 1)
-    while parts:
-        # If there are less than 4 parts, it can't be encoded and we're done
-        if len(parts) < 5:
-            rtn.extend(parts)
-            break
-        # The first element is any non-encoded leading text
-        rtn.append(parts[0])
-        charset = parts[1]
-        encoding = parts[2].lower()
-        atom = parts[3]
-        # The next chunk to decode should be in parts[4]
-        parts = ecre.split(parts[4])
-        # The encoding must be either `q' or `b', case-insensitive
-        if encoding == 'q':
-            func = _qdecode
-        elif encoding == 'b':
-            func = _bdecode
+    for atom, charset in L:
+        if charset is None:
+            rtn.append(atom)
         else:
-            func = _identity
-        # Decode and get the unicode in the charset
-        rtn.append(unicode(func(atom), charset))
+            # Convert the string to Unicode using the given encoding.  Leave
+            # Unicode conversion errors to strict.
+            rtn.append(unicode(atom, charset))
     # Now that we've decoded everything, we just need to join all the parts
     # together into the final string.
     return UEMPTYSTRING.join(rtn)
@@ -96,6 +141,7 @@
 
 def encode(s, charset='iso-8859-1', encoding='q'):
     """Encode a string according to RFC 2047."""
+    warnings.warn('Use Header.Header.encode() instead.', DeprecationWarning, 2)
     encoding = encoding.lower()
     if encoding == 'q':
         estr = _qencode(s)
@@ -150,3 +196,48 @@
          'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
         now[0], now[3], now[4], now[5],
         zone)
+
+
+
+def make_msgid(idstring=None):
+    """Returns a string suitable for RFC 2822 compliant Message-ID:, e.g:
+
+    <20020201195627.33539.96671@nightshade.la.mastaler.com>
+
+    Optional idstring if given is a string used to strengthen the
+    uniqueness of the Message-ID, otherwise an empty string is used.
+    """
+    timeval = time.time()
+    utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
+    pid = os.getpid()
+    randint = random.randrange(100000)
+    if idstring is None:
+        idstring = ''
+    else:
+        idstring = '.' + idstring
+    idhost = socket.getfqdn()
+    msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
+    return msgid
+
+
+
+# These functions are in the standalone mimelib version only because they've
+# subsequently been fixed in the latest Python versions.  We use this to worm
+# around broken older Pythons.
+def parsedate(data):
+    if not data:
+        return None
+    return _parsedate(data)
+
+
+def parsedate_tz(data):
+    if not data:
+        return None
+    return _parsedate_tz(data)
+
+
+def parseaddr(addr):
+    realname, emailaddr = _parseaddr(addr)
+    if realname == '' and emailaddr is None:
+        return '', ''
+    return realname, emailaddr

diff --git a/Lib/email/__init__.py b/Lib/email/__init__.py
index c13495b..f4a5b76 100644
--- a/Lib/email/__init__.py
+++ b/Lib/email/__init__.py

@@ -1,14 +1,16 @@
-# Copyright (C) 2001 Python Software Foundation
+# Copyright (C) 2001,2002 Python Software Foundation
 # Author: barry@zope.com (Barry Warsaw)
 
 """A package for parsing, handling, and generating email messages.
 """
 
-__version__ = '1.0'
+__version__ = '2.0'
 
-__all__ = ['Encoders',
+__all__ = ['Charset',
+           'Encoders',
            'Errors',
            'Generator',
+           'Header',
            'Iterators',
            'MIMEAudio',
            'MIMEBase',
@@ -18,6 +20,8 @@
            'Message',
            'Parser',
            'Utils',
+           'base64MIME',
+           'quopriMIME',
            'message_from_string',
            'message_from_file',
            ]

diff --git a/Lib/email/base64MIME.py b/Lib/email/base64MIME.py
new file mode 100644
index 0000000..08420b2
--- /dev/null
+++ b/Lib/email/base64MIME.py

@@ -0,0 +1,174 @@
+# Copyright (C) 2002 Python Software Foundation
+# Author: che@debian.org (Ben Gertzfield)
+
+"""Base64 content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
+characters encoding known as Base64.
+
+It is used in the MIME standards for email to attach images, audio, and text
+using some 8-bit character sets to messages.
+
+This module provides an interface to encode and decode both headers and bodies
+with Base64 encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header.  This method is commonly used for 8-bit real names
+in To:, From:, Cc:, etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character conversion
+necessary for proper internationalized headers; it only does dumb encoding and
+decoding.  To deal with the various line wrapping issues, use the email.Header
+module.
+"""
+
+import re
+from binascii import b2a_base64, a2b_base64
+from email.Utils import fix_eols
+
+CRLF = '\r\n'
+NL = '\n'
+EMPTYSTRING = ''
+
+# See also Charset.py
+MISC_LEN = 7
+
+
+
+# Helpers
+def base64_len(s):
+    """Return the length of s when it is encoded with base64."""
+    groups_of_3, leftover = divmod(len(s), 3) 
+    # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. 
+    # Thanks, Tim!
+    n = groups_of_3 * 4 
+    if leftover: 
+        n += 4 
+    return n 
+
+
+
+def header_encode(header, charset='iso-8859-1', keep_eols=0, maxlinelen=76,
+                  eol=NL):
+    """Encode a single header line with Base64 encoding in a given charset.
+    
+    Defined in RFC 2045, this Base64 encoding is identical to normal Base64
+    encoding, except that each line must be intelligently wrapped (respecting
+    the Base64 encoding), and subsequent lines must start with a space.
+
+    charset names the character set to use to encode the header.  It defaults
+    to iso-8859-1.
+
+    End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
+    to the canonical email line separator \\r\\n unless the keep_eols
+    parameter is set to true (the default is false).
+
+    Each line of the header will be terminated in the value of eol, which
+    defaults to "\\n".  Set this to "\\r\\n" if you are using the result of
+    this function directly in email.
+
+    The resulting string will be in the form:
+
+    "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
+      =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
+      
+    with each line wrapped at, at most, maxlinelen characters (defaults to 76
+    characters).
+    """
+    # Return empty headers unchanged
+    if not header:
+        return header
+
+    if not keep_eols:
+        header = fix_eols(header)
+    
+    # Base64 encode each line, in encoded chunks no greater than maxlinelen in
+    # length, after the RFC chrome is added in.
+    base64ed = []
+    max_encoded = maxlinelen - len(charset) - MISC_LEN
+    max_unencoded = max_encoded * 3 / 4
+
+    # BAW: Ben's original code used a step of max_unencoded, but I think it
+    # ought to be max_encoded.  Otherwise, where's max_encoded used?  I'm
+    # still not sure what the 
+    for i in range(0, len(header), max_unencoded):
+        base64ed.append(b2a_base64(header[i:i+max_unencoded]))
+
+    # Now add the RFC chrome to each encoded chunk
+    lines = []
+    for line in base64ed:
+        # Ignore the last character of each line if it is a newline
+        if line[-1] == NL:
+            line = line[:-1]
+        # Add the chrome
+        lines.append('=?%s?b?%s?=' % (charset, line))
+    # Glue the lines together and return it.  BAW: should we be able to
+    # specify the leading whitespace in the joiner?
+    joiner = eol + ' '
+    return joiner.join(lines)
+
+
+
+def encode(s, binary=1, maxlinelen=76, eol=NL):
+    """Encode a string with base64.
+
+    Each line will be wrapped at, at most, maxlinelen characters (defaults to
+    76 characters).
+
+    If binary is false, end-of-line characters will be converted to the
+    canonical email end-of-line sequence \\r\\n.  Otherwise they will be left
+    verbatim (this is the default).
+
+    Each line of encoded text will end with eol, which defaults to "\\n".  Set
+    this to "\r\n" if you will be using the result of this function directly
+    in an email.
+    """
+    if not s:
+        return s
+    
+    if not binary:
+        s = fix_eols(s)
+        
+    encvec = []
+    max_unencoded = maxlinelen * 3 / 4
+    for i in range(0, len(s), max_unencoded):
+        # BAW: should encode() inherit b2a_base64()'s dubious behavior in
+        # adding a newline to the encoded string?
+        enc = b2a_base64(s[i:i + max_unencoded])
+        if enc[-1] == NL and eol <> NL:
+            enc = enc[:-1] + eol
+        encvec.append(enc)
+    return EMPTYSTRING.join(encvec)
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_encode = encode
+encodestring = encode
+
+
+
+def decode(s, convert_eols=None):
+    """Decode a raw base64 string.
+
+    If convert_eols is set to a string value, all canonical email linefeeds,
+    e.g. "\\r\\n", in the decoded text will be converted to the value of
+    convert_eols.  os.linesep is a good choice for convert_eols if you are
+    decoding a text attachment.
+
+    This function does not parse a full MIME header value encoded with
+    base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
+    level email.Header class for that functionality.
+    """
+    if not s:
+        return s
+    
+    dec = a2b_base64(s)
+    if convert_eols:
+        return dec.replace(CRLF, convert_eols)
+    return dec
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode

diff --git a/Lib/email/quopriMIME.py b/Lib/email/quopriMIME.py
new file mode 100644
index 0000000..002034e
--- /dev/null
+++ b/Lib/email/quopriMIME.py

@@ -0,0 +1,312 @@
+# Copyright (C) 2001,2002 Python Software Foundation
+# Author: che@debian.org (Ben Gertzfield)
+
+"""Quoted-printable content transfer encoding per RFCs 2045-2047.
+
+This module handles the content transfer encoding method defined in RFC 2045
+to encode US ASCII-like 8-bit data called `quoted-printable'.  It is used to
+safely encode text that is in a character set similar to the 7-bit US ASCII
+character set, but that includes some 8-bit characters that are normally not
+allowed in email bodies or headers.
+
+Quoted-printable is very space-inefficient for encoding binary files; use the
+email.base64MIME module for that instead.
+
+This module provides an interface to encode and decode both headers and bodies
+with quoted-printable encoding.
+
+RFC 2045 defines a method for including character set information in an
+`encoded-word' in a header.  This method is commonly used for 8-bit real names
+in To:/From:/Cc: etc. fields, as well as Subject: lines.
+
+This module does not do the line wrapping or end-of-line character
+conversion necessary for proper internationalized headers; it only
+does dumb encoding and decoding.  To deal with the various line
+wrapping issues, use the email.Header module.  
+"""
+
+import re
+from string import hexdigits
+from email.Utils import fix_eols
+
+CRLF = '\r\n'
+NL = '\n'
+
+# See also Charset.py
+MISC_LEN = 7
+
+hqre = re.compile(r'[^-a-zA-Z0-9!*+/ ]')
+bqre = re.compile(r'[^ !-<>-~\t]')
+
+
+
+# Helpers
+def header_quopri_check(c):
+    """Return true if the character should be escaped with header quopri."""
+    return hqre.match(c) and 1
+
+
+def body_quopri_check(c):
+    """Return true if the character should be escaped with body quopri."""
+    return bqre.match(c) and 1
+
+    
+def header_quopri_len(s):
+    """Return the length of str when it is encoded with header quopri."""
+    count = 0
+    for c in s:
+        if hqre.match(c):
+            count += 3
+        else:
+            count += 1
+    return count
+
+
+def body_quopri_len(str):
+    """Return the length of str when it is encoded with body quopri."""
+    count = 0
+    for c in str:
+        if bqre.match(c):
+            count += 3
+        else:
+            count += 1
+    return count
+
+
+def _max_append(L, s, maxlen, extra=''):
+    if not L:
+        L.append(s)
+    elif len(L[-1]) + len(s) < maxlen:
+        L[-1] += extra + s
+    else:
+        L.append(s)
+
+
+def unquote(s):
+    """Turn a string in the form =AB to the ASCII character with value 0xab"""
+    return chr(int(s[1:3], 16))
+
+
+def quote(c):
+    return "=%02X" % ord(c)
+
+
+
+def header_encode(header, charset="iso-8859-1", keep_eols=0, maxlinelen=76,
+                  eol=NL):
+    """Encode a single header line with quoted-printable (like) encoding.
+
+    Defined in RFC 2045, this `Q' encoding is similar to quoted-printable, but
+    used specifically for email header fields to allow charsets with mostly 7
+    bit characters (and some 8 bit) to remain more or less readable in non-RFC
+    2045 aware mail clients.
+
+    charset names the character set to use to encode the header.  It defaults
+    to iso-8859-1.
+
+    The resulting string will be in the form:
+
+    "=?charset?q?I_f=E2rt_in_your_g=E8n=E8ral_dire=E7tion?\\n
+      =?charset?q?Silly_=C8nglish_Kn=EEghts?="
+
+    with each line wrapped safely at, at most, maxlinelen characters (defaults
+    to 76 characters).
+
+    End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
+    to the canonical email line separator \\r\\n unless the keep_eols
+    parameter is set to true (the default is false).
+
+    Each line of the header will be terminated in the value of eol, which
+    defaults to "\\n".  Set this to "\\r\\n" if you are using the result of
+    this function directly in email.
+    """
+    # Return empty headers unchanged
+    if not header:
+        return header
+
+    if not keep_eols:
+        header = fix_eols(header)
+
+    # Quopri encode each line, in encoded chunks no greater than maxlinelen in
+    # lenght, after the RFC chrome is added in.
+    quoted = []
+    max_encoded = maxlinelen - len(charset) - MISC_LEN
+    
+    for c in header:
+        # Space may be represented as _ instead of =20 for readability
+        if c == ' ':
+            _max_append(quoted, '_', max_encoded)
+        # These characters can be included verbatim
+        elif not hqre.match(c):
+            _max_append(quoted, c, max_encoded)
+        # Otherwise, replace with hex value like =E2
+        else:
+            _max_append(quoted, "=%02X" % ord(c), max_encoded)
+
+    # Now add the RFC chrome to each encoded chunk and glue the chunks
+    # together.  BAW: should we be able to specify the leading whitespace in
+    # the joiner?
+    joiner = eol + ' '
+    return joiner.join(['=?%s?q?%s?=' % (charset, line) for line in quoted])
+
+
+
+def encode(body, binary=0, maxlinelen=76, eol=NL):
+    """Encode with quoted-printable, wrapping at maxlinelen characters.
+
+    If binary is false (the default), end-of-line characters will be converted
+    to the canonical email end-of-line sequence \\r\\n.  Otherwise they will
+    be left verbatim.
+
+    Each line of encoded text will end with eol, which defaults to "\\n".  Set
+    this to "\\r\\n" if you will be using the result of this function directly
+    in an email.
+
+    Each line will be wrapped at, at most, maxlinelen characters (defaults to
+    76 characters).  Long lines will have the `soft linefeed' quoted-printable
+    character "=" appended to them, so the decoded text will be identical to
+    the original text.
+    """
+    if not body:
+        return body
+
+    if not binary:
+        body = fix_eols(body)
+
+    # BAW: We're accumulating the body text by string concatenation.  That
+    # can't be very efficient, but I don't have time now to rewrite it.  It
+    # just feels like this algorithm could be more efficient.
+    encoded_body = ''
+    lineno = -1
+    # Preserve line endings here so we can check later to see an eol needs to
+    # be added to the output later.
+    lines = body.splitlines(1)
+    for line in lines:
+        # But strip off line-endings for processing this line.
+        if line.endswith(CRLF):
+            line = line[:-2]
+        elif line[-1] in CRLF:
+            line = line[:-1]
+            
+        lineno += 1
+        encoded_line = ''
+        prev = None
+        linelen = len(line)
+        # Now we need to examine every character to see if it needs to be
+        # quopri encoded.  BAW: again, string concatenation is inefficient.
+        for j in range(linelen):
+            c = line[j]
+            prev = c
+            if bqre.match(c):
+                c = quote(c)
+            elif j+1 == linelen:
+                # Check for whitespace at end of line; special case
+                if c not in ' \t':
+                    encoded_line += c
+                prev = c
+                continue
+            # Check to see to see if the line has reached its maximum length
+            if len(encoded_line) + len(c) >= maxlinelen:
+                encoded_body += encoded_line + '=' + eol
+                encoded_line = ''
+            encoded_line += c
+        # Now at end of line..
+        if prev and prev in ' \t':
+            # Special case for whitespace at end of file
+            if lineno+1 == len(lines):
+                prev = quote(prev)
+                if len(encoded_line) + len(prev) > maxlinelen:
+                    encoded_body += encoded_line + '=' + eol + prev
+                else:
+                    encoded_body += encoded_line + prev
+            # Just normal whitespace at end of line
+            else:
+                encoded_body += encoded_line + prev + '=' + eol
+            encoded_line = ''
+        # Now look at the line we just finished and it has a line ending, we
+        # need to add eol to the end of the line.
+        if lines[lineno].endswith(CRLF) or lines[lineno][-1] in CRLF:
+            encoded_body += encoded_line + eol
+        else:
+            encoded_body += encoded_line
+        encoded_line = ''
+    return encoded_body
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_encode = encode
+encodestring = encode
+
+
+
+# BAW: I'm not sure if the intent was for the signature of this function to be
+# the same as base64MIME.decode() or not...
+def decode(encoded, eol=NL):
+    """Decode a quoted-printable string.
+
+    Lines are separated with eol, which defaults to \\n.
+    """
+    if not encoded:
+        return encoded
+    # BAW: see comment in encode() above.  Again, we're building up the
+    # decoded string with string concatenation, which could be done much more
+    # efficiently.
+    decoded = ''
+
+    for line in encoded.splitlines():
+        line = line.rstrip()
+        if not line:
+            decoded += eol
+            continue
+
+        i = 0
+        n = len(line)
+        while i < n:
+            c = line[i]
+            if c <> '=':
+                decoded += c
+                i += 1
+            # Otherwise, c == "=".  Are we at the end of the line?  If so, add
+            # a soft line break.
+            elif i+1 == n:
+                i += 1
+                continue
+            # Decode if in form =AB
+            elif i+2 < n and line[i+1] in hexdigits and line[i+2] in hexdigits:
+                decoded += unquote(line[i:i+3])
+                i += 3
+            # Otherwise, not in form =AB, pass literally
+            else:
+                decoded += c
+                i += 1
+
+            if i == n:
+                decoded += eol
+    # Special case if original string did not end with eol
+    if encoded[-1] <> eol and decoded[-1] == eol:
+        decoded = decoded[:-1]
+    return decoded
+
+
+# For convenience and backwards compatibility w/ standard base64 module
+body_decode = decode
+decodestring = decode
+
+
+
+def _unquote_match(match):
+    """Turn a match in the form =AB to the ASCII character with value 0xab"""
+    s = match.group(0)
+    return unquote(s)
+
+
+# Header decoding is done a bit differently
+def header_decode(s):
+    """Decode a string encoded with RFC 2045 MIME header `Q' encoding.
+
+    This function does not parse a full MIME header value encoded with
+    quoted-printable (like =?iso-8895-1?q?Hello_World?=) -- please use
+    the high level email.Header class for that functionality.
+    """
+    s = s.replace('_', ' ')
+    return re.sub(r'=\w{2}', _unquote_match, s)

diff --git a/Lib/test/data/msg_24.txt b/Lib/test/data/msg_24.txt
new file mode 100644
index 0000000..4e52339
--- /dev/null
+++ b/Lib/test/data/msg_24.txt

@@ -0,0 +1,10 @@
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+
+--BOUNDARY
+
+
+--BOUNDARY--

diff --git a/Lib/test/data/msg_25.txt b/Lib/test/data/msg_25.txt
new file mode 100644
index 0000000..9e35275
--- /dev/null
+++ b/Lib/test/data/msg_25.txt

@@ -0,0 +1,117 @@
+From MAILER-DAEMON Fri Apr 06 16:46:09 2001
+Received: from [204.245.199.98] (helo=zinfandel.lacita.com)
+	by www.linux.org.uk with esmtp (Exim 3.13 #1)
+	id 14lYR6-0008Iv-00
+	for linuxuser-admin@www.linux.org.uk; Fri, 06 Apr 2001 16:46:09 +0100
+Received: from localhost (localhost) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with internal id JAB03225; Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
+Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
+From: Mail Delivery Subsystem <MAILER-DAEMON@zinfandel.lacita.com>
+Subject: Returned mail: Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
+Message-Id: <200104061723.JAB03225@zinfandel.lacita.com>
+To: <linuxuser-admin@www.linux.org.uk>
+To: postmaster@zinfandel.lacita.com
+MIME-Version: 1.0
+Content-Type: multipart/report; report-type=delivery-status;
+	bo
+Auto-Submitted: auto-generated (failure)
+
+This is a MIME-encapsulated message
+
+--JAB03225.986577786/zinfandel.lacita.com
+
+The original message was received at Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
+from [199.164.235.226]
+
+   ----- The following addresses have delivery notifications -----
+<scoffman@wellpartner.com>  (unrecoverable error)
+
+   ----- Transcript of session follows -----
+554 Too many hops 19 (17 max): from <linuxuser-admin@www.linux.org.uk> via [199.164.235.226], to <scoffman@wellpartner.com>
+
+--JAB03225.986577786/zinfandel.lacita.com
+Content-Type: message/delivery-status
+
+Reporting-MTA: dns; zinfandel.lacita.com
+Received-From-MTA: dns; [199.164.235.226]
+Arrival-Date: Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
+
+Final-Recipient: rfc822; scoffman@wellpartner.com
+Action: failed
+Status: 5.4.6
+Last-Attempt-Date: Fri, 6 Apr 2001 09:23:06 -0800 (GMT-0800)
+
+--JAB03225.986577786/zinfandel.lacita.com
+Content-Type: text/rfc822-headers
+
+Return-Path: linuxuser-admin@www.linux.org.uk
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03225 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:23:03 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03221 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:22:18 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03217 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:21:37 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03213 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:56 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03209 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:20:15 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03205 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:19:33 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03201 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:18:52 -0800 (GMT-0800)
+Received: from zinfandel.lacita.com ([204.245.199.98])
+	by
+	fo
+Received: from ns1.wellpartner.net ([199.164.235.226]) by zinfandel.lacita.com (8.7.3/8.6.10-MT4.00) with ESMTP id JAA03197 for <scoffman@wellpartner.com>; Fri, 6 Apr 2001 09:17:54 -0800 (GMT-0800)
+Received: from www.linux.org.uk (parcelfarce.linux.theplanet.co.uk [195.92.249.252])
+	by
+	fo
+Received: from localhost.localdomain
+	([
+	by
+	id
+Received: from [212.1.130.11] (helo=s1.uklinux.net ident=root)
+	by
+	id
+	fo
+Received: from server (ppp-2-22.cvx4.telinco.net [212.1.149.22])
+	by
+	fo
+From: Daniel James <daniel@linuxuser.co.uk>
+Organization: LinuxUser
+To: linuxuser@www.linux.org.uk
+X-Mailer: KMail [version 1.1.99]
+Content-Type: text/plain;
+  c
+MIME-Version: 1.0
+Message-Id: <01040616033903.00962@server>
+Content-Transfer-Encoding: 8bit
+Subject: [LinuxUser] bulletin no. 45
+Sender: linuxuser-admin@www.linux.org.uk
+Errors-To: linuxuser-admin@www.linux.org.uk
+X-BeenThere: linuxuser@www.linux.org.uk
+X-Mailman-Version: 2.0.3
+Precedence: bulk
+List-Help: <mailto:linuxuser-request@www.linux.org.uk?subject=help>
+List-Post: <mailto:linuxuser@www.linux.org.uk>
+List-Subscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
+	<m
+List-Id: bulletins from LinuxUser magazine <linuxuser.www.linux.org.uk>
+List-Unsubscribe: <http://www.linux.org.uk/mailman/listinfo/linuxuser>,
+	<m
+List-Archive: <http://www.linux.org.uk/pipermail/linuxuser/>
+Date: Fri, 6 Apr 2001 16:03:39 +0100
+
+--JAB03225.986577786/zinfandel.lacita.com--
+
+

diff --git a/Lib/test/test_email.py b/Lib/test/test_email.py
index 7105f7d..1322246 100644
--- a/Lib/test/test_email.py
+++ b/Lib/test/test_email.py

@@ -1,15 +1,19 @@
 # Copyright (C) 2001,2002 Python Software Foundation
 # email package unit tests
 
+import sys
 import os
 import time
 import unittest
 import base64
 from cStringIO import StringIO
 from types import StringType
+import warnings
 
 import email
 
+from email.Charset import Charset
+from email.Header import Header, decode_header
 from email.Parser import Parser, HeaderParser
 from email.Generator import Generator, DecodedGenerator
 from email.Message import Message
@@ -22,14 +26,18 @@
 from email import Errors
 from email import Encoders
 from email import Iterators
+from email import base64MIME
+from email import quopriMIME
 
 from test_support import findfile, __file__ as test_support_file
 
-
 NL = '\n'
 EMPTYSTRING = ''
 SPACE = ' '
 
+# We don't care about DeprecationWarnings
+warnings.filterwarnings('ignore', '', DeprecationWarning, __name__)
+
 
 
 def openfile(filename):
@@ -41,7 +49,7 @@
 # Base test class
 class TestEmailBase(unittest.TestCase):
     def _msgobj(self, filename):
-        fp = openfile(filename)
+        fp = openfile(findfile(filename))
         try:
             msg = email.message_from_file(fp)
         finally:
@@ -58,6 +66,45 @@
         eq(msg.get_all('cc'), ['ccc@zzz.org', 'ddd@zzz.org', 'eee@zzz.org'])
         eq(msg.get_all('xx', 'n/a'), 'n/a')
 
+    def test_getset_charset(self):
+        eq = self.assertEqual
+        msg = Message()
+        eq(msg.get_charset(), None)
+        charset = Charset('iso-8859-1')
+        msg.set_charset(charset)
+        eq(msg['mime-version'], '1.0')
+        eq(msg.get_type(), 'text/plain')
+        eq(msg['content-type'], 'text/plain; charset="iso-8859-1"')
+        eq(msg.get_param('charset'), 'iso-8859-1')
+        eq(msg['content-transfer-encoding'], 'quoted-printable')
+        eq(msg.get_charset().input_charset, 'iso-8859-1')
+        # Remove the charset
+        msg.set_charset(None)
+        eq(msg.get_charset(), None)
+        eq(msg['content-type'], 'text/plain')
+        # Try adding a charset when there's already MIME headers present
+        msg = Message()
+        msg['MIME-Version'] = '2.0'
+        msg['Content-Type'] = 'text/x-weird'
+        msg['Content-Transfer-Encoding'] = 'quinted-puntable'
+        msg.set_charset(charset)
+        eq(msg['mime-version'], '2.0')
+        eq(msg['content-type'], 'text/x-weird; charset="iso-8859-1"')
+        eq(msg['content-transfer-encoding'], 'quinted-puntable')
+
+    def test_set_charset_from_string(self):
+        eq = self.assertEqual
+        msg = Message()
+        msg.set_charset('us-ascii')
+        eq(msg.get_charset().input_charset, 'us-ascii')
+        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+
+    def test_set_payload_with_charset(self):
+        msg = Message()
+        charset = Charset('iso-8859-1')
+        msg.set_payload('This is a string payload', charset)
+        self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
+
     def test_get_charsets(self):
         eq = self.assertEqual
 
@@ -204,6 +251,11 @@
         eq(msg.get_params(header='x-header'),
            [('foo', ''), ('bar', 'one'), ('baz', 'two')])
 
+    def test_get_param_liberal(self):
+        msg = Message()
+        msg['Content-Type'] = 'Content-Type: Multipart/mixed; boundary = "CPIMSSMTPC06p5f3tG"'
+        self.assertEqual(msg.get_param('boundary'), 'CPIMSSMTPC06p5f3tG')
+
     def test_get_param(self):
         eq = self.assertEqual
         msg = email.message_from_string(
@@ -216,6 +268,10 @@
         eq(msg.get_param('foo', header='x-header'), '')
         eq(msg.get_param('bar', header='x-header'), 'one')
         eq(msg.get_param('baz', header='x-header'), 'two')
+        # XXX: We are not RFC-2045 compliant!  We cannot parse:
+        # msg["Content-Type"] = 'text/plain; weird="hey; dolly? [you] @ <\\"home\\">?"'
+        # msg.get_param("weird")
+        # yet.
 
     def test_get_param_funky_continuation_lines(self):
         msg = self._msgobj('msg_22.txt')
@@ -228,7 +284,52 @@
         self.failUnless(msg.has_key('HEADER'))
         self.failIf(msg.has_key('headeri'))
 
+    def test_set_param(self):
+        eq = self.assertEqual
+        msg = Message()
+        msg.set_param('charset', 'iso-2022-jp')
+        eq(msg.get_param('charset'), 'iso-2022-jp')
+        msg.set_param('importance', 'high value')
+        eq(msg.get_param('importance'), 'high value')
+        eq(msg.get_param('importance', unquote=0), '"high value"')
+        eq(msg.get_params(), [('text/plain', ''),
+                              ('charset', 'iso-2022-jp'),
+                              ('importance', 'high value')])
+        eq(msg.get_params(unquote=0), [('text/plain', ''),
+                                       ('charset', '"iso-2022-jp"'),
+                                       ('importance', '"high value"')])
+        msg.set_param('charset', 'iso-9999-xx', header='X-Jimmy')
+        eq(msg.get_param('charset', header='X-Jimmy'), 'iso-9999-xx')
 
+    def test_del_param(self):
+        eq = self.assertEqual
+        msg = self._msgobj('msg_05.txt')
+        eq(msg.get_params(),
+           [('multipart/report', ''), ('report-type', 'delivery-status'),
+            ('boundary', 'D1690A7AC1.996856090/mail.example.com')])
+        old_val = msg.get_param("report-type")
+        msg.del_param("report-type")
+        eq(msg.get_params(),
+           [('multipart/report', ''),
+            ('boundary', 'D1690A7AC1.996856090/mail.example.com')]) 
+        msg.set_param("report-type", old_val)
+        eq(msg.get_params(),
+           [('multipart/report', ''),
+            ('boundary', 'D1690A7AC1.996856090/mail.example.com'),
+            ('report-type', old_val)])
+
+    def test_set_type(self):
+        eq = self.assertEqual
+        msg = Message()
+        self.assertRaises(ValueError, msg.set_type, 'text')
+        msg.set_type('text/plain')
+        eq(msg['content-type'], 'text/plain')
+        msg.set_param('charset', 'us-ascii')
+        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+        msg.set_type('text/html')
+        eq(msg['content-type'], 'text/html; charset="us-ascii"')
+
+           
 
 # Test the email.Encoders module
 class TestEncoders(unittest.TestCase):
@@ -236,7 +337,6 @@
         eq = self.assertEqual
         msg = MIMEText('hello world', _encoder=Encoders.encode_noop)
         eq(msg.get_payload(), 'hello world\n')
-        eq(msg['content-transfer-encoding'], None)
 
     def test_encode_7bit(self):
         eq = self.assertEqual
@@ -253,6 +353,12 @@
         eq(msg.get_payload(), 'hello \x80 world\n')
         eq(msg['content-transfer-encoding'], '8bit')
 
+    def test_encode_empty_payload(self):
+        eq = self.assertEqual
+        msg = Message()
+        msg.set_charset('us-ascii')
+        eq(msg['content-transfer-encoding'], '7bit')
+
     def test_encode_base64(self):
         eq = self.assertEqual
         msg = MIMEText('hello world', _encoder=Encoders.encode_base64)
@@ -265,6 +371,23 @@
         eq(msg.get_payload(), 'hello=20world\n')
         eq(msg['content-transfer-encoding'], 'quoted-printable')
 
+    def test_default_cte(self):
+        eq = self.assertEqual
+        msg = MIMEText('hello world')
+        eq(msg['content-transfer-encoding'], '7bit')
+
+    def test_default_cte(self):
+        eq = self.assertEqual
+        # With no explicit _charset its us-ascii, and all are 7-bit
+        msg = MIMEText('hello world')
+        eq(msg['content-transfer-encoding'], '7bit')
+        # Similar, but with 8-bit data
+        msg = MIMEText('hello \xf8 world')
+        eq(msg['content-transfer-encoding'], '8bit')
+        # And now with a different charset
+        msg = MIMEText('hello \xf8 world', _charset='iso-8859-1')
+        eq(msg['content-transfer-encoding'], 'quoted-printable')
+
 
 
 # Test long header wrapping
@@ -279,7 +402,14 @@
         sfp = StringIO()
         g = Generator(sfp)
         g(msg)
-        self.assertEqual(sfp.getvalue(), openfile('msg_18.txt').read())
+        self.assertEqual(sfp.getvalue(), '''\
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+X-Foobar-Spoink-Defrobnit: wasnipoop; giraffes="very-long-necked-animals";
+	spooge="yummy"; hippos="gargantuan"; marshmallows="gooey"
+
+''')
 
     def test_no_semis_header_splitter(self):
         msg = Message()
@@ -314,6 +444,30 @@
 
 Test""")
 
+    def test_splitting_multiple_long_lines(self):
+        msg = Message()
+        msg['Received'] = """\
+from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+	from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+	from babylon.socal-raves.org (localhost [127.0.0.1]); by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81; for <mailman-admin@babylon.socal-raves.org>; Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+"""
+        self.assertEqual(msg.as_string(), """\
+Received: from babylon.socal-raves.org (localhost [127.0.0.1]);
+	by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
+	for <mailman-admin@babylon.socal-raves.org>;
+	Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+	from babylon.socal-raves.org (localhost [127.0.0.1]);
+	by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
+	for <mailman-admin@babylon.socal-raves.org>;
+	Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+	from babylon.socal-raves.org (localhost [127.0.0.1]);
+	by babylon.socal-raves.org (Postfix) with ESMTP id B570E51B81;
+	for <mailman-admin@babylon.socal-raves.org>;
+	Sat, 2 Feb 2002 17:00:06 -0800 (PST)
+
+
+""")
+
 
 
 # Test mangling of "From " lines in the body of a message
@@ -476,6 +630,12 @@
         self.assertEqual(self._msg.get_payload(), 'hello there\n')
         self.failUnless(not self._msg.is_multipart())
 
+    def test_charset(self):
+        eq = self.assertEqual
+        msg = MIMEText('hello there', _charset='us-ascii')
+        eq(msg.get_charset().input_charset, 'us-ascii')
+        eq(msg['content-type'], 'text/plain; charset="us-ascii"')
+
 
 
 # Test a more complicated multipart/mixed type message
@@ -539,6 +699,82 @@
         unless(not m0.is_multipart())
         unless(not m1.is_multipart())
 
+    def test_no_parts_in_a_multipart(self):
+        outer = MIMEBase('multipart', 'mixed')
+        outer['Subject'] = 'A subject'
+        outer['To'] = 'aperson@dom.ain'
+        outer['From'] = 'bperson@dom.ain'
+        outer.preamble = ''
+        outer.epilogue = ''
+        outer.set_boundary('BOUNDARY')
+        msg = MIMEText('hello world')
+        self.assertEqual(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+
+--BOUNDARY
+
+
+--BOUNDARY--
+''')        
+
+    def test_one_part_in_a_multipart(self):
+        outer = MIMEBase('multipart', 'mixed')
+        outer['Subject'] = 'A subject'
+        outer['To'] = 'aperson@dom.ain'
+        outer['From'] = 'bperson@dom.ain'
+        outer.preamble = ''
+        outer.epilogue = ''
+        outer.set_boundary('BOUNDARY')
+        msg = MIMEText('hello world')
+        outer.attach(msg)
+        self.assertEqual(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+
+--BOUNDARY
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+
+hello world
+
+--BOUNDARY--
+''')        
+
+    def test_seq_parts_in_a_multipart(self):
+        outer = MIMEBase('multipart', 'mixed')
+        outer['Subject'] = 'A subject'
+        outer['To'] = 'aperson@dom.ain'
+        outer['From'] = 'bperson@dom.ain'
+        outer.preamble = ''
+        outer.epilogue = ''
+        msg = MIMEText('hello world')
+        outer.attach(msg)
+        outer.set_boundary('BOUNDARY')
+        self.assertEqual(outer.as_string(), '''\
+Content-Type: multipart/mixed; boundary="BOUNDARY"
+MIME-Version: 1.0
+Subject: A subject
+To: aperson@dom.ain
+From: bperson@dom.ain
+
+--BOUNDARY
+Content-Type: text/plain; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+
+hello world
+
+--BOUNDARY--
+''')        
+
 
 
 # Test some badly formatted messages
@@ -551,7 +787,7 @@
         self.failUnless(msg.get_subtype() is None)
 
     def test_bogus_boundary(self):
-        fp = openfile('msg_15.txt')
+        fp = openfile(findfile('msg_15.txt'))
         try:
             data = fp.read()
         finally:
@@ -561,6 +797,10 @@
         # message into the intended message tree.
         self.assertRaises(Errors.BoundaryError, p.parsestr, data)
 
+    def test_multipart_no_boundary(self):
+        fp = openfile(findfile('msg_25.txt'))
+        self.assertRaises(Errors.BoundaryError, email.message_from_file, fp)
+
 
 
 # Test RFC 2047 header encoding and decoding
@@ -570,7 +810,7 @@
         s = '=?iso-8859-1?q?this=20is=20some=20text?='
         eq(Utils.decode(s), 'this is some text')
         s = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?='
-        eq(Utils.decode(s), u'Keld_J\xf8rn_Simonsen')
+        eq(Utils.decode(s), u'Keld J\xf8rn Simonsen')
         s = '=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=' \
             '=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?='
         eq(Utils.decode(s), 'If you can read this you understand the example.')
@@ -578,6 +818,8 @@
         eq(Utils.decode(s),
            u'\u05dd\u05d5\u05dc\u05e9 \u05df\u05d1 \u05d9\u05dc\u05d8\u05e4\u05e0')
         s = '=?iso-8859-1?q?this=20is?= =?iso-8859-1?q?some=20text?='
+        eq(Utils.decode(s), u'this issome text')
+        s = '=?iso-8859-1?q?this=20is_?= =?iso-8859-1?q?some=20text?='
         eq(Utils.decode(s), u'this is some text')
 
     def test_encode_header(self):
@@ -794,6 +1036,10 @@
         msg, text = self._msgobj('msg_23.txt')
         self._idempotent(msg, text)
 
+    def test_multipart_no_parts(self):
+        msg, text = self._msgobj('msg_24.txt')
+        self._idempotent(msg, text)
+
     def test_content_type(self):
         eq = self.assertEquals
         # Get a message object and reset the seek pointer for other tests
@@ -835,7 +1081,6 @@
         self.failUnless(isinstance(msg1.get_payload(), StringType))
         eq(msg1.get_payload(), '\n')
 
-
 
 # Test various other bits of the package's functionality
 class TestMiscellaneous(unittest.TestCase):
@@ -916,49 +1161,77 @@
         module = __import__('email')
         all = module.__all__
         all.sort()
-        self.assertEqual(all, ['Encoders', 'Errors', 'Generator', 'Iterators',
-                               'MIMEAudio', 'MIMEBase', 'MIMEImage',
-                               'MIMEMessage', 'MIMEText', 'Message', 'Parser',
-                               'Utils',
-                               'message_from_file', 'message_from_string'])
+        self.assertEqual(all, ['Charset', 'Encoders', 'Errors', 'Generator', 
+                               'Header', 'Iterators', 'MIMEAudio', 
+                               'MIMEBase', 'MIMEImage', 'MIMEMessage', 
+                               'MIMEText', 'Message', 'Parser',
+                               'Utils', 'base64MIME', 
+                               'message_from_file', 'message_from_string',
+                               'quopriMIME'])
 
     def test_formatdate(self):
-        now = 1005327232.109884
-        gm_epoch = time.gmtime(0)[0:3]
-        loc_epoch = time.localtime(0)[0:3]
-        # When does the epoch start?
-        if gm_epoch == (1970, 1, 1):
-            # traditional Unix epoch
-            matchdate = 'Fri, 09 Nov 2001 17:33:52 -0000'
-        elif loc_epoch == (1904, 1, 1):
-            # Mac epoch
-            matchdate = 'Sat, 09 Nov 1935 16:33:52 -0000'
-        else:
-            matchdate = "I don't understand your epoch"
-        gdate = Utils.formatdate(now)
-        self.assertEqual(gdate, matchdate)
+        now = time.time()
+        self.assertEqual(Utils.parsedate(Utils.formatdate(now))[:6],
+                         time.gmtime(now)[:6])
 
     def test_formatdate_localtime(self):
-        now = 1005327232.109884
-        ldate = Utils.formatdate(now, localtime=1)
-        zone = ldate.split()[5]
-        offset = int(zone[1:3]) * 3600 + int(zone[-2:]) * 60
-        # Remember offset is in seconds west of UTC, but the timezone is in
-        # minutes east of UTC, so the signs differ.
-        if zone[0] == '+':
-            offset = -offset
-        if time.daylight and time.localtime(now)[-1]:
-            toff = time.altzone
-        else:
-            toff = time.timezone
-        self.assertEqual(offset, toff)
+        now = time.time()
+        self.assertEqual(
+            Utils.parsedate(Utils.formatdate(now, localtime=1))[:6],
+            time.localtime(now)[:6])
 
     def test_parsedate_none(self):
         self.assertEqual(Utils.parsedate(''), None)
 
     def test_parseaddr_empty(self):
         self.assertEqual(Utils.parseaddr('<>'), ('', ''))
-        self.assertEqual(Utils.dump_address_pair(Utils.parseaddr('<>')), '')
+        self.assertEqual(Utils.formataddr(Utils.parseaddr('<>')), '')
+
+    def test_noquote_dump(self):
+        self.assertEqual(
+            Utils.formataddr(('A Silly Person', 'person@dom.ain')),
+            'A Silly Person <person@dom.ain>')
+
+    def test_escape_dump(self):
+        self.assertEqual(
+            Utils.formataddr(('A (Very) Silly Person', 'person@dom.ain')),
+            r'"A \(Very\) Silly Person" <person@dom.ain>')
+        a = r'A \(Special\) Person'
+        b = 'person@dom.ain'
+        self.assertEqual(Utils.parseaddr(Utils.formataddr((a, b))), (a, b))
+
+    def test_quote_dump(self):
+        self.assertEqual(
+            Utils.formataddr(('A Silly; Person', 'person@dom.ain')),
+            r'"A Silly; Person" <person@dom.ain>')
+
+    def test_fix_eols(self):
+        eq = self.assertEqual
+        eq(Utils.fix_eols('hello'), 'hello')
+        eq(Utils.fix_eols('hello\n'), 'hello\r\n')
+        eq(Utils.fix_eols('hello\r'), 'hello\r\n')
+        eq(Utils.fix_eols('hello\r\n'), 'hello\r\n')
+        eq(Utils.fix_eols('hello\n\r'), 'hello\r\n\r\n')
+
+    def test_charset_richcomparisons(self):
+        eq = self.assertEqual
+        ne = self.failIfEqual
+        cset1 = Charset()
+        cset2 = Charset()
+        eq(cset1, 'us-ascii')
+        eq(cset1, 'US-ASCII')
+        eq(cset1, 'Us-AsCiI')
+        eq('us-ascii', cset1)
+        eq('US-ASCII', cset1)
+        eq('Us-AsCiI', cset1)
+        ne(cset1, 'usascii')
+        ne(cset1, 'USASCII')
+        ne(cset1, 'UsAsCiI')
+        ne('usascii', cset1)
+        ne('USASCII', cset1)
+        ne('UsAsCiI', cset1)
+        eq(cset1, cset2)
+        eq(cset2, cset1)
 
 
 
@@ -983,8 +1256,12 @@
         eq = self.assertEqual
         msg = self._msgobj('msg_04.txt')
         it = Iterators.typed_subpart_iterator(msg, 'text')
-        lines = [subpart.get_payload() for subpart in it]
-        eq(len(lines), 2)
+        lines = []
+        subparts = 0
+        for subpart in it:
+            subparts += 1
+            lines.append(subpart.get_payload())
+        eq(subparts, 2)
         eq(EMPTYSTRING.join(lines), """\
 a simple kind of mirror
 to reflect upon our own
@@ -1011,6 +1288,7 @@
 -Me
 """)
 
+
 
 class TestParsers(unittest.TestCase):
     def test_header_parser(self):
@@ -1025,6 +1303,274 @@
         eq(msg.is_multipart(), 0)
         self.failUnless(isinstance(msg.get_payload(), StringType))
 
+    def test_whitespace_continuaton(self):
+        eq = self.assertEqual
+        # This message contains a line after the Subject: header that has only
+        # whitespace, but it is not empty!
+        msg = email.message_from_string("""\
+From: aperson@dom.ain
+To: bperson@dom.ain
+Subject: the next line has a space on it
+ 
+Date: Mon, 8 Apr 2002 15:09:19 -0400
+Message-ID: spam
+
+Here's the message body
+""")
+        eq(msg['subject'], 'the next line has a space on it\n ')
+        eq(msg['message-id'], 'spam')
+        eq(msg.get_payload(), "Here's the message body\n")
+
+
+
+class TestBase64(unittest.TestCase):
+    def test_len(self):
+        eq = self.assertEqual
+        eq(base64MIME.base64_len('hello'),
+           len(base64MIME.encode('hello', eol='')))
+        for size in range(15):
+            if   size == 0 : bsize = 0
+            elif size <= 3 : bsize = 4
+            elif size <= 6 : bsize = 8
+            elif size <= 9 : bsize = 12
+            elif size <= 12: bsize = 16
+            else           : bsize = 20
+            eq(base64MIME.base64_len('x'*size), bsize)
+
+    def test_decode(self):
+        eq = self.assertEqual
+        eq(base64MIME.decode(''), '')
+        eq(base64MIME.decode('aGVsbG8='), 'hello')
+        eq(base64MIME.decode('aGVsbG8=', 'X'), 'hello')
+        eq(base64MIME.decode('aGVsbG8NCndvcmxk\n', 'X'), 'helloXworld')
+
+    def test_encode(self):
+        eq = self.assertEqual
+        eq(base64MIME.encode(''), '')
+        eq(base64MIME.encode('hello'), 'aGVsbG8=\n')
+        # Test the binary flag
+        eq(base64MIME.encode('hello\n'), 'aGVsbG8K\n')
+        eq(base64MIME.encode('hello\n', 0), 'aGVsbG8NCg==\n')
+        # Test the maxlinelen arg
+        eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40), """\
+eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
+eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
+eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg
+eHh4eCB4eHh4IA==
+""")
+        # Test the eol argument
+        eq(base64MIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
+eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
+eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
+eHh4eCB4eHh4IHh4eHggeHh4eCB4eHh4IHh4eHgg\r
+eHh4eCB4eHh4IA==\r
+""")
+        
+    def test_header_encode(self):
+        eq = self.assertEqual
+        he = base64MIME.header_encode
+        eq(he('hello'), '=?iso-8859-1?b?aGVsbG8=?=')
+        eq(he('hello\nworld'), '=?iso-8859-1?b?aGVsbG8NCndvcmxk?=')
+        # Test the charset option
+        eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?b?aGVsbG8=?=')
+        # Test the keep_eols flag
+        eq(he('hello\nworld', keep_eols=1),
+           '=?iso-8859-1?b?aGVsbG8Kd29ybGQ=?=')
+        # Test the maxlinelen argument
+        eq(he('xxxx ' * 20, maxlinelen=40), """\
+=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=
+ =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=
+ =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=
+ =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=
+ =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=
+ =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
+        # Test the eol argument
+        eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
+=?iso-8859-1?b?eHh4eCB4eHh4IHh4eHggeHg=?=\r
+ =?iso-8859-1?b?eHggeHh4eCB4eHh4IHh4eHg=?=\r
+ =?iso-8859-1?b?IHh4eHggeHh4eCB4eHh4IHg=?=\r
+ =?iso-8859-1?b?eHh4IHh4eHggeHh4eCB4eHg=?=\r
+ =?iso-8859-1?b?eCB4eHh4IHh4eHggeHh4eCA=?=\r
+ =?iso-8859-1?b?eHh4eCB4eHh4IHh4eHgg?=""")
+
+
+
+class TestQuopri(unittest.TestCase):
+    def setUp(self):
+        self.hlit = [chr(x) for x in range(ord('a'), ord('z')+1)] + \
+                    [chr(x) for x in range(ord('A'), ord('Z')+1)] + \
+                    [chr(x) for x in range(ord('0'), ord('9')+1)] + \
+                    ['!', '*', '+', '-', '/', ' ']
+        self.hnon = [chr(x) for x in range(256) if chr(x) not in self.hlit]
+        assert len(self.hlit) + len(self.hnon) == 256
+        self.blit = [chr(x) for x in range(ord(' '), ord('~')+1)] + ['\t']
+        self.blit.remove('=')
+        self.bnon = [chr(x) for x in range(256) if chr(x) not in self.blit]
+        assert len(self.blit) + len(self.bnon) == 256
+
+    def test_header_quopri_check(self):
+        for c in self.hlit:
+            self.failIf(quopriMIME.header_quopri_check(c))
+        for c in self.hnon:
+            self.failUnless(quopriMIME.header_quopri_check(c))
+
+    def test_body_quopri_check(self):
+        for c in self.blit:
+            self.failIf(quopriMIME.body_quopri_check(c))
+        for c in self.bnon:
+            self.failUnless(quopriMIME.body_quopri_check(c))
+
+    def test_header_quopri_len(self):
+        eq = self.assertEqual
+        hql = quopriMIME.header_quopri_len
+        enc = quopriMIME.header_encode
+        for s in ('hello', 'h@e@l@l@o@'):
+            # Empty charset and no line-endings.  7 == RFC chrome
+            eq(hql(s), len(enc(s, charset='', eol=''))-7)
+        for c in self.hlit:
+            eq(hql(c), 1)
+        for c in self.hnon:
+            eq(hql(c), 3)
+
+    def test_body_quopri_len(self):
+        eq = self.assertEqual
+        bql = quopriMIME.body_quopri_len
+        for c in self.blit:
+            eq(bql(c), 1)
+        for c in self.bnon:
+            eq(bql(c), 3)
+
+    def test_quote_unquote_idempotent(self):
+        for x in range(256):
+            c = chr(x)
+            self.assertEqual(quopriMIME.unquote(quopriMIME.quote(c)), c)
+
+    def test_header_encode(self):
+        eq = self.assertEqual
+        he = quopriMIME.header_encode
+        eq(he('hello'), '=?iso-8859-1?q?hello?=')
+        eq(he('hello\nworld'), '=?iso-8859-1?q?hello=0D=0Aworld?=')
+        # Test the charset option
+        eq(he('hello', charset='iso-8859-2'), '=?iso-8859-2?q?hello?=')
+        # Test the keep_eols flag
+        eq(he('hello\nworld', keep_eols=1), '=?iso-8859-1?q?hello=0Aworld?=')
+        # Test a non-ASCII character
+        eq(he('helloÇthere'), '=?iso-8859-1?q?hello=C7there?=')
+        # Test the maxlinelen argument
+        eq(he('xxxx ' * 20, maxlinelen=40), """\
+=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=
+ =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=
+ =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=
+ =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=
+ =?iso-8859-1?q?x_xxxx_xxxx_?=""")
+        # Test the eol argument
+        eq(he('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
+=?iso-8859-1?q?xxxx_xxxx_xxxx_xxxx_xx?=\r
+ =?iso-8859-1?q?xx_xxxx_xxxx_xxxx_xxxx?=\r
+ =?iso-8859-1?q?_xxxx_xxxx_xxxx_xxxx_x?=\r
+ =?iso-8859-1?q?xxx_xxxx_xxxx_xxxx_xxx?=\r
+ =?iso-8859-1?q?x_xxxx_xxxx_?=""")
+
+    def test_decode(self):
+        eq = self.assertEqual
+        eq(quopriMIME.decode(''), '')
+        eq(quopriMIME.decode('hello'), 'hello')
+        eq(quopriMIME.decode('hello', 'X'), 'hello')
+        eq(quopriMIME.decode('hello\nworld', 'X'), 'helloXworld')
+
+    def test_encode(self):
+        eq = self.assertEqual
+        eq(quopriMIME.encode(''), '')
+        eq(quopriMIME.encode('hello'), 'hello')
+        # Test the binary flag
+        eq(quopriMIME.encode('hello\r\nworld'), 'hello\nworld')
+        eq(quopriMIME.encode('hello\r\nworld', 0), 'hello\nworld')
+        # Test the maxlinelen arg
+        eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40), """\
+xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=
+ xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=
+x xxxx xxxx xxxx xxxx=20""")
+        # Test the eol argument
+        eq(quopriMIME.encode('xxxx ' * 20, maxlinelen=40, eol='\r\n'), """\
+xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx=\r
+ xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxx=\r
+x xxxx xxxx xxxx xxxx=20""")
+        eq(quopriMIME.encode("""\
+one line
+
+two line"""), """\
+one line
+
+two line""")
+        
+
+
+# Test the Charset class
+class TestCharset(unittest.TestCase):
+    def test_idempotent(self):
+        eq = self.assertEqual
+        # Make sure us-ascii = no Unicode conversion
+        c = Charset('us-ascii')
+        s = 'Hello World!'
+        sp = c.to_splittable(s)
+        eq(s, c.from_splittable(sp))
+        # test 8-bit idempotency with us-ascii
+        s = '\xa4\xa2\xa4\xa4\xa4\xa6\xa4\xa8\xa4\xaa'
+        sp = c.to_splittable(s)
+        eq(s, c.from_splittable(sp))
+
+
+
+# Test multilingual MIME headers.
+class TestHeader(unittest.TestCase):
+    def test_simple(self):
+        eq = self.assertEqual
+        h = Header('Hello World!')
+        eq(h.encode(), 'Hello World!')
+        h.append('Goodbye World!')
+        eq(h.encode(), 'Hello World! Goodbye World!')
+
+    def test_header_needs_no_decoding(self):
+        h = 'no decoding needed'
+        self.assertEqual(decode_header(h), [(h, None)])
+
+    def test_long(self):
+        h = Header("I am the very model of a modern Major-General; I've information vegetable, animal, and mineral; I know the kings of England, and I quote the fights historical from Marathon to Waterloo, in order categorical; I'm very well acquainted, too, with matters mathematical; I understand equations, both the simple and quadratical; about binomial theorem I'm teeming with a lot o' news, with many cheerful facts about the square of the hypotenuse.",
+                   maxlinelen=76)
+        for l in h.encode().split('\n '):
+            self.failUnless(len(l) <= 76)
+
+    def test_multilingual(self):
+        eq = self.assertEqual
+        g = Charset("iso-8859-1")
+        cz = Charset("iso-8859-2")
+        utf8 = Charset("utf-8")
+        g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
+        cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
+        utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
+        h = Header(g_head, g)
+        h.append(cz_head, cz)
+        h.append(utf8_head, utf8)
+        enc = h.encode()
+        eq(enc, """=?iso-8859-1?q?Die_Mieter_treten_hier_ein_werden_mit_eine?=
+ =?iso-8859-1?q?m_Foerderband_komfortabel_den_Korridor_ent?=
+ =?iso-8859-1?q?lang=2C_an_s=FCdl=FCndischen_Wandgem=E4lden_vorbei?=
+ =?iso-8859-1?q?=2C_gegen_die_rotierenden_Klingen_bef=F6rdert=2E_?=
+ =?iso-8859-2?q?Finan=E8ni_metropole_se_hroutil?=
+ =?iso-8859-2?q?y_pod_tlakem_jejich_d=F9vtipu=2E=2E_?=
+ =?utf-8?b?5q2j56K644Gr6KiA44GG44Go57+76Kiz44Gv?=
+ =?utf-8?b?44GV44KM44Gm44GE44G+44Gb44KT44CC5LiA?=
+ =?utf-8?b?6YOo44Gv44OJ44Kk44OE6Kqe44Gn44GZ44GM?=
+ =?utf-8?b?44CB44GC44Go44Gv44Gn44Gf44KJ44KB44Gn?=
+ =?utf-8?b?44GZ44CC5a6f6Zqb44Gr44Gv44CMV2VubiBpc3QgZGE=?=
+ =?utf-8?b?cyBOdW5zdHVjayBnaXQgdW5k?=
+ =?utf-8?b?IFNsb3Rlcm1leWVyPyBKYSEgQmVpaGVyaHVuZCBkYXMgT2Rl?=
+ =?utf-8?b?ciBkaWUgRmxpcHBlcndhbGR0?=
+ =?utf-8?b?IGdlcnNwdXQu44CN44Go6KiA44Gj44Gm44GE44G+44GZ44CC?=""")
+        eq(decode_header(enc),
+           [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
+            (utf8_head, "utf-8")])
+
 
 
 def suite():
@@ -1044,13 +1590,13 @@
     suite.addTest(unittest.makeSuite(TestMiscellaneous))
     suite.addTest(unittest.makeSuite(TestIterators))
     suite.addTest(unittest.makeSuite(TestParsers))
+    suite.addTest(unittest.makeSuite(TestBase64))
+    suite.addTest(unittest.makeSuite(TestQuopri))
+    suite.addTest(unittest.makeSuite(TestHeader))
+    suite.addTest(unittest.makeSuite(TestCharset))
     return suite
 
 
 
-def test_main():
-    from test_support import run_suite
-    run_suite(suite())
-
 if __name__ == '__main__':
-    test_main()
+    unittest.main(defaultTest='suite')

diff --git a/Lib/test/test_email_codecs.py b/Lib/test/test_email_codecs.py
new file mode 100644
index 0000000..d0451d1
--- /dev/null
+++ b/Lib/test/test_email_codecs.py

@@ -0,0 +1,51 @@
+# Copyright (C) 2002 Python Software Foundation
+# email package unit tests for (optional) Asian codecs
+
+import unittest
+from test_support import TestSkipped
+
+from email.Charset import Charset
+from email.Header import Header, decode_header
+
+
+# See if we have the Japanese codecs package installed
+try:
+    unicode('foo', 'japanese.iso-2022-jp')
+except LookupError:
+    raise TestSkipped, 'Optional Japanese codecs not installed'
+
+
+
+class TestEmailAsianCodecs(unittest.TestCase):
+    def test_japanese_codecs(self):
+        eq = self.assertEqual
+        j = Charset("euc-jp")
+        g = Charset("iso-8859-1")
+        h = Header("Hello World!")
+        jhello = '\xa5\xcf\xa5\xed\xa1\xbc\xa5\xef\xa1\xbc\xa5\xeb\xa5\xc9\xa1\xaa'
+        ghello = 'Gr\xfc\xdf Gott!'
+        h.append(jhello, j)
+        h.append(ghello, g)
+        eq(h.encode(), 'Hello World! =?iso-2022-jp?b?GyRCJU8lbSE8JW8hPCVrJUkhKhsoQg==?=\n =?iso-8859-1?q?Gr=FC=DF_Gott!?=')
+        eq(decode_header(h.encode()),
+           [('Hello World!', None),
+            ('\x1b$B%O%m!<%o!<%k%I!*\x1b(B', 'iso-2022-jp'),
+            ('Gr\xfc\xdf Gott!', 'iso-8859-1')])
+        long = 'test-ja \xa4\xd8\xc5\xea\xb9\xc6\xa4\xb5\xa4\xec\xa4\xbf\xa5\xe1\xa1\xbc\xa5\xeb\xa4\xcf\xbb\xca\xb2\xf1\xbc\xd4\xa4\xce\xbe\xb5\xc7\xa7\xa4\xf2\xc2\xd4\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9'
+        h = Header(long, j, header_name="Subject")
+        # test a very long header
+        enc = h.encode()
+        eq(enc, '=?iso-2022-jp?b?dGVzdC1qYSAbJEIkWEVqOUYkNSRsJD8lYRsoQg==?=\n =?iso-2022-jp?b?GyRCITwlayRPO0oycTxUJE4+NRsoQg==?=\n =?iso-2022-jp?b?GyRCRyckckJUJEMkRiQkJF4kORsoQg==?=')
+        eq(decode_header(enc), [("test-ja \x1b$B$XEj9F$5$l$?%a\x1b(B\x1b$B!<%k$O;J2q<T$N>5\x1b(B\x1b$BG'$rBT$C$F$$$^$9\x1b(B", 'iso-2022-jp')])
+
+
+
+def suite():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(TestEmailAsianCodecs))
+    return suite
+
+
+
+if __name__ == '__main__':
+    unittest.main(defaultTest='suite')
commit	409a4c08b545aa064cf8fe3b8de51404756a301e	[log] [tgz]
author	Barry Warsaw <barry@python.org>	Wed Apr 10 21:01:31 2002 +0000
committer	Barry Warsaw <barry@python.org>	Wed Apr 10 21:01:31 2002 +0000
tree	06cf8fe44e1fe28fbc0147635ec41961f2df6515
parent	68e69338ae19c37bd3e69cb76e107bfa76231e06 [diff]