blob: 6db007dc1978bd329bfe05cdbbc4efe564be0ede [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2002-2007 Python Software Foundation
2# Author: Ben Gertzfield
3# Contact: email-sig@python.org
4
5"""Base64 content transfer encoding per RFCs 2045-2047.
6
7This module handles the content transfer encoding method defined in RFC 2045
8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
9characters encoding known as Base64.
10
11It is used in the MIME standards for email to attach images, audio, and text
12using some 8-bit character sets to messages.
13
14This module provides an interface to encode and decode both headers and bodies
15with Base64 encoding.
16
17RFC 2045 defines a method for including character set information in an
18`encoded-word' in a header. This method is commonly used for 8-bit real names
19in To:, From:, Cc:, etc. fields, as well as Subject: lines.
20
21This module does not do the line wrapping or end-of-line character conversion
22necessary for proper internationalized headers; it only does dumb encoding and
23decoding. To deal with the various line wrapping issues, use the email.Header
24module.
25"""
26
27__all__ = [
Guido van Rossum8b3febe2007-08-30 01:15:14 +000028 'body_decode',
29 'body_encode',
30 'decode',
31 'decodestring',
32 'encode',
33 'encodestring',
34 'header_encode',
Guido van Rossum9604e662007-08-30 03:46:43 +000035 'header_length',
Guido van Rossum8b3febe2007-08-30 01:15:14 +000036 ]
37
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
Guido van Rossum9604e662007-08-30 03:46:43 +000039from base64 import b64encode
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from binascii import b2a_base64, a2b_base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42CRLF = '\r\n'
43NL = '\n'
44EMPTYSTRING = ''
45
46# See also Charset.py
47MISC_LEN = 7
48
49
50
51# Helpers
Guido van Rossum9604e662007-08-30 03:46:43 +000052def header_length(bytearray):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 """Return the length of s when it is encoded with base64."""
Guido van Rossum9604e662007-08-30 03:46:43 +000054 groups_of_3, leftover = divmod(len(bytearray), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 n = groups_of_3 * 4
57 if leftover:
58 n += 4
59 return n
60
61
62
Guido van Rossum9604e662007-08-30 03:46:43 +000063def header_encode(header_bytes, charset='iso-8859-1'):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000064 """Encode a single header line with Base64 encoding in a given charset.
65
Guido van Rossum8b3febe2007-08-30 01:15:14 +000066 charset names the character set to use to encode the header. It defaults
Guido van Rossum9604e662007-08-30 03:46:43 +000067 to iso-8859-1. Base64 encoding is defined in RFC 2045.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000068 """
Guido van Rossum9604e662007-08-30 03:46:43 +000069 if not header_bytes:
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +000070 return ""
71 if isinstance(header_bytes, str):
72 header_bytes = header_bytes.encode(charset)
Guido van Rossum98297ee2007-11-06 21:34:58 +000073 encoded = b64encode(header_bytes).decode("ascii")
Guido van Rossum9604e662007-08-30 03:46:43 +000074 return '=?%s?b?%s?=' % (charset, encoded)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000075
76
77
Guido van Rossum9604e662007-08-30 03:46:43 +000078def body_encode(s, maxlinelen=76, eol=NL):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000079 """Encode a string with base64.
80
81 Each line will be wrapped at, at most, maxlinelen characters (defaults to
82 76 characters).
83
Guido van Rossum8b3febe2007-08-30 01:15:14 +000084 Each line of encoded text will end with eol, which defaults to "\\n". Set
85 this to "\r\n" if you will be using the result of this function directly
86 in an email.
87 """
88 if not s:
89 return s
90
Guido van Rossum8b3febe2007-08-30 01:15:14 +000091 encvec = []
92 max_unencoded = maxlinelen * 3 // 4
93 for i in range(0, len(s), max_unencoded):
94 # BAW: should encode() inherit b2a_base64()'s dubious behavior in
95 # adding a newline to the encoded string?
Guido van Rossum98297ee2007-11-06 21:34:58 +000096 enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
Guido van Rossum8b3febe2007-08-30 01:15:14 +000097 if enc.endswith(NL) and eol != NL:
98 enc = enc[:-1] + eol
99 encvec.append(enc)
100 return EMPTYSTRING.join(encvec)
101
102
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000103
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000104def decode(string):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000105 """Decode a raw base64 string, returning a bytes object.
106
Guido van Rossum9604e662007-08-30 03:46:43 +0000107 This function does not parse a full MIME header value encoded with
108 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
109 level email.Header class for that functionality.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000110 """
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000111 if not string:
112 return bytes()
113 elif isinstance(string, str):
114 return a2b_base64(string.encode('raw-unicode-escape'))
115 else:
116 return a2b_base64(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000117
118
119# For convenience and backwards compatibility w/ standard base64 module
120body_decode = decode
121decodestring = decode