blob: e309f30a2bd35a55b40e8e489d0de979dcb4d571 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2002-2007 Python Software Foundation
2# Author: Ben Gertzfield
3# Contact: email-sig@python.org
4
5"""Base64 content transfer encoding per RFCs 2045-2047.
6
7This module handles the content transfer encoding method defined in RFC 2045
8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
9characters encoding known as Base64.
10
11It is used in the MIME standards for email to attach images, audio, and text
12using some 8-bit character sets to messages.
13
14This module provides an interface to encode and decode both headers and bodies
15with Base64 encoding.
16
17RFC 2045 defines a method for including character set information in an
18`encoded-word' in a header. This method is commonly used for 8-bit real names
19in To:, From:, Cc:, etc. fields, as well as Subject: lines.
20
21This module does not do the line wrapping or end-of-line character conversion
22necessary for proper internationalized headers; it only does dumb encoding and
23decoding. To deal with the various line wrapping issues, use the email.Header
24module.
25"""
26
27__all__ = [
28 'base64_len',
29 'body_decode',
30 'body_encode',
31 'decode',
32 'decodestring',
33 'encode',
34 'encodestring',
35 'header_encode',
36 ]
37
38import re
39
40from binascii import b2a_base64, a2b_base64
41from email.utils import fix_eols
42
43CRLF = '\r\n'
44NL = '\n'
45EMPTYSTRING = ''
46
47# See also Charset.py
48MISC_LEN = 7
49
50
51
52# Helpers
53def base64_len(s):
54 """Return the length of s when it is encoded with base64."""
55 groups_of_3, leftover = divmod(len(s), 3)
56 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
57 # Thanks, Tim!
58 n = groups_of_3 * 4
59 if leftover:
60 n += 4
61 return n
62
63
64
65def header_encode(header, charset='iso-8859-1', keep_eols=False,
66 maxlinelen=76, eol=NL):
67 """Encode a single header line with Base64 encoding in a given charset.
68
69 Defined in RFC 2045, this Base64 encoding is identical to normal Base64
70 encoding, except that each line must be intelligently wrapped (respecting
71 the Base64 encoding), and subsequent lines must start with a space.
72
73 charset names the character set to use to encode the header. It defaults
74 to iso-8859-1.
75
76 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
77 to the canonical email line separator \\r\\n unless the keep_eols
78 parameter is True (the default is False).
79
80 Each line of the header will be terminated in the value of eol, which
81 defaults to "\\n". Set this to "\\r\\n" if you are using the result of
82 this function directly in email.
83
84 The resulting string will be in the form:
85
86 "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
87 =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
88
89 with each line wrapped at, at most, maxlinelen characters (defaults to 76
90 characters).
91 """
92 # Return empty headers unchanged
93 if not header:
94 return header
95
96 if not keep_eols:
97 header = fix_eols(header)
98
99 # Base64 encode each line, in encoded chunks no greater than maxlinelen in
100 # length, after the RFC chrome is added in.
101 base64ed = []
102 max_encoded = maxlinelen - len(charset) - MISC_LEN
103 max_unencoded = max_encoded * 3 // 4
104
105 for i in range(0, len(header), max_unencoded):
106 base64ed.append(b2a_base64(header[i:i+max_unencoded]))
107
108 # Now add the RFC chrome to each encoded chunk
109 lines = []
110 for line in base64ed:
111 # Ignore the last character of each line if it is a newline
Barry Warsaw8b3d6592007-08-30 02:10:49 +0000112 if line[-1] == ord(NL):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000113 line = line[:-1]
114 # Add the chrome
115 lines.append('=?%s?b?%s?=' % (charset, line))
116 # Glue the lines together and return it. BAW: should we be able to
117 # specify the leading whitespace in the joiner?
118 joiner = eol + ' '
119 return joiner.join(lines)
120
121
122
123def encode(s, binary=True, maxlinelen=76, eol=NL):
124 """Encode a string with base64.
125
126 Each line will be wrapped at, at most, maxlinelen characters (defaults to
127 76 characters).
128
129 If binary is False, end-of-line characters will be converted to the
130 canonical email end-of-line sequence \\r\\n. Otherwise they will be left
131 verbatim (this is the default).
132
133 Each line of encoded text will end with eol, which defaults to "\\n". Set
134 this to "\r\n" if you will be using the result of this function directly
135 in an email.
136 """
137 if not s:
138 return s
139
140 if not binary:
141 s = fix_eols(s)
142
143 encvec = []
144 max_unencoded = maxlinelen * 3 // 4
145 for i in range(0, len(s), max_unencoded):
146 # BAW: should encode() inherit b2a_base64()'s dubious behavior in
147 # adding a newline to the encoded string?
148 enc = str(b2a_base64(s[i:i + max_unencoded]))
149 if enc.endswith(NL) and eol != NL:
150 enc = enc[:-1] + eol
151 encvec.append(enc)
152 return EMPTYSTRING.join(encvec)
153
154
155# For convenience and backwards compatibility w/ standard base64 module
156body_encode = encode
157encodestring = encode
158
159
160
Barry Warsaw8b3d6592007-08-30 02:10:49 +0000161def decode(string):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000162 """Decode a raw base64 string, returning a bytes object.
163
Barry Warsaw8b3d6592007-08-30 02:10:49 +0000164 This function does not parse a full MIME header value encoded with base64
165 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- use the high level
166 email.Header class for that functionality.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000167 """
Barry Warsaw8b3d6592007-08-30 02:10:49 +0000168 if not string:
169 return bytes()
170 elif isinstance(string, str):
171 return a2b_base64(string.encode('raw-unicode-escape'))
172 else:
173 return a2b_base64(string)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000174
175
176# For convenience and backwards compatibility w/ standard base64 module
177body_decode = decode
178decodestring = decode