blob: f177586c8d8476058b0b8d975416174a58f4423b [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2002 Python Software Foundation
2# Author: che@debian.org (Ben Gertzfield)
3
4"""Base64 content transfer encoding per RFCs 2045-2047.
5
6This module handles the content transfer encoding method defined in RFC 2045
7to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
8characters encoding known as Base64.
9
10It is used in the MIME standards for email to attach images, audio, and text
11using some 8-bit character sets to messages.
12
13This module provides an interface to encode and decode both headers and bodies
14with Base64 encoding.
15
16RFC 2045 defines a method for including character set information in an
17`encoded-word' in a header. This method is commonly used for 8-bit real names
18in To:, From:, Cc:, etc. fields, as well as Subject: lines.
19
20This module does not do the line wrapping or end-of-line character conversion
21necessary for proper internationalized headers; it only does dumb encoding and
22decoding. To deal with the various line wrapping issues, use the email.Header
23module.
24"""
25
26import re
27from binascii import b2a_base64, a2b_base64
28from email.Utils import fix_eols
29
Barry Warsawd2b2e532002-06-02 19:08:31 +000030try:
31 from email._compat22 import _floordiv
32except SyntaxError:
33 # Python 2.1 spells integer division differently
34 from email._compat21 import _floordiv
35
36
Barry Warsaw409a4c02002-04-10 21:01:31 +000037CRLF = '\r\n'
38NL = '\n'
39EMPTYSTRING = ''
40
41# See also Charset.py
42MISC_LEN = 7
43
44
45
46# Helpers
47def base64_len(s):
48 """Return the length of s when it is encoded with base64."""
Tim Peters8ac14952002-05-23 15:15:30 +000049 groups_of_3, leftover = divmod(len(s), 3)
50 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
Barry Warsaw409a4c02002-04-10 21:01:31 +000051 # Thanks, Tim!
Tim Peters8ac14952002-05-23 15:15:30 +000052 n = groups_of_3 * 4
53 if leftover:
54 n += 4
55 return n
Barry Warsaw409a4c02002-04-10 21:01:31 +000056
57
58
59def header_encode(header, charset='iso-8859-1', keep_eols=0, maxlinelen=76,
60 eol=NL):
61 """Encode a single header line with Base64 encoding in a given charset.
Tim Peters8ac14952002-05-23 15:15:30 +000062
Barry Warsaw409a4c02002-04-10 21:01:31 +000063 Defined in RFC 2045, this Base64 encoding is identical to normal Base64
64 encoding, except that each line must be intelligently wrapped (respecting
65 the Base64 encoding), and subsequent lines must start with a space.
66
67 charset names the character set to use to encode the header. It defaults
68 to iso-8859-1.
69
70 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
71 to the canonical email line separator \\r\\n unless the keep_eols
72 parameter is set to true (the default is false).
73
74 Each line of the header will be terminated in the value of eol, which
75 defaults to "\\n". Set this to "\\r\\n" if you are using the result of
76 this function directly in email.
77
78 The resulting string will be in the form:
79
80 "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
81 =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
Tim Peters8ac14952002-05-23 15:15:30 +000082
Barry Warsaw409a4c02002-04-10 21:01:31 +000083 with each line wrapped at, at most, maxlinelen characters (defaults to 76
84 characters).
85 """
86 # Return empty headers unchanged
87 if not header:
88 return header
89
90 if not keep_eols:
91 header = fix_eols(header)
Tim Peters8ac14952002-05-23 15:15:30 +000092
Barry Warsaw409a4c02002-04-10 21:01:31 +000093 # Base64 encode each line, in encoded chunks no greater than maxlinelen in
94 # length, after the RFC chrome is added in.
95 base64ed = []
96 max_encoded = maxlinelen - len(charset) - MISC_LEN
Barry Warsawd2b2e532002-06-02 19:08:31 +000097 max_unencoded = _floordiv(max_encoded * 3, 4)
Barry Warsaw409a4c02002-04-10 21:01:31 +000098
99 # BAW: Ben's original code used a step of max_unencoded, but I think it
100 # ought to be max_encoded. Otherwise, where's max_encoded used? I'm
Tim Peters8ac14952002-05-23 15:15:30 +0000101 # still not sure what the
Barry Warsaw409a4c02002-04-10 21:01:31 +0000102 for i in range(0, len(header), max_unencoded):
103 base64ed.append(b2a_base64(header[i:i+max_unencoded]))
104
105 # Now add the RFC chrome to each encoded chunk
106 lines = []
107 for line in base64ed:
108 # Ignore the last character of each line if it is a newline
109 if line[-1] == NL:
110 line = line[:-1]
111 # Add the chrome
112 lines.append('=?%s?b?%s?=' % (charset, line))
113 # Glue the lines together and return it. BAW: should we be able to
114 # specify the leading whitespace in the joiner?
115 joiner = eol + ' '
116 return joiner.join(lines)
117
118
119
120def encode(s, binary=1, maxlinelen=76, eol=NL):
121 """Encode a string with base64.
122
123 Each line will be wrapped at, at most, maxlinelen characters (defaults to
124 76 characters).
125
126 If binary is false, end-of-line characters will be converted to the
127 canonical email end-of-line sequence \\r\\n. Otherwise they will be left
128 verbatim (this is the default).
129
130 Each line of encoded text will end with eol, which defaults to "\\n". Set
131 this to "\r\n" if you will be using the result of this function directly
132 in an email.
133 """
134 if not s:
135 return s
Tim Peters8ac14952002-05-23 15:15:30 +0000136
Barry Warsaw409a4c02002-04-10 21:01:31 +0000137 if not binary:
138 s = fix_eols(s)
Tim Peters8ac14952002-05-23 15:15:30 +0000139
Barry Warsaw409a4c02002-04-10 21:01:31 +0000140 encvec = []
Barry Warsawd2b2e532002-06-02 19:08:31 +0000141 max_unencoded = _floordiv(maxlinelen * 3, 4)
Barry Warsaw409a4c02002-04-10 21:01:31 +0000142 for i in range(0, len(s), max_unencoded):
143 # BAW: should encode() inherit b2a_base64()'s dubious behavior in
144 # adding a newline to the encoded string?
145 enc = b2a_base64(s[i:i + max_unencoded])
146 if enc[-1] == NL and eol <> NL:
147 enc = enc[:-1] + eol
148 encvec.append(enc)
149 return EMPTYSTRING.join(encvec)
150
151
152# For convenience and backwards compatibility w/ standard base64 module
153body_encode = encode
154encodestring = encode
155
156
157
158def decode(s, convert_eols=None):
159 """Decode a raw base64 string.
160
161 If convert_eols is set to a string value, all canonical email linefeeds,
162 e.g. "\\r\\n", in the decoded text will be converted to the value of
163 convert_eols. os.linesep is a good choice for convert_eols if you are
164 decoding a text attachment.
165
166 This function does not parse a full MIME header value encoded with
167 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
168 level email.Header class for that functionality.
169 """
170 if not s:
171 return s
Tim Peters8ac14952002-05-23 15:15:30 +0000172
Barry Warsaw409a4c02002-04-10 21:01:31 +0000173 dec = a2b_base64(s)
174 if convert_eols:
175 return dec.replace(CRLF, convert_eols)
176 return dec
177
178
179# For convenience and backwards compatibility w/ standard base64 module
180body_decode = decode
181decodestring = decode