blob: 08420b29b5a217124ed7e1885a3bfd0ed71e1b6e [file] [log] [blame]
Barry Warsaw409a4c02002-04-10 21:01:31 +00001# Copyright (C) 2002 Python Software Foundation
2# Author: che@debian.org (Ben Gertzfield)
3
4"""Base64 content transfer encoding per RFCs 2045-2047.
5
6This module handles the content transfer encoding method defined in RFC 2045
7to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
8characters encoding known as Base64.
9
10It is used in the MIME standards for email to attach images, audio, and text
11using some 8-bit character sets to messages.
12
13This module provides an interface to encode and decode both headers and bodies
14with Base64 encoding.
15
16RFC 2045 defines a method for including character set information in an
17`encoded-word' in a header. This method is commonly used for 8-bit real names
18in To:, From:, Cc:, etc. fields, as well as Subject: lines.
19
20This module does not do the line wrapping or end-of-line character conversion
21necessary for proper internationalized headers; it only does dumb encoding and
22decoding. To deal with the various line wrapping issues, use the email.Header
23module.
24"""
25
26import re
27from binascii import b2a_base64, a2b_base64
28from email.Utils import fix_eols
29
30CRLF = '\r\n'
31NL = '\n'
32EMPTYSTRING = ''
33
34# See also Charset.py
35MISC_LEN = 7
36
37
38
39# Helpers
40def base64_len(s):
41 """Return the length of s when it is encoded with base64."""
42 groups_of_3, leftover = divmod(len(s), 3)
43 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
44 # Thanks, Tim!
45 n = groups_of_3 * 4
46 if leftover:
47 n += 4
48 return n
49
50
51
52def header_encode(header, charset='iso-8859-1', keep_eols=0, maxlinelen=76,
53 eol=NL):
54 """Encode a single header line with Base64 encoding in a given charset.
55
56 Defined in RFC 2045, this Base64 encoding is identical to normal Base64
57 encoding, except that each line must be intelligently wrapped (respecting
58 the Base64 encoding), and subsequent lines must start with a space.
59
60 charset names the character set to use to encode the header. It defaults
61 to iso-8859-1.
62
63 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
64 to the canonical email line separator \\r\\n unless the keep_eols
65 parameter is set to true (the default is false).
66
67 Each line of the header will be terminated in the value of eol, which
68 defaults to "\\n". Set this to "\\r\\n" if you are using the result of
69 this function directly in email.
70
71 The resulting string will be in the form:
72
73 "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
74 =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
75
76 with each line wrapped at, at most, maxlinelen characters (defaults to 76
77 characters).
78 """
79 # Return empty headers unchanged
80 if not header:
81 return header
82
83 if not keep_eols:
84 header = fix_eols(header)
85
86 # Base64 encode each line, in encoded chunks no greater than maxlinelen in
87 # length, after the RFC chrome is added in.
88 base64ed = []
89 max_encoded = maxlinelen - len(charset) - MISC_LEN
90 max_unencoded = max_encoded * 3 / 4
91
92 # BAW: Ben's original code used a step of max_unencoded, but I think it
93 # ought to be max_encoded. Otherwise, where's max_encoded used? I'm
94 # still not sure what the
95 for i in range(0, len(header), max_unencoded):
96 base64ed.append(b2a_base64(header[i:i+max_unencoded]))
97
98 # Now add the RFC chrome to each encoded chunk
99 lines = []
100 for line in base64ed:
101 # Ignore the last character of each line if it is a newline
102 if line[-1] == NL:
103 line = line[:-1]
104 # Add the chrome
105 lines.append('=?%s?b?%s?=' % (charset, line))
106 # Glue the lines together and return it. BAW: should we be able to
107 # specify the leading whitespace in the joiner?
108 joiner = eol + ' '
109 return joiner.join(lines)
110
111
112
113def encode(s, binary=1, maxlinelen=76, eol=NL):
114 """Encode a string with base64.
115
116 Each line will be wrapped at, at most, maxlinelen characters (defaults to
117 76 characters).
118
119 If binary is false, end-of-line characters will be converted to the
120 canonical email end-of-line sequence \\r\\n. Otherwise they will be left
121 verbatim (this is the default).
122
123 Each line of encoded text will end with eol, which defaults to "\\n". Set
124 this to "\r\n" if you will be using the result of this function directly
125 in an email.
126 """
127 if not s:
128 return s
129
130 if not binary:
131 s = fix_eols(s)
132
133 encvec = []
134 max_unencoded = maxlinelen * 3 / 4
135 for i in range(0, len(s), max_unencoded):
136 # BAW: should encode() inherit b2a_base64()'s dubious behavior in
137 # adding a newline to the encoded string?
138 enc = b2a_base64(s[i:i + max_unencoded])
139 if enc[-1] == NL and eol <> NL:
140 enc = enc[:-1] + eol
141 encvec.append(enc)
142 return EMPTYSTRING.join(encvec)
143
144
145# For convenience and backwards compatibility w/ standard base64 module
146body_encode = encode
147encodestring = encode
148
149
150
151def decode(s, convert_eols=None):
152 """Decode a raw base64 string.
153
154 If convert_eols is set to a string value, all canonical email linefeeds,
155 e.g. "\\r\\n", in the decoded text will be converted to the value of
156 convert_eols. os.linesep is a good choice for convert_eols if you are
157 decoding a text attachment.
158
159 This function does not parse a full MIME header value encoded with
160 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
161 level email.Header class for that functionality.
162 """
163 if not s:
164 return s
165
166 dec = a2b_base64(s)
167 if convert_eols:
168 return dec.replace(CRLF, convert_eols)
169 return dec
170
171
172# For convenience and backwards compatibility w/ standard base64 module
173body_decode = decode
174decodestring = decode