blob: 6ed1d535841b7a58046900717f038a93562b3678 [file] [log] [blame]
Barry Warsawbb113862004-10-03 03:16:19 +00001# Copyright (C) 2002-2004 Python Software Foundation
2# Author: Ben Gertzfield
3# Contact: email-sig@python.org
Barry Warsaw409a4c02002-04-10 21:01:31 +00004
5"""Base64 content transfer encoding per RFCs 2045-2047.
6
7This module handles the content transfer encoding method defined in RFC 2045
8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
9characters encoding known as Base64.
10
11It is used in the MIME standards for email to attach images, audio, and text
12using some 8-bit character sets to messages.
13
14This module provides an interface to encode and decode both headers and bodies
15with Base64 encoding.
16
17RFC 2045 defines a method for including character set information in an
18`encoded-word' in a header. This method is commonly used for 8-bit real names
19in To:, From:, Cc:, etc. fields, as well as Subject: lines.
20
21This module does not do the line wrapping or end-of-line character conversion
22necessary for proper internationalized headers; it only does dumb encoding and
23decoding. To deal with the various line wrapping issues, use the email.Header
24module.
25"""
26
27import re
28from binascii import b2a_base64, a2b_base64
29from email.Utils import fix_eols
30
31CRLF = '\r\n'
32NL = '\n'
33EMPTYSTRING = ''
34
35# See also Charset.py
36MISC_LEN = 7
37
38
39
40# Helpers
41def base64_len(s):
42 """Return the length of s when it is encoded with base64."""
Tim Peters8ac14952002-05-23 15:15:30 +000043 groups_of_3, leftover = divmod(len(s), 3)
44 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
Barry Warsaw409a4c02002-04-10 21:01:31 +000045 # Thanks, Tim!
Tim Peters8ac14952002-05-23 15:15:30 +000046 n = groups_of_3 * 4
47 if leftover:
48 n += 4
49 return n
Barry Warsaw409a4c02002-04-10 21:01:31 +000050
51
52
Barry Warsawc202d932002-09-28 21:02:51 +000053def header_encode(header, charset='iso-8859-1', keep_eols=False,
54 maxlinelen=76, eol=NL):
Barry Warsaw409a4c02002-04-10 21:01:31 +000055 """Encode a single header line with Base64 encoding in a given charset.
Tim Peters8ac14952002-05-23 15:15:30 +000056
Barry Warsaw409a4c02002-04-10 21:01:31 +000057 Defined in RFC 2045, this Base64 encoding is identical to normal Base64
58 encoding, except that each line must be intelligently wrapped (respecting
59 the Base64 encoding), and subsequent lines must start with a space.
60
61 charset names the character set to use to encode the header. It defaults
62 to iso-8859-1.
63
64 End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
65 to the canonical email line separator \\r\\n unless the keep_eols
Barry Warsawc202d932002-09-28 21:02:51 +000066 parameter is True (the default is False).
Barry Warsaw409a4c02002-04-10 21:01:31 +000067
68 Each line of the header will be terminated in the value of eol, which
69 defaults to "\\n". Set this to "\\r\\n" if you are using the result of
70 this function directly in email.
71
72 The resulting string will be in the form:
73
74 "=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
75 =?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
Tim Peters8ac14952002-05-23 15:15:30 +000076
Barry Warsaw409a4c02002-04-10 21:01:31 +000077 with each line wrapped at, at most, maxlinelen characters (defaults to 76
78 characters).
79 """
80 # Return empty headers unchanged
81 if not header:
82 return header
83
84 if not keep_eols:
85 header = fix_eols(header)
Tim Peters8ac14952002-05-23 15:15:30 +000086
Barry Warsaw409a4c02002-04-10 21:01:31 +000087 # Base64 encode each line, in encoded chunks no greater than maxlinelen in
88 # length, after the RFC chrome is added in.
89 base64ed = []
90 max_encoded = maxlinelen - len(charset) - MISC_LEN
Barry Warsaw24f79762004-05-09 03:55:11 +000091 max_unencoded = max_encoded * 3 // 4
Barry Warsaw409a4c02002-04-10 21:01:31 +000092
Barry Warsaw409a4c02002-04-10 21:01:31 +000093 for i in range(0, len(header), max_unencoded):
94 base64ed.append(b2a_base64(header[i:i+max_unencoded]))
95
96 # Now add the RFC chrome to each encoded chunk
97 lines = []
98 for line in base64ed:
99 # Ignore the last character of each line if it is a newline
Barry Warsawc202d932002-09-28 21:02:51 +0000100 if line.endswith(NL):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000101 line = line[:-1]
102 # Add the chrome
103 lines.append('=?%s?b?%s?=' % (charset, line))
104 # Glue the lines together and return it. BAW: should we be able to
105 # specify the leading whitespace in the joiner?
106 joiner = eol + ' '
107 return joiner.join(lines)
108
109
110
Barry Warsawc202d932002-09-28 21:02:51 +0000111def encode(s, binary=True, maxlinelen=76, eol=NL):
Barry Warsaw409a4c02002-04-10 21:01:31 +0000112 """Encode a string with base64.
113
114 Each line will be wrapped at, at most, maxlinelen characters (defaults to
115 76 characters).
116
Barry Warsawc202d932002-09-28 21:02:51 +0000117 If binary is False, end-of-line characters will be converted to the
Barry Warsaw409a4c02002-04-10 21:01:31 +0000118 canonical email end-of-line sequence \\r\\n. Otherwise they will be left
119 verbatim (this is the default).
120
121 Each line of encoded text will end with eol, which defaults to "\\n". Set
122 this to "\r\n" if you will be using the result of this function directly
123 in an email.
124 """
125 if not s:
126 return s
Tim Peters8ac14952002-05-23 15:15:30 +0000127
Barry Warsaw409a4c02002-04-10 21:01:31 +0000128 if not binary:
129 s = fix_eols(s)
Tim Peters8ac14952002-05-23 15:15:30 +0000130
Barry Warsaw409a4c02002-04-10 21:01:31 +0000131 encvec = []
Barry Warsaw24f79762004-05-09 03:55:11 +0000132 max_unencoded = maxlinelen * 3 // 4
Barry Warsaw409a4c02002-04-10 21:01:31 +0000133 for i in range(0, len(s), max_unencoded):
134 # BAW: should encode() inherit b2a_base64()'s dubious behavior in
135 # adding a newline to the encoded string?
136 enc = b2a_base64(s[i:i + max_unencoded])
Barry Warsawc202d932002-09-28 21:02:51 +0000137 if enc.endswith(NL) and eol <> NL:
Barry Warsaw409a4c02002-04-10 21:01:31 +0000138 enc = enc[:-1] + eol
139 encvec.append(enc)
140 return EMPTYSTRING.join(encvec)
141
142
143# For convenience and backwards compatibility w/ standard base64 module
144body_encode = encode
145encodestring = encode
146
147
148
149def decode(s, convert_eols=None):
150 """Decode a raw base64 string.
151
152 If convert_eols is set to a string value, all canonical email linefeeds,
153 e.g. "\\r\\n", in the decoded text will be converted to the value of
154 convert_eols. os.linesep is a good choice for convert_eols if you are
155 decoding a text attachment.
156
157 This function does not parse a full MIME header value encoded with
158 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
159 level email.Header class for that functionality.
160 """
161 if not s:
162 return s
Tim Peters8ac14952002-05-23 15:15:30 +0000163
Barry Warsaw409a4c02002-04-10 21:01:31 +0000164 dec = a2b_base64(s)
165 if convert_eols:
166 return dec.replace(CRLF, convert_eols)
167 return dec
168
169
170# For convenience and backwards compatibility w/ standard base64 module
171body_decode = decode
172decodestring = decode