blob: f3bbac1cafb736e88f3533b98e6a84c2be39eb20 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2002-2007 Python Software Foundation
2# Author: Ben Gertzfield
3# Contact: email-sig@python.org
4
5"""Base64 content transfer encoding per RFCs 2045-2047.
6
7This module handles the content transfer encoding method defined in RFC 2045
8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
9characters encoding known as Base64.
10
11It is used in the MIME standards for email to attach images, audio, and text
12using some 8-bit character sets to messages.
13
14This module provides an interface to encode and decode both headers and bodies
15with Base64 encoding.
16
17RFC 2045 defines a method for including character set information in an
18`encoded-word' in a header. This method is commonly used for 8-bit real names
19in To:, From:, Cc:, etc. fields, as well as Subject: lines.
20
21This module does not do the line wrapping or end-of-line character conversion
22necessary for proper internationalized headers; it only does dumb encoding and
Amaury Forgeot d'Arc1c25de62009-07-12 16:43:19 +000023decoding. To deal with the various line wrapping issues, use the email.header
Guido van Rossum8b3febe2007-08-30 01:15:14 +000024module.
25"""
26
27__all__ = [
Guido van Rossum8b3febe2007-08-30 01:15:14 +000028 'body_decode',
29 'body_encode',
30 'decode',
31 'decodestring',
Guido van Rossum8b3febe2007-08-30 01:15:14 +000032 'header_encode',
Guido van Rossum9604e662007-08-30 03:46:43 +000033 'header_length',
Guido van Rossum8b3febe2007-08-30 01:15:14 +000034 ]
35
Guido van Rossum8b3febe2007-08-30 01:15:14 +000036
Guido van Rossum9604e662007-08-30 03:46:43 +000037from base64 import b64encode
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038from binascii import b2a_base64, a2b_base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039
40CRLF = '\r\n'
41NL = '\n'
42EMPTYSTRING = ''
43
44# See also Charset.py
45MISC_LEN = 7
46
47
48
49# Helpers
Guido van Rossum9604e662007-08-30 03:46:43 +000050def header_length(bytearray):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000051 """Return the length of s when it is encoded with base64."""
Guido van Rossum9604e662007-08-30 03:46:43 +000052 groups_of_3, leftover = divmod(len(bytearray), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000054 n = groups_of_3 * 4
55 if leftover:
56 n += 4
57 return n
58
59
60
Guido van Rossum9604e662007-08-30 03:46:43 +000061def header_encode(header_bytes, charset='iso-8859-1'):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000062 """Encode a single header line with Base64 encoding in a given charset.
63
Guido van Rossum8b3febe2007-08-30 01:15:14 +000064 charset names the character set to use to encode the header. It defaults
Guido van Rossum9604e662007-08-30 03:46:43 +000065 to iso-8859-1. Base64 encoding is defined in RFC 2045.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000066 """
Guido van Rossum9604e662007-08-30 03:46:43 +000067 if not header_bytes:
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +000068 return ""
69 if isinstance(header_bytes, str):
70 header_bytes = header_bytes.encode(charset)
Guido van Rossum98297ee2007-11-06 21:34:58 +000071 encoded = b64encode(header_bytes).decode("ascii")
Guido van Rossum9604e662007-08-30 03:46:43 +000072 return '=?%s?b?%s?=' % (charset, encoded)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073
74
75
Guido van Rossum9604e662007-08-30 03:46:43 +000076def body_encode(s, maxlinelen=76, eol=NL):
Florent Xiclunaf1046ca2010-07-27 21:20:15 +000077 r"""Encode a string with base64.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000078
79 Each line will be wrapped at, at most, maxlinelen characters (defaults to
80 76 characters).
81
Florent Xiclunaf1046ca2010-07-27 21:20:15 +000082 Each line of encoded text will end with eol, which defaults to "\n". Set
Guido van Rossum8b3febe2007-08-30 01:15:14 +000083 this to "\r\n" if you will be using the result of this function directly
84 in an email.
85 """
86 if not s:
87 return s
88
Guido van Rossum8b3febe2007-08-30 01:15:14 +000089 encvec = []
90 max_unencoded = maxlinelen * 3 // 4
91 for i in range(0, len(s), max_unencoded):
92 # BAW: should encode() inherit b2a_base64()'s dubious behavior in
93 # adding a newline to the encoded string?
Guido van Rossum98297ee2007-11-06 21:34:58 +000094 enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
Guido van Rossum8b3febe2007-08-30 01:15:14 +000095 if enc.endswith(NL) and eol != NL:
96 enc = enc[:-1] + eol
97 encvec.append(enc)
98 return EMPTYSTRING.join(encvec)
99
100
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000101
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000102def decode(string):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000103 """Decode a raw base64 string, returning a bytes object.
104
Guido van Rossum9604e662007-08-30 03:46:43 +0000105 This function does not parse a full MIME header value encoded with
106 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
Amaury Forgeot d'Arc1c25de62009-07-12 16:43:19 +0000107 level email.header class for that functionality.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000108 """
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000109 if not string:
110 return bytes()
111 elif isinstance(string, str):
112 return a2b_base64(string.encode('raw-unicode-escape'))
113 else:
Georg Brandl3308e802009-05-29 20:42:47 +0000114 return a2b_base64(string)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000115
116
117# For convenience and backwards compatibility w/ standard base64 module
118body_decode = decode
119decodestring = decode