blob: 6cbfdf6ad8ad2aa63ff7c9b0c64bba5e2436ef8b [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2002-2007 Python Software Foundation
2# Author: Ben Gertzfield
3# Contact: email-sig@python.org
4
5"""Base64 content transfer encoding per RFCs 2045-2047.
6
7This module handles the content transfer encoding method defined in RFC 2045
8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
9characters encoding known as Base64.
10
11It is used in the MIME standards for email to attach images, audio, and text
12using some 8-bit character sets to messages.
13
14This module provides an interface to encode and decode both headers and bodies
15with Base64 encoding.
16
17RFC 2045 defines a method for including character set information in an
18`encoded-word' in a header. This method is commonly used for 8-bit real names
19in To:, From:, Cc:, etc. fields, as well as Subject: lines.
20
21This module does not do the line wrapping or end-of-line character conversion
22necessary for proper internationalized headers; it only does dumb encoding and
Amaury Forgeot d'Arc1c25de62009-07-12 16:43:19 +000023decoding. To deal with the various line wrapping issues, use the email.header
Guido van Rossum8b3febe2007-08-30 01:15:14 +000024module.
25"""
26
27__all__ = [
Guido van Rossum8b3febe2007-08-30 01:15:14 +000028 'body_decode',
29 'body_encode',
30 'decode',
31 'decodestring',
Guido van Rossum8b3febe2007-08-30 01:15:14 +000032 'header_encode',
Guido van Rossum9604e662007-08-30 03:46:43 +000033 'header_length',
Guido van Rossum8b3febe2007-08-30 01:15:14 +000034 ]
35
Guido van Rossum8b3febe2007-08-30 01:15:14 +000036
Guido van Rossum9604e662007-08-30 03:46:43 +000037from base64 import b64encode
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038from binascii import b2a_base64, a2b_base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000039
40CRLF = '\r\n'
41NL = '\n'
42EMPTYSTRING = ''
43
44# See also Charset.py
45MISC_LEN = 7
46
47
48
49# Helpers
Guido van Rossum9604e662007-08-30 03:46:43 +000050def header_length(bytearray):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000051 """Return the length of s when it is encoded with base64."""
Guido van Rossum9604e662007-08-30 03:46:43 +000052 groups_of_3, leftover = divmod(len(bytearray), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000054 n = groups_of_3 * 4
55 if leftover:
56 n += 4
57 return n
58
59
60
Guido van Rossum9604e662007-08-30 03:46:43 +000061def header_encode(header_bytes, charset='iso-8859-1'):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000062 """Encode a single header line with Base64 encoding in a given charset.
63
Guido van Rossum8b3febe2007-08-30 01:15:14 +000064 charset names the character set to use to encode the header. It defaults
Guido van Rossum9604e662007-08-30 03:46:43 +000065 to iso-8859-1. Base64 encoding is defined in RFC 2045.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000066 """
Guido van Rossum9604e662007-08-30 03:46:43 +000067 if not header_bytes:
Alexandre Vassalotti5209857f2008-05-03 04:39:38 +000068 return ""
69 if isinstance(header_bytes, str):
70 header_bytes = header_bytes.encode(charset)
Guido van Rossum98297ee2007-11-06 21:34:58 +000071 encoded = b64encode(header_bytes).decode("ascii")
Guido van Rossum9604e662007-08-30 03:46:43 +000072 return '=?%s?b?%s?=' % (charset, encoded)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000073
74
75
Guido van Rossum9604e662007-08-30 03:46:43 +000076def body_encode(s, maxlinelen=76, eol=NL):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000077 """Encode a string with base64.
78
79 Each line will be wrapped at, at most, maxlinelen characters (defaults to
80 76 characters).
81
Guido van Rossum8b3febe2007-08-30 01:15:14 +000082 Each line of encoded text will end with eol, which defaults to "\\n". Set
83 this to "\r\n" if you will be using the result of this function directly
84 in an email.
85 """
86 if not s:
87 return s
88
Guido van Rossum8b3febe2007-08-30 01:15:14 +000089 encvec = []
90 max_unencoded = maxlinelen * 3 // 4
91 for i in range(0, len(s), max_unencoded):
92 # BAW: should encode() inherit b2a_base64()'s dubious behavior in
93 # adding a newline to the encoded string?
Guido van Rossum98297ee2007-11-06 21:34:58 +000094 enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
Guido van Rossum8b3febe2007-08-30 01:15:14 +000095 if enc.endswith(NL) and eol != NL:
96 enc = enc[:-1] + eol
97 encvec.append(enc)
98 return EMPTYSTRING.join(encvec)
99
100
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000101
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000102def decode(string):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000103 """Decode a raw base64 string, returning a bytes object.
104
Guido van Rossum9604e662007-08-30 03:46:43 +0000105 This function does not parse a full MIME header value encoded with
106 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
Amaury Forgeot d'Arc1c25de62009-07-12 16:43:19 +0000107 level email.header class for that functionality.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000108 """
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000109 if not string:
110 return bytes()
111 elif isinstance(string, str):
112 return a2b_base64(string.encode('raw-unicode-escape'))
113 else:
Georg Brandl3308e802009-05-29 20:42:47 +0000114 return a2b_base64(string)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000115
116
117# For convenience and backwards compatibility w/ standard base64 module
118body_decode = decode
119decodestring = decode