blob: c60f8dbe253a2c525a483a29f3d84e8ab3b08d95 [file] [log] [blame]
Guido van Rossum8b3febe2007-08-30 01:15:14 +00001# Copyright (C) 2002-2007 Python Software Foundation
2# Author: Ben Gertzfield
3# Contact: email-sig@python.org
4
5"""Base64 content transfer encoding per RFCs 2045-2047.
6
7This module handles the content transfer encoding method defined in RFC 2045
8to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
9characters encoding known as Base64.
10
11It is used in the MIME standards for email to attach images, audio, and text
12using some 8-bit character sets to messages.
13
14This module provides an interface to encode and decode both headers and bodies
15with Base64 encoding.
16
17RFC 2045 defines a method for including character set information in an
18`encoded-word' in a header. This method is commonly used for 8-bit real names
19in To:, From:, Cc:, etc. fields, as well as Subject: lines.
20
21This module does not do the line wrapping or end-of-line character conversion
22necessary for proper internationalized headers; it only does dumb encoding and
23decoding. To deal with the various line wrapping issues, use the email.Header
24module.
25"""
26
27__all__ = [
Guido van Rossum8b3febe2007-08-30 01:15:14 +000028 'body_decode',
29 'body_encode',
30 'decode',
31 'decodestring',
32 'encode',
33 'encodestring',
34 'header_encode',
Guido van Rossum9604e662007-08-30 03:46:43 +000035 'header_length',
Guido van Rossum8b3febe2007-08-30 01:15:14 +000036 ]
37
Guido van Rossum8b3febe2007-08-30 01:15:14 +000038
Guido van Rossum9604e662007-08-30 03:46:43 +000039from base64 import b64encode
Guido van Rossum8b3febe2007-08-30 01:15:14 +000040from binascii import b2a_base64, a2b_base64
Guido van Rossum8b3febe2007-08-30 01:15:14 +000041
42CRLF = '\r\n'
43NL = '\n'
44EMPTYSTRING = ''
45
46# See also Charset.py
47MISC_LEN = 7
48
49
50
51# Helpers
Guido van Rossum9604e662007-08-30 03:46:43 +000052def header_length(bytearray):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000053 """Return the length of s when it is encoded with base64."""
Guido van Rossum9604e662007-08-30 03:46:43 +000054 groups_of_3, leftover = divmod(len(bytearray), 3)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000055 # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000056 n = groups_of_3 * 4
57 if leftover:
58 n += 4
59 return n
60
61
62
Guido van Rossum9604e662007-08-30 03:46:43 +000063def header_encode(header_bytes, charset='iso-8859-1'):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000064 """Encode a single header line with Base64 encoding in a given charset.
65
Guido van Rossum8b3febe2007-08-30 01:15:14 +000066 charset names the character set to use to encode the header. It defaults
Guido van Rossum9604e662007-08-30 03:46:43 +000067 to iso-8859-1. Base64 encoding is defined in RFC 2045.
Guido van Rossum8b3febe2007-08-30 01:15:14 +000068 """
69 # Return empty headers unchanged
Guido van Rossum9604e662007-08-30 03:46:43 +000070 if not header_bytes:
71 return str(header_bytes)
Guido van Rossum98297ee2007-11-06 21:34:58 +000072 encoded = b64encode(header_bytes).decode("ascii")
Guido van Rossum9604e662007-08-30 03:46:43 +000073 return '=?%s?b?%s?=' % (charset, encoded)
Guido van Rossum8b3febe2007-08-30 01:15:14 +000074
75
76
Guido van Rossum9604e662007-08-30 03:46:43 +000077def body_encode(s, maxlinelen=76, eol=NL):
Guido van Rossum8b3febe2007-08-30 01:15:14 +000078 """Encode a string with base64.
79
80 Each line will be wrapped at, at most, maxlinelen characters (defaults to
81 76 characters).
82
Guido van Rossum8b3febe2007-08-30 01:15:14 +000083 Each line of encoded text will end with eol, which defaults to "\\n". Set
84 this to "\r\n" if you will be using the result of this function directly
85 in an email.
86 """
87 if not s:
88 return s
89
Guido van Rossum8b3febe2007-08-30 01:15:14 +000090 encvec = []
91 max_unencoded = maxlinelen * 3 // 4
92 for i in range(0, len(s), max_unencoded):
93 # BAW: should encode() inherit b2a_base64()'s dubious behavior in
94 # adding a newline to the encoded string?
Guido van Rossum98297ee2007-11-06 21:34:58 +000095 enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii")
Guido van Rossum8b3febe2007-08-30 01:15:14 +000096 if enc.endswith(NL) and eol != NL:
97 enc = enc[:-1] + eol
98 encvec.append(enc)
99 return EMPTYSTRING.join(encvec)
100
101
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000102
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000103def decode(string):
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000104 """Decode a raw base64 string, returning a bytes object.
105
Guido van Rossum9604e662007-08-30 03:46:43 +0000106 This function does not parse a full MIME header value encoded with
107 base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
108 level email.Header class for that functionality.
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000109 """
Barry Warsaw2cc1f6d2007-08-30 14:28:55 +0000110 if not string:
111 return bytes()
112 elif isinstance(string, str):
113 return a2b_base64(string.encode('raw-unicode-escape'))
114 else:
115 return a2b_base64(s)
Guido van Rossum8b3febe2007-08-30 01:15:14 +0000116
117
118# For convenience and backwards compatibility w/ standard base64 module
119body_decode = decode
120decodestring = decode