Blame - Lib/email/base64MIME.py - platform/external/python/cpython3

blob: 56e44e1c2c62d5b0d86e7e015123452463575cab [file] [log] [blame]

Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	1	# Copyright (C) 2002 Python Software Foundation
				2	# Author: che@debian.org (Ben Gertzfield)
				3
				4	"""Base64 content transfer encoding per RFCs 2045-2047.
				5
				6	This module handles the content transfer encoding method defined in RFC 2045
				7	to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit
				8	characters encoding known as Base64.
				9
				10	It is used in the MIME standards for email to attach images, audio, and text
				11	using some 8-bit character sets to messages.
				12
				13	This module provides an interface to encode and decode both headers and bodies
				14	with Base64 encoding.
				15
				16	RFC 2045 defines a method for including character set information in an
				17	`encoded-word' in a header. This method is commonly used for 8-bit real names
				18	in To:, From:, Cc:, etc. fields, as well as Subject: lines.
				19
				20	This module does not do the line wrapping or end-of-line character conversion
				21	necessary for proper internationalized headers; it only does dumb encoding and
				22	decoding. To deal with the various line wrapping issues, use the email.Header
				23	module.
				24	"""
				25
				26	import re
				27	from binascii import b2a_base64, a2b_base64
				28	from email.Utils import fix_eols
				29
Barry Warsaw	d2b2e53	2002-06-02 19:08:31 +0000	[diff] [blame]	30	try:
				31	from email._compat22 import _floordiv
				32	except SyntaxError:
				33	# Python 2.1 spells integer division differently
				34	from email._compat21 import _floordiv
				35
				36
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	37	CRLF = '\r\n'
				38	NL = '\n'
				39	EMPTYSTRING = ''
				40
				41	# See also Charset.py
				42	MISC_LEN = 7
				43
Barry Warsaw	c202d93	2002-09-28 21:02:51 +0000	[diff] [blame]	44	try:
				45	True, False
				46	except NameError:
				47	True = 1
				48	False = 0
				49
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	50
				51
				52	# Helpers
				53	def base64_len(s):
				54	"""Return the length of s when it is encoded with base64."""
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	55	groups_of_3, leftover = divmod(len(s), 3)
				56	# 4 bytes out for each 3 bytes (or nonzero fraction thereof) in.
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	57	# Thanks, Tim!
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	58	n = groups_of_3 * 4
				59	if leftover:
				60	n += 4
				61	return n
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	62
				63
				64
Barry Warsaw	c202d93	2002-09-28 21:02:51 +0000	[diff] [blame]	65	def header_encode(header, charset='iso-8859-1', keep_eols=False,
				66	maxlinelen=76, eol=NL):
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	67	"""Encode a single header line with Base64 encoding in a given charset.
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	68
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	69	Defined in RFC 2045, this Base64 encoding is identical to normal Base64
				70	encoding, except that each line must be intelligently wrapped (respecting
				71	the Base64 encoding), and subsequent lines must start with a space.
				72
				73	charset names the character set to use to encode the header. It defaults
				74	to iso-8859-1.
				75
				76	End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
				77	to the canonical email line separator \\r\\n unless the keep_eols
Barry Warsaw	c202d93	2002-09-28 21:02:51 +0000	[diff] [blame]	78	parameter is True (the default is False).
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	79
				80	Each line of the header will be terminated in the value of eol, which
				81	defaults to "\\n". Set this to "\\r\\n" if you are using the result of
				82	this function directly in email.
				83
				84	The resulting string will be in the form:
				85
				86	"=?charset?b?WW/5ciBtYXp66XLrIHf8eiBhIGhhbXBzdGHuciBBIFlv+XIgbWF6euly?=\\n
				87	=?charset?b?6yB3/HogYSBoYW1wc3Rh7nIgQkMgWW/5ciBtYXp66XLrIHf8eiBhIGhh?="
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	88
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	89	with each line wrapped at, at most, maxlinelen characters (defaults to 76
				90	characters).
				91	"""
				92	# Return empty headers unchanged
				93	if not header:
				94	return header
				95
				96	if not keep_eols:
				97	header = fix_eols(header)
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	98
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	99	# Base64 encode each line, in encoded chunks no greater than maxlinelen in
				100	# length, after the RFC chrome is added in.
				101	base64ed = []
				102	max_encoded = maxlinelen - len(charset) - MISC_LEN
Barry Warsaw	d2b2e53	2002-06-02 19:08:31 +0000	[diff] [blame]	103	max_unencoded = _floordiv(max_encoded * 3, 4)
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	104
				105	# BAW: Ben's original code used a step of max_unencoded, but I think it
				106	# ought to be max_encoded. Otherwise, where's max_encoded used? I'm
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	107	# still not sure what the
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	108	for i in range(0, len(header), max_unencoded):
				109	base64ed.append(b2a_base64(header[i:i+max_unencoded]))
				110
				111	# Now add the RFC chrome to each encoded chunk
				112	lines = []
				113	for line in base64ed:
				114	# Ignore the last character of each line if it is a newline
Barry Warsaw	c202d93	2002-09-28 21:02:51 +0000	[diff] [blame]	115	if line.endswith(NL):
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	116	line = line[:-1]
				117	# Add the chrome
				118	lines.append('=?%s?b?%s?=' % (charset, line))
				119	# Glue the lines together and return it. BAW: should we be able to
				120	# specify the leading whitespace in the joiner?
				121	joiner = eol + ' '
				122	return joiner.join(lines)
				123
				124
				125
Barry Warsaw	c202d93	2002-09-28 21:02:51 +0000	[diff] [blame]	126	def encode(s, binary=True, maxlinelen=76, eol=NL):
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	127	"""Encode a string with base64.
				128
				129	Each line will be wrapped at, at most, maxlinelen characters (defaults to
				130	76 characters).
				131
Barry Warsaw	c202d93	2002-09-28 21:02:51 +0000	[diff] [blame]	132	If binary is False, end-of-line characters will be converted to the
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	133	canonical email end-of-line sequence \\r\\n. Otherwise they will be left
				134	verbatim (this is the default).
				135
				136	Each line of encoded text will end with eol, which defaults to "\\n". Set
				137	this to "\r\n" if you will be using the result of this function directly
				138	in an email.
				139	"""
				140	if not s:
				141	return s
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	142
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	143	if not binary:
				144	s = fix_eols(s)
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	145
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	146	encvec = []
Barry Warsaw	d2b2e53	2002-06-02 19:08:31 +0000	[diff] [blame]	147	max_unencoded = _floordiv(maxlinelen * 3, 4)
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	148	for i in range(0, len(s), max_unencoded):
				149	# BAW: should encode() inherit b2a_base64()'s dubious behavior in
				150	# adding a newline to the encoded string?
				151	enc = b2a_base64(s[i:i + max_unencoded])
Barry Warsaw	c202d93	2002-09-28 21:02:51 +0000	[diff] [blame]	152	if enc.endswith(NL) and eol <> NL:
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	153	enc = enc[:-1] + eol
				154	encvec.append(enc)
				155	return EMPTYSTRING.join(encvec)
				156
				157
				158	# For convenience and backwards compatibility w/ standard base64 module
				159	body_encode = encode
				160	encodestring = encode
				161
				162
				163
				164	def decode(s, convert_eols=None):
				165	"""Decode a raw base64 string.
				166
				167	If convert_eols is set to a string value, all canonical email linefeeds,
				168	e.g. "\\r\\n", in the decoded text will be converted to the value of
				169	convert_eols. os.linesep is a good choice for convert_eols if you are
				170	decoding a text attachment.
				171
				172	This function does not parse a full MIME header value encoded with
				173	base64 (like =?iso-8895-1?b?bmloISBuaWgh?=) -- please use the high
				174	level email.Header class for that functionality.
				175	"""
				176	if not s:
				177	return s
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	178
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	179	dec = a2b_base64(s)
				180	if convert_eols:
				181	return dec.replace(CRLF, convert_eols)
				182	return dec
				183
				184
				185	# For convenience and backwards compatibility w/ standard base64 module
				186	body_decode = decode
				187	decodestring = decode