Blame - Lib/email/Header.py - platform/external/python/cpython3

blob: 714839ede5a4e574425105e375696a90bf195a1b [file] [log] [blame]

Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	1	# Copyright (C) 2002 Python Software Foundation
				2	# Author: che@debian.org (Ben Gertzfield)
				3
				4	"""Header encoding and decoding functionality."""
				5
				6	import re
				7	import email.quopriMIME
				8	import email.base64MIME
				9	from email.Charset import Charset
				10
Barry Warsaw	812031b	2002-05-19 23:47:53 +0000	[diff] [blame]	11	try:
Barry Warsaw	1c30aa2	2002-06-01 05:49:17 +0000	[diff] [blame]	12	from email._compat22 import _floordiv
Barry Warsaw	812031b	2002-05-19 23:47:53 +0000	[diff] [blame]	13	except SyntaxError:
				14	# Python 2.1 spells integer division differently
Barry Warsaw	1c30aa2	2002-06-01 05:49:17 +0000	[diff] [blame]	15	from email._compat21 import _floordiv
Barry Warsaw	812031b	2002-05-19 23:47:53 +0000	[diff] [blame]	16
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	17	CRLFSPACE = '\r\n '
				18	CRLF = '\r\n'
				19	NLSPACE = '\n '
				20
				21	MAXLINELEN = 76
				22
				23	ENCODE = 1
				24	DECODE = 2
				25
				26	# Match encoded-word strings in the form =?charset?q?Hello_World?=
				27	ecre = re.compile(r'''
				28	=\? # literal =?
				29	(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
				30	\? # literal ?
				31	(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
				32	\? # literal ?
				33	(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
				34	\?= # literal ?=
				35	''', re.VERBOSE \| re.IGNORECASE)
				36
				37
				38
				39	# Helpers
				40	_max_append = email.quopriMIME._max_append
				41
				42
				43
				44	def decode_header(header):
				45	"""Decode a message header value without converting charset.
				46
				47	Returns a list of (decoded_string, charset) pairs containing each of the
				48	decoded parts of the header. Charset is None for non-encoded parts of the
				49	header, otherwise a lower-case string containing the name of the character
				50	set specified in the encoded string.
				51	"""
				52	# If no encoding, just return the header
				53	header = str(header)
				54	if not ecre.search(header):
				55	return [(header, None)]
				56
				57	decoded = []
				58	dec = ''
				59	for line in header.splitlines():
				60	# This line might not have an encoding in it
				61	if not ecre.search(line):
				62	decoded.append((line, None))
				63	continue
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	64
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	65	parts = ecre.split(line)
				66	while parts:
				67	unenc = parts.pop(0).strip()
				68	if unenc:
				69	# Should we continue a long line?
				70	if decoded and decoded[-1][1] is None:
				71	decoded[-1] = (decoded[-1][0] + dec, None)
				72	else:
				73	decoded.append((unenc, None))
				74	if parts:
				75	charset, encoding = [s.lower() for s in parts[0:2]]
				76	encoded = parts[2]
				77	dec = ''
				78	if encoding == 'q':
				79	dec = email.quopriMIME.header_decode(encoded)
				80	elif encoding == 'b':
				81	dec = email.base64MIME.decode(encoded)
				82	else:
				83	dec = encoded
				84
				85	if decoded and decoded[-1][1] == charset:
				86	decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
				87	else:
				88	decoded.append((dec, charset))
				89	del parts[0:3]
				90	return decoded
				91
				92
				93
				94	class Header:
Barry Warsaw	812031b	2002-05-19 23:47:53 +0000	[diff] [blame]	95	def __init__(self, s, charset=None, maxlinelen=None, header_name=None):
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	96	"""Create a MIME-compliant header that can contain many languages.
				97
				98	Specify the initial header value in s. Specify its character set as a
				99	Charset object in the charset argument. If none, a default Charset
				100	instance will be used.
				101
				102	You can later append to the header with append(s, charset) below;
				103	charset does not have to be the same as the one initially specified
				104	here. In fact, it's optional, and if not given, defaults to the
				105	charset specified in the constructor.
				106
Barry Warsaw	812031b	2002-05-19 23:47:53 +0000	[diff] [blame]	107	The maximum line length can be specified explicitly via maxlinelen.
				108	You can also pass None for maxlinelen and the name of a header field
				109	(e.g. "Subject") to let the constructor guess the best line length to
				110	use. The default maxlinelen is 76.
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	111	"""
				112	if charset is None:
				113	charset = Charset()
				114	self._charset = charset
				115	# BAW: I believe `chunks' and `maxlinelen' should be non-public.
				116	self._chunks = []
				117	self.append(s, charset)
Barry Warsaw	812031b	2002-05-19 23:47:53 +0000	[diff] [blame]	118	if maxlinelen is None:
				119	if header_name is None:
				120	self._maxlinelen = MAXLINELEN
				121	else:
				122	self.guess_maxlinelen(header_name)
				123	else:
				124	self._maxlinelen = maxlinelen
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	125
				126	def __str__(self):
				127	"""A synonym for self.encode()."""
				128	return self.encode()
				129
				130	def guess_maxlinelen(self, s=None):
				131	"""Guess the maximum length to make each header line.
				132
				133	Given a header name (e.g. "Subject"), set this header's maximum line
				134	length to an appropriate length to avoid line wrapping. If s is not
				135	given, return the previous maximum line length and don't set it.
				136
				137	Returns the new maximum line length.
				138	"""
				139	# BAW: is this semantic necessary?
				140	if s is not None:
				141	self._maxlinelen = MAXLINELEN - len(s) - 2
				142	return self._maxlinelen
				143
				144	def append(self, s, charset=None):
				145	"""Append string s with Charset charset to the MIME header.
				146
				147	charset defaults to the one given in the class constructor.
				148	"""
				149	if charset is None:
				150	charset = self._charset
				151	self._chunks.append((s, charset))
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	152
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	153	def _split(self, s, charset):
				154	# Split up a header safely for use with encode_chunks. BAW: this
				155	# appears to be a private convenience method.
				156	splittable = charset.to_splittable(s)
				157	encoded = charset.from_splittable(splittable)
Barry Warsaw	812031b	2002-05-19 23:47:53 +0000	[diff] [blame]	158	elen = charset.encoded_header_len(encoded)
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	159
Barry Warsaw	812031b	2002-05-19 23:47:53 +0000	[diff] [blame]	160	if elen <= self._maxlinelen:
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	161	return [(encoded, charset)]
Barry Warsaw	812031b	2002-05-19 23:47:53 +0000	[diff] [blame]	162	# BAW: should we use encoded?
				163	elif elen == len(s):
				164	# We can split on _maxlinelen boundaries because we know that the
				165	# encoding won't change the size of the string
				166	splitpnt = self._maxlinelen
				167	first = charset.from_splittable(splittable[:splitpnt], 0)
				168	last = charset.from_splittable(splittable[splitpnt:], 0)
				169	return self._split(first, charset) + self._split(last, charset)
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	170	else:
Barry Warsaw	1c30aa2	2002-06-01 05:49:17 +0000	[diff] [blame]	171	# Divide and conquer.
				172	halfway = _floordiv(len(splittable), 2)
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	173	first = charset.from_splittable(splittable[:halfway], 0)
				174	last = charset.from_splittable(splittable[halfway:], 0)
				175	return self._split(first, charset) + self._split(last, charset)
				176
				177	def encode(self):
				178	"""Encode a message header, possibly converting charset and encoding.
				179
				180	There are many issues involved in converting a given string for use in
				181	an email header. Only certain character sets are readable in most
				182	email clients, and as header strings can only contain a subset of
				183	7-bit ASCII, care must be taken to properly convert and encode (with
				184	Base64 or quoted-printable) header strings. In addition, there is a
				185	75-character length limit on any given encoded header field, so
				186	line-wrapping must be performed, even with double-byte character sets.
Tim Peters	8ac1495	2002-05-23 15:15:30 +0000	[diff] [blame]	187
Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	188	This method will do its best to convert the string to the correct
				189	character set used in email, and encode and line wrap it safely with
				190	the appropriate scheme for that character set.
				191
				192	If the given charset is not known or an error occurs during
				193	conversion, this function will return the header untouched.
				194	"""
				195	newchunks = []
				196	for s, charset in self._chunks:
				197	newchunks += self._split(s, charset)
				198	self._chunks = newchunks
				199	return self.encode_chunks()
				200
				201	def encode_chunks(self):
				202	"""MIME-encode a header with many different charsets and/or encodings.
				203
				204	Given a list of pairs (string, charset), return a MIME-encoded string
				205	suitable for use in a header field. Each pair may have different
				206	charsets and/or encodings, and the resulting header will accurately
				207	reflect each setting.
				208
				209	Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like
				210	character sets like iso-8859-1), email.Utils.BASE64 (Base64, for
				211	non-ASCII like character sets like KOI8-R and iso-2022-jp), or None
				212	(no encoding).
				213
				214	Each pair will be represented on a separate line; the resulting string
				215	will be in the format:
				216
				217	"=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
				218	=?charset2?b?SvxyZ2VuIEL2aW5n?="
				219	"""
				220	chunks = []
				221	for header, charset in self._chunks:
				222	if charset is None:
				223	_max_append(chunks, header, self._maxlinelen, ' ')
				224	else:
				225	_max_append(chunks, charset.header_encode(header, 0),
				226	self._maxlinelen, ' ')
				227	return NLSPACE.join(chunks)