Blame - Lib/email/Header.py - platform/external/python/cpython3

blob: 097b9783335c38b41fc43b0dd2c623bb0d519009 [file] [log] [blame]

Barry Warsaw	409a4c0	2002-04-10 21:01:31 +0000	[diff] [blame]	1	# Copyright (C) 2002 Python Software Foundation
				2	# Author: che@debian.org (Ben Gertzfield)
				3
				4	"""Header encoding and decoding functionality."""
				5
				6	import re
				7	import email.quopriMIME
				8	import email.base64MIME
				9	from email.Charset import Charset
				10
				11	CRLFSPACE = '\r\n '
				12	CRLF = '\r\n'
				13	NLSPACE = '\n '
				14
				15	MAXLINELEN = 76
				16
				17	ENCODE = 1
				18	DECODE = 2
				19
				20	# Match encoded-word strings in the form =?charset?q?Hello_World?=
				21	ecre = re.compile(r'''
				22	=\? # literal =?
				23	(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
				24	\? # literal ?
				25	(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
				26	\? # literal ?
				27	(?P<encoded>.*?) # non-greedy up to the next ?= is the encoded string
				28	\?= # literal ?=
				29	''', re.VERBOSE \| re.IGNORECASE)
				30
				31
				32
				33	# Helpers
				34	_max_append = email.quopriMIME._max_append
				35
				36
				37
				38	def decode_header(header):
				39	"""Decode a message header value without converting charset.
				40
				41	Returns a list of (decoded_string, charset) pairs containing each of the
				42	decoded parts of the header. Charset is None for non-encoded parts of the
				43	header, otherwise a lower-case string containing the name of the character
				44	set specified in the encoded string.
				45	"""
				46	# If no encoding, just return the header
				47	header = str(header)
				48	if not ecre.search(header):
				49	return [(header, None)]
				50
				51	decoded = []
				52	dec = ''
				53	for line in header.splitlines():
				54	# This line might not have an encoding in it
				55	if not ecre.search(line):
				56	decoded.append((line, None))
				57	continue
				58
				59	parts = ecre.split(line)
				60	while parts:
				61	unenc = parts.pop(0).strip()
				62	if unenc:
				63	# Should we continue a long line?
				64	if decoded and decoded[-1][1] is None:
				65	decoded[-1] = (decoded[-1][0] + dec, None)
				66	else:
				67	decoded.append((unenc, None))
				68	if parts:
				69	charset, encoding = [s.lower() for s in parts[0:2]]
				70	encoded = parts[2]
				71	dec = ''
				72	if encoding == 'q':
				73	dec = email.quopriMIME.header_decode(encoded)
				74	elif encoding == 'b':
				75	dec = email.base64MIME.decode(encoded)
				76	else:
				77	dec = encoded
				78
				79	if decoded and decoded[-1][1] == charset:
				80	decoded[-1] = (decoded[-1][0] + dec, decoded[-1][1])
				81	else:
				82	decoded.append((dec, charset))
				83	del parts[0:3]
				84	return decoded
				85
				86
				87
				88	class Header:
				89	def __init__(self, s, charset=None, maxlinelen=MAXLINELEN,
				90	header_name=None):
				91	"""Create a MIME-compliant header that can contain many languages.
				92
				93	Specify the initial header value in s. Specify its character set as a
				94	Charset object in the charset argument. If none, a default Charset
				95	instance will be used.
				96
				97	You can later append to the header with append(s, charset) below;
				98	charset does not have to be the same as the one initially specified
				99	here. In fact, it's optional, and if not given, defaults to the
				100	charset specified in the constructor.
				101
				102	The maximum line length can either be specified by maxlinelen, or you
				103	can pass in the name of the header field (e.g. "Subject") to let this
				104	class guess the best line length to use to prevent wrapping. The
				105	default maxlinelen is 76.
				106	"""
				107	if charset is None:
				108	charset = Charset()
				109	self._charset = charset
				110	# BAW: I believe `chunks' and `maxlinelen' should be non-public.
				111	self._chunks = []
				112	self.append(s, charset)
				113	self._maxlinelen = maxlinelen
				114	if header_name is not None:
				115	self.guess_maxlinelen(header_name)
				116
				117	def __str__(self):
				118	"""A synonym for self.encode()."""
				119	return self.encode()
				120
				121	def guess_maxlinelen(self, s=None):
				122	"""Guess the maximum length to make each header line.
				123
				124	Given a header name (e.g. "Subject"), set this header's maximum line
				125	length to an appropriate length to avoid line wrapping. If s is not
				126	given, return the previous maximum line length and don't set it.
				127
				128	Returns the new maximum line length.
				129	"""
				130	# BAW: is this semantic necessary?
				131	if s is not None:
				132	self._maxlinelen = MAXLINELEN - len(s) - 2
				133	return self._maxlinelen
				134
				135	def append(self, s, charset=None):
				136	"""Append string s with Charset charset to the MIME header.
				137
				138	charset defaults to the one given in the class constructor.
				139	"""
				140	if charset is None:
				141	charset = self._charset
				142	self._chunks.append((s, charset))
				143
				144	def _split(self, s, charset):
				145	# Split up a header safely for use with encode_chunks. BAW: this
				146	# appears to be a private convenience method.
				147	splittable = charset.to_splittable(s)
				148	encoded = charset.from_splittable(splittable)
				149
				150	if charset.encoded_header_len(encoded) < self._maxlinelen:
				151	return [(encoded, charset)]
				152	else:
				153	# Divide and conquer. BAW: halfway depends on integer division.
				154	# When porting to Python 2.2, use the // operator.
				155	halfway = len(splittable) // 2
				156	first = charset.from_splittable(splittable[:halfway], 0)
				157	last = charset.from_splittable(splittable[halfway:], 0)
				158	return self._split(first, charset) + self._split(last, charset)
				159
				160	def encode(self):
				161	"""Encode a message header, possibly converting charset and encoding.
				162
				163	There are many issues involved in converting a given string for use in
				164	an email header. Only certain character sets are readable in most
				165	email clients, and as header strings can only contain a subset of
				166	7-bit ASCII, care must be taken to properly convert and encode (with
				167	Base64 or quoted-printable) header strings. In addition, there is a
				168	75-character length limit on any given encoded header field, so
				169	line-wrapping must be performed, even with double-byte character sets.
				170
				171	This method will do its best to convert the string to the correct
				172	character set used in email, and encode and line wrap it safely with
				173	the appropriate scheme for that character set.
				174
				175	If the given charset is not known or an error occurs during
				176	conversion, this function will return the header untouched.
				177	"""
				178	newchunks = []
				179	for s, charset in self._chunks:
				180	newchunks += self._split(s, charset)
				181	self._chunks = newchunks
				182	return self.encode_chunks()
				183
				184	def encode_chunks(self):
				185	"""MIME-encode a header with many different charsets and/or encodings.
				186
				187	Given a list of pairs (string, charset), return a MIME-encoded string
				188	suitable for use in a header field. Each pair may have different
				189	charsets and/or encodings, and the resulting header will accurately
				190	reflect each setting.
				191
				192	Each encoding can be email.Utils.QP (quoted-printable, for ASCII-like
				193	character sets like iso-8859-1), email.Utils.BASE64 (Base64, for
				194	non-ASCII like character sets like KOI8-R and iso-2022-jp), or None
				195	(no encoding).
				196
				197	Each pair will be represented on a separate line; the resulting string
				198	will be in the format:
				199
				200	"=?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
				201	=?charset2?b?SvxyZ2VuIEL2aW5n?="
				202	"""
				203	chunks = []
				204	for header, charset in self._chunks:
				205	if charset is None:
				206	_max_append(chunks, header, self._maxlinelen, ' ')
				207	else:
				208	_max_append(chunks, charset.header_encode(header, 0),
				209	self._maxlinelen, ' ')
				210	return NLSPACE.join(chunks)