Blame - Lib/email/headerregistry.py - platform/external/python/cpython3

blob: 911a2afea7349ce206e83e1859c667379e0bb015 [file] [log] [blame]

R David Murray	0b6f6c8	2012-05-25 18:42:14 -0400	[diff] [blame]	1	"""Representing and manipulating email headers via custom objects.
				2
				3	This module provides an implementation of the HeaderRegistry API.
				4	The implementation is designed to flexibly follow RFC5322 rules.
				5
				6	Eventually HeaderRegistry will be a public API, but it isn't yet,
				7	and will probably change some before that happens.
				8
				9	"""
R David Murray	685b349	2014-10-17 19:30:13 -0400	[diff] [blame]	10	from types import MappingProxyType
R David Murray	0b6f6c8	2012-05-25 18:42:14 -0400	[diff] [blame]	11
				12	from email import utils
				13	from email import errors
				14	from email import _header_value_parser as parser
				15
				16	class Address:
				17
				18	def __init__(self, display_name='', username='', domain='', addr_spec=None):
				19	"""Create an object represeting a full email address.
				20
				21	An address can have a 'display_name', a 'username', and a 'domain'. In
				22	addition to specifying the username and domain separately, they may be
				23	specified together by using the addr_spec keyword instead of the
				24	username and domain keywords. If an addr_spec string is specified it
				25	must be properly quoted according to RFC 5322 rules; an error will be
				26	raised if it is not.
				27
				28	An Address object has display_name, username, domain, and addr_spec
				29	attributes, all of which are read-only. The addr_spec and the string
				30	value of the object are both quoted according to RFC5322 rules, but
				31	without any Content Transfer Encoding.
				32
				33	"""
				34	# This clause with its potential 'raise' may only happen when an
				35	# application program creates an Address object using an addr_spec
				36	# keyword. The email library code itself must always supply username
				37	# and domain.
				38	if addr_spec is not None:
				39	if username or domain:
				40	raise TypeError("addrspec specified when username and/or "
				41	"domain also specified")
				42	a_s, rest = parser.get_addr_spec(addr_spec)
				43	if rest:
				44	raise ValueError("Invalid addr_spec; only '{}' "
				45	"could be parsed from '{}'".format(
				46	a_s, addr_spec))
				47	if a_s.all_defects:
				48	raise a_s.all_defects[0]
				49	username = a_s.local_part
				50	domain = a_s.domain
				51	self._display_name = display_name
				52	self._username = username
				53	self._domain = domain
				54
				55	@property
				56	def display_name(self):
				57	return self._display_name
				58
				59	@property
				60	def username(self):
				61	return self._username
				62
				63	@property
				64	def domain(self):
				65	return self._domain
				66
				67	@property
				68	def addr_spec(self):
				69	"""The addr_spec (username@domain) portion of the address, quoted
				70	according to RFC 5322 rules, but with no Content Transfer Encoding.
				71	"""
				72	nameset = set(self.username)
				73	if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
				74	lp = parser.quote_string(self.username)
				75	else:
				76	lp = self.username
				77	if self.domain:
				78	return lp + '@' + self.domain
				79	if not lp:
				80	return '<>'
				81	return lp
				82
				83	def __repr__(self):
				84	return "Address(display_name={!r}, username={!r}, domain={!r})".format(
				85	self.display_name, self.username, self.domain)
				86
				87	def __str__(self):
				88	nameset = set(self.display_name)
				89	if len(nameset) > len(nameset-parser.SPECIALS):
				90	disp = parser.quote_string(self.display_name)
				91	else:
				92	disp = self.display_name
				93	if disp:
				94	addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
				95	return "{} <{}>".format(disp, addr_spec)
				96	return self.addr_spec
				97
				98	def __eq__(self, other):
				99	if type(other) != type(self):
				100	return False
				101	return (self.display_name == other.display_name and
				102	self.username == other.username and
				103	self.domain == other.domain)
				104
				105
				106	class Group:
				107
				108	def __init__(self, display_name=None, addresses=None):
				109	"""Create an object representing an address group.
				110
				111	An address group consists of a display_name followed by colon and an
				112	list of addresses (see Address) terminated by a semi-colon. The Group
				113	is created by specifying a display_name and a possibly empty list of
				114	Address objects. A Group can also be used to represent a single
				115	address that is not in a group, which is convenient when manipulating
				116	lists that are a combination of Groups and individual Addresses. In
				117	this case the display_name should be set to None. In particular, the
				118	string representation of a Group whose display_name is None is the same
				119	as the Address object, if there is one and only one Address object in
				120	the addresses list.
				121
				122	"""
				123	self._display_name = display_name
				124	self._addresses = tuple(addresses) if addresses else tuple()
				125
				126	@property
				127	def display_name(self):
				128	return self._display_name
				129
				130	@property
				131	def addresses(self):
				132	return self._addresses
				133
				134	def __repr__(self):
				135	return "Group(display_name={!r}, addresses={!r}".format(
				136	self.display_name, self.addresses)
				137
				138	def __str__(self):
				139	if self.display_name is None and len(self.addresses)==1:
				140	return str(self.addresses[0])
				141	disp = self.display_name
				142	if disp is not None:
				143	nameset = set(disp)
				144	if len(nameset) > len(nameset-parser.SPECIALS):
				145	disp = parser.quote_string(disp)
				146	adrstr = ", ".join(str(x) for x in self.addresses)
				147	adrstr = ' ' + adrstr if adrstr else adrstr
				148	return "{}:{};".format(disp, adrstr)
				149
				150	def __eq__(self, other):
				151	if type(other) != type(self):
				152	return False
				153	return (self.display_name == other.display_name and
				154	self.addresses == other.addresses)
				155
				156
				157	# Header Classes #
				158
				159	class BaseHeader(str):
				160
				161	"""Base class for message headers.
				162
				163	Implements generic behavior and provides tools for subclasses.
				164
				165	A subclass must define a classmethod named 'parse' that takes an unfolded
				166	value string and a dictionary as its arguments. The dictionary will
				167	contain one key, 'defects', initialized to an empty list. After the call
				168	the dictionary must contain two additional keys: parse_tree, set to the
				169	parse tree obtained from parsing the header, and 'decoded', set to the
				170	string value of the idealized representation of the data from the value.
				171	(That is, encoded words are decoded, and values that have canonical
				172	representations are so represented.)
				173
				174	The defects key is intended to collect parsing defects, which the message
				175	parser will subsequently dispose of as appropriate. The parser should not,
				176	insofar as practical, raise any errors. Defects should be added to the
				177	list instead. The standard header parsers register defects for RFC
				178	compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
				179	errors.
				180
				181	The parse method may add additional keys to the dictionary. In this case
				182	the subclass must define an 'init' method, which will be passed the
				183	dictionary as its keyword arguments. The method should use (usually by
				184	setting them as the value of similarly named attributes) and remove all the
				185	extra keys added by its parse method, and then use super to call its parent
				186	class with the remaining arguments and keywords.
				187
				188	The subclass should also make sure that a 'max_count' attribute is defined
				189	that is either None or 1. XXX: need to better define this API.
				190
				191	"""
				192
				193	def __new__(cls, name, value):
				194	kwds = {'defects': []}
				195	cls.parse(value, kwds)
				196	if utils._has_surrogates(kwds['decoded']):
				197	kwds['decoded'] = utils._sanitize(kwds['decoded'])
				198	self = str.__new__(cls, kwds['decoded'])
				199	del kwds['decoded']
				200	self.init(name, **kwds)
				201	return self
				202
				203	def init(self, name, *, parse_tree, defects):
				204	self._name = name
				205	self._parse_tree = parse_tree
				206	self._defects = defects
				207
				208	@property
				209	def name(self):
				210	return self._name
				211
				212	@property
				213	def defects(self):
				214	return tuple(self._defects)
				215
				216	def __reduce__(self):
				217	return (
				218	_reconstruct_header,
				219	(
				220	self.__class__.__name__,
				221	self.__class__.__bases__,
				222	str(self),
				223	),
				224	self.__dict__)
				225
				226	@classmethod
				227	def _reconstruct(cls, value):
				228	return str.__new__(cls, value)
				229
				230	def fold(self, *, policy):
				231	"""Fold header according to policy.
				232
				233	The parsed representation of the header is folded according to
				234	RFC5322 rules, as modified by the policy. If the parse tree
				235	contains surrogateescaped bytes, the bytes are CTE encoded using
				236	the charset 'unknown-8bit".
				237
				238	Any non-ASCII characters in the parse tree are CTE encoded using
				239	charset utf-8. XXX: make this a policy setting.
				240
				241	The returned value is an ASCII-only string possibly containing linesep
				242	characters, and ending with a linesep character. The string includes
				243	the header name and the ': ' separator.
				244
				245	"""
				246	# At some point we need to only put fws here if it was in the source.
				247	header = parser.Header([
				248	parser.HeaderLabel([
				249	parser.ValueTerminal(self.name, 'header-name'),
				250	parser.ValueTerminal(':', 'header-sep')]),
				251	parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
				252	self._parse_tree])
				253	return header.fold(policy=policy)
				254
				255
				256	def _reconstruct_header(cls_name, bases, value):
				257	return type(cls_name, bases, {})._reconstruct(value)
				258
				259
				260	class UnstructuredHeader:
				261
				262	max_count = None
				263	value_parser = staticmethod(parser.get_unstructured)
				264
				265	@classmethod
				266	def parse(cls, value, kwds):
				267	kwds['parse_tree'] = cls.value_parser(value)
				268	kwds['decoded'] = str(kwds['parse_tree'])
				269
				270
				271	class UniqueUnstructuredHeader(UnstructuredHeader):
				272
				273	max_count = 1
				274
				275
				276	class DateHeader:
				277
				278	"""Header whose value consists of a single timestamp.
				279
				280	Provides an additional attribute, datetime, which is either an aware
				281	datetime using a timezone, or a naive datetime if the timezone
				282	in the input string is -0000. Also accepts a datetime as input.
				283	The 'value' attribute is the normalized form of the timestamp,
				284	which means it is the output of format_datetime on the datetime.
				285	"""
				286
				287	max_count = None
				288
				289	# This is used only for folding, not for creating 'decoded'.
				290	value_parser = staticmethod(parser.get_unstructured)
				291
				292	@classmethod
				293	def parse(cls, value, kwds):
				294	if not value:
				295	kwds['defects'].append(errors.HeaderMissingRequiredValue())
				296	kwds['datetime'] = None
				297	kwds['decoded'] = ''
				298	kwds['parse_tree'] = parser.TokenList()
				299	return
				300	if isinstance(value, str):
				301	value = utils.parsedate_to_datetime(value)
				302	kwds['datetime'] = value
				303	kwds['decoded'] = utils.format_datetime(kwds['datetime'])
				304	kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
				305
				306	def init(self, args, *kw):
				307	self._datetime = kw.pop('datetime')
				308	super().init(args, *kw)
				309
				310	@property
				311	def datetime(self):
				312	return self._datetime
				313
				314
				315	class UniqueDateHeader(DateHeader):
				316
				317	max_count = 1
				318
				319
				320	class AddressHeader:
				321
				322	max_count = None
				323
				324	@staticmethod
				325	def value_parser(value):
				326	address_list, value = parser.get_address_list(value)
				327	assert not value, 'this should not happen'
				328	return address_list
				329
				330	@classmethod
				331	def parse(cls, value, kwds):
				332	if isinstance(value, str):
				333	# We are translating here from the RFC language (address/mailbox)
				334	# to our API language (group/address).
				335	kwds['parse_tree'] = address_list = cls.value_parser(value)
				336	groups = []
				337	for addr in address_list.addresses:
				338	groups.append(Group(addr.display_name,
				339	[Address(mb.display_name or '',
				340	mb.local_part or '',
				341	mb.domain or '')
				342	for mb in addr.all_mailboxes]))
				343	defects = list(address_list.all_defects)
				344	else:
				345	# Assume it is Address/Group stuff
				346	if not hasattr(value, '__iter__'):
				347	value = [value]
				348	groups = [Group(None, [item]) if not hasattr(item, 'addresses')
				349	else item
				350	for item in value]
				351	defects = []
				352	kwds['groups'] = groups
				353	kwds['defects'] = defects
				354	kwds['decoded'] = ', '.join([str(item) for item in groups])
				355	if 'parse_tree' not in kwds:
				356	kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
				357
				358	def init(self, args, *kw):
				359	self._groups = tuple(kw.pop('groups'))
				360	self._addresses = None
				361	super().init(args, *kw)
				362
				363	@property
				364	def groups(self):
				365	return self._groups
				366
				367	@property
				368	def addresses(self):
				369	if self._addresses is None:
				370	self._addresses = tuple([address for group in self._groups
				371	for address in group.addresses])
				372	return self._addresses
				373
				374
				375	class UniqueAddressHeader(AddressHeader):
				376
				377	max_count = 1
				378
				379
				380	class SingleAddressHeader(AddressHeader):
				381
				382	@property
				383	def address(self):
				384	if len(self.addresses)!=1:
				385	raise ValueError(("value of single address header {} is not "
				386	"a single address").format(self.name))
				387	return self.addresses[0]
				388
				389
				390	class UniqueSingleAddressHeader(SingleAddressHeader):
				391
				392	max_count = 1
				393
				394
R David Murray	97f43c0	2012-06-24 05:03:27 -0400	[diff] [blame]	395	class MIMEVersionHeader:
				396
				397	max_count = 1
				398
				399	value_parser = staticmethod(parser.parse_mime_version)
				400
				401	@classmethod
				402	def parse(cls, value, kwds):
				403	kwds['parse_tree'] = parse_tree = cls.value_parser(value)
				404	kwds['decoded'] = str(parse_tree)
				405	kwds['defects'].extend(parse_tree.all_defects)
				406	kwds['major'] = None if parse_tree.minor is None else parse_tree.major
				407	kwds['minor'] = parse_tree.minor
				408	if parse_tree.minor is not None:
				409	kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
				410	else:
				411	kwds['version'] = None
				412
				413	def init(self, args, *kw):
				414	self._version = kw.pop('version')
				415	self._major = kw.pop('major')
				416	self._minor = kw.pop('minor')
				417	super().init(args, *kw)
				418
				419	@property
				420	def major(self):
				421	return self._major
				422
				423	@property
				424	def minor(self):
				425	return self._minor
				426
				427	@property
				428	def version(self):
				429	return self._version
				430
				431
				432	class ParameterizedMIMEHeader:
				433
				434	# Mixin that handles the params dict. Must be subclassed and
				435	# a property value_parser for the specific header provided.
				436
				437	max_count = 1
				438
				439	@classmethod
				440	def parse(cls, value, kwds):
				441	kwds['parse_tree'] = parse_tree = cls.value_parser(value)
				442	kwds['decoded'] = str(parse_tree)
				443	kwds['defects'].extend(parse_tree.all_defects)
				444	if parse_tree.params is None:
				445	kwds['params'] = {}
				446	else:
				447	# The MIME RFCs specify that parameter ordering is arbitrary.
				448	kwds['params'] = {utils._sanitize(name).lower():
				449	utils._sanitize(value)
				450	for name, value in parse_tree.params}
				451
				452	def init(self, args, *kw):
				453	self._params = kw.pop('params')
				454	super().init(args, *kw)
				455
				456	@property
				457	def params(self):
R David Murray	685b349	2014-10-17 19:30:13 -0400	[diff] [blame]	458	return MappingProxyType(self._params)
R David Murray	97f43c0	2012-06-24 05:03:27 -0400	[diff] [blame]	459
				460
				461	class ContentTypeHeader(ParameterizedMIMEHeader):
				462
				463	value_parser = staticmethod(parser.parse_content_type_header)
				464
				465	def init(self, args, *kw):
				466	super().init(args, *kw)
				467	self._maintype = utils._sanitize(self._parse_tree.maintype)
				468	self._subtype = utils._sanitize(self._parse_tree.subtype)
				469
				470	@property
				471	def maintype(self):
				472	return self._maintype
				473
				474	@property
				475	def subtype(self):
				476	return self._subtype
				477
				478	@property
				479	def content_type(self):
				480	return self.maintype + '/' + self.subtype
				481
				482
				483	class ContentDispositionHeader(ParameterizedMIMEHeader):
				484
				485	value_parser = staticmethod(parser.parse_content_disposition_header)
				486
				487	def init(self, args, *kw):
				488	super().init(args, *kw)
				489	cd = self._parse_tree.content_disposition
				490	self._content_disposition = cd if cd is None else utils._sanitize(cd)
				491
				492	@property
				493	def content_disposition(self):
				494	return self._content_disposition
				495
				496
				497	class ContentTransferEncodingHeader:
				498
				499	max_count = 1
				500
				501	value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
				502
				503	@classmethod
				504	def parse(cls, value, kwds):
				505	kwds['parse_tree'] = parse_tree = cls.value_parser(value)
				506	kwds['decoded'] = str(parse_tree)
				507	kwds['defects'].extend(parse_tree.all_defects)
				508
				509	def init(self, args, *kw):
				510	super().init(args, *kw)
				511	self._cte = utils._sanitize(self._parse_tree.cte)
				512
				513	@property
				514	def cte(self):
				515	return self._cte
				516
				517
R David Murray	0b6f6c8	2012-05-25 18:42:14 -0400	[diff] [blame]	518	# The header factory #
				519
				520	_default_header_map = {
R David Murray	97f43c0	2012-06-24 05:03:27 -0400	[diff] [blame]	521	'subject': UniqueUnstructuredHeader,
				522	'date': UniqueDateHeader,
				523	'resent-date': DateHeader,
				524	'orig-date': UniqueDateHeader,
				525	'sender': UniqueSingleAddressHeader,
				526	'resent-sender': SingleAddressHeader,
				527	'to': UniqueAddressHeader,
				528	'resent-to': AddressHeader,
				529	'cc': UniqueAddressHeader,
				530	'resent-cc': AddressHeader,
				531	'bcc': UniqueAddressHeader,
				532	'resent-bcc': AddressHeader,
				533	'from': UniqueAddressHeader,
				534	'resent-from': AddressHeader,
				535	'reply-to': UniqueAddressHeader,
				536	'mime-version': MIMEVersionHeader,
				537	'content-type': ContentTypeHeader,
				538	'content-disposition': ContentDispositionHeader,
				539	'content-transfer-encoding': ContentTransferEncodingHeader,
R David Murray	0b6f6c8	2012-05-25 18:42:14 -0400	[diff] [blame]	540	}
				541
				542	class HeaderRegistry:
				543
				544	"""A header_factory and header registry."""
				545
				546	def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
				547	use_default_map=True):
				548	"""Create a header_factory that works with the Policy API.
				549
				550	base_class is the class that will be the last class in the created
				551	header class's __bases__ list. default_class is the class that will be
				552	used if "name" (see __call__) does not appear in the registry.
				553	use_default_map controls whether or not the default mapping of names to
				554	specialized classes is copied in to the registry when the factory is
				555	created. The default is True.
				556
				557	"""
				558	self.registry = {}
				559	self.base_class = base_class
				560	self.default_class = default_class
				561	if use_default_map:
				562	self.registry.update(_default_header_map)
				563
				564	def map_to_type(self, name, cls):
				565	"""Register cls as the specialized class for handling "name" headers.
				566
				567	"""
				568	self.registry[name.lower()] = cls
				569
				570	def __getitem__(self, name):
				571	cls = self.registry.get(name.lower(), self.default_class)
				572	return type('_'+cls.__name__, (cls, self.base_class), {})
				573
				574	def __call__(self, name, value):
				575	"""Create a header instance for header 'name' from 'value'.
				576
				577	Creates a header instance by creating a specialized class for parsing
				578	and representing the specified header by combining the factory
				579	base_class with a specialized class from the registry or the
				580	default_class, and passing the name and value to the constructed
				581	class's constructor.
				582
				583	"""
				584	return self[name](name, value)