blob: 5d84fc0d82d0b04b86c83241c051720f7b586737 [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
R David Murray685b3492014-10-17 19:30:13 -040010from types import MappingProxyType
R David Murray0b6f6c82012-05-25 18:42:14 -040011
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18 def __init__(self, display_name='', username='', domain='', addr_spec=None):
Martin Panter96a4f072016-02-10 01:17:51 +000019 """Create an object representing a full email address.
R David Murray0b6f6c82012-05-25 18:42:14 -040020
21 An address can have a 'display_name', a 'username', and a 'domain'. In
22 addition to specifying the username and domain separately, they may be
23 specified together by using the addr_spec keyword *instead of* the
24 username and domain keywords. If an addr_spec string is specified it
25 must be properly quoted according to RFC 5322 rules; an error will be
26 raised if it is not.
27
28 An Address object has display_name, username, domain, and addr_spec
29 attributes, all of which are read-only. The addr_spec and the string
30 value of the object are both quoted according to RFC5322 rules, but
31 without any Content Transfer Encoding.
32
33 """
Ashwin Ramaswami614f1722020-03-29 20:38:41 -040034
35 inputs = ''.join(filter(None, (display_name, username, domain, addr_spec)))
36 if '\r' in inputs or '\n' in inputs:
37 raise ValueError("invalid arguments; address parts cannot contain CR or LF")
38
R David Murray0b6f6c82012-05-25 18:42:14 -040039 # This clause with its potential 'raise' may only happen when an
40 # application program creates an Address object using an addr_spec
41 # keyword. The email library code itself must always supply username
42 # and domain.
43 if addr_spec is not None:
44 if username or domain:
45 raise TypeError("addrspec specified when username and/or "
46 "domain also specified")
47 a_s, rest = parser.get_addr_spec(addr_spec)
48 if rest:
49 raise ValueError("Invalid addr_spec; only '{}' "
50 "could be parsed from '{}'".format(
51 a_s, addr_spec))
52 if a_s.all_defects:
53 raise a_s.all_defects[0]
54 username = a_s.local_part
55 domain = a_s.domain
56 self._display_name = display_name
57 self._username = username
58 self._domain = domain
59
60 @property
61 def display_name(self):
62 return self._display_name
63
64 @property
65 def username(self):
66 return self._username
67
68 @property
69 def domain(self):
70 return self._domain
71
72 @property
73 def addr_spec(self):
74 """The addr_spec (username@domain) portion of the address, quoted
75 according to RFC 5322 rules, but with no Content Transfer Encoding.
76 """
Michael Selik27026382019-09-19 20:25:55 -070077 lp = self.username
78 if not parser.DOT_ATOM_ENDS.isdisjoint(lp):
79 lp = parser.quote_string(lp)
R David Murray0b6f6c82012-05-25 18:42:14 -040080 if self.domain:
81 return lp + '@' + self.domain
82 if not lp:
83 return '<>'
84 return lp
85
86 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +030087 return "{}(display_name={!r}, username={!r}, domain={!r})".format(
88 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -040089 self.display_name, self.username, self.domain)
90
91 def __str__(self):
Michael Selik27026382019-09-19 20:25:55 -070092 disp = self.display_name
93 if not parser.SPECIALS.isdisjoint(disp):
94 disp = parser.quote_string(disp)
R David Murray0b6f6c82012-05-25 18:42:14 -040095 if disp:
96 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
97 return "{} <{}>".format(disp, addr_spec)
98 return self.addr_spec
99
100 def __eq__(self, other):
Serhiy Storchaka662db122019-08-08 08:42:54 +0300101 if not isinstance(other, Address):
102 return NotImplemented
R David Murray0b6f6c82012-05-25 18:42:14 -0400103 return (self.display_name == other.display_name and
104 self.username == other.username and
105 self.domain == other.domain)
106
107
108class Group:
109
110 def __init__(self, display_name=None, addresses=None):
111 """Create an object representing an address group.
112
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300113 An address group consists of a display_name followed by colon and a
R David Murray0b6f6c82012-05-25 18:42:14 -0400114 list of addresses (see Address) terminated by a semi-colon. The Group
115 is created by specifying a display_name and a possibly empty list of
116 Address objects. A Group can also be used to represent a single
117 address that is not in a group, which is convenient when manipulating
118 lists that are a combination of Groups and individual Addresses. In
119 this case the display_name should be set to None. In particular, the
120 string representation of a Group whose display_name is None is the same
121 as the Address object, if there is one and only one Address object in
122 the addresses list.
123
124 """
125 self._display_name = display_name
126 self._addresses = tuple(addresses) if addresses else tuple()
127
128 @property
129 def display_name(self):
130 return self._display_name
131
132 @property
133 def addresses(self):
134 return self._addresses
135
136 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300137 return "{}(display_name={!r}, addresses={!r}".format(
138 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -0400139 self.display_name, self.addresses)
140
141 def __str__(self):
142 if self.display_name is None and len(self.addresses)==1:
143 return str(self.addresses[0])
144 disp = self.display_name
Michael Selik27026382019-09-19 20:25:55 -0700145 if disp is not None and not parser.SPECIALS.isdisjoint(disp):
146 disp = parser.quote_string(disp)
R David Murray0b6f6c82012-05-25 18:42:14 -0400147 adrstr = ", ".join(str(x) for x in self.addresses)
148 adrstr = ' ' + adrstr if adrstr else adrstr
149 return "{}:{};".format(disp, adrstr)
150
151 def __eq__(self, other):
Serhiy Storchaka662db122019-08-08 08:42:54 +0300152 if not isinstance(other, Group):
153 return NotImplemented
R David Murray0b6f6c82012-05-25 18:42:14 -0400154 return (self.display_name == other.display_name and
155 self.addresses == other.addresses)
156
157
158# Header Classes #
159
160class BaseHeader(str):
161
162 """Base class for message headers.
163
164 Implements generic behavior and provides tools for subclasses.
165
166 A subclass must define a classmethod named 'parse' that takes an unfolded
167 value string and a dictionary as its arguments. The dictionary will
168 contain one key, 'defects', initialized to an empty list. After the call
169 the dictionary must contain two additional keys: parse_tree, set to the
170 parse tree obtained from parsing the header, and 'decoded', set to the
171 string value of the idealized representation of the data from the value.
172 (That is, encoded words are decoded, and values that have canonical
173 representations are so represented.)
174
175 The defects key is intended to collect parsing defects, which the message
176 parser will subsequently dispose of as appropriate. The parser should not,
177 insofar as practical, raise any errors. Defects should be added to the
178 list instead. The standard header parsers register defects for RFC
179 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
180 errors.
181
182 The parse method may add additional keys to the dictionary. In this case
183 the subclass must define an 'init' method, which will be passed the
184 dictionary as its keyword arguments. The method should use (usually by
185 setting them as the value of similarly named attributes) and remove all the
186 extra keys added by its parse method, and then use super to call its parent
187 class with the remaining arguments and keywords.
188
189 The subclass should also make sure that a 'max_count' attribute is defined
190 that is either None or 1. XXX: need to better define this API.
191
192 """
193
194 def __new__(cls, name, value):
195 kwds = {'defects': []}
196 cls.parse(value, kwds)
197 if utils._has_surrogates(kwds['decoded']):
198 kwds['decoded'] = utils._sanitize(kwds['decoded'])
199 self = str.__new__(cls, kwds['decoded'])
200 del kwds['decoded']
201 self.init(name, **kwds)
202 return self
203
204 def init(self, name, *, parse_tree, defects):
205 self._name = name
206 self._parse_tree = parse_tree
207 self._defects = defects
208
209 @property
210 def name(self):
211 return self._name
212
213 @property
214 def defects(self):
215 return tuple(self._defects)
216
217 def __reduce__(self):
218 return (
219 _reconstruct_header,
220 (
221 self.__class__.__name__,
222 self.__class__.__bases__,
223 str(self),
224 ),
225 self.__dict__)
226
227 @classmethod
228 def _reconstruct(cls, value):
229 return str.__new__(cls, value)
230
231 def fold(self, *, policy):
232 """Fold header according to policy.
233
234 The parsed representation of the header is folded according to
235 RFC5322 rules, as modified by the policy. If the parse tree
236 contains surrogateescaped bytes, the bytes are CTE encoded using
237 the charset 'unknown-8bit".
238
239 Any non-ASCII characters in the parse tree are CTE encoded using
240 charset utf-8. XXX: make this a policy setting.
241
242 The returned value is an ASCII-only string possibly containing linesep
243 characters, and ending with a linesep character. The string includes
244 the header name and the ': ' separator.
245
246 """
Min ho Kim96e12d52019-07-22 06:12:33 +1000247 # At some point we need to put fws here if it was in the source.
R David Murray0b6f6c82012-05-25 18:42:14 -0400248 header = parser.Header([
249 parser.HeaderLabel([
250 parser.ValueTerminal(self.name, 'header-name'),
251 parser.ValueTerminal(':', 'header-sep')]),
R. David Murray85d5c182017-12-03 18:51:41 -0500252 ])
253 if self._parse_tree:
254 header.append(
255 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
256 header.append(self._parse_tree)
R David Murray0b6f6c82012-05-25 18:42:14 -0400257 return header.fold(policy=policy)
258
259
260def _reconstruct_header(cls_name, bases, value):
261 return type(cls_name, bases, {})._reconstruct(value)
262
263
264class UnstructuredHeader:
265
266 max_count = None
267 value_parser = staticmethod(parser.get_unstructured)
268
269 @classmethod
270 def parse(cls, value, kwds):
271 kwds['parse_tree'] = cls.value_parser(value)
272 kwds['decoded'] = str(kwds['parse_tree'])
273
274
275class UniqueUnstructuredHeader(UnstructuredHeader):
276
277 max_count = 1
278
279
280class DateHeader:
281
282 """Header whose value consists of a single timestamp.
283
284 Provides an additional attribute, datetime, which is either an aware
285 datetime using a timezone, or a naive datetime if the timezone
286 in the input string is -0000. Also accepts a datetime as input.
287 The 'value' attribute is the normalized form of the timestamp,
288 which means it is the output of format_datetime on the datetime.
289 """
290
291 max_count = None
292
293 # This is used only for folding, not for creating 'decoded'.
294 value_parser = staticmethod(parser.get_unstructured)
295
296 @classmethod
297 def parse(cls, value, kwds):
298 if not value:
299 kwds['defects'].append(errors.HeaderMissingRequiredValue())
300 kwds['datetime'] = None
301 kwds['decoded'] = ''
302 kwds['parse_tree'] = parser.TokenList()
303 return
304 if isinstance(value, str):
305 value = utils.parsedate_to_datetime(value)
306 kwds['datetime'] = value
307 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
308 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
309
310 def init(self, *args, **kw):
311 self._datetime = kw.pop('datetime')
312 super().init(*args, **kw)
313
314 @property
315 def datetime(self):
316 return self._datetime
317
318
319class UniqueDateHeader(DateHeader):
320
321 max_count = 1
322
323
324class AddressHeader:
325
326 max_count = None
327
328 @staticmethod
329 def value_parser(value):
330 address_list, value = parser.get_address_list(value)
331 assert not value, 'this should not happen'
332 return address_list
333
334 @classmethod
335 def parse(cls, value, kwds):
336 if isinstance(value, str):
337 # We are translating here from the RFC language (address/mailbox)
338 # to our API language (group/address).
339 kwds['parse_tree'] = address_list = cls.value_parser(value)
340 groups = []
341 for addr in address_list.addresses:
342 groups.append(Group(addr.display_name,
343 [Address(mb.display_name or '',
344 mb.local_part or '',
345 mb.domain or '')
346 for mb in addr.all_mailboxes]))
347 defects = list(address_list.all_defects)
348 else:
349 # Assume it is Address/Group stuff
350 if not hasattr(value, '__iter__'):
351 value = [value]
352 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
353 else item
354 for item in value]
355 defects = []
356 kwds['groups'] = groups
357 kwds['defects'] = defects
358 kwds['decoded'] = ', '.join([str(item) for item in groups])
359 if 'parse_tree' not in kwds:
360 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
361
362 def init(self, *args, **kw):
363 self._groups = tuple(kw.pop('groups'))
364 self._addresses = None
365 super().init(*args, **kw)
366
367 @property
368 def groups(self):
369 return self._groups
370
371 @property
372 def addresses(self):
373 if self._addresses is None:
Jon Dufresne39726282017-05-18 07:35:54 -0700374 self._addresses = tuple(address for group in self._groups
375 for address in group.addresses)
R David Murray0b6f6c82012-05-25 18:42:14 -0400376 return self._addresses
377
378
379class UniqueAddressHeader(AddressHeader):
380
381 max_count = 1
382
383
384class SingleAddressHeader(AddressHeader):
385
386 @property
387 def address(self):
388 if len(self.addresses)!=1:
389 raise ValueError(("value of single address header {} is not "
390 "a single address").format(self.name))
391 return self.addresses[0]
392
393
394class UniqueSingleAddressHeader(SingleAddressHeader):
395
396 max_count = 1
397
398
R David Murray97f43c02012-06-24 05:03:27 -0400399class MIMEVersionHeader:
400
401 max_count = 1
402
403 value_parser = staticmethod(parser.parse_mime_version)
404
405 @classmethod
406 def parse(cls, value, kwds):
407 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
408 kwds['decoded'] = str(parse_tree)
409 kwds['defects'].extend(parse_tree.all_defects)
410 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
411 kwds['minor'] = parse_tree.minor
412 if parse_tree.minor is not None:
413 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
414 else:
415 kwds['version'] = None
416
417 def init(self, *args, **kw):
418 self._version = kw.pop('version')
419 self._major = kw.pop('major')
420 self._minor = kw.pop('minor')
421 super().init(*args, **kw)
422
423 @property
424 def major(self):
425 return self._major
426
427 @property
428 def minor(self):
429 return self._minor
430
431 @property
432 def version(self):
433 return self._version
434
435
436class ParameterizedMIMEHeader:
437
438 # Mixin that handles the params dict. Must be subclassed and
439 # a property value_parser for the specific header provided.
440
441 max_count = 1
442
443 @classmethod
444 def parse(cls, value, kwds):
445 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
446 kwds['decoded'] = str(parse_tree)
447 kwds['defects'].extend(parse_tree.all_defects)
448 if parse_tree.params is None:
449 kwds['params'] = {}
450 else:
451 # The MIME RFCs specify that parameter ordering is arbitrary.
452 kwds['params'] = {utils._sanitize(name).lower():
453 utils._sanitize(value)
454 for name, value in parse_tree.params}
455
456 def init(self, *args, **kw):
457 self._params = kw.pop('params')
458 super().init(*args, **kw)
459
460 @property
461 def params(self):
R David Murray685b3492014-10-17 19:30:13 -0400462 return MappingProxyType(self._params)
R David Murray97f43c02012-06-24 05:03:27 -0400463
464
465class ContentTypeHeader(ParameterizedMIMEHeader):
466
467 value_parser = staticmethod(parser.parse_content_type_header)
468
469 def init(self, *args, **kw):
470 super().init(*args, **kw)
471 self._maintype = utils._sanitize(self._parse_tree.maintype)
472 self._subtype = utils._sanitize(self._parse_tree.subtype)
473
474 @property
475 def maintype(self):
476 return self._maintype
477
478 @property
479 def subtype(self):
480 return self._subtype
481
482 @property
483 def content_type(self):
484 return self.maintype + '/' + self.subtype
485
486
487class ContentDispositionHeader(ParameterizedMIMEHeader):
488
489 value_parser = staticmethod(parser.parse_content_disposition_header)
490
491 def init(self, *args, **kw):
492 super().init(*args, **kw)
493 cd = self._parse_tree.content_disposition
494 self._content_disposition = cd if cd is None else utils._sanitize(cd)
495
496 @property
497 def content_disposition(self):
498 return self._content_disposition
499
500
501class ContentTransferEncodingHeader:
502
503 max_count = 1
504
505 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
506
507 @classmethod
508 def parse(cls, value, kwds):
509 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
510 kwds['decoded'] = str(parse_tree)
511 kwds['defects'].extend(parse_tree.all_defects)
512
513 def init(self, *args, **kw):
514 super().init(*args, **kw)
515 self._cte = utils._sanitize(self._parse_tree.cte)
516
517 @property
518 def cte(self):
519 return self._cte
520
521
Abhilash Raj46d88a12019-06-04 13:41:34 -0400522class MessageIDHeader:
523
524 max_count = 1
525 value_parser = staticmethod(parser.parse_message_id)
526
527 @classmethod
528 def parse(cls, value, kwds):
529 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
530 kwds['decoded'] = str(parse_tree)
531 kwds['defects'].extend(parse_tree.all_defects)
532
533
R David Murray0b6f6c82012-05-25 18:42:14 -0400534# The header factory #
535
536_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400537 'subject': UniqueUnstructuredHeader,
538 'date': UniqueDateHeader,
539 'resent-date': DateHeader,
540 'orig-date': UniqueDateHeader,
541 'sender': UniqueSingleAddressHeader,
542 'resent-sender': SingleAddressHeader,
543 'to': UniqueAddressHeader,
544 'resent-to': AddressHeader,
545 'cc': UniqueAddressHeader,
546 'resent-cc': AddressHeader,
547 'bcc': UniqueAddressHeader,
548 'resent-bcc': AddressHeader,
549 'from': UniqueAddressHeader,
550 'resent-from': AddressHeader,
551 'reply-to': UniqueAddressHeader,
552 'mime-version': MIMEVersionHeader,
553 'content-type': ContentTypeHeader,
554 'content-disposition': ContentDispositionHeader,
555 'content-transfer-encoding': ContentTransferEncodingHeader,
Abhilash Raj46d88a12019-06-04 13:41:34 -0400556 'message-id': MessageIDHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400557 }
558
559class HeaderRegistry:
560
561 """A header_factory and header registry."""
562
563 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
564 use_default_map=True):
565 """Create a header_factory that works with the Policy API.
566
567 base_class is the class that will be the last class in the created
568 header class's __bases__ list. default_class is the class that will be
569 used if "name" (see __call__) does not appear in the registry.
570 use_default_map controls whether or not the default mapping of names to
571 specialized classes is copied in to the registry when the factory is
572 created. The default is True.
573
574 """
575 self.registry = {}
576 self.base_class = base_class
577 self.default_class = default_class
578 if use_default_map:
579 self.registry.update(_default_header_map)
580
581 def map_to_type(self, name, cls):
582 """Register cls as the specialized class for handling "name" headers.
583
584 """
585 self.registry[name.lower()] = cls
586
587 def __getitem__(self, name):
588 cls = self.registry.get(name.lower(), self.default_class)
589 return type('_'+cls.__name__, (cls, self.base_class), {})
590
591 def __call__(self, name, value):
592 """Create a header instance for header 'name' from 'value'.
593
594 Creates a header instance by creating a specialized class for parsing
595 and representing the specified header by combining the factory
596 base_class with a specialized class from the registry or the
597 default_class, and passing the name and value to the constructed
598 class's constructor.
599
600 """
601 return self[name](name, value)