blob: d8613ebf24e613743c94261f6f0a44857ca7c6ab [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
R David Murray685b3492014-10-17 19:30:13 -040010from types import MappingProxyType
R David Murray0b6f6c82012-05-25 18:42:14 -040011
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18 def __init__(self, display_name='', username='', domain='', addr_spec=None):
Martin Panter96a4f072016-02-10 01:17:51 +000019 """Create an object representing a full email address.
R David Murray0b6f6c82012-05-25 18:42:14 -040020
21 An address can have a 'display_name', a 'username', and a 'domain'. In
22 addition to specifying the username and domain separately, they may be
23 specified together by using the addr_spec keyword *instead of* the
24 username and domain keywords. If an addr_spec string is specified it
25 must be properly quoted according to RFC 5322 rules; an error will be
26 raised if it is not.
27
28 An Address object has display_name, username, domain, and addr_spec
29 attributes, all of which are read-only. The addr_spec and the string
30 value of the object are both quoted according to RFC5322 rules, but
31 without any Content Transfer Encoding.
32
33 """
Ashwin Ramaswami614f1722020-03-29 20:38:41 -040034
35 inputs = ''.join(filter(None, (display_name, username, domain, addr_spec)))
36 if '\r' in inputs or '\n' in inputs:
37 raise ValueError("invalid arguments; address parts cannot contain CR or LF")
38
R David Murray0b6f6c82012-05-25 18:42:14 -040039 # This clause with its potential 'raise' may only happen when an
40 # application program creates an Address object using an addr_spec
41 # keyword. The email library code itself must always supply username
42 # and domain.
43 if addr_spec is not None:
44 if username or domain:
45 raise TypeError("addrspec specified when username and/or "
46 "domain also specified")
47 a_s, rest = parser.get_addr_spec(addr_spec)
48 if rest:
49 raise ValueError("Invalid addr_spec; only '{}' "
50 "could be parsed from '{}'".format(
51 a_s, addr_spec))
52 if a_s.all_defects:
53 raise a_s.all_defects[0]
54 username = a_s.local_part
55 domain = a_s.domain
56 self._display_name = display_name
57 self._username = username
58 self._domain = domain
59
60 @property
61 def display_name(self):
62 return self._display_name
63
64 @property
65 def username(self):
66 return self._username
67
68 @property
69 def domain(self):
70 return self._domain
71
72 @property
73 def addr_spec(self):
74 """The addr_spec (username@domain) portion of the address, quoted
75 according to RFC 5322 rules, but with no Content Transfer Encoding.
76 """
Michael Selik27026382019-09-19 20:25:55 -070077 lp = self.username
78 if not parser.DOT_ATOM_ENDS.isdisjoint(lp):
79 lp = parser.quote_string(lp)
R David Murray0b6f6c82012-05-25 18:42:14 -040080 if self.domain:
81 return lp + '@' + self.domain
82 if not lp:
83 return '<>'
84 return lp
85
86 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +030087 return "{}(display_name={!r}, username={!r}, domain={!r})".format(
88 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -040089 self.display_name, self.username, self.domain)
90
91 def __str__(self):
Michael Selik27026382019-09-19 20:25:55 -070092 disp = self.display_name
93 if not parser.SPECIALS.isdisjoint(disp):
94 disp = parser.quote_string(disp)
R David Murray0b6f6c82012-05-25 18:42:14 -040095 if disp:
96 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
97 return "{} <{}>".format(disp, addr_spec)
98 return self.addr_spec
99
100 def __eq__(self, other):
Serhiy Storchaka662db122019-08-08 08:42:54 +0300101 if not isinstance(other, Address):
102 return NotImplemented
R David Murray0b6f6c82012-05-25 18:42:14 -0400103 return (self.display_name == other.display_name and
104 self.username == other.username and
105 self.domain == other.domain)
106
107
108class Group:
109
110 def __init__(self, display_name=None, addresses=None):
111 """Create an object representing an address group.
112
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300113 An address group consists of a display_name followed by colon and a
R David Murray0b6f6c82012-05-25 18:42:14 -0400114 list of addresses (see Address) terminated by a semi-colon. The Group
115 is created by specifying a display_name and a possibly empty list of
116 Address objects. A Group can also be used to represent a single
117 address that is not in a group, which is convenient when manipulating
118 lists that are a combination of Groups and individual Addresses. In
119 this case the display_name should be set to None. In particular, the
120 string representation of a Group whose display_name is None is the same
121 as the Address object, if there is one and only one Address object in
122 the addresses list.
123
124 """
125 self._display_name = display_name
126 self._addresses = tuple(addresses) if addresses else tuple()
127
128 @property
129 def display_name(self):
130 return self._display_name
131
132 @property
133 def addresses(self):
134 return self._addresses
135
136 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300137 return "{}(display_name={!r}, addresses={!r}".format(
138 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -0400139 self.display_name, self.addresses)
140
141 def __str__(self):
142 if self.display_name is None and len(self.addresses)==1:
143 return str(self.addresses[0])
144 disp = self.display_name
Michael Selik27026382019-09-19 20:25:55 -0700145 if disp is not None and not parser.SPECIALS.isdisjoint(disp):
146 disp = parser.quote_string(disp)
R David Murray0b6f6c82012-05-25 18:42:14 -0400147 adrstr = ", ".join(str(x) for x in self.addresses)
148 adrstr = ' ' + adrstr if adrstr else adrstr
149 return "{}:{};".format(disp, adrstr)
150
151 def __eq__(self, other):
Serhiy Storchaka662db122019-08-08 08:42:54 +0300152 if not isinstance(other, Group):
153 return NotImplemented
R David Murray0b6f6c82012-05-25 18:42:14 -0400154 return (self.display_name == other.display_name and
155 self.addresses == other.addresses)
156
157
158# Header Classes #
159
160class BaseHeader(str):
161
162 """Base class for message headers.
163
164 Implements generic behavior and provides tools for subclasses.
165
166 A subclass must define a classmethod named 'parse' that takes an unfolded
167 value string and a dictionary as its arguments. The dictionary will
168 contain one key, 'defects', initialized to an empty list. After the call
169 the dictionary must contain two additional keys: parse_tree, set to the
170 parse tree obtained from parsing the header, and 'decoded', set to the
171 string value of the idealized representation of the data from the value.
172 (That is, encoded words are decoded, and values that have canonical
173 representations are so represented.)
174
175 The defects key is intended to collect parsing defects, which the message
176 parser will subsequently dispose of as appropriate. The parser should not,
177 insofar as practical, raise any errors. Defects should be added to the
178 list instead. The standard header parsers register defects for RFC
179 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
180 errors.
181
182 The parse method may add additional keys to the dictionary. In this case
183 the subclass must define an 'init' method, which will be passed the
184 dictionary as its keyword arguments. The method should use (usually by
185 setting them as the value of similarly named attributes) and remove all the
186 extra keys added by its parse method, and then use super to call its parent
187 class with the remaining arguments and keywords.
188
189 The subclass should also make sure that a 'max_count' attribute is defined
190 that is either None or 1. XXX: need to better define this API.
191
192 """
193
194 def __new__(cls, name, value):
195 kwds = {'defects': []}
196 cls.parse(value, kwds)
197 if utils._has_surrogates(kwds['decoded']):
198 kwds['decoded'] = utils._sanitize(kwds['decoded'])
199 self = str.__new__(cls, kwds['decoded'])
200 del kwds['decoded']
201 self.init(name, **kwds)
202 return self
203
204 def init(self, name, *, parse_tree, defects):
205 self._name = name
206 self._parse_tree = parse_tree
207 self._defects = defects
208
209 @property
210 def name(self):
211 return self._name
212
213 @property
214 def defects(self):
215 return tuple(self._defects)
216
217 def __reduce__(self):
218 return (
219 _reconstruct_header,
220 (
221 self.__class__.__name__,
222 self.__class__.__bases__,
223 str(self),
224 ),
225 self.__dict__)
226
227 @classmethod
228 def _reconstruct(cls, value):
229 return str.__new__(cls, value)
230
231 def fold(self, *, policy):
232 """Fold header according to policy.
233
234 The parsed representation of the header is folded according to
235 RFC5322 rules, as modified by the policy. If the parse tree
236 contains surrogateescaped bytes, the bytes are CTE encoded using
237 the charset 'unknown-8bit".
238
239 Any non-ASCII characters in the parse tree are CTE encoded using
240 charset utf-8. XXX: make this a policy setting.
241
242 The returned value is an ASCII-only string possibly containing linesep
243 characters, and ending with a linesep character. The string includes
244 the header name and the ': ' separator.
245
246 """
Min ho Kim96e12d52019-07-22 06:12:33 +1000247 # At some point we need to put fws here if it was in the source.
R David Murray0b6f6c82012-05-25 18:42:14 -0400248 header = parser.Header([
249 parser.HeaderLabel([
250 parser.ValueTerminal(self.name, 'header-name'),
251 parser.ValueTerminal(':', 'header-sep')]),
R. David Murray85d5c182017-12-03 18:51:41 -0500252 ])
253 if self._parse_tree:
254 header.append(
255 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
256 header.append(self._parse_tree)
R David Murray0b6f6c82012-05-25 18:42:14 -0400257 return header.fold(policy=policy)
258
259
260def _reconstruct_header(cls_name, bases, value):
261 return type(cls_name, bases, {})._reconstruct(value)
262
263
264class UnstructuredHeader:
265
266 max_count = None
267 value_parser = staticmethod(parser.get_unstructured)
268
269 @classmethod
270 def parse(cls, value, kwds):
271 kwds['parse_tree'] = cls.value_parser(value)
272 kwds['decoded'] = str(kwds['parse_tree'])
273
274
275class UniqueUnstructuredHeader(UnstructuredHeader):
276
277 max_count = 1
278
279
280class DateHeader:
281
282 """Header whose value consists of a single timestamp.
283
284 Provides an additional attribute, datetime, which is either an aware
285 datetime using a timezone, or a naive datetime if the timezone
286 in the input string is -0000. Also accepts a datetime as input.
287 The 'value' attribute is the normalized form of the timestamp,
288 which means it is the output of format_datetime on the datetime.
289 """
290
291 max_count = None
292
293 # This is used only for folding, not for creating 'decoded'.
294 value_parser = staticmethod(parser.get_unstructured)
295
296 @classmethod
297 def parse(cls, value, kwds):
298 if not value:
299 kwds['defects'].append(errors.HeaderMissingRequiredValue())
300 kwds['datetime'] = None
301 kwds['decoded'] = ''
302 kwds['parse_tree'] = parser.TokenList()
303 return
304 if isinstance(value, str):
Georges Toth303aac82020-10-27 01:31:06 +0100305 kwds['decoded'] = value
306 try:
307 value = utils.parsedate_to_datetime(value)
308 except ValueError:
309 kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format'))
310 kwds['datetime'] = None
311 kwds['parse_tree'] = parser.TokenList()
312 return
R David Murray0b6f6c82012-05-25 18:42:14 -0400313 kwds['datetime'] = value
314 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
315 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
316
317 def init(self, *args, **kw):
318 self._datetime = kw.pop('datetime')
319 super().init(*args, **kw)
320
321 @property
322 def datetime(self):
323 return self._datetime
324
325
326class UniqueDateHeader(DateHeader):
327
328 max_count = 1
329
330
331class AddressHeader:
332
333 max_count = None
334
335 @staticmethod
336 def value_parser(value):
337 address_list, value = parser.get_address_list(value)
338 assert not value, 'this should not happen'
339 return address_list
340
341 @classmethod
342 def parse(cls, value, kwds):
343 if isinstance(value, str):
344 # We are translating here from the RFC language (address/mailbox)
345 # to our API language (group/address).
346 kwds['parse_tree'] = address_list = cls.value_parser(value)
347 groups = []
348 for addr in address_list.addresses:
349 groups.append(Group(addr.display_name,
350 [Address(mb.display_name or '',
351 mb.local_part or '',
352 mb.domain or '')
353 for mb in addr.all_mailboxes]))
354 defects = list(address_list.all_defects)
355 else:
356 # Assume it is Address/Group stuff
357 if not hasattr(value, '__iter__'):
358 value = [value]
359 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
360 else item
361 for item in value]
362 defects = []
363 kwds['groups'] = groups
364 kwds['defects'] = defects
365 kwds['decoded'] = ', '.join([str(item) for item in groups])
366 if 'parse_tree' not in kwds:
367 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
368
369 def init(self, *args, **kw):
370 self._groups = tuple(kw.pop('groups'))
371 self._addresses = None
372 super().init(*args, **kw)
373
374 @property
375 def groups(self):
376 return self._groups
377
378 @property
379 def addresses(self):
380 if self._addresses is None:
Jon Dufresne39726282017-05-18 07:35:54 -0700381 self._addresses = tuple(address for group in self._groups
382 for address in group.addresses)
R David Murray0b6f6c82012-05-25 18:42:14 -0400383 return self._addresses
384
385
386class UniqueAddressHeader(AddressHeader):
387
388 max_count = 1
389
390
391class SingleAddressHeader(AddressHeader):
392
393 @property
394 def address(self):
395 if len(self.addresses)!=1:
396 raise ValueError(("value of single address header {} is not "
397 "a single address").format(self.name))
398 return self.addresses[0]
399
400
401class UniqueSingleAddressHeader(SingleAddressHeader):
402
403 max_count = 1
404
405
R David Murray97f43c02012-06-24 05:03:27 -0400406class MIMEVersionHeader:
407
408 max_count = 1
409
410 value_parser = staticmethod(parser.parse_mime_version)
411
412 @classmethod
413 def parse(cls, value, kwds):
414 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
415 kwds['decoded'] = str(parse_tree)
416 kwds['defects'].extend(parse_tree.all_defects)
417 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
418 kwds['minor'] = parse_tree.minor
419 if parse_tree.minor is not None:
420 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
421 else:
422 kwds['version'] = None
423
424 def init(self, *args, **kw):
425 self._version = kw.pop('version')
426 self._major = kw.pop('major')
427 self._minor = kw.pop('minor')
428 super().init(*args, **kw)
429
430 @property
431 def major(self):
432 return self._major
433
434 @property
435 def minor(self):
436 return self._minor
437
438 @property
439 def version(self):
440 return self._version
441
442
443class ParameterizedMIMEHeader:
444
445 # Mixin that handles the params dict. Must be subclassed and
446 # a property value_parser for the specific header provided.
447
448 max_count = 1
449
450 @classmethod
451 def parse(cls, value, kwds):
452 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
453 kwds['decoded'] = str(parse_tree)
454 kwds['defects'].extend(parse_tree.all_defects)
455 if parse_tree.params is None:
456 kwds['params'] = {}
457 else:
458 # The MIME RFCs specify that parameter ordering is arbitrary.
459 kwds['params'] = {utils._sanitize(name).lower():
460 utils._sanitize(value)
461 for name, value in parse_tree.params}
462
463 def init(self, *args, **kw):
464 self._params = kw.pop('params')
465 super().init(*args, **kw)
466
467 @property
468 def params(self):
R David Murray685b3492014-10-17 19:30:13 -0400469 return MappingProxyType(self._params)
R David Murray97f43c02012-06-24 05:03:27 -0400470
471
472class ContentTypeHeader(ParameterizedMIMEHeader):
473
474 value_parser = staticmethod(parser.parse_content_type_header)
475
476 def init(self, *args, **kw):
477 super().init(*args, **kw)
478 self._maintype = utils._sanitize(self._parse_tree.maintype)
479 self._subtype = utils._sanitize(self._parse_tree.subtype)
480
481 @property
482 def maintype(self):
483 return self._maintype
484
485 @property
486 def subtype(self):
487 return self._subtype
488
489 @property
490 def content_type(self):
491 return self.maintype + '/' + self.subtype
492
493
494class ContentDispositionHeader(ParameterizedMIMEHeader):
495
496 value_parser = staticmethod(parser.parse_content_disposition_header)
497
498 def init(self, *args, **kw):
499 super().init(*args, **kw)
500 cd = self._parse_tree.content_disposition
501 self._content_disposition = cd if cd is None else utils._sanitize(cd)
502
503 @property
504 def content_disposition(self):
505 return self._content_disposition
506
507
508class ContentTransferEncodingHeader:
509
510 max_count = 1
511
512 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
513
514 @classmethod
515 def parse(cls, value, kwds):
516 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
517 kwds['decoded'] = str(parse_tree)
518 kwds['defects'].extend(parse_tree.all_defects)
519
520 def init(self, *args, **kw):
521 super().init(*args, **kw)
522 self._cte = utils._sanitize(self._parse_tree.cte)
523
524 @property
525 def cte(self):
526 return self._cte
527
528
Abhilash Raj46d88a12019-06-04 13:41:34 -0400529class MessageIDHeader:
530
531 max_count = 1
532 value_parser = staticmethod(parser.parse_message_id)
533
534 @classmethod
535 def parse(cls, value, kwds):
536 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
537 kwds['decoded'] = str(parse_tree)
538 kwds['defects'].extend(parse_tree.all_defects)
539
540
R David Murray0b6f6c82012-05-25 18:42:14 -0400541# The header factory #
542
543_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400544 'subject': UniqueUnstructuredHeader,
545 'date': UniqueDateHeader,
546 'resent-date': DateHeader,
547 'orig-date': UniqueDateHeader,
548 'sender': UniqueSingleAddressHeader,
549 'resent-sender': SingleAddressHeader,
550 'to': UniqueAddressHeader,
551 'resent-to': AddressHeader,
552 'cc': UniqueAddressHeader,
553 'resent-cc': AddressHeader,
554 'bcc': UniqueAddressHeader,
555 'resent-bcc': AddressHeader,
556 'from': UniqueAddressHeader,
557 'resent-from': AddressHeader,
558 'reply-to': UniqueAddressHeader,
559 'mime-version': MIMEVersionHeader,
560 'content-type': ContentTypeHeader,
561 'content-disposition': ContentDispositionHeader,
562 'content-transfer-encoding': ContentTransferEncodingHeader,
Abhilash Raj46d88a12019-06-04 13:41:34 -0400563 'message-id': MessageIDHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400564 }
565
566class HeaderRegistry:
567
568 """A header_factory and header registry."""
569
570 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
571 use_default_map=True):
572 """Create a header_factory that works with the Policy API.
573
574 base_class is the class that will be the last class in the created
575 header class's __bases__ list. default_class is the class that will be
576 used if "name" (see __call__) does not appear in the registry.
577 use_default_map controls whether or not the default mapping of names to
578 specialized classes is copied in to the registry when the factory is
579 created. The default is True.
580
581 """
582 self.registry = {}
583 self.base_class = base_class
584 self.default_class = default_class
585 if use_default_map:
586 self.registry.update(_default_header_map)
587
588 def map_to_type(self, name, cls):
589 """Register cls as the specialized class for handling "name" headers.
590
591 """
592 self.registry[name.lower()] = cls
593
594 def __getitem__(self, name):
595 cls = self.registry.get(name.lower(), self.default_class)
596 return type('_'+cls.__name__, (cls, self.base_class), {})
597
598 def __call__(self, name, value):
599 """Create a header instance for header 'name' from 'value'.
600
601 Creates a header instance by creating a specialized class for parsing
602 and representing the specified header by combining the factory
603 base_class with a specialized class from the registry or the
604 default_class, and passing the name and value to the constructed
605 class's constructor.
606
607 """
608 return self[name](name, value)