blob: cc1d1912918149e400ce3d4fbcec4eb77c1ee7c6 [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
R David Murray685b3492014-10-17 19:30:13 -040010from types import MappingProxyType
R David Murray0b6f6c82012-05-25 18:42:14 -040011
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18 def __init__(self, display_name='', username='', domain='', addr_spec=None):
Martin Panter96a4f072016-02-10 01:17:51 +000019 """Create an object representing a full email address.
R David Murray0b6f6c82012-05-25 18:42:14 -040020
21 An address can have a 'display_name', a 'username', and a 'domain'. In
22 addition to specifying the username and domain separately, they may be
23 specified together by using the addr_spec keyword *instead of* the
24 username and domain keywords. If an addr_spec string is specified it
25 must be properly quoted according to RFC 5322 rules; an error will be
26 raised if it is not.
27
28 An Address object has display_name, username, domain, and addr_spec
29 attributes, all of which are read-only. The addr_spec and the string
30 value of the object are both quoted according to RFC5322 rules, but
31 without any Content Transfer Encoding.
32
33 """
34 # This clause with its potential 'raise' may only happen when an
35 # application program creates an Address object using an addr_spec
36 # keyword. The email library code itself must always supply username
37 # and domain.
38 if addr_spec is not None:
39 if username or domain:
40 raise TypeError("addrspec specified when username and/or "
41 "domain also specified")
42 a_s, rest = parser.get_addr_spec(addr_spec)
43 if rest:
44 raise ValueError("Invalid addr_spec; only '{}' "
45 "could be parsed from '{}'".format(
46 a_s, addr_spec))
47 if a_s.all_defects:
48 raise a_s.all_defects[0]
49 username = a_s.local_part
50 domain = a_s.domain
51 self._display_name = display_name
52 self._username = username
53 self._domain = domain
54
55 @property
56 def display_name(self):
57 return self._display_name
58
59 @property
60 def username(self):
61 return self._username
62
63 @property
64 def domain(self):
65 return self._domain
66
67 @property
68 def addr_spec(self):
69 """The addr_spec (username@domain) portion of the address, quoted
70 according to RFC 5322 rules, but with no Content Transfer Encoding.
71 """
Michael Selik27026382019-09-19 20:25:55 -070072 lp = self.username
73 if not parser.DOT_ATOM_ENDS.isdisjoint(lp):
74 lp = parser.quote_string(lp)
R David Murray0b6f6c82012-05-25 18:42:14 -040075 if self.domain:
76 return lp + '@' + self.domain
77 if not lp:
78 return '<>'
79 return lp
80
81 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +030082 return "{}(display_name={!r}, username={!r}, domain={!r})".format(
83 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -040084 self.display_name, self.username, self.domain)
85
86 def __str__(self):
Michael Selik27026382019-09-19 20:25:55 -070087 disp = self.display_name
88 if not parser.SPECIALS.isdisjoint(disp):
89 disp = parser.quote_string(disp)
R David Murray0b6f6c82012-05-25 18:42:14 -040090 if disp:
91 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
92 return "{} <{}>".format(disp, addr_spec)
93 return self.addr_spec
94
95 def __eq__(self, other):
Serhiy Storchaka662db122019-08-08 08:42:54 +030096 if not isinstance(other, Address):
97 return NotImplemented
R David Murray0b6f6c82012-05-25 18:42:14 -040098 return (self.display_name == other.display_name and
99 self.username == other.username and
100 self.domain == other.domain)
101
102
103class Group:
104
105 def __init__(self, display_name=None, addresses=None):
106 """Create an object representing an address group.
107
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300108 An address group consists of a display_name followed by colon and a
R David Murray0b6f6c82012-05-25 18:42:14 -0400109 list of addresses (see Address) terminated by a semi-colon. The Group
110 is created by specifying a display_name and a possibly empty list of
111 Address objects. A Group can also be used to represent a single
112 address that is not in a group, which is convenient when manipulating
113 lists that are a combination of Groups and individual Addresses. In
114 this case the display_name should be set to None. In particular, the
115 string representation of a Group whose display_name is None is the same
116 as the Address object, if there is one and only one Address object in
117 the addresses list.
118
119 """
120 self._display_name = display_name
121 self._addresses = tuple(addresses) if addresses else tuple()
122
123 @property
124 def display_name(self):
125 return self._display_name
126
127 @property
128 def addresses(self):
129 return self._addresses
130
131 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300132 return "{}(display_name={!r}, addresses={!r}".format(
133 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -0400134 self.display_name, self.addresses)
135
136 def __str__(self):
137 if self.display_name is None and len(self.addresses)==1:
138 return str(self.addresses[0])
139 disp = self.display_name
Michael Selik27026382019-09-19 20:25:55 -0700140 if disp is not None and not parser.SPECIALS.isdisjoint(disp):
141 disp = parser.quote_string(disp)
R David Murray0b6f6c82012-05-25 18:42:14 -0400142 adrstr = ", ".join(str(x) for x in self.addresses)
143 adrstr = ' ' + adrstr if adrstr else adrstr
144 return "{}:{};".format(disp, adrstr)
145
146 def __eq__(self, other):
Serhiy Storchaka662db122019-08-08 08:42:54 +0300147 if not isinstance(other, Group):
148 return NotImplemented
R David Murray0b6f6c82012-05-25 18:42:14 -0400149 return (self.display_name == other.display_name and
150 self.addresses == other.addresses)
151
152
153# Header Classes #
154
155class BaseHeader(str):
156
157 """Base class for message headers.
158
159 Implements generic behavior and provides tools for subclasses.
160
161 A subclass must define a classmethod named 'parse' that takes an unfolded
162 value string and a dictionary as its arguments. The dictionary will
163 contain one key, 'defects', initialized to an empty list. After the call
164 the dictionary must contain two additional keys: parse_tree, set to the
165 parse tree obtained from parsing the header, and 'decoded', set to the
166 string value of the idealized representation of the data from the value.
167 (That is, encoded words are decoded, and values that have canonical
168 representations are so represented.)
169
170 The defects key is intended to collect parsing defects, which the message
171 parser will subsequently dispose of as appropriate. The parser should not,
172 insofar as practical, raise any errors. Defects should be added to the
173 list instead. The standard header parsers register defects for RFC
174 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
175 errors.
176
177 The parse method may add additional keys to the dictionary. In this case
178 the subclass must define an 'init' method, which will be passed the
179 dictionary as its keyword arguments. The method should use (usually by
180 setting them as the value of similarly named attributes) and remove all the
181 extra keys added by its parse method, and then use super to call its parent
182 class with the remaining arguments and keywords.
183
184 The subclass should also make sure that a 'max_count' attribute is defined
185 that is either None or 1. XXX: need to better define this API.
186
187 """
188
189 def __new__(cls, name, value):
190 kwds = {'defects': []}
191 cls.parse(value, kwds)
192 if utils._has_surrogates(kwds['decoded']):
193 kwds['decoded'] = utils._sanitize(kwds['decoded'])
194 self = str.__new__(cls, kwds['decoded'])
195 del kwds['decoded']
196 self.init(name, **kwds)
197 return self
198
199 def init(self, name, *, parse_tree, defects):
200 self._name = name
201 self._parse_tree = parse_tree
202 self._defects = defects
203
204 @property
205 def name(self):
206 return self._name
207
208 @property
209 def defects(self):
210 return tuple(self._defects)
211
212 def __reduce__(self):
213 return (
214 _reconstruct_header,
215 (
216 self.__class__.__name__,
217 self.__class__.__bases__,
218 str(self),
219 ),
220 self.__dict__)
221
222 @classmethod
223 def _reconstruct(cls, value):
224 return str.__new__(cls, value)
225
226 def fold(self, *, policy):
227 """Fold header according to policy.
228
229 The parsed representation of the header is folded according to
230 RFC5322 rules, as modified by the policy. If the parse tree
231 contains surrogateescaped bytes, the bytes are CTE encoded using
232 the charset 'unknown-8bit".
233
234 Any non-ASCII characters in the parse tree are CTE encoded using
235 charset utf-8. XXX: make this a policy setting.
236
237 The returned value is an ASCII-only string possibly containing linesep
238 characters, and ending with a linesep character. The string includes
239 the header name and the ': ' separator.
240
241 """
Min ho Kim96e12d52019-07-22 06:12:33 +1000242 # At some point we need to put fws here if it was in the source.
R David Murray0b6f6c82012-05-25 18:42:14 -0400243 header = parser.Header([
244 parser.HeaderLabel([
245 parser.ValueTerminal(self.name, 'header-name'),
246 parser.ValueTerminal(':', 'header-sep')]),
R. David Murray85d5c182017-12-03 18:51:41 -0500247 ])
248 if self._parse_tree:
249 header.append(
250 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
251 header.append(self._parse_tree)
R David Murray0b6f6c82012-05-25 18:42:14 -0400252 return header.fold(policy=policy)
253
254
255def _reconstruct_header(cls_name, bases, value):
256 return type(cls_name, bases, {})._reconstruct(value)
257
258
259class UnstructuredHeader:
260
261 max_count = None
262 value_parser = staticmethod(parser.get_unstructured)
263
264 @classmethod
265 def parse(cls, value, kwds):
266 kwds['parse_tree'] = cls.value_parser(value)
267 kwds['decoded'] = str(kwds['parse_tree'])
268
269
270class UniqueUnstructuredHeader(UnstructuredHeader):
271
272 max_count = 1
273
274
275class DateHeader:
276
277 """Header whose value consists of a single timestamp.
278
279 Provides an additional attribute, datetime, which is either an aware
280 datetime using a timezone, or a naive datetime if the timezone
281 in the input string is -0000. Also accepts a datetime as input.
282 The 'value' attribute is the normalized form of the timestamp,
283 which means it is the output of format_datetime on the datetime.
284 """
285
286 max_count = None
287
288 # This is used only for folding, not for creating 'decoded'.
289 value_parser = staticmethod(parser.get_unstructured)
290
291 @classmethod
292 def parse(cls, value, kwds):
293 if not value:
294 kwds['defects'].append(errors.HeaderMissingRequiredValue())
295 kwds['datetime'] = None
296 kwds['decoded'] = ''
297 kwds['parse_tree'] = parser.TokenList()
298 return
299 if isinstance(value, str):
300 value = utils.parsedate_to_datetime(value)
301 kwds['datetime'] = value
302 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
303 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
304
305 def init(self, *args, **kw):
306 self._datetime = kw.pop('datetime')
307 super().init(*args, **kw)
308
309 @property
310 def datetime(self):
311 return self._datetime
312
313
314class UniqueDateHeader(DateHeader):
315
316 max_count = 1
317
318
319class AddressHeader:
320
321 max_count = None
322
323 @staticmethod
324 def value_parser(value):
325 address_list, value = parser.get_address_list(value)
326 assert not value, 'this should not happen'
327 return address_list
328
329 @classmethod
330 def parse(cls, value, kwds):
331 if isinstance(value, str):
332 # We are translating here from the RFC language (address/mailbox)
333 # to our API language (group/address).
334 kwds['parse_tree'] = address_list = cls.value_parser(value)
335 groups = []
336 for addr in address_list.addresses:
337 groups.append(Group(addr.display_name,
338 [Address(mb.display_name or '',
339 mb.local_part or '',
340 mb.domain or '')
341 for mb in addr.all_mailboxes]))
342 defects = list(address_list.all_defects)
343 else:
344 # Assume it is Address/Group stuff
345 if not hasattr(value, '__iter__'):
346 value = [value]
347 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
348 else item
349 for item in value]
350 defects = []
351 kwds['groups'] = groups
352 kwds['defects'] = defects
353 kwds['decoded'] = ', '.join([str(item) for item in groups])
354 if 'parse_tree' not in kwds:
355 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
356
357 def init(self, *args, **kw):
358 self._groups = tuple(kw.pop('groups'))
359 self._addresses = None
360 super().init(*args, **kw)
361
362 @property
363 def groups(self):
364 return self._groups
365
366 @property
367 def addresses(self):
368 if self._addresses is None:
Jon Dufresne39726282017-05-18 07:35:54 -0700369 self._addresses = tuple(address for group in self._groups
370 for address in group.addresses)
R David Murray0b6f6c82012-05-25 18:42:14 -0400371 return self._addresses
372
373
374class UniqueAddressHeader(AddressHeader):
375
376 max_count = 1
377
378
379class SingleAddressHeader(AddressHeader):
380
381 @property
382 def address(self):
383 if len(self.addresses)!=1:
384 raise ValueError(("value of single address header {} is not "
385 "a single address").format(self.name))
386 return self.addresses[0]
387
388
389class UniqueSingleAddressHeader(SingleAddressHeader):
390
391 max_count = 1
392
393
R David Murray97f43c02012-06-24 05:03:27 -0400394class MIMEVersionHeader:
395
396 max_count = 1
397
398 value_parser = staticmethod(parser.parse_mime_version)
399
400 @classmethod
401 def parse(cls, value, kwds):
402 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
403 kwds['decoded'] = str(parse_tree)
404 kwds['defects'].extend(parse_tree.all_defects)
405 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
406 kwds['minor'] = parse_tree.minor
407 if parse_tree.minor is not None:
408 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
409 else:
410 kwds['version'] = None
411
412 def init(self, *args, **kw):
413 self._version = kw.pop('version')
414 self._major = kw.pop('major')
415 self._minor = kw.pop('minor')
416 super().init(*args, **kw)
417
418 @property
419 def major(self):
420 return self._major
421
422 @property
423 def minor(self):
424 return self._minor
425
426 @property
427 def version(self):
428 return self._version
429
430
431class ParameterizedMIMEHeader:
432
433 # Mixin that handles the params dict. Must be subclassed and
434 # a property value_parser for the specific header provided.
435
436 max_count = 1
437
438 @classmethod
439 def parse(cls, value, kwds):
440 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
441 kwds['decoded'] = str(parse_tree)
442 kwds['defects'].extend(parse_tree.all_defects)
443 if parse_tree.params is None:
444 kwds['params'] = {}
445 else:
446 # The MIME RFCs specify that parameter ordering is arbitrary.
447 kwds['params'] = {utils._sanitize(name).lower():
448 utils._sanitize(value)
449 for name, value in parse_tree.params}
450
451 def init(self, *args, **kw):
452 self._params = kw.pop('params')
453 super().init(*args, **kw)
454
455 @property
456 def params(self):
R David Murray685b3492014-10-17 19:30:13 -0400457 return MappingProxyType(self._params)
R David Murray97f43c02012-06-24 05:03:27 -0400458
459
460class ContentTypeHeader(ParameterizedMIMEHeader):
461
462 value_parser = staticmethod(parser.parse_content_type_header)
463
464 def init(self, *args, **kw):
465 super().init(*args, **kw)
466 self._maintype = utils._sanitize(self._parse_tree.maintype)
467 self._subtype = utils._sanitize(self._parse_tree.subtype)
468
469 @property
470 def maintype(self):
471 return self._maintype
472
473 @property
474 def subtype(self):
475 return self._subtype
476
477 @property
478 def content_type(self):
479 return self.maintype + '/' + self.subtype
480
481
482class ContentDispositionHeader(ParameterizedMIMEHeader):
483
484 value_parser = staticmethod(parser.parse_content_disposition_header)
485
486 def init(self, *args, **kw):
487 super().init(*args, **kw)
488 cd = self._parse_tree.content_disposition
489 self._content_disposition = cd if cd is None else utils._sanitize(cd)
490
491 @property
492 def content_disposition(self):
493 return self._content_disposition
494
495
496class ContentTransferEncodingHeader:
497
498 max_count = 1
499
500 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
501
502 @classmethod
503 def parse(cls, value, kwds):
504 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
505 kwds['decoded'] = str(parse_tree)
506 kwds['defects'].extend(parse_tree.all_defects)
507
508 def init(self, *args, **kw):
509 super().init(*args, **kw)
510 self._cte = utils._sanitize(self._parse_tree.cte)
511
512 @property
513 def cte(self):
514 return self._cte
515
516
Abhilash Raj46d88a12019-06-04 13:41:34 -0400517class MessageIDHeader:
518
519 max_count = 1
520 value_parser = staticmethod(parser.parse_message_id)
521
522 @classmethod
523 def parse(cls, value, kwds):
524 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
525 kwds['decoded'] = str(parse_tree)
526 kwds['defects'].extend(parse_tree.all_defects)
527
528
R David Murray0b6f6c82012-05-25 18:42:14 -0400529# The header factory #
530
531_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400532 'subject': UniqueUnstructuredHeader,
533 'date': UniqueDateHeader,
534 'resent-date': DateHeader,
535 'orig-date': UniqueDateHeader,
536 'sender': UniqueSingleAddressHeader,
537 'resent-sender': SingleAddressHeader,
538 'to': UniqueAddressHeader,
539 'resent-to': AddressHeader,
540 'cc': UniqueAddressHeader,
541 'resent-cc': AddressHeader,
542 'bcc': UniqueAddressHeader,
543 'resent-bcc': AddressHeader,
544 'from': UniqueAddressHeader,
545 'resent-from': AddressHeader,
546 'reply-to': UniqueAddressHeader,
547 'mime-version': MIMEVersionHeader,
548 'content-type': ContentTypeHeader,
549 'content-disposition': ContentDispositionHeader,
550 'content-transfer-encoding': ContentTransferEncodingHeader,
Abhilash Raj46d88a12019-06-04 13:41:34 -0400551 'message-id': MessageIDHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400552 }
553
554class HeaderRegistry:
555
556 """A header_factory and header registry."""
557
558 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
559 use_default_map=True):
560 """Create a header_factory that works with the Policy API.
561
562 base_class is the class that will be the last class in the created
563 header class's __bases__ list. default_class is the class that will be
564 used if "name" (see __call__) does not appear in the registry.
565 use_default_map controls whether or not the default mapping of names to
566 specialized classes is copied in to the registry when the factory is
567 created. The default is True.
568
569 """
570 self.registry = {}
571 self.base_class = base_class
572 self.default_class = default_class
573 if use_default_map:
574 self.registry.update(_default_header_map)
575
576 def map_to_type(self, name, cls):
577 """Register cls as the specialized class for handling "name" headers.
578
579 """
580 self.registry[name.lower()] = cls
581
582 def __getitem__(self, name):
583 cls = self.registry.get(name.lower(), self.default_class)
584 return type('_'+cls.__name__, (cls, self.base_class), {})
585
586 def __call__(self, name, value):
587 """Create a header instance for header 'name' from 'value'.
588
589 Creates a header instance by creating a specialized class for parsing
590 and representing the specified header by combining the factory
591 base_class with a specialized class from the registry or the
592 default_class, and passing the name and value to the constructed
593 class's constructor.
594
595 """
596 return self[name](name, value)