blob: d0914fd1925c6596ab7f708f325f9cf48c492df6 [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
R David Murray685b3492014-10-17 19:30:13 -040010from types import MappingProxyType
R David Murray0b6f6c82012-05-25 18:42:14 -040011
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18 def __init__(self, display_name='', username='', domain='', addr_spec=None):
Martin Panter96a4f072016-02-10 01:17:51 +000019 """Create an object representing a full email address.
R David Murray0b6f6c82012-05-25 18:42:14 -040020
21 An address can have a 'display_name', a 'username', and a 'domain'. In
22 addition to specifying the username and domain separately, they may be
23 specified together by using the addr_spec keyword *instead of* the
24 username and domain keywords. If an addr_spec string is specified it
25 must be properly quoted according to RFC 5322 rules; an error will be
26 raised if it is not.
27
28 An Address object has display_name, username, domain, and addr_spec
29 attributes, all of which are read-only. The addr_spec and the string
30 value of the object are both quoted according to RFC5322 rules, but
31 without any Content Transfer Encoding.
32
33 """
Miss Islington (bot)75635c62020-05-27 06:37:40 -070034
35 inputs = ''.join(filter(None, (display_name, username, domain, addr_spec)))
36 if '\r' in inputs or '\n' in inputs:
37 raise ValueError("invalid arguments; address parts cannot contain CR or LF")
38
R David Murray0b6f6c82012-05-25 18:42:14 -040039 # This clause with its potential 'raise' may only happen when an
40 # application program creates an Address object using an addr_spec
41 # keyword. The email library code itself must always supply username
42 # and domain.
43 if addr_spec is not None:
44 if username or domain:
45 raise TypeError("addrspec specified when username and/or "
46 "domain also specified")
47 a_s, rest = parser.get_addr_spec(addr_spec)
48 if rest:
49 raise ValueError("Invalid addr_spec; only '{}' "
50 "could be parsed from '{}'".format(
51 a_s, addr_spec))
52 if a_s.all_defects:
53 raise a_s.all_defects[0]
54 username = a_s.local_part
55 domain = a_s.domain
56 self._display_name = display_name
57 self._username = username
58 self._domain = domain
59
60 @property
61 def display_name(self):
62 return self._display_name
63
64 @property
65 def username(self):
66 return self._username
67
68 @property
69 def domain(self):
70 return self._domain
71
72 @property
73 def addr_spec(self):
74 """The addr_spec (username@domain) portion of the address, quoted
75 according to RFC 5322 rules, but with no Content Transfer Encoding.
76 """
77 nameset = set(self.username)
78 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
79 lp = parser.quote_string(self.username)
80 else:
81 lp = self.username
82 if self.domain:
83 return lp + '@' + self.domain
84 if not lp:
85 return '<>'
86 return lp
87
88 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +030089 return "{}(display_name={!r}, username={!r}, domain={!r})".format(
90 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -040091 self.display_name, self.username, self.domain)
92
93 def __str__(self):
94 nameset = set(self.display_name)
95 if len(nameset) > len(nameset-parser.SPECIALS):
96 disp = parser.quote_string(self.display_name)
97 else:
98 disp = self.display_name
99 if disp:
100 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
101 return "{} <{}>".format(disp, addr_spec)
102 return self.addr_spec
103
104 def __eq__(self, other):
105 if type(other) != type(self):
106 return False
107 return (self.display_name == other.display_name and
108 self.username == other.username and
109 self.domain == other.domain)
110
111
112class Group:
113
114 def __init__(self, display_name=None, addresses=None):
115 """Create an object representing an address group.
116
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300117 An address group consists of a display_name followed by colon and a
R David Murray0b6f6c82012-05-25 18:42:14 -0400118 list of addresses (see Address) terminated by a semi-colon. The Group
119 is created by specifying a display_name and a possibly empty list of
120 Address objects. A Group can also be used to represent a single
121 address that is not in a group, which is convenient when manipulating
122 lists that are a combination of Groups and individual Addresses. In
123 this case the display_name should be set to None. In particular, the
124 string representation of a Group whose display_name is None is the same
125 as the Address object, if there is one and only one Address object in
126 the addresses list.
127
128 """
129 self._display_name = display_name
130 self._addresses = tuple(addresses) if addresses else tuple()
131
132 @property
133 def display_name(self):
134 return self._display_name
135
136 @property
137 def addresses(self):
138 return self._addresses
139
140 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300141 return "{}(display_name={!r}, addresses={!r}".format(
142 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -0400143 self.display_name, self.addresses)
144
145 def __str__(self):
146 if self.display_name is None and len(self.addresses)==1:
147 return str(self.addresses[0])
148 disp = self.display_name
149 if disp is not None:
150 nameset = set(disp)
151 if len(nameset) > len(nameset-parser.SPECIALS):
152 disp = parser.quote_string(disp)
153 adrstr = ", ".join(str(x) for x in self.addresses)
154 adrstr = ' ' + adrstr if adrstr else adrstr
155 return "{}:{};".format(disp, adrstr)
156
157 def __eq__(self, other):
158 if type(other) != type(self):
159 return False
160 return (self.display_name == other.display_name and
161 self.addresses == other.addresses)
162
163
164# Header Classes #
165
166class BaseHeader(str):
167
168 """Base class for message headers.
169
170 Implements generic behavior and provides tools for subclasses.
171
172 A subclass must define a classmethod named 'parse' that takes an unfolded
173 value string and a dictionary as its arguments. The dictionary will
174 contain one key, 'defects', initialized to an empty list. After the call
175 the dictionary must contain two additional keys: parse_tree, set to the
176 parse tree obtained from parsing the header, and 'decoded', set to the
177 string value of the idealized representation of the data from the value.
178 (That is, encoded words are decoded, and values that have canonical
179 representations are so represented.)
180
181 The defects key is intended to collect parsing defects, which the message
182 parser will subsequently dispose of as appropriate. The parser should not,
183 insofar as practical, raise any errors. Defects should be added to the
184 list instead. The standard header parsers register defects for RFC
185 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
186 errors.
187
188 The parse method may add additional keys to the dictionary. In this case
189 the subclass must define an 'init' method, which will be passed the
190 dictionary as its keyword arguments. The method should use (usually by
191 setting them as the value of similarly named attributes) and remove all the
192 extra keys added by its parse method, and then use super to call its parent
193 class with the remaining arguments and keywords.
194
195 The subclass should also make sure that a 'max_count' attribute is defined
196 that is either None or 1. XXX: need to better define this API.
197
198 """
199
200 def __new__(cls, name, value):
201 kwds = {'defects': []}
202 cls.parse(value, kwds)
203 if utils._has_surrogates(kwds['decoded']):
204 kwds['decoded'] = utils._sanitize(kwds['decoded'])
205 self = str.__new__(cls, kwds['decoded'])
206 del kwds['decoded']
207 self.init(name, **kwds)
208 return self
209
210 def init(self, name, *, parse_tree, defects):
211 self._name = name
212 self._parse_tree = parse_tree
213 self._defects = defects
214
215 @property
216 def name(self):
217 return self._name
218
219 @property
220 def defects(self):
221 return tuple(self._defects)
222
223 def __reduce__(self):
224 return (
225 _reconstruct_header,
226 (
227 self.__class__.__name__,
228 self.__class__.__bases__,
229 str(self),
230 ),
231 self.__dict__)
232
233 @classmethod
234 def _reconstruct(cls, value):
235 return str.__new__(cls, value)
236
237 def fold(self, *, policy):
238 """Fold header according to policy.
239
240 The parsed representation of the header is folded according to
241 RFC5322 rules, as modified by the policy. If the parse tree
242 contains surrogateescaped bytes, the bytes are CTE encoded using
243 the charset 'unknown-8bit".
244
245 Any non-ASCII characters in the parse tree are CTE encoded using
246 charset utf-8. XXX: make this a policy setting.
247
248 The returned value is an ASCII-only string possibly containing linesep
249 characters, and ending with a linesep character. The string includes
250 the header name and the ': ' separator.
251
252 """
Kyle Stanley24b5b362019-07-21 22:48:45 -0400253 # At some point we need to put fws here if it was in the source.
R David Murray0b6f6c82012-05-25 18:42:14 -0400254 header = parser.Header([
255 parser.HeaderLabel([
256 parser.ValueTerminal(self.name, 'header-name'),
257 parser.ValueTerminal(':', 'header-sep')]),
R. David Murray85d5c182017-12-03 18:51:41 -0500258 ])
259 if self._parse_tree:
260 header.append(
261 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
262 header.append(self._parse_tree)
R David Murray0b6f6c82012-05-25 18:42:14 -0400263 return header.fold(policy=policy)
264
265
266def _reconstruct_header(cls_name, bases, value):
267 return type(cls_name, bases, {})._reconstruct(value)
268
269
270class UnstructuredHeader:
271
272 max_count = None
273 value_parser = staticmethod(parser.get_unstructured)
274
275 @classmethod
276 def parse(cls, value, kwds):
277 kwds['parse_tree'] = cls.value_parser(value)
278 kwds['decoded'] = str(kwds['parse_tree'])
279
280
281class UniqueUnstructuredHeader(UnstructuredHeader):
282
283 max_count = 1
284
285
286class DateHeader:
287
288 """Header whose value consists of a single timestamp.
289
290 Provides an additional attribute, datetime, which is either an aware
291 datetime using a timezone, or a naive datetime if the timezone
292 in the input string is -0000. Also accepts a datetime as input.
293 The 'value' attribute is the normalized form of the timestamp,
294 which means it is the output of format_datetime on the datetime.
295 """
296
297 max_count = None
298
299 # This is used only for folding, not for creating 'decoded'.
300 value_parser = staticmethod(parser.get_unstructured)
301
302 @classmethod
303 def parse(cls, value, kwds):
304 if not value:
305 kwds['defects'].append(errors.HeaderMissingRequiredValue())
306 kwds['datetime'] = None
307 kwds['decoded'] = ''
308 kwds['parse_tree'] = parser.TokenList()
309 return
310 if isinstance(value, str):
311 value = utils.parsedate_to_datetime(value)
312 kwds['datetime'] = value
313 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
314 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
315
316 def init(self, *args, **kw):
317 self._datetime = kw.pop('datetime')
318 super().init(*args, **kw)
319
320 @property
321 def datetime(self):
322 return self._datetime
323
324
325class UniqueDateHeader(DateHeader):
326
327 max_count = 1
328
329
330class AddressHeader:
331
332 max_count = None
333
334 @staticmethod
335 def value_parser(value):
336 address_list, value = parser.get_address_list(value)
337 assert not value, 'this should not happen'
338 return address_list
339
340 @classmethod
341 def parse(cls, value, kwds):
342 if isinstance(value, str):
343 # We are translating here from the RFC language (address/mailbox)
344 # to our API language (group/address).
345 kwds['parse_tree'] = address_list = cls.value_parser(value)
346 groups = []
347 for addr in address_list.addresses:
348 groups.append(Group(addr.display_name,
349 [Address(mb.display_name or '',
350 mb.local_part or '',
351 mb.domain or '')
352 for mb in addr.all_mailboxes]))
353 defects = list(address_list.all_defects)
354 else:
355 # Assume it is Address/Group stuff
356 if not hasattr(value, '__iter__'):
357 value = [value]
358 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
359 else item
360 for item in value]
361 defects = []
362 kwds['groups'] = groups
363 kwds['defects'] = defects
364 kwds['decoded'] = ', '.join([str(item) for item in groups])
365 if 'parse_tree' not in kwds:
366 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
367
368 def init(self, *args, **kw):
369 self._groups = tuple(kw.pop('groups'))
370 self._addresses = None
371 super().init(*args, **kw)
372
373 @property
374 def groups(self):
375 return self._groups
376
377 @property
378 def addresses(self):
379 if self._addresses is None:
Jon Dufresne39726282017-05-18 07:35:54 -0700380 self._addresses = tuple(address for group in self._groups
381 for address in group.addresses)
R David Murray0b6f6c82012-05-25 18:42:14 -0400382 return self._addresses
383
384
385class UniqueAddressHeader(AddressHeader):
386
387 max_count = 1
388
389
390class SingleAddressHeader(AddressHeader):
391
392 @property
393 def address(self):
394 if len(self.addresses)!=1:
395 raise ValueError(("value of single address header {} is not "
396 "a single address").format(self.name))
397 return self.addresses[0]
398
399
400class UniqueSingleAddressHeader(SingleAddressHeader):
401
402 max_count = 1
403
404
R David Murray97f43c02012-06-24 05:03:27 -0400405class MIMEVersionHeader:
406
407 max_count = 1
408
409 value_parser = staticmethod(parser.parse_mime_version)
410
411 @classmethod
412 def parse(cls, value, kwds):
413 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
414 kwds['decoded'] = str(parse_tree)
415 kwds['defects'].extend(parse_tree.all_defects)
416 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
417 kwds['minor'] = parse_tree.minor
418 if parse_tree.minor is not None:
419 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
420 else:
421 kwds['version'] = None
422
423 def init(self, *args, **kw):
424 self._version = kw.pop('version')
425 self._major = kw.pop('major')
426 self._minor = kw.pop('minor')
427 super().init(*args, **kw)
428
429 @property
430 def major(self):
431 return self._major
432
433 @property
434 def minor(self):
435 return self._minor
436
437 @property
438 def version(self):
439 return self._version
440
441
442class ParameterizedMIMEHeader:
443
444 # Mixin that handles the params dict. Must be subclassed and
445 # a property value_parser for the specific header provided.
446
447 max_count = 1
448
449 @classmethod
450 def parse(cls, value, kwds):
451 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
452 kwds['decoded'] = str(parse_tree)
453 kwds['defects'].extend(parse_tree.all_defects)
454 if parse_tree.params is None:
455 kwds['params'] = {}
456 else:
457 # The MIME RFCs specify that parameter ordering is arbitrary.
458 kwds['params'] = {utils._sanitize(name).lower():
459 utils._sanitize(value)
460 for name, value in parse_tree.params}
461
462 def init(self, *args, **kw):
463 self._params = kw.pop('params')
464 super().init(*args, **kw)
465
466 @property
467 def params(self):
R David Murray685b3492014-10-17 19:30:13 -0400468 return MappingProxyType(self._params)
R David Murray97f43c02012-06-24 05:03:27 -0400469
470
471class ContentTypeHeader(ParameterizedMIMEHeader):
472
473 value_parser = staticmethod(parser.parse_content_type_header)
474
475 def init(self, *args, **kw):
476 super().init(*args, **kw)
477 self._maintype = utils._sanitize(self._parse_tree.maintype)
478 self._subtype = utils._sanitize(self._parse_tree.subtype)
479
480 @property
481 def maintype(self):
482 return self._maintype
483
484 @property
485 def subtype(self):
486 return self._subtype
487
488 @property
489 def content_type(self):
490 return self.maintype + '/' + self.subtype
491
492
493class ContentDispositionHeader(ParameterizedMIMEHeader):
494
495 value_parser = staticmethod(parser.parse_content_disposition_header)
496
497 def init(self, *args, **kw):
498 super().init(*args, **kw)
499 cd = self._parse_tree.content_disposition
500 self._content_disposition = cd if cd is None else utils._sanitize(cd)
501
502 @property
503 def content_disposition(self):
504 return self._content_disposition
505
506
507class ContentTransferEncodingHeader:
508
509 max_count = 1
510
511 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
512
513 @classmethod
514 def parse(cls, value, kwds):
515 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
516 kwds['decoded'] = str(parse_tree)
517 kwds['defects'].extend(parse_tree.all_defects)
518
519 def init(self, *args, **kw):
520 super().init(*args, **kw)
521 self._cte = utils._sanitize(self._parse_tree.cte)
522
523 @property
524 def cte(self):
525 return self._cte
526
527
Abhilash Raj46d88a12019-06-04 13:41:34 -0400528class MessageIDHeader:
529
530 max_count = 1
531 value_parser = staticmethod(parser.parse_message_id)
532
533 @classmethod
534 def parse(cls, value, kwds):
535 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
536 kwds['decoded'] = str(parse_tree)
537 kwds['defects'].extend(parse_tree.all_defects)
538
539
R David Murray0b6f6c82012-05-25 18:42:14 -0400540# The header factory #
541
542_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400543 'subject': UniqueUnstructuredHeader,
544 'date': UniqueDateHeader,
545 'resent-date': DateHeader,
546 'orig-date': UniqueDateHeader,
547 'sender': UniqueSingleAddressHeader,
548 'resent-sender': SingleAddressHeader,
549 'to': UniqueAddressHeader,
550 'resent-to': AddressHeader,
551 'cc': UniqueAddressHeader,
552 'resent-cc': AddressHeader,
553 'bcc': UniqueAddressHeader,
554 'resent-bcc': AddressHeader,
555 'from': UniqueAddressHeader,
556 'resent-from': AddressHeader,
557 'reply-to': UniqueAddressHeader,
558 'mime-version': MIMEVersionHeader,
559 'content-type': ContentTypeHeader,
560 'content-disposition': ContentDispositionHeader,
561 'content-transfer-encoding': ContentTransferEncodingHeader,
Abhilash Raj46d88a12019-06-04 13:41:34 -0400562 'message-id': MessageIDHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400563 }
564
565class HeaderRegistry:
566
567 """A header_factory and header registry."""
568
569 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
570 use_default_map=True):
571 """Create a header_factory that works with the Policy API.
572
573 base_class is the class that will be the last class in the created
574 header class's __bases__ list. default_class is the class that will be
575 used if "name" (see __call__) does not appear in the registry.
576 use_default_map controls whether or not the default mapping of names to
577 specialized classes is copied in to the registry when the factory is
578 created. The default is True.
579
580 """
581 self.registry = {}
582 self.base_class = base_class
583 self.default_class = default_class
584 if use_default_map:
585 self.registry.update(_default_header_map)
586
587 def map_to_type(self, name, cls):
588 """Register cls as the specialized class for handling "name" headers.
589
590 """
591 self.registry[name.lower()] = cls
592
593 def __getitem__(self, name):
594 cls = self.registry.get(name.lower(), self.default_class)
595 return type('_'+cls.__name__, (cls, self.base_class), {})
596
597 def __call__(self, name, value):
598 """Create a header instance for header 'name' from 'value'.
599
600 Creates a header instance by creating a specialized class for parsing
601 and representing the specified header by combining the factory
602 base_class with a specialized class from the registry or the
603 default_class, and passing the name and value to the constructed
604 class's constructor.
605
606 """
607 return self[name](name, value)