blob: 452c6ad50846926db965f3ab5c3fb3a79dfd07f4 [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
R David Murray685b3492014-10-17 19:30:13 -040010from types import MappingProxyType
R David Murray0b6f6c82012-05-25 18:42:14 -040011
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18 def __init__(self, display_name='', username='', domain='', addr_spec=None):
Martin Panter96a4f072016-02-10 01:17:51 +000019 """Create an object representing a full email address.
R David Murray0b6f6c82012-05-25 18:42:14 -040020
21 An address can have a 'display_name', a 'username', and a 'domain'. In
22 addition to specifying the username and domain separately, they may be
23 specified together by using the addr_spec keyword *instead of* the
24 username and domain keywords. If an addr_spec string is specified it
25 must be properly quoted according to RFC 5322 rules; an error will be
26 raised if it is not.
27
28 An Address object has display_name, username, domain, and addr_spec
29 attributes, all of which are read-only. The addr_spec and the string
30 value of the object are both quoted according to RFC5322 rules, but
31 without any Content Transfer Encoding.
32
33 """
34 # This clause with its potential 'raise' may only happen when an
35 # application program creates an Address object using an addr_spec
36 # keyword. The email library code itself must always supply username
37 # and domain.
38 if addr_spec is not None:
39 if username or domain:
40 raise TypeError("addrspec specified when username and/or "
41 "domain also specified")
42 a_s, rest = parser.get_addr_spec(addr_spec)
43 if rest:
44 raise ValueError("Invalid addr_spec; only '{}' "
45 "could be parsed from '{}'".format(
46 a_s, addr_spec))
47 if a_s.all_defects:
48 raise a_s.all_defects[0]
49 username = a_s.local_part
50 domain = a_s.domain
51 self._display_name = display_name
52 self._username = username
53 self._domain = domain
54
55 @property
56 def display_name(self):
57 return self._display_name
58
59 @property
60 def username(self):
61 return self._username
62
63 @property
64 def domain(self):
65 return self._domain
66
67 @property
68 def addr_spec(self):
69 """The addr_spec (username@domain) portion of the address, quoted
70 according to RFC 5322 rules, but with no Content Transfer Encoding.
71 """
72 nameset = set(self.username)
73 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
74 lp = parser.quote_string(self.username)
75 else:
76 lp = self.username
77 if self.domain:
78 return lp + '@' + self.domain
79 if not lp:
80 return '<>'
81 return lp
82
83 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +030084 return "{}(display_name={!r}, username={!r}, domain={!r})".format(
85 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -040086 self.display_name, self.username, self.domain)
87
88 def __str__(self):
89 nameset = set(self.display_name)
90 if len(nameset) > len(nameset-parser.SPECIALS):
91 disp = parser.quote_string(self.display_name)
92 else:
93 disp = self.display_name
94 if disp:
95 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
96 return "{} <{}>".format(disp, addr_spec)
97 return self.addr_spec
98
99 def __eq__(self, other):
100 if type(other) != type(self):
101 return False
102 return (self.display_name == other.display_name and
103 self.username == other.username and
104 self.domain == other.domain)
105
106
107class Group:
108
109 def __init__(self, display_name=None, addresses=None):
110 """Create an object representing an address group.
111
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300112 An address group consists of a display_name followed by colon and a
R David Murray0b6f6c82012-05-25 18:42:14 -0400113 list of addresses (see Address) terminated by a semi-colon. The Group
114 is created by specifying a display_name and a possibly empty list of
115 Address objects. A Group can also be used to represent a single
116 address that is not in a group, which is convenient when manipulating
117 lists that are a combination of Groups and individual Addresses. In
118 this case the display_name should be set to None. In particular, the
119 string representation of a Group whose display_name is None is the same
120 as the Address object, if there is one and only one Address object in
121 the addresses list.
122
123 """
124 self._display_name = display_name
125 self._addresses = tuple(addresses) if addresses else tuple()
126
127 @property
128 def display_name(self):
129 return self._display_name
130
131 @property
132 def addresses(self):
133 return self._addresses
134
135 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300136 return "{}(display_name={!r}, addresses={!r}".format(
137 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -0400138 self.display_name, self.addresses)
139
140 def __str__(self):
141 if self.display_name is None and len(self.addresses)==1:
142 return str(self.addresses[0])
143 disp = self.display_name
144 if disp is not None:
145 nameset = set(disp)
146 if len(nameset) > len(nameset-parser.SPECIALS):
147 disp = parser.quote_string(disp)
148 adrstr = ", ".join(str(x) for x in self.addresses)
149 adrstr = ' ' + adrstr if adrstr else adrstr
150 return "{}:{};".format(disp, adrstr)
151
152 def __eq__(self, other):
153 if type(other) != type(self):
154 return False
155 return (self.display_name == other.display_name and
156 self.addresses == other.addresses)
157
158
159# Header Classes #
160
161class BaseHeader(str):
162
163 """Base class for message headers.
164
165 Implements generic behavior and provides tools for subclasses.
166
167 A subclass must define a classmethod named 'parse' that takes an unfolded
168 value string and a dictionary as its arguments. The dictionary will
169 contain one key, 'defects', initialized to an empty list. After the call
170 the dictionary must contain two additional keys: parse_tree, set to the
171 parse tree obtained from parsing the header, and 'decoded', set to the
172 string value of the idealized representation of the data from the value.
173 (That is, encoded words are decoded, and values that have canonical
174 representations are so represented.)
175
176 The defects key is intended to collect parsing defects, which the message
177 parser will subsequently dispose of as appropriate. The parser should not,
178 insofar as practical, raise any errors. Defects should be added to the
179 list instead. The standard header parsers register defects for RFC
180 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
181 errors.
182
183 The parse method may add additional keys to the dictionary. In this case
184 the subclass must define an 'init' method, which will be passed the
185 dictionary as its keyword arguments. The method should use (usually by
186 setting them as the value of similarly named attributes) and remove all the
187 extra keys added by its parse method, and then use super to call its parent
188 class with the remaining arguments and keywords.
189
190 The subclass should also make sure that a 'max_count' attribute is defined
191 that is either None or 1. XXX: need to better define this API.
192
193 """
194
195 def __new__(cls, name, value):
196 kwds = {'defects': []}
197 cls.parse(value, kwds)
198 if utils._has_surrogates(kwds['decoded']):
199 kwds['decoded'] = utils._sanitize(kwds['decoded'])
200 self = str.__new__(cls, kwds['decoded'])
201 del kwds['decoded']
202 self.init(name, **kwds)
203 return self
204
205 def init(self, name, *, parse_tree, defects):
206 self._name = name
207 self._parse_tree = parse_tree
208 self._defects = defects
209
210 @property
211 def name(self):
212 return self._name
213
214 @property
215 def defects(self):
216 return tuple(self._defects)
217
218 def __reduce__(self):
219 return (
220 _reconstruct_header,
221 (
222 self.__class__.__name__,
223 self.__class__.__bases__,
224 str(self),
225 ),
226 self.__dict__)
227
228 @classmethod
229 def _reconstruct(cls, value):
230 return str.__new__(cls, value)
231
232 def fold(self, *, policy):
233 """Fold header according to policy.
234
235 The parsed representation of the header is folded according to
236 RFC5322 rules, as modified by the policy. If the parse tree
237 contains surrogateescaped bytes, the bytes are CTE encoded using
238 the charset 'unknown-8bit".
239
240 Any non-ASCII characters in the parse tree are CTE encoded using
241 charset utf-8. XXX: make this a policy setting.
242
243 The returned value is an ASCII-only string possibly containing linesep
244 characters, and ending with a linesep character. The string includes
245 the header name and the ': ' separator.
246
247 """
R. David Murray85d5c182017-12-03 18:51:41 -0500248 # At some point we need to put fws here iif it was in the source.
R David Murray0b6f6c82012-05-25 18:42:14 -0400249 header = parser.Header([
250 parser.HeaderLabel([
251 parser.ValueTerminal(self.name, 'header-name'),
252 parser.ValueTerminal(':', 'header-sep')]),
R. David Murray85d5c182017-12-03 18:51:41 -0500253 ])
254 if self._parse_tree:
255 header.append(
256 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
257 header.append(self._parse_tree)
R David Murray0b6f6c82012-05-25 18:42:14 -0400258 return header.fold(policy=policy)
259
260
261def _reconstruct_header(cls_name, bases, value):
262 return type(cls_name, bases, {})._reconstruct(value)
263
264
265class UnstructuredHeader:
266
267 max_count = None
268 value_parser = staticmethod(parser.get_unstructured)
269
270 @classmethod
271 def parse(cls, value, kwds):
272 kwds['parse_tree'] = cls.value_parser(value)
273 kwds['decoded'] = str(kwds['parse_tree'])
274
275
276class UniqueUnstructuredHeader(UnstructuredHeader):
277
278 max_count = 1
279
280
281class DateHeader:
282
283 """Header whose value consists of a single timestamp.
284
285 Provides an additional attribute, datetime, which is either an aware
286 datetime using a timezone, or a naive datetime if the timezone
287 in the input string is -0000. Also accepts a datetime as input.
288 The 'value' attribute is the normalized form of the timestamp,
289 which means it is the output of format_datetime on the datetime.
290 """
291
292 max_count = None
293
294 # This is used only for folding, not for creating 'decoded'.
295 value_parser = staticmethod(parser.get_unstructured)
296
297 @classmethod
298 def parse(cls, value, kwds):
299 if not value:
300 kwds['defects'].append(errors.HeaderMissingRequiredValue())
301 kwds['datetime'] = None
302 kwds['decoded'] = ''
303 kwds['parse_tree'] = parser.TokenList()
304 return
305 if isinstance(value, str):
306 value = utils.parsedate_to_datetime(value)
307 kwds['datetime'] = value
308 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
309 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
310
311 def init(self, *args, **kw):
312 self._datetime = kw.pop('datetime')
313 super().init(*args, **kw)
314
315 @property
316 def datetime(self):
317 return self._datetime
318
319
320class UniqueDateHeader(DateHeader):
321
322 max_count = 1
323
324
325class AddressHeader:
326
327 max_count = None
328
329 @staticmethod
330 def value_parser(value):
331 address_list, value = parser.get_address_list(value)
332 assert not value, 'this should not happen'
333 return address_list
334
335 @classmethod
336 def parse(cls, value, kwds):
337 if isinstance(value, str):
338 # We are translating here from the RFC language (address/mailbox)
339 # to our API language (group/address).
340 kwds['parse_tree'] = address_list = cls.value_parser(value)
341 groups = []
342 for addr in address_list.addresses:
343 groups.append(Group(addr.display_name,
344 [Address(mb.display_name or '',
345 mb.local_part or '',
346 mb.domain or '')
347 for mb in addr.all_mailboxes]))
348 defects = list(address_list.all_defects)
349 else:
350 # Assume it is Address/Group stuff
351 if not hasattr(value, '__iter__'):
352 value = [value]
353 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
354 else item
355 for item in value]
356 defects = []
357 kwds['groups'] = groups
358 kwds['defects'] = defects
359 kwds['decoded'] = ', '.join([str(item) for item in groups])
360 if 'parse_tree' not in kwds:
361 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
362
363 def init(self, *args, **kw):
364 self._groups = tuple(kw.pop('groups'))
365 self._addresses = None
366 super().init(*args, **kw)
367
368 @property
369 def groups(self):
370 return self._groups
371
372 @property
373 def addresses(self):
374 if self._addresses is None:
Jon Dufresne39726282017-05-18 07:35:54 -0700375 self._addresses = tuple(address for group in self._groups
376 for address in group.addresses)
R David Murray0b6f6c82012-05-25 18:42:14 -0400377 return self._addresses
378
379
380class UniqueAddressHeader(AddressHeader):
381
382 max_count = 1
383
384
385class SingleAddressHeader(AddressHeader):
386
387 @property
388 def address(self):
389 if len(self.addresses)!=1:
390 raise ValueError(("value of single address header {} is not "
391 "a single address").format(self.name))
392 return self.addresses[0]
393
394
395class UniqueSingleAddressHeader(SingleAddressHeader):
396
397 max_count = 1
398
399
R David Murray97f43c02012-06-24 05:03:27 -0400400class MIMEVersionHeader:
401
402 max_count = 1
403
404 value_parser = staticmethod(parser.parse_mime_version)
405
406 @classmethod
407 def parse(cls, value, kwds):
408 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
409 kwds['decoded'] = str(parse_tree)
410 kwds['defects'].extend(parse_tree.all_defects)
411 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
412 kwds['minor'] = parse_tree.minor
413 if parse_tree.minor is not None:
414 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
415 else:
416 kwds['version'] = None
417
418 def init(self, *args, **kw):
419 self._version = kw.pop('version')
420 self._major = kw.pop('major')
421 self._minor = kw.pop('minor')
422 super().init(*args, **kw)
423
424 @property
425 def major(self):
426 return self._major
427
428 @property
429 def minor(self):
430 return self._minor
431
432 @property
433 def version(self):
434 return self._version
435
436
437class ParameterizedMIMEHeader:
438
439 # Mixin that handles the params dict. Must be subclassed and
440 # a property value_parser for the specific header provided.
441
442 max_count = 1
443
444 @classmethod
445 def parse(cls, value, kwds):
446 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
447 kwds['decoded'] = str(parse_tree)
448 kwds['defects'].extend(parse_tree.all_defects)
449 if parse_tree.params is None:
450 kwds['params'] = {}
451 else:
452 # The MIME RFCs specify that parameter ordering is arbitrary.
453 kwds['params'] = {utils._sanitize(name).lower():
454 utils._sanitize(value)
455 for name, value in parse_tree.params}
456
457 def init(self, *args, **kw):
458 self._params = kw.pop('params')
459 super().init(*args, **kw)
460
461 @property
462 def params(self):
R David Murray685b3492014-10-17 19:30:13 -0400463 return MappingProxyType(self._params)
R David Murray97f43c02012-06-24 05:03:27 -0400464
465
466class ContentTypeHeader(ParameterizedMIMEHeader):
467
468 value_parser = staticmethod(parser.parse_content_type_header)
469
470 def init(self, *args, **kw):
471 super().init(*args, **kw)
472 self._maintype = utils._sanitize(self._parse_tree.maintype)
473 self._subtype = utils._sanitize(self._parse_tree.subtype)
474
475 @property
476 def maintype(self):
477 return self._maintype
478
479 @property
480 def subtype(self):
481 return self._subtype
482
483 @property
484 def content_type(self):
485 return self.maintype + '/' + self.subtype
486
487
488class ContentDispositionHeader(ParameterizedMIMEHeader):
489
490 value_parser = staticmethod(parser.parse_content_disposition_header)
491
492 def init(self, *args, **kw):
493 super().init(*args, **kw)
494 cd = self._parse_tree.content_disposition
495 self._content_disposition = cd if cd is None else utils._sanitize(cd)
496
497 @property
498 def content_disposition(self):
499 return self._content_disposition
500
501
502class ContentTransferEncodingHeader:
503
504 max_count = 1
505
506 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
507
508 @classmethod
509 def parse(cls, value, kwds):
510 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
511 kwds['decoded'] = str(parse_tree)
512 kwds['defects'].extend(parse_tree.all_defects)
513
514 def init(self, *args, **kw):
515 super().init(*args, **kw)
516 self._cte = utils._sanitize(self._parse_tree.cte)
517
518 @property
519 def cte(self):
520 return self._cte
521
522
Abhilash Raj46d88a12019-06-04 13:41:34 -0400523class MessageIDHeader:
524
525 max_count = 1
526 value_parser = staticmethod(parser.parse_message_id)
527
528 @classmethod
529 def parse(cls, value, kwds):
530 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
531 kwds['decoded'] = str(parse_tree)
532 kwds['defects'].extend(parse_tree.all_defects)
533
534
R David Murray0b6f6c82012-05-25 18:42:14 -0400535# The header factory #
536
537_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400538 'subject': UniqueUnstructuredHeader,
539 'date': UniqueDateHeader,
540 'resent-date': DateHeader,
541 'orig-date': UniqueDateHeader,
542 'sender': UniqueSingleAddressHeader,
543 'resent-sender': SingleAddressHeader,
544 'to': UniqueAddressHeader,
545 'resent-to': AddressHeader,
546 'cc': UniqueAddressHeader,
547 'resent-cc': AddressHeader,
548 'bcc': UniqueAddressHeader,
549 'resent-bcc': AddressHeader,
550 'from': UniqueAddressHeader,
551 'resent-from': AddressHeader,
552 'reply-to': UniqueAddressHeader,
553 'mime-version': MIMEVersionHeader,
554 'content-type': ContentTypeHeader,
555 'content-disposition': ContentDispositionHeader,
556 'content-transfer-encoding': ContentTransferEncodingHeader,
Abhilash Raj46d88a12019-06-04 13:41:34 -0400557 'message-id': MessageIDHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400558 }
559
560class HeaderRegistry:
561
562 """A header_factory and header registry."""
563
564 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
565 use_default_map=True):
566 """Create a header_factory that works with the Policy API.
567
568 base_class is the class that will be the last class in the created
569 header class's __bases__ list. default_class is the class that will be
570 used if "name" (see __call__) does not appear in the registry.
571 use_default_map controls whether or not the default mapping of names to
572 specialized classes is copied in to the registry when the factory is
573 created. The default is True.
574
575 """
576 self.registry = {}
577 self.base_class = base_class
578 self.default_class = default_class
579 if use_default_map:
580 self.registry.update(_default_header_map)
581
582 def map_to_type(self, name, cls):
583 """Register cls as the specialized class for handling "name" headers.
584
585 """
586 self.registry[name.lower()] = cls
587
588 def __getitem__(self, name):
589 cls = self.registry.get(name.lower(), self.default_class)
590 return type('_'+cls.__name__, (cls, self.base_class), {})
591
592 def __call__(self, name, value):
593 """Create a header instance for header 'name' from 'value'.
594
595 Creates a header instance by creating a specialized class for parsing
596 and representing the specified header by combining the factory
597 base_class with a specialized class from the registry or the
598 default_class, and passing the name and value to the constructed
599 class's constructor.
600
601 """
602 return self[name](name, value)