blob: b590d69e8b744129a53c79f550a42e15c83e71e9 [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
R David Murray0b6f6c82012-05-25 18:42:14 -04005"""
R David Murray685b3492014-10-17 19:30:13 -04006from types import MappingProxyType
R David Murray0b6f6c82012-05-25 18:42:14 -04007
8from email import utils
9from email import errors
10from email import _header_value_parser as parser
11
12class Address:
13
14 def __init__(self, display_name='', username='', domain='', addr_spec=None):
Martin Panter96a4f072016-02-10 01:17:51 +000015 """Create an object representing a full email address.
R David Murray0b6f6c82012-05-25 18:42:14 -040016
17 An address can have a 'display_name', a 'username', and a 'domain'. In
18 addition to specifying the username and domain separately, they may be
19 specified together by using the addr_spec keyword *instead of* the
20 username and domain keywords. If an addr_spec string is specified it
21 must be properly quoted according to RFC 5322 rules; an error will be
22 raised if it is not.
23
24 An Address object has display_name, username, domain, and addr_spec
25 attributes, all of which are read-only. The addr_spec and the string
26 value of the object are both quoted according to RFC5322 rules, but
27 without any Content Transfer Encoding.
28
29 """
Ashwin Ramaswami614f1722020-03-29 20:38:41 -040030
31 inputs = ''.join(filter(None, (display_name, username, domain, addr_spec)))
32 if '\r' in inputs or '\n' in inputs:
33 raise ValueError("invalid arguments; address parts cannot contain CR or LF")
34
R David Murray0b6f6c82012-05-25 18:42:14 -040035 # This clause with its potential 'raise' may only happen when an
36 # application program creates an Address object using an addr_spec
37 # keyword. The email library code itself must always supply username
38 # and domain.
39 if addr_spec is not None:
40 if username or domain:
41 raise TypeError("addrspec specified when username and/or "
42 "domain also specified")
43 a_s, rest = parser.get_addr_spec(addr_spec)
44 if rest:
45 raise ValueError("Invalid addr_spec; only '{}' "
46 "could be parsed from '{}'".format(
47 a_s, addr_spec))
48 if a_s.all_defects:
49 raise a_s.all_defects[0]
50 username = a_s.local_part
51 domain = a_s.domain
52 self._display_name = display_name
53 self._username = username
54 self._domain = domain
55
56 @property
57 def display_name(self):
58 return self._display_name
59
60 @property
61 def username(self):
62 return self._username
63
64 @property
65 def domain(self):
66 return self._domain
67
68 @property
69 def addr_spec(self):
70 """The addr_spec (username@domain) portion of the address, quoted
71 according to RFC 5322 rules, but with no Content Transfer Encoding.
72 """
Michael Selik27026382019-09-19 20:25:55 -070073 lp = self.username
74 if not parser.DOT_ATOM_ENDS.isdisjoint(lp):
75 lp = parser.quote_string(lp)
R David Murray0b6f6c82012-05-25 18:42:14 -040076 if self.domain:
77 return lp + '@' + self.domain
78 if not lp:
79 return '<>'
80 return lp
81
82 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +030083 return "{}(display_name={!r}, username={!r}, domain={!r})".format(
84 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -040085 self.display_name, self.username, self.domain)
86
87 def __str__(self):
Michael Selik27026382019-09-19 20:25:55 -070088 disp = self.display_name
89 if not parser.SPECIALS.isdisjoint(disp):
90 disp = parser.quote_string(disp)
R David Murray0b6f6c82012-05-25 18:42:14 -040091 if disp:
92 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
93 return "{} <{}>".format(disp, addr_spec)
94 return self.addr_spec
95
96 def __eq__(self, other):
Serhiy Storchaka662db122019-08-08 08:42:54 +030097 if not isinstance(other, Address):
98 return NotImplemented
R David Murray0b6f6c82012-05-25 18:42:14 -040099 return (self.display_name == other.display_name and
100 self.username == other.username and
101 self.domain == other.domain)
102
103
104class Group:
105
106 def __init__(self, display_name=None, addresses=None):
107 """Create an object representing an address group.
108
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300109 An address group consists of a display_name followed by colon and a
R David Murray0b6f6c82012-05-25 18:42:14 -0400110 list of addresses (see Address) terminated by a semi-colon. The Group
111 is created by specifying a display_name and a possibly empty list of
112 Address objects. A Group can also be used to represent a single
113 address that is not in a group, which is convenient when manipulating
114 lists that are a combination of Groups and individual Addresses. In
115 this case the display_name should be set to None. In particular, the
116 string representation of a Group whose display_name is None is the same
117 as the Address object, if there is one and only one Address object in
118 the addresses list.
119
120 """
121 self._display_name = display_name
122 self._addresses = tuple(addresses) if addresses else tuple()
123
124 @property
125 def display_name(self):
126 return self._display_name
127
128 @property
129 def addresses(self):
130 return self._addresses
131
132 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300133 return "{}(display_name={!r}, addresses={!r}".format(
134 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -0400135 self.display_name, self.addresses)
136
137 def __str__(self):
138 if self.display_name is None and len(self.addresses)==1:
139 return str(self.addresses[0])
140 disp = self.display_name
Michael Selik27026382019-09-19 20:25:55 -0700141 if disp is not None and not parser.SPECIALS.isdisjoint(disp):
142 disp = parser.quote_string(disp)
R David Murray0b6f6c82012-05-25 18:42:14 -0400143 adrstr = ", ".join(str(x) for x in self.addresses)
144 adrstr = ' ' + adrstr if adrstr else adrstr
145 return "{}:{};".format(disp, adrstr)
146
147 def __eq__(self, other):
Serhiy Storchaka662db122019-08-08 08:42:54 +0300148 if not isinstance(other, Group):
149 return NotImplemented
R David Murray0b6f6c82012-05-25 18:42:14 -0400150 return (self.display_name == other.display_name and
151 self.addresses == other.addresses)
152
153
154# Header Classes #
155
156class BaseHeader(str):
157
158 """Base class for message headers.
159
160 Implements generic behavior and provides tools for subclasses.
161
162 A subclass must define a classmethod named 'parse' that takes an unfolded
163 value string and a dictionary as its arguments. The dictionary will
164 contain one key, 'defects', initialized to an empty list. After the call
165 the dictionary must contain two additional keys: parse_tree, set to the
166 parse tree obtained from parsing the header, and 'decoded', set to the
167 string value of the idealized representation of the data from the value.
168 (That is, encoded words are decoded, and values that have canonical
169 representations are so represented.)
170
171 The defects key is intended to collect parsing defects, which the message
172 parser will subsequently dispose of as appropriate. The parser should not,
173 insofar as practical, raise any errors. Defects should be added to the
174 list instead. The standard header parsers register defects for RFC
175 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
176 errors.
177
178 The parse method may add additional keys to the dictionary. In this case
179 the subclass must define an 'init' method, which will be passed the
180 dictionary as its keyword arguments. The method should use (usually by
181 setting them as the value of similarly named attributes) and remove all the
182 extra keys added by its parse method, and then use super to call its parent
183 class with the remaining arguments and keywords.
184
185 The subclass should also make sure that a 'max_count' attribute is defined
186 that is either None or 1. XXX: need to better define this API.
187
188 """
189
190 def __new__(cls, name, value):
191 kwds = {'defects': []}
192 cls.parse(value, kwds)
193 if utils._has_surrogates(kwds['decoded']):
194 kwds['decoded'] = utils._sanitize(kwds['decoded'])
195 self = str.__new__(cls, kwds['decoded'])
196 del kwds['decoded']
197 self.init(name, **kwds)
198 return self
199
200 def init(self, name, *, parse_tree, defects):
201 self._name = name
202 self._parse_tree = parse_tree
203 self._defects = defects
204
205 @property
206 def name(self):
207 return self._name
208
209 @property
210 def defects(self):
211 return tuple(self._defects)
212
213 def __reduce__(self):
214 return (
215 _reconstruct_header,
216 (
217 self.__class__.__name__,
218 self.__class__.__bases__,
219 str(self),
220 ),
221 self.__dict__)
222
223 @classmethod
224 def _reconstruct(cls, value):
225 return str.__new__(cls, value)
226
227 def fold(self, *, policy):
228 """Fold header according to policy.
229
230 The parsed representation of the header is folded according to
231 RFC5322 rules, as modified by the policy. If the parse tree
232 contains surrogateescaped bytes, the bytes are CTE encoded using
233 the charset 'unknown-8bit".
234
235 Any non-ASCII characters in the parse tree are CTE encoded using
236 charset utf-8. XXX: make this a policy setting.
237
238 The returned value is an ASCII-only string possibly containing linesep
239 characters, and ending with a linesep character. The string includes
240 the header name and the ': ' separator.
241
242 """
Min ho Kim96e12d52019-07-22 06:12:33 +1000243 # At some point we need to put fws here if it was in the source.
R David Murray0b6f6c82012-05-25 18:42:14 -0400244 header = parser.Header([
245 parser.HeaderLabel([
246 parser.ValueTerminal(self.name, 'header-name'),
247 parser.ValueTerminal(':', 'header-sep')]),
R. David Murray85d5c182017-12-03 18:51:41 -0500248 ])
249 if self._parse_tree:
250 header.append(
251 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]))
252 header.append(self._parse_tree)
R David Murray0b6f6c82012-05-25 18:42:14 -0400253 return header.fold(policy=policy)
254
255
256def _reconstruct_header(cls_name, bases, value):
257 return type(cls_name, bases, {})._reconstruct(value)
258
259
260class UnstructuredHeader:
261
262 max_count = None
263 value_parser = staticmethod(parser.get_unstructured)
264
265 @classmethod
266 def parse(cls, value, kwds):
267 kwds['parse_tree'] = cls.value_parser(value)
268 kwds['decoded'] = str(kwds['parse_tree'])
269
270
271class UniqueUnstructuredHeader(UnstructuredHeader):
272
273 max_count = 1
274
275
276class DateHeader:
277
278 """Header whose value consists of a single timestamp.
279
280 Provides an additional attribute, datetime, which is either an aware
281 datetime using a timezone, or a naive datetime if the timezone
282 in the input string is -0000. Also accepts a datetime as input.
283 The 'value' attribute is the normalized form of the timestamp,
284 which means it is the output of format_datetime on the datetime.
285 """
286
287 max_count = None
288
289 # This is used only for folding, not for creating 'decoded'.
290 value_parser = staticmethod(parser.get_unstructured)
291
292 @classmethod
293 def parse(cls, value, kwds):
294 if not value:
295 kwds['defects'].append(errors.HeaderMissingRequiredValue())
296 kwds['datetime'] = None
297 kwds['decoded'] = ''
298 kwds['parse_tree'] = parser.TokenList()
299 return
300 if isinstance(value, str):
Georges Toth303aac82020-10-27 01:31:06 +0100301 kwds['decoded'] = value
302 try:
303 value = utils.parsedate_to_datetime(value)
304 except ValueError:
305 kwds['defects'].append(errors.InvalidDateDefect('Invalid date value or format'))
306 kwds['datetime'] = None
307 kwds['parse_tree'] = parser.TokenList()
308 return
R David Murray0b6f6c82012-05-25 18:42:14 -0400309 kwds['datetime'] = value
310 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
311 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
312
313 def init(self, *args, **kw):
314 self._datetime = kw.pop('datetime')
315 super().init(*args, **kw)
316
317 @property
318 def datetime(self):
319 return self._datetime
320
321
322class UniqueDateHeader(DateHeader):
323
324 max_count = 1
325
326
327class AddressHeader:
328
329 max_count = None
330
331 @staticmethod
332 def value_parser(value):
333 address_list, value = parser.get_address_list(value)
334 assert not value, 'this should not happen'
335 return address_list
336
337 @classmethod
338 def parse(cls, value, kwds):
339 if isinstance(value, str):
340 # We are translating here from the RFC language (address/mailbox)
341 # to our API language (group/address).
342 kwds['parse_tree'] = address_list = cls.value_parser(value)
343 groups = []
344 for addr in address_list.addresses:
345 groups.append(Group(addr.display_name,
346 [Address(mb.display_name or '',
347 mb.local_part or '',
348 mb.domain or '')
349 for mb in addr.all_mailboxes]))
350 defects = list(address_list.all_defects)
351 else:
352 # Assume it is Address/Group stuff
353 if not hasattr(value, '__iter__'):
354 value = [value]
355 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
356 else item
357 for item in value]
358 defects = []
359 kwds['groups'] = groups
360 kwds['defects'] = defects
361 kwds['decoded'] = ', '.join([str(item) for item in groups])
362 if 'parse_tree' not in kwds:
363 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
364
365 def init(self, *args, **kw):
366 self._groups = tuple(kw.pop('groups'))
367 self._addresses = None
368 super().init(*args, **kw)
369
370 @property
371 def groups(self):
372 return self._groups
373
374 @property
375 def addresses(self):
376 if self._addresses is None:
Jon Dufresne39726282017-05-18 07:35:54 -0700377 self._addresses = tuple(address for group in self._groups
378 for address in group.addresses)
R David Murray0b6f6c82012-05-25 18:42:14 -0400379 return self._addresses
380
381
382class UniqueAddressHeader(AddressHeader):
383
384 max_count = 1
385
386
387class SingleAddressHeader(AddressHeader):
388
389 @property
390 def address(self):
391 if len(self.addresses)!=1:
392 raise ValueError(("value of single address header {} is not "
393 "a single address").format(self.name))
394 return self.addresses[0]
395
396
397class UniqueSingleAddressHeader(SingleAddressHeader):
398
399 max_count = 1
400
401
R David Murray97f43c02012-06-24 05:03:27 -0400402class MIMEVersionHeader:
403
404 max_count = 1
405
406 value_parser = staticmethod(parser.parse_mime_version)
407
408 @classmethod
409 def parse(cls, value, kwds):
410 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
411 kwds['decoded'] = str(parse_tree)
412 kwds['defects'].extend(parse_tree.all_defects)
413 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
414 kwds['minor'] = parse_tree.minor
415 if parse_tree.minor is not None:
416 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
417 else:
418 kwds['version'] = None
419
420 def init(self, *args, **kw):
421 self._version = kw.pop('version')
422 self._major = kw.pop('major')
423 self._minor = kw.pop('minor')
424 super().init(*args, **kw)
425
426 @property
427 def major(self):
428 return self._major
429
430 @property
431 def minor(self):
432 return self._minor
433
434 @property
435 def version(self):
436 return self._version
437
438
439class ParameterizedMIMEHeader:
440
441 # Mixin that handles the params dict. Must be subclassed and
442 # a property value_parser for the specific header provided.
443
444 max_count = 1
445
446 @classmethod
447 def parse(cls, value, kwds):
448 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
449 kwds['decoded'] = str(parse_tree)
450 kwds['defects'].extend(parse_tree.all_defects)
451 if parse_tree.params is None:
452 kwds['params'] = {}
453 else:
454 # The MIME RFCs specify that parameter ordering is arbitrary.
455 kwds['params'] = {utils._sanitize(name).lower():
456 utils._sanitize(value)
457 for name, value in parse_tree.params}
458
459 def init(self, *args, **kw):
460 self._params = kw.pop('params')
461 super().init(*args, **kw)
462
463 @property
464 def params(self):
R David Murray685b3492014-10-17 19:30:13 -0400465 return MappingProxyType(self._params)
R David Murray97f43c02012-06-24 05:03:27 -0400466
467
468class ContentTypeHeader(ParameterizedMIMEHeader):
469
470 value_parser = staticmethod(parser.parse_content_type_header)
471
472 def init(self, *args, **kw):
473 super().init(*args, **kw)
474 self._maintype = utils._sanitize(self._parse_tree.maintype)
475 self._subtype = utils._sanitize(self._parse_tree.subtype)
476
477 @property
478 def maintype(self):
479 return self._maintype
480
481 @property
482 def subtype(self):
483 return self._subtype
484
485 @property
486 def content_type(self):
487 return self.maintype + '/' + self.subtype
488
489
490class ContentDispositionHeader(ParameterizedMIMEHeader):
491
492 value_parser = staticmethod(parser.parse_content_disposition_header)
493
494 def init(self, *args, **kw):
495 super().init(*args, **kw)
496 cd = self._parse_tree.content_disposition
497 self._content_disposition = cd if cd is None else utils._sanitize(cd)
498
499 @property
500 def content_disposition(self):
501 return self._content_disposition
502
503
504class ContentTransferEncodingHeader:
505
506 max_count = 1
507
508 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
509
510 @classmethod
511 def parse(cls, value, kwds):
512 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
513 kwds['decoded'] = str(parse_tree)
514 kwds['defects'].extend(parse_tree.all_defects)
515
516 def init(self, *args, **kw):
517 super().init(*args, **kw)
518 self._cte = utils._sanitize(self._parse_tree.cte)
519
520 @property
521 def cte(self):
522 return self._cte
523
524
Abhilash Raj46d88a12019-06-04 13:41:34 -0400525class MessageIDHeader:
526
527 max_count = 1
528 value_parser = staticmethod(parser.parse_message_id)
529
530 @classmethod
531 def parse(cls, value, kwds):
532 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
533 kwds['decoded'] = str(parse_tree)
534 kwds['defects'].extend(parse_tree.all_defects)
535
536
R David Murray0b6f6c82012-05-25 18:42:14 -0400537# The header factory #
538
539_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400540 'subject': UniqueUnstructuredHeader,
541 'date': UniqueDateHeader,
542 'resent-date': DateHeader,
543 'orig-date': UniqueDateHeader,
544 'sender': UniqueSingleAddressHeader,
545 'resent-sender': SingleAddressHeader,
546 'to': UniqueAddressHeader,
547 'resent-to': AddressHeader,
548 'cc': UniqueAddressHeader,
549 'resent-cc': AddressHeader,
550 'bcc': UniqueAddressHeader,
551 'resent-bcc': AddressHeader,
552 'from': UniqueAddressHeader,
553 'resent-from': AddressHeader,
554 'reply-to': UniqueAddressHeader,
555 'mime-version': MIMEVersionHeader,
556 'content-type': ContentTypeHeader,
557 'content-disposition': ContentDispositionHeader,
558 'content-transfer-encoding': ContentTransferEncodingHeader,
Abhilash Raj46d88a12019-06-04 13:41:34 -0400559 'message-id': MessageIDHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400560 }
561
562class HeaderRegistry:
563
564 """A header_factory and header registry."""
565
566 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
567 use_default_map=True):
568 """Create a header_factory that works with the Policy API.
569
570 base_class is the class that will be the last class in the created
571 header class's __bases__ list. default_class is the class that will be
572 used if "name" (see __call__) does not appear in the registry.
573 use_default_map controls whether or not the default mapping of names to
574 specialized classes is copied in to the registry when the factory is
575 created. The default is True.
576
577 """
578 self.registry = {}
579 self.base_class = base_class
580 self.default_class = default_class
581 if use_default_map:
582 self.registry.update(_default_header_map)
583
584 def map_to_type(self, name, cls):
585 """Register cls as the specialized class for handling "name" headers.
586
587 """
588 self.registry[name.lower()] = cls
589
590 def __getitem__(self, name):
591 cls = self.registry.get(name.lower(), self.default_class)
592 return type('_'+cls.__name__, (cls, self.base_class), {})
593
594 def __call__(self, name, value):
595 """Create a header instance for header 'name' from 'value'.
596
597 Creates a header instance by creating a specialized class for parsing
598 and representing the specified header by combining the factory
599 base_class with a specialized class from the registry or the
600 default_class, and passing the name and value to the constructed
601 class's constructor.
602
603 """
604 return self[name](name, value)