blob: 0fc2231e5cbd2949033523b101bfd493ed440537 [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
R David Murray685b3492014-10-17 19:30:13 -040010from types import MappingProxyType
R David Murray0b6f6c82012-05-25 18:42:14 -040011
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18 def __init__(self, display_name='', username='', domain='', addr_spec=None):
Martin Panter96a4f072016-02-10 01:17:51 +000019 """Create an object representing a full email address.
R David Murray0b6f6c82012-05-25 18:42:14 -040020
21 An address can have a 'display_name', a 'username', and a 'domain'. In
22 addition to specifying the username and domain separately, they may be
23 specified together by using the addr_spec keyword *instead of* the
24 username and domain keywords. If an addr_spec string is specified it
25 must be properly quoted according to RFC 5322 rules; an error will be
26 raised if it is not.
27
28 An Address object has display_name, username, domain, and addr_spec
29 attributes, all of which are read-only. The addr_spec and the string
30 value of the object are both quoted according to RFC5322 rules, but
31 without any Content Transfer Encoding.
32
33 """
34 # This clause with its potential 'raise' may only happen when an
35 # application program creates an Address object using an addr_spec
36 # keyword. The email library code itself must always supply username
37 # and domain.
38 if addr_spec is not None:
39 if username or domain:
40 raise TypeError("addrspec specified when username and/or "
41 "domain also specified")
42 a_s, rest = parser.get_addr_spec(addr_spec)
43 if rest:
44 raise ValueError("Invalid addr_spec; only '{}' "
45 "could be parsed from '{}'".format(
46 a_s, addr_spec))
47 if a_s.all_defects:
48 raise a_s.all_defects[0]
49 username = a_s.local_part
50 domain = a_s.domain
51 self._display_name = display_name
52 self._username = username
53 self._domain = domain
54
55 @property
56 def display_name(self):
57 return self._display_name
58
59 @property
60 def username(self):
61 return self._username
62
63 @property
64 def domain(self):
65 return self._domain
66
67 @property
68 def addr_spec(self):
69 """The addr_spec (username@domain) portion of the address, quoted
70 according to RFC 5322 rules, but with no Content Transfer Encoding.
71 """
72 nameset = set(self.username)
73 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
74 lp = parser.quote_string(self.username)
75 else:
76 lp = self.username
77 if self.domain:
78 return lp + '@' + self.domain
79 if not lp:
80 return '<>'
81 return lp
82
83 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +030084 return "{}(display_name={!r}, username={!r}, domain={!r})".format(
85 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -040086 self.display_name, self.username, self.domain)
87
88 def __str__(self):
89 nameset = set(self.display_name)
90 if len(nameset) > len(nameset-parser.SPECIALS):
91 disp = parser.quote_string(self.display_name)
92 else:
93 disp = self.display_name
94 if disp:
95 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
96 return "{} <{}>".format(disp, addr_spec)
97 return self.addr_spec
98
99 def __eq__(self, other):
100 if type(other) != type(self):
101 return False
102 return (self.display_name == other.display_name and
103 self.username == other.username and
104 self.domain == other.domain)
105
106
107class Group:
108
109 def __init__(self, display_name=None, addresses=None):
110 """Create an object representing an address group.
111
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300112 An address group consists of a display_name followed by colon and a
R David Murray0b6f6c82012-05-25 18:42:14 -0400113 list of addresses (see Address) terminated by a semi-colon. The Group
114 is created by specifying a display_name and a possibly empty list of
115 Address objects. A Group can also be used to represent a single
116 address that is not in a group, which is convenient when manipulating
117 lists that are a combination of Groups and individual Addresses. In
118 this case the display_name should be set to None. In particular, the
119 string representation of a Group whose display_name is None is the same
120 as the Address object, if there is one and only one Address object in
121 the addresses list.
122
123 """
124 self._display_name = display_name
125 self._addresses = tuple(addresses) if addresses else tuple()
126
127 @property
128 def display_name(self):
129 return self._display_name
130
131 @property
132 def addresses(self):
133 return self._addresses
134
135 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300136 return "{}(display_name={!r}, addresses={!r}".format(
137 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -0400138 self.display_name, self.addresses)
139
140 def __str__(self):
141 if self.display_name is None and len(self.addresses)==1:
142 return str(self.addresses[0])
143 disp = self.display_name
144 if disp is not None:
145 nameset = set(disp)
146 if len(nameset) > len(nameset-parser.SPECIALS):
147 disp = parser.quote_string(disp)
148 adrstr = ", ".join(str(x) for x in self.addresses)
149 adrstr = ' ' + adrstr if adrstr else adrstr
150 return "{}:{};".format(disp, adrstr)
151
152 def __eq__(self, other):
153 if type(other) != type(self):
154 return False
155 return (self.display_name == other.display_name and
156 self.addresses == other.addresses)
157
158
159# Header Classes #
160
161class BaseHeader(str):
162
163 """Base class for message headers.
164
165 Implements generic behavior and provides tools for subclasses.
166
167 A subclass must define a classmethod named 'parse' that takes an unfolded
168 value string and a dictionary as its arguments. The dictionary will
169 contain one key, 'defects', initialized to an empty list. After the call
170 the dictionary must contain two additional keys: parse_tree, set to the
171 parse tree obtained from parsing the header, and 'decoded', set to the
172 string value of the idealized representation of the data from the value.
173 (That is, encoded words are decoded, and values that have canonical
174 representations are so represented.)
175
176 The defects key is intended to collect parsing defects, which the message
177 parser will subsequently dispose of as appropriate. The parser should not,
178 insofar as practical, raise any errors. Defects should be added to the
179 list instead. The standard header parsers register defects for RFC
180 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
181 errors.
182
183 The parse method may add additional keys to the dictionary. In this case
184 the subclass must define an 'init' method, which will be passed the
185 dictionary as its keyword arguments. The method should use (usually by
186 setting them as the value of similarly named attributes) and remove all the
187 extra keys added by its parse method, and then use super to call its parent
188 class with the remaining arguments and keywords.
189
190 The subclass should also make sure that a 'max_count' attribute is defined
191 that is either None or 1. XXX: need to better define this API.
192
193 """
194
195 def __new__(cls, name, value):
196 kwds = {'defects': []}
197 cls.parse(value, kwds)
198 if utils._has_surrogates(kwds['decoded']):
199 kwds['decoded'] = utils._sanitize(kwds['decoded'])
200 self = str.__new__(cls, kwds['decoded'])
201 del kwds['decoded']
202 self.init(name, **kwds)
203 return self
204
205 def init(self, name, *, parse_tree, defects):
206 self._name = name
207 self._parse_tree = parse_tree
208 self._defects = defects
209
210 @property
211 def name(self):
212 return self._name
213
214 @property
215 def defects(self):
216 return tuple(self._defects)
217
218 def __reduce__(self):
219 return (
220 _reconstruct_header,
221 (
222 self.__class__.__name__,
223 self.__class__.__bases__,
224 str(self),
225 ),
226 self.__dict__)
227
228 @classmethod
229 def _reconstruct(cls, value):
230 return str.__new__(cls, value)
231
232 def fold(self, *, policy):
233 """Fold header according to policy.
234
235 The parsed representation of the header is folded according to
236 RFC5322 rules, as modified by the policy. If the parse tree
237 contains surrogateescaped bytes, the bytes are CTE encoded using
238 the charset 'unknown-8bit".
239
240 Any non-ASCII characters in the parse tree are CTE encoded using
241 charset utf-8. XXX: make this a policy setting.
242
243 The returned value is an ASCII-only string possibly containing linesep
244 characters, and ending with a linesep character. The string includes
245 the header name and the ': ' separator.
246
247 """
248 # At some point we need to only put fws here if it was in the source.
249 header = parser.Header([
250 parser.HeaderLabel([
251 parser.ValueTerminal(self.name, 'header-name'),
252 parser.ValueTerminal(':', 'header-sep')]),
253 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
254 self._parse_tree])
255 return header.fold(policy=policy)
256
257
258def _reconstruct_header(cls_name, bases, value):
259 return type(cls_name, bases, {})._reconstruct(value)
260
261
262class UnstructuredHeader:
263
264 max_count = None
265 value_parser = staticmethod(parser.get_unstructured)
266
267 @classmethod
268 def parse(cls, value, kwds):
269 kwds['parse_tree'] = cls.value_parser(value)
270 kwds['decoded'] = str(kwds['parse_tree'])
271
272
273class UniqueUnstructuredHeader(UnstructuredHeader):
274
275 max_count = 1
276
277
278class DateHeader:
279
280 """Header whose value consists of a single timestamp.
281
282 Provides an additional attribute, datetime, which is either an aware
283 datetime using a timezone, or a naive datetime if the timezone
284 in the input string is -0000. Also accepts a datetime as input.
285 The 'value' attribute is the normalized form of the timestamp,
286 which means it is the output of format_datetime on the datetime.
287 """
288
289 max_count = None
290
291 # This is used only for folding, not for creating 'decoded'.
292 value_parser = staticmethod(parser.get_unstructured)
293
294 @classmethod
295 def parse(cls, value, kwds):
296 if not value:
297 kwds['defects'].append(errors.HeaderMissingRequiredValue())
298 kwds['datetime'] = None
299 kwds['decoded'] = ''
300 kwds['parse_tree'] = parser.TokenList()
301 return
302 if isinstance(value, str):
303 value = utils.parsedate_to_datetime(value)
304 kwds['datetime'] = value
305 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
306 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
307
308 def init(self, *args, **kw):
309 self._datetime = kw.pop('datetime')
310 super().init(*args, **kw)
311
312 @property
313 def datetime(self):
314 return self._datetime
315
316
317class UniqueDateHeader(DateHeader):
318
319 max_count = 1
320
321
322class AddressHeader:
323
324 max_count = None
325
326 @staticmethod
327 def value_parser(value):
328 address_list, value = parser.get_address_list(value)
329 assert not value, 'this should not happen'
330 return address_list
331
332 @classmethod
333 def parse(cls, value, kwds):
334 if isinstance(value, str):
335 # We are translating here from the RFC language (address/mailbox)
336 # to our API language (group/address).
337 kwds['parse_tree'] = address_list = cls.value_parser(value)
338 groups = []
339 for addr in address_list.addresses:
340 groups.append(Group(addr.display_name,
341 [Address(mb.display_name or '',
342 mb.local_part or '',
343 mb.domain or '')
344 for mb in addr.all_mailboxes]))
345 defects = list(address_list.all_defects)
346 else:
347 # Assume it is Address/Group stuff
348 if not hasattr(value, '__iter__'):
349 value = [value]
350 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
351 else item
352 for item in value]
353 defects = []
354 kwds['groups'] = groups
355 kwds['defects'] = defects
356 kwds['decoded'] = ', '.join([str(item) for item in groups])
357 if 'parse_tree' not in kwds:
358 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
359
360 def init(self, *args, **kw):
361 self._groups = tuple(kw.pop('groups'))
362 self._addresses = None
363 super().init(*args, **kw)
364
365 @property
366 def groups(self):
367 return self._groups
368
369 @property
370 def addresses(self):
371 if self._addresses is None:
372 self._addresses = tuple([address for group in self._groups
373 for address in group.addresses])
374 return self._addresses
375
376
377class UniqueAddressHeader(AddressHeader):
378
379 max_count = 1
380
381
382class SingleAddressHeader(AddressHeader):
383
384 @property
385 def address(self):
386 if len(self.addresses)!=1:
387 raise ValueError(("value of single address header {} is not "
388 "a single address").format(self.name))
389 return self.addresses[0]
390
391
392class UniqueSingleAddressHeader(SingleAddressHeader):
393
394 max_count = 1
395
396
R David Murray97f43c02012-06-24 05:03:27 -0400397class MIMEVersionHeader:
398
399 max_count = 1
400
401 value_parser = staticmethod(parser.parse_mime_version)
402
403 @classmethod
404 def parse(cls, value, kwds):
405 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
406 kwds['decoded'] = str(parse_tree)
407 kwds['defects'].extend(parse_tree.all_defects)
408 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
409 kwds['minor'] = parse_tree.minor
410 if parse_tree.minor is not None:
411 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
412 else:
413 kwds['version'] = None
414
415 def init(self, *args, **kw):
416 self._version = kw.pop('version')
417 self._major = kw.pop('major')
418 self._minor = kw.pop('minor')
419 super().init(*args, **kw)
420
421 @property
422 def major(self):
423 return self._major
424
425 @property
426 def minor(self):
427 return self._minor
428
429 @property
430 def version(self):
431 return self._version
432
433
434class ParameterizedMIMEHeader:
435
436 # Mixin that handles the params dict. Must be subclassed and
437 # a property value_parser for the specific header provided.
438
439 max_count = 1
440
441 @classmethod
442 def parse(cls, value, kwds):
443 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
444 kwds['decoded'] = str(parse_tree)
445 kwds['defects'].extend(parse_tree.all_defects)
446 if parse_tree.params is None:
447 kwds['params'] = {}
448 else:
449 # The MIME RFCs specify that parameter ordering is arbitrary.
450 kwds['params'] = {utils._sanitize(name).lower():
451 utils._sanitize(value)
452 for name, value in parse_tree.params}
453
454 def init(self, *args, **kw):
455 self._params = kw.pop('params')
456 super().init(*args, **kw)
457
458 @property
459 def params(self):
R David Murray685b3492014-10-17 19:30:13 -0400460 return MappingProxyType(self._params)
R David Murray97f43c02012-06-24 05:03:27 -0400461
462
463class ContentTypeHeader(ParameterizedMIMEHeader):
464
465 value_parser = staticmethod(parser.parse_content_type_header)
466
467 def init(self, *args, **kw):
468 super().init(*args, **kw)
469 self._maintype = utils._sanitize(self._parse_tree.maintype)
470 self._subtype = utils._sanitize(self._parse_tree.subtype)
471
472 @property
473 def maintype(self):
474 return self._maintype
475
476 @property
477 def subtype(self):
478 return self._subtype
479
480 @property
481 def content_type(self):
482 return self.maintype + '/' + self.subtype
483
484
485class ContentDispositionHeader(ParameterizedMIMEHeader):
486
487 value_parser = staticmethod(parser.parse_content_disposition_header)
488
489 def init(self, *args, **kw):
490 super().init(*args, **kw)
491 cd = self._parse_tree.content_disposition
492 self._content_disposition = cd if cd is None else utils._sanitize(cd)
493
494 @property
495 def content_disposition(self):
496 return self._content_disposition
497
498
499class ContentTransferEncodingHeader:
500
501 max_count = 1
502
503 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
504
505 @classmethod
506 def parse(cls, value, kwds):
507 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
508 kwds['decoded'] = str(parse_tree)
509 kwds['defects'].extend(parse_tree.all_defects)
510
511 def init(self, *args, **kw):
512 super().init(*args, **kw)
513 self._cte = utils._sanitize(self._parse_tree.cte)
514
515 @property
516 def cte(self):
517 return self._cte
518
519
R David Murray0b6f6c82012-05-25 18:42:14 -0400520# The header factory #
521
522_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400523 'subject': UniqueUnstructuredHeader,
524 'date': UniqueDateHeader,
525 'resent-date': DateHeader,
526 'orig-date': UniqueDateHeader,
527 'sender': UniqueSingleAddressHeader,
528 'resent-sender': SingleAddressHeader,
529 'to': UniqueAddressHeader,
530 'resent-to': AddressHeader,
531 'cc': UniqueAddressHeader,
532 'resent-cc': AddressHeader,
533 'bcc': UniqueAddressHeader,
534 'resent-bcc': AddressHeader,
535 'from': UniqueAddressHeader,
536 'resent-from': AddressHeader,
537 'reply-to': UniqueAddressHeader,
538 'mime-version': MIMEVersionHeader,
539 'content-type': ContentTypeHeader,
540 'content-disposition': ContentDispositionHeader,
541 'content-transfer-encoding': ContentTransferEncodingHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400542 }
543
544class HeaderRegistry:
545
546 """A header_factory and header registry."""
547
548 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
549 use_default_map=True):
550 """Create a header_factory that works with the Policy API.
551
552 base_class is the class that will be the last class in the created
553 header class's __bases__ list. default_class is the class that will be
554 used if "name" (see __call__) does not appear in the registry.
555 use_default_map controls whether or not the default mapping of names to
556 specialized classes is copied in to the registry when the factory is
557 created. The default is True.
558
559 """
560 self.registry = {}
561 self.base_class = base_class
562 self.default_class = default_class
563 if use_default_map:
564 self.registry.update(_default_header_map)
565
566 def map_to_type(self, name, cls):
567 """Register cls as the specialized class for handling "name" headers.
568
569 """
570 self.registry[name.lower()] = cls
571
572 def __getitem__(self, name):
573 cls = self.registry.get(name.lower(), self.default_class)
574 return type('_'+cls.__name__, (cls, self.base_class), {})
575
576 def __call__(self, name, value):
577 """Create a header instance for header 'name' from 'value'.
578
579 Creates a header instance by creating a specialized class for parsing
580 and representing the specified header by combining the factory
581 base_class with a specialized class from the registry or the
582 default_class, and passing the name and value to the constructed
583 class's constructor.
584
585 """
586 return self[name](name, value)