blob: 2bdae6cdeb4592eff6ff19102a559f7b07f0437e [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
10
11from email import utils
12from email import errors
13from email import _header_value_parser as parser
14
15class Address:
16
17 def __init__(self, display_name='', username='', domain='', addr_spec=None):
18 """Create an object represeting a full email address.
19
20 An address can have a 'display_name', a 'username', and a 'domain'. In
21 addition to specifying the username and domain separately, they may be
22 specified together by using the addr_spec keyword *instead of* the
23 username and domain keywords. If an addr_spec string is specified it
24 must be properly quoted according to RFC 5322 rules; an error will be
25 raised if it is not.
26
27 An Address object has display_name, username, domain, and addr_spec
28 attributes, all of which are read-only. The addr_spec and the string
29 value of the object are both quoted according to RFC5322 rules, but
30 without any Content Transfer Encoding.
31
32 """
33 # This clause with its potential 'raise' may only happen when an
34 # application program creates an Address object using an addr_spec
35 # keyword. The email library code itself must always supply username
36 # and domain.
37 if addr_spec is not None:
38 if username or domain:
39 raise TypeError("addrspec specified when username and/or "
40 "domain also specified")
41 a_s, rest = parser.get_addr_spec(addr_spec)
42 if rest:
43 raise ValueError("Invalid addr_spec; only '{}' "
44 "could be parsed from '{}'".format(
45 a_s, addr_spec))
46 if a_s.all_defects:
47 raise a_s.all_defects[0]
48 username = a_s.local_part
49 domain = a_s.domain
50 self._display_name = display_name
51 self._username = username
52 self._domain = domain
53
54 @property
55 def display_name(self):
56 return self._display_name
57
58 @property
59 def username(self):
60 return self._username
61
62 @property
63 def domain(self):
64 return self._domain
65
66 @property
67 def addr_spec(self):
68 """The addr_spec (username@domain) portion of the address, quoted
69 according to RFC 5322 rules, but with no Content Transfer Encoding.
70 """
71 nameset = set(self.username)
72 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
73 lp = parser.quote_string(self.username)
74 else:
75 lp = self.username
76 if self.domain:
77 return lp + '@' + self.domain
78 if not lp:
79 return '<>'
80 return lp
81
82 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +030083 return "{}(display_name={!r}, username={!r}, domain={!r})".format(
84 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -040085 self.display_name, self.username, self.domain)
86
87 def __str__(self):
88 nameset = set(self.display_name)
89 if len(nameset) > len(nameset-parser.SPECIALS):
90 disp = parser.quote_string(self.display_name)
91 else:
92 disp = self.display_name
93 if disp:
94 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
95 return "{} <{}>".format(disp, addr_spec)
96 return self.addr_spec
97
98 def __eq__(self, other):
99 if type(other) != type(self):
100 return False
101 return (self.display_name == other.display_name and
102 self.username == other.username and
103 self.domain == other.domain)
104
105
106class Group:
107
108 def __init__(self, display_name=None, addresses=None):
109 """Create an object representing an address group.
110
111 An address group consists of a display_name followed by colon and an
112 list of addresses (see Address) terminated by a semi-colon. The Group
113 is created by specifying a display_name and a possibly empty list of
114 Address objects. A Group can also be used to represent a single
115 address that is not in a group, which is convenient when manipulating
116 lists that are a combination of Groups and individual Addresses. In
117 this case the display_name should be set to None. In particular, the
118 string representation of a Group whose display_name is None is the same
119 as the Address object, if there is one and only one Address object in
120 the addresses list.
121
122 """
123 self._display_name = display_name
124 self._addresses = tuple(addresses) if addresses else tuple()
125
126 @property
127 def display_name(self):
128 return self._display_name
129
130 @property
131 def addresses(self):
132 return self._addresses
133
134 def __repr__(self):
Serhiy Storchaka465e60e2014-07-25 23:36:00 +0300135 return "{}(display_name={!r}, addresses={!r}".format(
136 self.__class__.__name__,
R David Murray0b6f6c82012-05-25 18:42:14 -0400137 self.display_name, self.addresses)
138
139 def __str__(self):
140 if self.display_name is None and len(self.addresses)==1:
141 return str(self.addresses[0])
142 disp = self.display_name
143 if disp is not None:
144 nameset = set(disp)
145 if len(nameset) > len(nameset-parser.SPECIALS):
146 disp = parser.quote_string(disp)
147 adrstr = ", ".join(str(x) for x in self.addresses)
148 adrstr = ' ' + adrstr if adrstr else adrstr
149 return "{}:{};".format(disp, adrstr)
150
151 def __eq__(self, other):
152 if type(other) != type(self):
153 return False
154 return (self.display_name == other.display_name and
155 self.addresses == other.addresses)
156
157
158# Header Classes #
159
160class BaseHeader(str):
161
162 """Base class for message headers.
163
164 Implements generic behavior and provides tools for subclasses.
165
166 A subclass must define a classmethod named 'parse' that takes an unfolded
167 value string and a dictionary as its arguments. The dictionary will
168 contain one key, 'defects', initialized to an empty list. After the call
169 the dictionary must contain two additional keys: parse_tree, set to the
170 parse tree obtained from parsing the header, and 'decoded', set to the
171 string value of the idealized representation of the data from the value.
172 (That is, encoded words are decoded, and values that have canonical
173 representations are so represented.)
174
175 The defects key is intended to collect parsing defects, which the message
176 parser will subsequently dispose of as appropriate. The parser should not,
177 insofar as practical, raise any errors. Defects should be added to the
178 list instead. The standard header parsers register defects for RFC
179 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
180 errors.
181
182 The parse method may add additional keys to the dictionary. In this case
183 the subclass must define an 'init' method, which will be passed the
184 dictionary as its keyword arguments. The method should use (usually by
185 setting them as the value of similarly named attributes) and remove all the
186 extra keys added by its parse method, and then use super to call its parent
187 class with the remaining arguments and keywords.
188
189 The subclass should also make sure that a 'max_count' attribute is defined
190 that is either None or 1. XXX: need to better define this API.
191
192 """
193
194 def __new__(cls, name, value):
195 kwds = {'defects': []}
196 cls.parse(value, kwds)
197 if utils._has_surrogates(kwds['decoded']):
198 kwds['decoded'] = utils._sanitize(kwds['decoded'])
199 self = str.__new__(cls, kwds['decoded'])
200 del kwds['decoded']
201 self.init(name, **kwds)
202 return self
203
204 def init(self, name, *, parse_tree, defects):
205 self._name = name
206 self._parse_tree = parse_tree
207 self._defects = defects
208
209 @property
210 def name(self):
211 return self._name
212
213 @property
214 def defects(self):
215 return tuple(self._defects)
216
217 def __reduce__(self):
218 return (
219 _reconstruct_header,
220 (
221 self.__class__.__name__,
222 self.__class__.__bases__,
223 str(self),
224 ),
225 self.__dict__)
226
227 @classmethod
228 def _reconstruct(cls, value):
229 return str.__new__(cls, value)
230
231 def fold(self, *, policy):
232 """Fold header according to policy.
233
234 The parsed representation of the header is folded according to
235 RFC5322 rules, as modified by the policy. If the parse tree
236 contains surrogateescaped bytes, the bytes are CTE encoded using
237 the charset 'unknown-8bit".
238
239 Any non-ASCII characters in the parse tree are CTE encoded using
240 charset utf-8. XXX: make this a policy setting.
241
242 The returned value is an ASCII-only string possibly containing linesep
243 characters, and ending with a linesep character. The string includes
244 the header name and the ': ' separator.
245
246 """
247 # At some point we need to only put fws here if it was in the source.
248 header = parser.Header([
249 parser.HeaderLabel([
250 parser.ValueTerminal(self.name, 'header-name'),
251 parser.ValueTerminal(':', 'header-sep')]),
252 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
253 self._parse_tree])
254 return header.fold(policy=policy)
255
256
257def _reconstruct_header(cls_name, bases, value):
258 return type(cls_name, bases, {})._reconstruct(value)
259
260
261class UnstructuredHeader:
262
263 max_count = None
264 value_parser = staticmethod(parser.get_unstructured)
265
266 @classmethod
267 def parse(cls, value, kwds):
268 kwds['parse_tree'] = cls.value_parser(value)
269 kwds['decoded'] = str(kwds['parse_tree'])
270
271
272class UniqueUnstructuredHeader(UnstructuredHeader):
273
274 max_count = 1
275
276
277class DateHeader:
278
279 """Header whose value consists of a single timestamp.
280
281 Provides an additional attribute, datetime, which is either an aware
282 datetime using a timezone, or a naive datetime if the timezone
283 in the input string is -0000. Also accepts a datetime as input.
284 The 'value' attribute is the normalized form of the timestamp,
285 which means it is the output of format_datetime on the datetime.
286 """
287
288 max_count = None
289
290 # This is used only for folding, not for creating 'decoded'.
291 value_parser = staticmethod(parser.get_unstructured)
292
293 @classmethod
294 def parse(cls, value, kwds):
295 if not value:
296 kwds['defects'].append(errors.HeaderMissingRequiredValue())
297 kwds['datetime'] = None
298 kwds['decoded'] = ''
299 kwds['parse_tree'] = parser.TokenList()
300 return
301 if isinstance(value, str):
302 value = utils.parsedate_to_datetime(value)
303 kwds['datetime'] = value
304 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
305 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
306
307 def init(self, *args, **kw):
308 self._datetime = kw.pop('datetime')
309 super().init(*args, **kw)
310
311 @property
312 def datetime(self):
313 return self._datetime
314
315
316class UniqueDateHeader(DateHeader):
317
318 max_count = 1
319
320
321class AddressHeader:
322
323 max_count = None
324
325 @staticmethod
326 def value_parser(value):
327 address_list, value = parser.get_address_list(value)
328 assert not value, 'this should not happen'
329 return address_list
330
331 @classmethod
332 def parse(cls, value, kwds):
333 if isinstance(value, str):
334 # We are translating here from the RFC language (address/mailbox)
335 # to our API language (group/address).
336 kwds['parse_tree'] = address_list = cls.value_parser(value)
337 groups = []
338 for addr in address_list.addresses:
339 groups.append(Group(addr.display_name,
340 [Address(mb.display_name or '',
341 mb.local_part or '',
342 mb.domain or '')
343 for mb in addr.all_mailboxes]))
344 defects = list(address_list.all_defects)
345 else:
346 # Assume it is Address/Group stuff
347 if not hasattr(value, '__iter__'):
348 value = [value]
349 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
350 else item
351 for item in value]
352 defects = []
353 kwds['groups'] = groups
354 kwds['defects'] = defects
355 kwds['decoded'] = ', '.join([str(item) for item in groups])
356 if 'parse_tree' not in kwds:
357 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
358
359 def init(self, *args, **kw):
360 self._groups = tuple(kw.pop('groups'))
361 self._addresses = None
362 super().init(*args, **kw)
363
364 @property
365 def groups(self):
366 return self._groups
367
368 @property
369 def addresses(self):
370 if self._addresses is None:
371 self._addresses = tuple([address for group in self._groups
372 for address in group.addresses])
373 return self._addresses
374
375
376class UniqueAddressHeader(AddressHeader):
377
378 max_count = 1
379
380
381class SingleAddressHeader(AddressHeader):
382
383 @property
384 def address(self):
385 if len(self.addresses)!=1:
386 raise ValueError(("value of single address header {} is not "
387 "a single address").format(self.name))
388 return self.addresses[0]
389
390
391class UniqueSingleAddressHeader(SingleAddressHeader):
392
393 max_count = 1
394
395
R David Murray97f43c02012-06-24 05:03:27 -0400396class MIMEVersionHeader:
397
398 max_count = 1
399
400 value_parser = staticmethod(parser.parse_mime_version)
401
402 @classmethod
403 def parse(cls, value, kwds):
404 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
405 kwds['decoded'] = str(parse_tree)
406 kwds['defects'].extend(parse_tree.all_defects)
407 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
408 kwds['minor'] = parse_tree.minor
409 if parse_tree.minor is not None:
410 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
411 else:
412 kwds['version'] = None
413
414 def init(self, *args, **kw):
415 self._version = kw.pop('version')
416 self._major = kw.pop('major')
417 self._minor = kw.pop('minor')
418 super().init(*args, **kw)
419
420 @property
421 def major(self):
422 return self._major
423
424 @property
425 def minor(self):
426 return self._minor
427
428 @property
429 def version(self):
430 return self._version
431
432
433class ParameterizedMIMEHeader:
434
435 # Mixin that handles the params dict. Must be subclassed and
436 # a property value_parser for the specific header provided.
437
438 max_count = 1
439
440 @classmethod
441 def parse(cls, value, kwds):
442 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
443 kwds['decoded'] = str(parse_tree)
444 kwds['defects'].extend(parse_tree.all_defects)
445 if parse_tree.params is None:
446 kwds['params'] = {}
447 else:
448 # The MIME RFCs specify that parameter ordering is arbitrary.
449 kwds['params'] = {utils._sanitize(name).lower():
450 utils._sanitize(value)
451 for name, value in parse_tree.params}
452
453 def init(self, *args, **kw):
454 self._params = kw.pop('params')
455 super().init(*args, **kw)
456
457 @property
458 def params(self):
459 return self._params.copy()
460
461
462class ContentTypeHeader(ParameterizedMIMEHeader):
463
464 value_parser = staticmethod(parser.parse_content_type_header)
465
466 def init(self, *args, **kw):
467 super().init(*args, **kw)
468 self._maintype = utils._sanitize(self._parse_tree.maintype)
469 self._subtype = utils._sanitize(self._parse_tree.subtype)
470
471 @property
472 def maintype(self):
473 return self._maintype
474
475 @property
476 def subtype(self):
477 return self._subtype
478
479 @property
480 def content_type(self):
481 return self.maintype + '/' + self.subtype
482
483
484class ContentDispositionHeader(ParameterizedMIMEHeader):
485
486 value_parser = staticmethod(parser.parse_content_disposition_header)
487
488 def init(self, *args, **kw):
489 super().init(*args, **kw)
490 cd = self._parse_tree.content_disposition
491 self._content_disposition = cd if cd is None else utils._sanitize(cd)
492
493 @property
494 def content_disposition(self):
495 return self._content_disposition
496
497
498class ContentTransferEncodingHeader:
499
500 max_count = 1
501
502 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
503
504 @classmethod
505 def parse(cls, value, kwds):
506 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
507 kwds['decoded'] = str(parse_tree)
508 kwds['defects'].extend(parse_tree.all_defects)
509
510 def init(self, *args, **kw):
511 super().init(*args, **kw)
512 self._cte = utils._sanitize(self._parse_tree.cte)
513
514 @property
515 def cte(self):
516 return self._cte
517
518
R David Murray0b6f6c82012-05-25 18:42:14 -0400519# The header factory #
520
521_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400522 'subject': UniqueUnstructuredHeader,
523 'date': UniqueDateHeader,
524 'resent-date': DateHeader,
525 'orig-date': UniqueDateHeader,
526 'sender': UniqueSingleAddressHeader,
527 'resent-sender': SingleAddressHeader,
528 'to': UniqueAddressHeader,
529 'resent-to': AddressHeader,
530 'cc': UniqueAddressHeader,
531 'resent-cc': AddressHeader,
532 'bcc': UniqueAddressHeader,
533 'resent-bcc': AddressHeader,
534 'from': UniqueAddressHeader,
535 'resent-from': AddressHeader,
536 'reply-to': UniqueAddressHeader,
537 'mime-version': MIMEVersionHeader,
538 'content-type': ContentTypeHeader,
539 'content-disposition': ContentDispositionHeader,
540 'content-transfer-encoding': ContentTransferEncodingHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400541 }
542
543class HeaderRegistry:
544
545 """A header_factory and header registry."""
546
547 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
548 use_default_map=True):
549 """Create a header_factory that works with the Policy API.
550
551 base_class is the class that will be the last class in the created
552 header class's __bases__ list. default_class is the class that will be
553 used if "name" (see __call__) does not appear in the registry.
554 use_default_map controls whether or not the default mapping of names to
555 specialized classes is copied in to the registry when the factory is
556 created. The default is True.
557
558 """
559 self.registry = {}
560 self.base_class = base_class
561 self.default_class = default_class
562 if use_default_map:
563 self.registry.update(_default_header_map)
564
565 def map_to_type(self, name, cls):
566 """Register cls as the specialized class for handling "name" headers.
567
568 """
569 self.registry[name.lower()] = cls
570
571 def __getitem__(self, name):
572 cls = self.registry.get(name.lower(), self.default_class)
573 return type('_'+cls.__name__, (cls, self.base_class), {})
574
575 def __call__(self, name, value):
576 """Create a header instance for header 'name' from 'value'.
577
578 Creates a header instance by creating a specialized class for parsing
579 and representing the specified header by combining the factory
580 base_class with a specialized class from the registry or the
581 default_class, and passing the name and value to the constructed
582 class's constructor.
583
584 """
585 return self[name](name, value)