blob: 911a2afea7349ce206e83e1859c667379e0bb015 [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
R David Murray685b3492014-10-17 19:30:13 -040010from types import MappingProxyType
R David Murray0b6f6c82012-05-25 18:42:14 -040011
12from email import utils
13from email import errors
14from email import _header_value_parser as parser
15
16class Address:
17
18 def __init__(self, display_name='', username='', domain='', addr_spec=None):
19 """Create an object represeting a full email address.
20
21 An address can have a 'display_name', a 'username', and a 'domain'. In
22 addition to specifying the username and domain separately, they may be
23 specified together by using the addr_spec keyword *instead of* the
24 username and domain keywords. If an addr_spec string is specified it
25 must be properly quoted according to RFC 5322 rules; an error will be
26 raised if it is not.
27
28 An Address object has display_name, username, domain, and addr_spec
29 attributes, all of which are read-only. The addr_spec and the string
30 value of the object are both quoted according to RFC5322 rules, but
31 without any Content Transfer Encoding.
32
33 """
34 # This clause with its potential 'raise' may only happen when an
35 # application program creates an Address object using an addr_spec
36 # keyword. The email library code itself must always supply username
37 # and domain.
38 if addr_spec is not None:
39 if username or domain:
40 raise TypeError("addrspec specified when username and/or "
41 "domain also specified")
42 a_s, rest = parser.get_addr_spec(addr_spec)
43 if rest:
44 raise ValueError("Invalid addr_spec; only '{}' "
45 "could be parsed from '{}'".format(
46 a_s, addr_spec))
47 if a_s.all_defects:
48 raise a_s.all_defects[0]
49 username = a_s.local_part
50 domain = a_s.domain
51 self._display_name = display_name
52 self._username = username
53 self._domain = domain
54
55 @property
56 def display_name(self):
57 return self._display_name
58
59 @property
60 def username(self):
61 return self._username
62
63 @property
64 def domain(self):
65 return self._domain
66
67 @property
68 def addr_spec(self):
69 """The addr_spec (username@domain) portion of the address, quoted
70 according to RFC 5322 rules, but with no Content Transfer Encoding.
71 """
72 nameset = set(self.username)
73 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
74 lp = parser.quote_string(self.username)
75 else:
76 lp = self.username
77 if self.domain:
78 return lp + '@' + self.domain
79 if not lp:
80 return '<>'
81 return lp
82
83 def __repr__(self):
84 return "Address(display_name={!r}, username={!r}, domain={!r})".format(
85 self.display_name, self.username, self.domain)
86
87 def __str__(self):
88 nameset = set(self.display_name)
89 if len(nameset) > len(nameset-parser.SPECIALS):
90 disp = parser.quote_string(self.display_name)
91 else:
92 disp = self.display_name
93 if disp:
94 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
95 return "{} <{}>".format(disp, addr_spec)
96 return self.addr_spec
97
98 def __eq__(self, other):
99 if type(other) != type(self):
100 return False
101 return (self.display_name == other.display_name and
102 self.username == other.username and
103 self.domain == other.domain)
104
105
106class Group:
107
108 def __init__(self, display_name=None, addresses=None):
109 """Create an object representing an address group.
110
111 An address group consists of a display_name followed by colon and an
112 list of addresses (see Address) terminated by a semi-colon. The Group
113 is created by specifying a display_name and a possibly empty list of
114 Address objects. A Group can also be used to represent a single
115 address that is not in a group, which is convenient when manipulating
116 lists that are a combination of Groups and individual Addresses. In
117 this case the display_name should be set to None. In particular, the
118 string representation of a Group whose display_name is None is the same
119 as the Address object, if there is one and only one Address object in
120 the addresses list.
121
122 """
123 self._display_name = display_name
124 self._addresses = tuple(addresses) if addresses else tuple()
125
126 @property
127 def display_name(self):
128 return self._display_name
129
130 @property
131 def addresses(self):
132 return self._addresses
133
134 def __repr__(self):
135 return "Group(display_name={!r}, addresses={!r}".format(
136 self.display_name, self.addresses)
137
138 def __str__(self):
139 if self.display_name is None and len(self.addresses)==1:
140 return str(self.addresses[0])
141 disp = self.display_name
142 if disp is not None:
143 nameset = set(disp)
144 if len(nameset) > len(nameset-parser.SPECIALS):
145 disp = parser.quote_string(disp)
146 adrstr = ", ".join(str(x) for x in self.addresses)
147 adrstr = ' ' + adrstr if adrstr else adrstr
148 return "{}:{};".format(disp, adrstr)
149
150 def __eq__(self, other):
151 if type(other) != type(self):
152 return False
153 return (self.display_name == other.display_name and
154 self.addresses == other.addresses)
155
156
157# Header Classes #
158
159class BaseHeader(str):
160
161 """Base class for message headers.
162
163 Implements generic behavior and provides tools for subclasses.
164
165 A subclass must define a classmethod named 'parse' that takes an unfolded
166 value string and a dictionary as its arguments. The dictionary will
167 contain one key, 'defects', initialized to an empty list. After the call
168 the dictionary must contain two additional keys: parse_tree, set to the
169 parse tree obtained from parsing the header, and 'decoded', set to the
170 string value of the idealized representation of the data from the value.
171 (That is, encoded words are decoded, and values that have canonical
172 representations are so represented.)
173
174 The defects key is intended to collect parsing defects, which the message
175 parser will subsequently dispose of as appropriate. The parser should not,
176 insofar as practical, raise any errors. Defects should be added to the
177 list instead. The standard header parsers register defects for RFC
178 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
179 errors.
180
181 The parse method may add additional keys to the dictionary. In this case
182 the subclass must define an 'init' method, which will be passed the
183 dictionary as its keyword arguments. The method should use (usually by
184 setting them as the value of similarly named attributes) and remove all the
185 extra keys added by its parse method, and then use super to call its parent
186 class with the remaining arguments and keywords.
187
188 The subclass should also make sure that a 'max_count' attribute is defined
189 that is either None or 1. XXX: need to better define this API.
190
191 """
192
193 def __new__(cls, name, value):
194 kwds = {'defects': []}
195 cls.parse(value, kwds)
196 if utils._has_surrogates(kwds['decoded']):
197 kwds['decoded'] = utils._sanitize(kwds['decoded'])
198 self = str.__new__(cls, kwds['decoded'])
199 del kwds['decoded']
200 self.init(name, **kwds)
201 return self
202
203 def init(self, name, *, parse_tree, defects):
204 self._name = name
205 self._parse_tree = parse_tree
206 self._defects = defects
207
208 @property
209 def name(self):
210 return self._name
211
212 @property
213 def defects(self):
214 return tuple(self._defects)
215
216 def __reduce__(self):
217 return (
218 _reconstruct_header,
219 (
220 self.__class__.__name__,
221 self.__class__.__bases__,
222 str(self),
223 ),
224 self.__dict__)
225
226 @classmethod
227 def _reconstruct(cls, value):
228 return str.__new__(cls, value)
229
230 def fold(self, *, policy):
231 """Fold header according to policy.
232
233 The parsed representation of the header is folded according to
234 RFC5322 rules, as modified by the policy. If the parse tree
235 contains surrogateescaped bytes, the bytes are CTE encoded using
236 the charset 'unknown-8bit".
237
238 Any non-ASCII characters in the parse tree are CTE encoded using
239 charset utf-8. XXX: make this a policy setting.
240
241 The returned value is an ASCII-only string possibly containing linesep
242 characters, and ending with a linesep character. The string includes
243 the header name and the ': ' separator.
244
245 """
246 # At some point we need to only put fws here if it was in the source.
247 header = parser.Header([
248 parser.HeaderLabel([
249 parser.ValueTerminal(self.name, 'header-name'),
250 parser.ValueTerminal(':', 'header-sep')]),
251 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
252 self._parse_tree])
253 return header.fold(policy=policy)
254
255
256def _reconstruct_header(cls_name, bases, value):
257 return type(cls_name, bases, {})._reconstruct(value)
258
259
260class UnstructuredHeader:
261
262 max_count = None
263 value_parser = staticmethod(parser.get_unstructured)
264
265 @classmethod
266 def parse(cls, value, kwds):
267 kwds['parse_tree'] = cls.value_parser(value)
268 kwds['decoded'] = str(kwds['parse_tree'])
269
270
271class UniqueUnstructuredHeader(UnstructuredHeader):
272
273 max_count = 1
274
275
276class DateHeader:
277
278 """Header whose value consists of a single timestamp.
279
280 Provides an additional attribute, datetime, which is either an aware
281 datetime using a timezone, or a naive datetime if the timezone
282 in the input string is -0000. Also accepts a datetime as input.
283 The 'value' attribute is the normalized form of the timestamp,
284 which means it is the output of format_datetime on the datetime.
285 """
286
287 max_count = None
288
289 # This is used only for folding, not for creating 'decoded'.
290 value_parser = staticmethod(parser.get_unstructured)
291
292 @classmethod
293 def parse(cls, value, kwds):
294 if not value:
295 kwds['defects'].append(errors.HeaderMissingRequiredValue())
296 kwds['datetime'] = None
297 kwds['decoded'] = ''
298 kwds['parse_tree'] = parser.TokenList()
299 return
300 if isinstance(value, str):
301 value = utils.parsedate_to_datetime(value)
302 kwds['datetime'] = value
303 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
304 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
305
306 def init(self, *args, **kw):
307 self._datetime = kw.pop('datetime')
308 super().init(*args, **kw)
309
310 @property
311 def datetime(self):
312 return self._datetime
313
314
315class UniqueDateHeader(DateHeader):
316
317 max_count = 1
318
319
320class AddressHeader:
321
322 max_count = None
323
324 @staticmethod
325 def value_parser(value):
326 address_list, value = parser.get_address_list(value)
327 assert not value, 'this should not happen'
328 return address_list
329
330 @classmethod
331 def parse(cls, value, kwds):
332 if isinstance(value, str):
333 # We are translating here from the RFC language (address/mailbox)
334 # to our API language (group/address).
335 kwds['parse_tree'] = address_list = cls.value_parser(value)
336 groups = []
337 for addr in address_list.addresses:
338 groups.append(Group(addr.display_name,
339 [Address(mb.display_name or '',
340 mb.local_part or '',
341 mb.domain or '')
342 for mb in addr.all_mailboxes]))
343 defects = list(address_list.all_defects)
344 else:
345 # Assume it is Address/Group stuff
346 if not hasattr(value, '__iter__'):
347 value = [value]
348 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
349 else item
350 for item in value]
351 defects = []
352 kwds['groups'] = groups
353 kwds['defects'] = defects
354 kwds['decoded'] = ', '.join([str(item) for item in groups])
355 if 'parse_tree' not in kwds:
356 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
357
358 def init(self, *args, **kw):
359 self._groups = tuple(kw.pop('groups'))
360 self._addresses = None
361 super().init(*args, **kw)
362
363 @property
364 def groups(self):
365 return self._groups
366
367 @property
368 def addresses(self):
369 if self._addresses is None:
370 self._addresses = tuple([address for group in self._groups
371 for address in group.addresses])
372 return self._addresses
373
374
375class UniqueAddressHeader(AddressHeader):
376
377 max_count = 1
378
379
380class SingleAddressHeader(AddressHeader):
381
382 @property
383 def address(self):
384 if len(self.addresses)!=1:
385 raise ValueError(("value of single address header {} is not "
386 "a single address").format(self.name))
387 return self.addresses[0]
388
389
390class UniqueSingleAddressHeader(SingleAddressHeader):
391
392 max_count = 1
393
394
R David Murray97f43c02012-06-24 05:03:27 -0400395class MIMEVersionHeader:
396
397 max_count = 1
398
399 value_parser = staticmethod(parser.parse_mime_version)
400
401 @classmethod
402 def parse(cls, value, kwds):
403 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
404 kwds['decoded'] = str(parse_tree)
405 kwds['defects'].extend(parse_tree.all_defects)
406 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
407 kwds['minor'] = parse_tree.minor
408 if parse_tree.minor is not None:
409 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
410 else:
411 kwds['version'] = None
412
413 def init(self, *args, **kw):
414 self._version = kw.pop('version')
415 self._major = kw.pop('major')
416 self._minor = kw.pop('minor')
417 super().init(*args, **kw)
418
419 @property
420 def major(self):
421 return self._major
422
423 @property
424 def minor(self):
425 return self._minor
426
427 @property
428 def version(self):
429 return self._version
430
431
432class ParameterizedMIMEHeader:
433
434 # Mixin that handles the params dict. Must be subclassed and
435 # a property value_parser for the specific header provided.
436
437 max_count = 1
438
439 @classmethod
440 def parse(cls, value, kwds):
441 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
442 kwds['decoded'] = str(parse_tree)
443 kwds['defects'].extend(parse_tree.all_defects)
444 if parse_tree.params is None:
445 kwds['params'] = {}
446 else:
447 # The MIME RFCs specify that parameter ordering is arbitrary.
448 kwds['params'] = {utils._sanitize(name).lower():
449 utils._sanitize(value)
450 for name, value in parse_tree.params}
451
452 def init(self, *args, **kw):
453 self._params = kw.pop('params')
454 super().init(*args, **kw)
455
456 @property
457 def params(self):
R David Murray685b3492014-10-17 19:30:13 -0400458 return MappingProxyType(self._params)
R David Murray97f43c02012-06-24 05:03:27 -0400459
460
461class ContentTypeHeader(ParameterizedMIMEHeader):
462
463 value_parser = staticmethod(parser.parse_content_type_header)
464
465 def init(self, *args, **kw):
466 super().init(*args, **kw)
467 self._maintype = utils._sanitize(self._parse_tree.maintype)
468 self._subtype = utils._sanitize(self._parse_tree.subtype)
469
470 @property
471 def maintype(self):
472 return self._maintype
473
474 @property
475 def subtype(self):
476 return self._subtype
477
478 @property
479 def content_type(self):
480 return self.maintype + '/' + self.subtype
481
482
483class ContentDispositionHeader(ParameterizedMIMEHeader):
484
485 value_parser = staticmethod(parser.parse_content_disposition_header)
486
487 def init(self, *args, **kw):
488 super().init(*args, **kw)
489 cd = self._parse_tree.content_disposition
490 self._content_disposition = cd if cd is None else utils._sanitize(cd)
491
492 @property
493 def content_disposition(self):
494 return self._content_disposition
495
496
497class ContentTransferEncodingHeader:
498
499 max_count = 1
500
501 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
502
503 @classmethod
504 def parse(cls, value, kwds):
505 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
506 kwds['decoded'] = str(parse_tree)
507 kwds['defects'].extend(parse_tree.all_defects)
508
509 def init(self, *args, **kw):
510 super().init(*args, **kw)
511 self._cte = utils._sanitize(self._parse_tree.cte)
512
513 @property
514 def cte(self):
515 return self._cte
516
517
R David Murray0b6f6c82012-05-25 18:42:14 -0400518# The header factory #
519
520_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400521 'subject': UniqueUnstructuredHeader,
522 'date': UniqueDateHeader,
523 'resent-date': DateHeader,
524 'orig-date': UniqueDateHeader,
525 'sender': UniqueSingleAddressHeader,
526 'resent-sender': SingleAddressHeader,
527 'to': UniqueAddressHeader,
528 'resent-to': AddressHeader,
529 'cc': UniqueAddressHeader,
530 'resent-cc': AddressHeader,
531 'bcc': UniqueAddressHeader,
532 'resent-bcc': AddressHeader,
533 'from': UniqueAddressHeader,
534 'resent-from': AddressHeader,
535 'reply-to': UniqueAddressHeader,
536 'mime-version': MIMEVersionHeader,
537 'content-type': ContentTypeHeader,
538 'content-disposition': ContentDispositionHeader,
539 'content-transfer-encoding': ContentTransferEncodingHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400540 }
541
542class HeaderRegistry:
543
544 """A header_factory and header registry."""
545
546 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
547 use_default_map=True):
548 """Create a header_factory that works with the Policy API.
549
550 base_class is the class that will be the last class in the created
551 header class's __bases__ list. default_class is the class that will be
552 used if "name" (see __call__) does not appear in the registry.
553 use_default_map controls whether or not the default mapping of names to
554 specialized classes is copied in to the registry when the factory is
555 created. The default is True.
556
557 """
558 self.registry = {}
559 self.base_class = base_class
560 self.default_class = default_class
561 if use_default_map:
562 self.registry.update(_default_header_map)
563
564 def map_to_type(self, name, cls):
565 """Register cls as the specialized class for handling "name" headers.
566
567 """
568 self.registry[name.lower()] = cls
569
570 def __getitem__(self, name):
571 cls = self.registry.get(name.lower(), self.default_class)
572 return type('_'+cls.__name__, (cls, self.base_class), {})
573
574 def __call__(self, name, value):
575 """Create a header instance for header 'name' from 'value'.
576
577 Creates a header instance by creating a specialized class for parsing
578 and representing the specified header by combining the factory
579 base_class with a specialized class from the registry or the
580 default_class, and passing the name and value to the constructed
581 class's constructor.
582
583 """
584 return self[name](name, value)