blob: 1fae950820a7b37608a424f425beaa3bfdcd33de [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
10
11from email import utils
12from email import errors
13from email import _header_value_parser as parser
14
15class Address:
16
17 def __init__(self, display_name='', username='', domain='', addr_spec=None):
18 """Create an object represeting a full email address.
19
20 An address can have a 'display_name', a 'username', and a 'domain'. In
21 addition to specifying the username and domain separately, they may be
22 specified together by using the addr_spec keyword *instead of* the
23 username and domain keywords. If an addr_spec string is specified it
24 must be properly quoted according to RFC 5322 rules; an error will be
25 raised if it is not.
26
27 An Address object has display_name, username, domain, and addr_spec
28 attributes, all of which are read-only. The addr_spec and the string
29 value of the object are both quoted according to RFC5322 rules, but
30 without any Content Transfer Encoding.
31
32 """
33 # This clause with its potential 'raise' may only happen when an
34 # application program creates an Address object using an addr_spec
35 # keyword. The email library code itself must always supply username
36 # and domain.
37 if addr_spec is not None:
38 if username or domain:
39 raise TypeError("addrspec specified when username and/or "
40 "domain also specified")
41 a_s, rest = parser.get_addr_spec(addr_spec)
42 if rest:
43 raise ValueError("Invalid addr_spec; only '{}' "
44 "could be parsed from '{}'".format(
45 a_s, addr_spec))
46 if a_s.all_defects:
47 raise a_s.all_defects[0]
48 username = a_s.local_part
49 domain = a_s.domain
50 self._display_name = display_name
51 self._username = username
52 self._domain = domain
53
54 @property
55 def display_name(self):
56 return self._display_name
57
58 @property
59 def username(self):
60 return self._username
61
62 @property
63 def domain(self):
64 return self._domain
65
66 @property
67 def addr_spec(self):
68 """The addr_spec (username@domain) portion of the address, quoted
69 according to RFC 5322 rules, but with no Content Transfer Encoding.
70 """
71 nameset = set(self.username)
72 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
73 lp = parser.quote_string(self.username)
74 else:
75 lp = self.username
76 if self.domain:
77 return lp + '@' + self.domain
78 if not lp:
79 return '<>'
80 return lp
81
82 def __repr__(self):
83 return "Address(display_name={!r}, username={!r}, domain={!r})".format(
84 self.display_name, self.username, self.domain)
85
86 def __str__(self):
87 nameset = set(self.display_name)
88 if len(nameset) > len(nameset-parser.SPECIALS):
89 disp = parser.quote_string(self.display_name)
90 else:
91 disp = self.display_name
92 if disp:
93 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
94 return "{} <{}>".format(disp, addr_spec)
95 return self.addr_spec
96
97 def __eq__(self, other):
98 if type(other) != type(self):
99 return False
100 return (self.display_name == other.display_name and
101 self.username == other.username and
102 self.domain == other.domain)
103
104
105class Group:
106
107 def __init__(self, display_name=None, addresses=None):
108 """Create an object representing an address group.
109
110 An address group consists of a display_name followed by colon and an
111 list of addresses (see Address) terminated by a semi-colon. The Group
112 is created by specifying a display_name and a possibly empty list of
113 Address objects. A Group can also be used to represent a single
114 address that is not in a group, which is convenient when manipulating
115 lists that are a combination of Groups and individual Addresses. In
116 this case the display_name should be set to None. In particular, the
117 string representation of a Group whose display_name is None is the same
118 as the Address object, if there is one and only one Address object in
119 the addresses list.
120
121 """
122 self._display_name = display_name
123 self._addresses = tuple(addresses) if addresses else tuple()
124
125 @property
126 def display_name(self):
127 return self._display_name
128
129 @property
130 def addresses(self):
131 return self._addresses
132
133 def __repr__(self):
134 return "Group(display_name={!r}, addresses={!r}".format(
135 self.display_name, self.addresses)
136
137 def __str__(self):
138 if self.display_name is None and len(self.addresses)==1:
139 return str(self.addresses[0])
140 disp = self.display_name
141 if disp is not None:
142 nameset = set(disp)
143 if len(nameset) > len(nameset-parser.SPECIALS):
144 disp = parser.quote_string(disp)
145 adrstr = ", ".join(str(x) for x in self.addresses)
146 adrstr = ' ' + adrstr if adrstr else adrstr
147 return "{}:{};".format(disp, adrstr)
148
149 def __eq__(self, other):
150 if type(other) != type(self):
151 return False
152 return (self.display_name == other.display_name and
153 self.addresses == other.addresses)
154
155
156# Header Classes #
157
158class BaseHeader(str):
159
160 """Base class for message headers.
161
162 Implements generic behavior and provides tools for subclasses.
163
164 A subclass must define a classmethod named 'parse' that takes an unfolded
165 value string and a dictionary as its arguments. The dictionary will
166 contain one key, 'defects', initialized to an empty list. After the call
167 the dictionary must contain two additional keys: parse_tree, set to the
168 parse tree obtained from parsing the header, and 'decoded', set to the
169 string value of the idealized representation of the data from the value.
170 (That is, encoded words are decoded, and values that have canonical
171 representations are so represented.)
172
173 The defects key is intended to collect parsing defects, which the message
174 parser will subsequently dispose of as appropriate. The parser should not,
175 insofar as practical, raise any errors. Defects should be added to the
176 list instead. The standard header parsers register defects for RFC
177 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
178 errors.
179
180 The parse method may add additional keys to the dictionary. In this case
181 the subclass must define an 'init' method, which will be passed the
182 dictionary as its keyword arguments. The method should use (usually by
183 setting them as the value of similarly named attributes) and remove all the
184 extra keys added by its parse method, and then use super to call its parent
185 class with the remaining arguments and keywords.
186
187 The subclass should also make sure that a 'max_count' attribute is defined
188 that is either None or 1. XXX: need to better define this API.
189
190 """
191
192 def __new__(cls, name, value):
193 kwds = {'defects': []}
194 cls.parse(value, kwds)
195 if utils._has_surrogates(kwds['decoded']):
196 kwds['decoded'] = utils._sanitize(kwds['decoded'])
197 self = str.__new__(cls, kwds['decoded'])
198 del kwds['decoded']
199 self.init(name, **kwds)
200 return self
201
202 def init(self, name, *, parse_tree, defects):
203 self._name = name
204 self._parse_tree = parse_tree
205 self._defects = defects
206
207 @property
208 def name(self):
209 return self._name
210
211 @property
212 def defects(self):
213 return tuple(self._defects)
214
215 def __reduce__(self):
216 return (
217 _reconstruct_header,
218 (
219 self.__class__.__name__,
220 self.__class__.__bases__,
221 str(self),
222 ),
223 self.__dict__)
224
225 @classmethod
226 def _reconstruct(cls, value):
227 return str.__new__(cls, value)
228
229 def fold(self, *, policy):
230 """Fold header according to policy.
231
232 The parsed representation of the header is folded according to
233 RFC5322 rules, as modified by the policy. If the parse tree
234 contains surrogateescaped bytes, the bytes are CTE encoded using
235 the charset 'unknown-8bit".
236
237 Any non-ASCII characters in the parse tree are CTE encoded using
238 charset utf-8. XXX: make this a policy setting.
239
240 The returned value is an ASCII-only string possibly containing linesep
241 characters, and ending with a linesep character. The string includes
242 the header name and the ': ' separator.
243
244 """
245 # At some point we need to only put fws here if it was in the source.
246 header = parser.Header([
247 parser.HeaderLabel([
248 parser.ValueTerminal(self.name, 'header-name'),
249 parser.ValueTerminal(':', 'header-sep')]),
250 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
251 self._parse_tree])
252 return header.fold(policy=policy)
253
254
255def _reconstruct_header(cls_name, bases, value):
256 return type(cls_name, bases, {})._reconstruct(value)
257
258
259class UnstructuredHeader:
260
261 max_count = None
262 value_parser = staticmethod(parser.get_unstructured)
263
264 @classmethod
265 def parse(cls, value, kwds):
266 kwds['parse_tree'] = cls.value_parser(value)
267 kwds['decoded'] = str(kwds['parse_tree'])
268
269
270class UniqueUnstructuredHeader(UnstructuredHeader):
271
272 max_count = 1
273
274
275class DateHeader:
276
277 """Header whose value consists of a single timestamp.
278
279 Provides an additional attribute, datetime, which is either an aware
280 datetime using a timezone, or a naive datetime if the timezone
281 in the input string is -0000. Also accepts a datetime as input.
282 The 'value' attribute is the normalized form of the timestamp,
283 which means it is the output of format_datetime on the datetime.
284 """
285
286 max_count = None
287
288 # This is used only for folding, not for creating 'decoded'.
289 value_parser = staticmethod(parser.get_unstructured)
290
291 @classmethod
292 def parse(cls, value, kwds):
293 if not value:
294 kwds['defects'].append(errors.HeaderMissingRequiredValue())
295 kwds['datetime'] = None
296 kwds['decoded'] = ''
297 kwds['parse_tree'] = parser.TokenList()
298 return
299 if isinstance(value, str):
300 value = utils.parsedate_to_datetime(value)
301 kwds['datetime'] = value
302 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
303 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
304
305 def init(self, *args, **kw):
306 self._datetime = kw.pop('datetime')
307 super().init(*args, **kw)
308
309 @property
310 def datetime(self):
311 return self._datetime
312
313
314class UniqueDateHeader(DateHeader):
315
316 max_count = 1
317
318
319class AddressHeader:
320
321 max_count = None
322
323 @staticmethod
324 def value_parser(value):
325 address_list, value = parser.get_address_list(value)
326 assert not value, 'this should not happen'
327 return address_list
328
329 @classmethod
330 def parse(cls, value, kwds):
331 if isinstance(value, str):
332 # We are translating here from the RFC language (address/mailbox)
333 # to our API language (group/address).
334 kwds['parse_tree'] = address_list = cls.value_parser(value)
335 groups = []
336 for addr in address_list.addresses:
337 groups.append(Group(addr.display_name,
338 [Address(mb.display_name or '',
339 mb.local_part or '',
340 mb.domain or '')
341 for mb in addr.all_mailboxes]))
342 defects = list(address_list.all_defects)
343 else:
344 # Assume it is Address/Group stuff
345 if not hasattr(value, '__iter__'):
346 value = [value]
347 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
348 else item
349 for item in value]
350 defects = []
351 kwds['groups'] = groups
352 kwds['defects'] = defects
353 kwds['decoded'] = ', '.join([str(item) for item in groups])
354 if 'parse_tree' not in kwds:
355 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
356
357 def init(self, *args, **kw):
358 self._groups = tuple(kw.pop('groups'))
359 self._addresses = None
360 super().init(*args, **kw)
361
362 @property
363 def groups(self):
364 return self._groups
365
366 @property
367 def addresses(self):
368 if self._addresses is None:
369 self._addresses = tuple([address for group in self._groups
370 for address in group.addresses])
371 return self._addresses
372
373
374class UniqueAddressHeader(AddressHeader):
375
376 max_count = 1
377
378
379class SingleAddressHeader(AddressHeader):
380
381 @property
382 def address(self):
383 if len(self.addresses)!=1:
384 raise ValueError(("value of single address header {} is not "
385 "a single address").format(self.name))
386 return self.addresses[0]
387
388
389class UniqueSingleAddressHeader(SingleAddressHeader):
390
391 max_count = 1
392
393
R David Murray97f43c02012-06-24 05:03:27 -0400394class MIMEVersionHeader:
395
396 max_count = 1
397
398 value_parser = staticmethod(parser.parse_mime_version)
399
400 @classmethod
401 def parse(cls, value, kwds):
402 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
403 kwds['decoded'] = str(parse_tree)
404 kwds['defects'].extend(parse_tree.all_defects)
405 kwds['major'] = None if parse_tree.minor is None else parse_tree.major
406 kwds['minor'] = parse_tree.minor
407 if parse_tree.minor is not None:
408 kwds['version'] = '{}.{}'.format(kwds['major'], kwds['minor'])
409 else:
410 kwds['version'] = None
411
412 def init(self, *args, **kw):
413 self._version = kw.pop('version')
414 self._major = kw.pop('major')
415 self._minor = kw.pop('minor')
416 super().init(*args, **kw)
417
418 @property
419 def major(self):
420 return self._major
421
422 @property
423 def minor(self):
424 return self._minor
425
426 @property
427 def version(self):
428 return self._version
429
430
431class ParameterizedMIMEHeader:
432
433 # Mixin that handles the params dict. Must be subclassed and
434 # a property value_parser for the specific header provided.
435
436 max_count = 1
437
438 @classmethod
439 def parse(cls, value, kwds):
440 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
441 kwds['decoded'] = str(parse_tree)
442 kwds['defects'].extend(parse_tree.all_defects)
443 if parse_tree.params is None:
444 kwds['params'] = {}
445 else:
446 # The MIME RFCs specify that parameter ordering is arbitrary.
447 kwds['params'] = {utils._sanitize(name).lower():
448 utils._sanitize(value)
449 for name, value in parse_tree.params}
450
451 def init(self, *args, **kw):
452 self._params = kw.pop('params')
453 super().init(*args, **kw)
454
455 @property
456 def params(self):
457 return self._params.copy()
458
459
460class ContentTypeHeader(ParameterizedMIMEHeader):
461
462 value_parser = staticmethod(parser.parse_content_type_header)
463
464 def init(self, *args, **kw):
465 super().init(*args, **kw)
466 self._maintype = utils._sanitize(self._parse_tree.maintype)
467 self._subtype = utils._sanitize(self._parse_tree.subtype)
468
469 @property
470 def maintype(self):
471 return self._maintype
472
473 @property
474 def subtype(self):
475 return self._subtype
476
477 @property
478 def content_type(self):
479 return self.maintype + '/' + self.subtype
480
481
482class ContentDispositionHeader(ParameterizedMIMEHeader):
483
484 value_parser = staticmethod(parser.parse_content_disposition_header)
485
486 def init(self, *args, **kw):
487 super().init(*args, **kw)
488 cd = self._parse_tree.content_disposition
489 self._content_disposition = cd if cd is None else utils._sanitize(cd)
490
491 @property
492 def content_disposition(self):
493 return self._content_disposition
494
495
496class ContentTransferEncodingHeader:
497
498 max_count = 1
499
500 value_parser = staticmethod(parser.parse_content_transfer_encoding_header)
501
502 @classmethod
503 def parse(cls, value, kwds):
504 kwds['parse_tree'] = parse_tree = cls.value_parser(value)
505 kwds['decoded'] = str(parse_tree)
506 kwds['defects'].extend(parse_tree.all_defects)
507
508 def init(self, *args, **kw):
509 super().init(*args, **kw)
510 self._cte = utils._sanitize(self._parse_tree.cte)
511
512 @property
513 def cte(self):
514 return self._cte
515
516
R David Murray0b6f6c82012-05-25 18:42:14 -0400517# The header factory #
518
519_default_header_map = {
R David Murray97f43c02012-06-24 05:03:27 -0400520 'subject': UniqueUnstructuredHeader,
521 'date': UniqueDateHeader,
522 'resent-date': DateHeader,
523 'orig-date': UniqueDateHeader,
524 'sender': UniqueSingleAddressHeader,
525 'resent-sender': SingleAddressHeader,
526 'to': UniqueAddressHeader,
527 'resent-to': AddressHeader,
528 'cc': UniqueAddressHeader,
529 'resent-cc': AddressHeader,
530 'bcc': UniqueAddressHeader,
531 'resent-bcc': AddressHeader,
532 'from': UniqueAddressHeader,
533 'resent-from': AddressHeader,
534 'reply-to': UniqueAddressHeader,
535 'mime-version': MIMEVersionHeader,
536 'content-type': ContentTypeHeader,
537 'content-disposition': ContentDispositionHeader,
538 'content-transfer-encoding': ContentTransferEncodingHeader,
R David Murray0b6f6c82012-05-25 18:42:14 -0400539 }
540
541class HeaderRegistry:
542
543 """A header_factory and header registry."""
544
545 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
546 use_default_map=True):
547 """Create a header_factory that works with the Policy API.
548
549 base_class is the class that will be the last class in the created
550 header class's __bases__ list. default_class is the class that will be
551 used if "name" (see __call__) does not appear in the registry.
552 use_default_map controls whether or not the default mapping of names to
553 specialized classes is copied in to the registry when the factory is
554 created. The default is True.
555
556 """
557 self.registry = {}
558 self.base_class = base_class
559 self.default_class = default_class
560 if use_default_map:
561 self.registry.update(_default_header_map)
562
563 def map_to_type(self, name, cls):
564 """Register cls as the specialized class for handling "name" headers.
565
566 """
567 self.registry[name.lower()] = cls
568
569 def __getitem__(self, name):
570 cls = self.registry.get(name.lower(), self.default_class)
571 return type('_'+cls.__name__, (cls, self.base_class), {})
572
573 def __call__(self, name, value):
574 """Create a header instance for header 'name' from 'value'.
575
576 Creates a header instance by creating a specialized class for parsing
577 and representing the specified header by combining the factory
578 base_class with a specialized class from the registry or the
579 default_class, and passing the name and value to the constructed
580 class's constructor.
581
582 """
583 return self[name](name, value)