blob: 658854630fcb96a10e4ba151cf6c6f9ad3684c4f [file] [log] [blame]
R David Murray0b6f6c82012-05-25 18:42:14 -04001"""Representing and manipulating email headers via custom objects.
2
3This module provides an implementation of the HeaderRegistry API.
4The implementation is designed to flexibly follow RFC5322 rules.
5
6Eventually HeaderRegistry will be a public API, but it isn't yet,
7and will probably change some before that happens.
8
9"""
10
11from email import utils
12from email import errors
13from email import _header_value_parser as parser
14
15class Address:
16
17 def __init__(self, display_name='', username='', domain='', addr_spec=None):
18 """Create an object represeting a full email address.
19
20 An address can have a 'display_name', a 'username', and a 'domain'. In
21 addition to specifying the username and domain separately, they may be
22 specified together by using the addr_spec keyword *instead of* the
23 username and domain keywords. If an addr_spec string is specified it
24 must be properly quoted according to RFC 5322 rules; an error will be
25 raised if it is not.
26
27 An Address object has display_name, username, domain, and addr_spec
28 attributes, all of which are read-only. The addr_spec and the string
29 value of the object are both quoted according to RFC5322 rules, but
30 without any Content Transfer Encoding.
31
32 """
33 # This clause with its potential 'raise' may only happen when an
34 # application program creates an Address object using an addr_spec
35 # keyword. The email library code itself must always supply username
36 # and domain.
37 if addr_spec is not None:
38 if username or domain:
39 raise TypeError("addrspec specified when username and/or "
40 "domain also specified")
41 a_s, rest = parser.get_addr_spec(addr_spec)
42 if rest:
43 raise ValueError("Invalid addr_spec; only '{}' "
44 "could be parsed from '{}'".format(
45 a_s, addr_spec))
46 if a_s.all_defects:
47 raise a_s.all_defects[0]
48 username = a_s.local_part
49 domain = a_s.domain
50 self._display_name = display_name
51 self._username = username
52 self._domain = domain
53
54 @property
55 def display_name(self):
56 return self._display_name
57
58 @property
59 def username(self):
60 return self._username
61
62 @property
63 def domain(self):
64 return self._domain
65
66 @property
67 def addr_spec(self):
68 """The addr_spec (username@domain) portion of the address, quoted
69 according to RFC 5322 rules, but with no Content Transfer Encoding.
70 """
71 nameset = set(self.username)
72 if len(nameset) > len(nameset-parser.DOT_ATOM_ENDS):
73 lp = parser.quote_string(self.username)
74 else:
75 lp = self.username
76 if self.domain:
77 return lp + '@' + self.domain
78 if not lp:
79 return '<>'
80 return lp
81
82 def __repr__(self):
83 return "Address(display_name={!r}, username={!r}, domain={!r})".format(
84 self.display_name, self.username, self.domain)
85
86 def __str__(self):
87 nameset = set(self.display_name)
88 if len(nameset) > len(nameset-parser.SPECIALS):
89 disp = parser.quote_string(self.display_name)
90 else:
91 disp = self.display_name
92 if disp:
93 addr_spec = '' if self.addr_spec=='<>' else self.addr_spec
94 return "{} <{}>".format(disp, addr_spec)
95 return self.addr_spec
96
97 def __eq__(self, other):
98 if type(other) != type(self):
99 return False
100 return (self.display_name == other.display_name and
101 self.username == other.username and
102 self.domain == other.domain)
103
104
105class Group:
106
107 def __init__(self, display_name=None, addresses=None):
108 """Create an object representing an address group.
109
110 An address group consists of a display_name followed by colon and an
111 list of addresses (see Address) terminated by a semi-colon. The Group
112 is created by specifying a display_name and a possibly empty list of
113 Address objects. A Group can also be used to represent a single
114 address that is not in a group, which is convenient when manipulating
115 lists that are a combination of Groups and individual Addresses. In
116 this case the display_name should be set to None. In particular, the
117 string representation of a Group whose display_name is None is the same
118 as the Address object, if there is one and only one Address object in
119 the addresses list.
120
121 """
122 self._display_name = display_name
123 self._addresses = tuple(addresses) if addresses else tuple()
124
125 @property
126 def display_name(self):
127 return self._display_name
128
129 @property
130 def addresses(self):
131 return self._addresses
132
133 def __repr__(self):
134 return "Group(display_name={!r}, addresses={!r}".format(
135 self.display_name, self.addresses)
136
137 def __str__(self):
138 if self.display_name is None and len(self.addresses)==1:
139 return str(self.addresses[0])
140 disp = self.display_name
141 if disp is not None:
142 nameset = set(disp)
143 if len(nameset) > len(nameset-parser.SPECIALS):
144 disp = parser.quote_string(disp)
145 adrstr = ", ".join(str(x) for x in self.addresses)
146 adrstr = ' ' + adrstr if adrstr else adrstr
147 return "{}:{};".format(disp, adrstr)
148
149 def __eq__(self, other):
150 if type(other) != type(self):
151 return False
152 return (self.display_name == other.display_name and
153 self.addresses == other.addresses)
154
155
156# Header Classes #
157
158class BaseHeader(str):
159
160 """Base class for message headers.
161
162 Implements generic behavior and provides tools for subclasses.
163
164 A subclass must define a classmethod named 'parse' that takes an unfolded
165 value string and a dictionary as its arguments. The dictionary will
166 contain one key, 'defects', initialized to an empty list. After the call
167 the dictionary must contain two additional keys: parse_tree, set to the
168 parse tree obtained from parsing the header, and 'decoded', set to the
169 string value of the idealized representation of the data from the value.
170 (That is, encoded words are decoded, and values that have canonical
171 representations are so represented.)
172
173 The defects key is intended to collect parsing defects, which the message
174 parser will subsequently dispose of as appropriate. The parser should not,
175 insofar as practical, raise any errors. Defects should be added to the
176 list instead. The standard header parsers register defects for RFC
177 compliance issues, for obsolete RFC syntax, and for unrecoverable parsing
178 errors.
179
180 The parse method may add additional keys to the dictionary. In this case
181 the subclass must define an 'init' method, which will be passed the
182 dictionary as its keyword arguments. The method should use (usually by
183 setting them as the value of similarly named attributes) and remove all the
184 extra keys added by its parse method, and then use super to call its parent
185 class with the remaining arguments and keywords.
186
187 The subclass should also make sure that a 'max_count' attribute is defined
188 that is either None or 1. XXX: need to better define this API.
189
190 """
191
192 def __new__(cls, name, value):
193 kwds = {'defects': []}
194 cls.parse(value, kwds)
195 if utils._has_surrogates(kwds['decoded']):
196 kwds['decoded'] = utils._sanitize(kwds['decoded'])
197 self = str.__new__(cls, kwds['decoded'])
198 del kwds['decoded']
199 self.init(name, **kwds)
200 return self
201
202 def init(self, name, *, parse_tree, defects):
203 self._name = name
204 self._parse_tree = parse_tree
205 self._defects = defects
206
207 @property
208 def name(self):
209 return self._name
210
211 @property
212 def defects(self):
213 return tuple(self._defects)
214
215 def __reduce__(self):
216 return (
217 _reconstruct_header,
218 (
219 self.__class__.__name__,
220 self.__class__.__bases__,
221 str(self),
222 ),
223 self.__dict__)
224
225 @classmethod
226 def _reconstruct(cls, value):
227 return str.__new__(cls, value)
228
229 def fold(self, *, policy):
230 """Fold header according to policy.
231
232 The parsed representation of the header is folded according to
233 RFC5322 rules, as modified by the policy. If the parse tree
234 contains surrogateescaped bytes, the bytes are CTE encoded using
235 the charset 'unknown-8bit".
236
237 Any non-ASCII characters in the parse tree are CTE encoded using
238 charset utf-8. XXX: make this a policy setting.
239
240 The returned value is an ASCII-only string possibly containing linesep
241 characters, and ending with a linesep character. The string includes
242 the header name and the ': ' separator.
243
244 """
245 # At some point we need to only put fws here if it was in the source.
246 header = parser.Header([
247 parser.HeaderLabel([
248 parser.ValueTerminal(self.name, 'header-name'),
249 parser.ValueTerminal(':', 'header-sep')]),
250 parser.CFWSList([parser.WhiteSpaceTerminal(' ', 'fws')]),
251 self._parse_tree])
252 return header.fold(policy=policy)
253
254
255def _reconstruct_header(cls_name, bases, value):
256 return type(cls_name, bases, {})._reconstruct(value)
257
258
259class UnstructuredHeader:
260
261 max_count = None
262 value_parser = staticmethod(parser.get_unstructured)
263
264 @classmethod
265 def parse(cls, value, kwds):
266 kwds['parse_tree'] = cls.value_parser(value)
267 kwds['decoded'] = str(kwds['parse_tree'])
268
269
270class UniqueUnstructuredHeader(UnstructuredHeader):
271
272 max_count = 1
273
274
275class DateHeader:
276
277 """Header whose value consists of a single timestamp.
278
279 Provides an additional attribute, datetime, which is either an aware
280 datetime using a timezone, or a naive datetime if the timezone
281 in the input string is -0000. Also accepts a datetime as input.
282 The 'value' attribute is the normalized form of the timestamp,
283 which means it is the output of format_datetime on the datetime.
284 """
285
286 max_count = None
287
288 # This is used only for folding, not for creating 'decoded'.
289 value_parser = staticmethod(parser.get_unstructured)
290
291 @classmethod
292 def parse(cls, value, kwds):
293 if not value:
294 kwds['defects'].append(errors.HeaderMissingRequiredValue())
295 kwds['datetime'] = None
296 kwds['decoded'] = ''
297 kwds['parse_tree'] = parser.TokenList()
298 return
299 if isinstance(value, str):
300 value = utils.parsedate_to_datetime(value)
301 kwds['datetime'] = value
302 kwds['decoded'] = utils.format_datetime(kwds['datetime'])
303 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
304
305 def init(self, *args, **kw):
306 self._datetime = kw.pop('datetime')
307 super().init(*args, **kw)
308
309 @property
310 def datetime(self):
311 return self._datetime
312
313
314class UniqueDateHeader(DateHeader):
315
316 max_count = 1
317
318
319class AddressHeader:
320
321 max_count = None
322
323 @staticmethod
324 def value_parser(value):
325 address_list, value = parser.get_address_list(value)
326 assert not value, 'this should not happen'
327 return address_list
328
329 @classmethod
330 def parse(cls, value, kwds):
331 if isinstance(value, str):
332 # We are translating here from the RFC language (address/mailbox)
333 # to our API language (group/address).
334 kwds['parse_tree'] = address_list = cls.value_parser(value)
335 groups = []
336 for addr in address_list.addresses:
337 groups.append(Group(addr.display_name,
338 [Address(mb.display_name or '',
339 mb.local_part or '',
340 mb.domain or '')
341 for mb in addr.all_mailboxes]))
342 defects = list(address_list.all_defects)
343 else:
344 # Assume it is Address/Group stuff
345 if not hasattr(value, '__iter__'):
346 value = [value]
347 groups = [Group(None, [item]) if not hasattr(item, 'addresses')
348 else item
349 for item in value]
350 defects = []
351 kwds['groups'] = groups
352 kwds['defects'] = defects
353 kwds['decoded'] = ', '.join([str(item) for item in groups])
354 if 'parse_tree' not in kwds:
355 kwds['parse_tree'] = cls.value_parser(kwds['decoded'])
356
357 def init(self, *args, **kw):
358 self._groups = tuple(kw.pop('groups'))
359 self._addresses = None
360 super().init(*args, **kw)
361
362 @property
363 def groups(self):
364 return self._groups
365
366 @property
367 def addresses(self):
368 if self._addresses is None:
369 self._addresses = tuple([address for group in self._groups
370 for address in group.addresses])
371 return self._addresses
372
373
374class UniqueAddressHeader(AddressHeader):
375
376 max_count = 1
377
378
379class SingleAddressHeader(AddressHeader):
380
381 @property
382 def address(self):
383 if len(self.addresses)!=1:
384 raise ValueError(("value of single address header {} is not "
385 "a single address").format(self.name))
386 return self.addresses[0]
387
388
389class UniqueSingleAddressHeader(SingleAddressHeader):
390
391 max_count = 1
392
393
394# The header factory #
395
396_default_header_map = {
397 'subject': UniqueUnstructuredHeader,
398 'date': UniqueDateHeader,
399 'resent-date': DateHeader,
400 'orig-date': UniqueDateHeader,
401 'sender': UniqueSingleAddressHeader,
402 'resent-sender': SingleAddressHeader,
403 'to': UniqueAddressHeader,
404 'resent-to': AddressHeader,
405 'cc': UniqueAddressHeader,
406 'resent-cc': AddressHeader,
407 'bcc': UniqueAddressHeader,
408 'resent-bcc': AddressHeader,
409 'from': UniqueAddressHeader,
410 'resent-from': AddressHeader,
411 'reply-to': UniqueAddressHeader,
412 }
413
414class HeaderRegistry:
415
416 """A header_factory and header registry."""
417
418 def __init__(self, base_class=BaseHeader, default_class=UnstructuredHeader,
419 use_default_map=True):
420 """Create a header_factory that works with the Policy API.
421
422 base_class is the class that will be the last class in the created
423 header class's __bases__ list. default_class is the class that will be
424 used if "name" (see __call__) does not appear in the registry.
425 use_default_map controls whether or not the default mapping of names to
426 specialized classes is copied in to the registry when the factory is
427 created. The default is True.
428
429 """
430 self.registry = {}
431 self.base_class = base_class
432 self.default_class = default_class
433 if use_default_map:
434 self.registry.update(_default_header_map)
435
436 def map_to_type(self, name, cls):
437 """Register cls as the specialized class for handling "name" headers.
438
439 """
440 self.registry[name.lower()] = cls
441
442 def __getitem__(self, name):
443 cls = self.registry.get(name.lower(), self.default_class)
444 return type('_'+cls.__name__, (cls, self.base_class), {})
445
446 def __call__(self, name, value):
447 """Create a header instance for header 'name' from 'value'.
448
449 Creates a header instance by creating a specialized class for parsing
450 and representing the specified header by combining the factory
451 base_class with a specialized class from the registry or the
452 default_class, and passing the name and value to the constructed
453 class's constructor.
454
455 """
456 return self[name](name, value)