blob: 6bc298b945c850a218be884b773d58b71fe4fe8f [file] [log] [blame]
R David Murrayc27e5222012-05-25 15:01:48 -04001"""Policy framework for the email package.
2
3Allows fine grained feature control of how the package parses and emits data.
4"""
5
6import abc
7from email import header
8from email import charset as _charset
9from email.utils import _has_surrogates
10
11__all__ = [
12 'Policy',
13 'Compat32',
14 'compat32',
15 ]
16
17
18class _PolicyBase:
19
20 """Policy Object basic framework.
21
22 This class is useless unless subclassed. A subclass should define
23 class attributes with defaults for any values that are to be
24 managed by the Policy object. The constructor will then allow
25 non-default values to be set for these attributes at instance
26 creation time. The instance will be callable, taking these same
27 attributes keyword arguments, and returning a new instance
28 identical to the called instance except for those values changed
29 by the keyword arguments. Instances may be added, yielding new
30 instances with any non-default values from the right hand
31 operand overriding those in the left hand operand. That is,
32
33 A + B == A(<non-default values of B>)
34
35 The repr of an instance can be used to reconstruct the object
36 if and only if the repr of the values can be used to reconstruct
37 those values.
38
39 """
40
41 def __init__(self, **kw):
42 """Create new Policy, possibly overriding some defaults.
43
44 See class docstring for a list of overridable attributes.
45
46 """
47 for name, value in kw.items():
48 if hasattr(self, name):
49 super(_PolicyBase,self).__setattr__(name, value)
50 else:
51 raise TypeError(
52 "{!r} is an invalid keyword argument for {}".format(
53 name, self.__class__.__name__))
54
55 def __repr__(self):
56 args = [ "{}={!r}".format(name, value)
57 for name, value in self.__dict__.items() ]
58 return "{}({})".format(self.__class__.__name__, ', '.join(args))
59
60 def clone(self, **kw):
61 """Return a new instance with specified attributes changed.
62
63 The new instance has the same attribute values as the current object,
64 except for the changes passed in as keyword arguments.
65
66 """
R David Murray0b6f6c82012-05-25 18:42:14 -040067 newpolicy = self.__class__.__new__(self.__class__)
R David Murrayc27e5222012-05-25 15:01:48 -040068 for attr, value in self.__dict__.items():
R David Murray0b6f6c82012-05-25 18:42:14 -040069 object.__setattr__(newpolicy, attr, value)
70 for attr, value in kw.items():
71 if not hasattr(self, attr):
72 raise TypeError(
73 "{!r} is an invalid keyword argument for {}".format(
74 attr, self.__class__.__name__))
75 object.__setattr__(newpolicy, attr, value)
76 return newpolicy
R David Murrayc27e5222012-05-25 15:01:48 -040077
78 def __setattr__(self, name, value):
79 if hasattr(self, name):
80 msg = "{!r} object attribute {!r} is read-only"
81 else:
82 msg = "{!r} object has no attribute {!r}"
83 raise AttributeError(msg.format(self.__class__.__name__, name))
84
85 def __add__(self, other):
86 """Non-default values from right operand override those from left.
87
88 The object returned is a new instance of the subclass.
89
90 """
91 return self.clone(**other.__dict__)
92
93
94# Conceptually this isn't a subclass of ABCMeta, but since we want Policy to
95# use ABCMeta as a metaclass *and* we want it to use this one as well, we have
96# to make this one a subclas of ABCMeta.
97class _DocstringExtenderMetaclass(abc.ABCMeta):
98
99 def __new__(meta, classname, bases, classdict):
100 if classdict.get('__doc__') and classdict['__doc__'].startswith('+'):
101 classdict['__doc__'] = meta._append_doc(bases[0].__doc__,
102 classdict['__doc__'])
103 for name, attr in classdict.items():
104 if attr.__doc__ and attr.__doc__.startswith('+'):
105 for cls in (cls for base in bases for cls in base.mro()):
106 doc = getattr(getattr(cls, name), '__doc__')
107 if doc:
108 attr.__doc__ = meta._append_doc(doc, attr.__doc__)
109 break
110 return super().__new__(meta, classname, bases, classdict)
111
112 @staticmethod
113 def _append_doc(doc, added_doc):
114 added_doc = added_doc.split('\n', 1)[1]
115 return doc + '\n' + added_doc
116
117
118class Policy(_PolicyBase, metaclass=_DocstringExtenderMetaclass):
119
120 r"""Controls for how messages are interpreted and formatted.
121
122 Most of the classes and many of the methods in the email package accept
123 Policy objects as parameters. A Policy object contains a set of values and
124 functions that control how input is interpreted and how output is rendered.
125 For example, the parameter 'raise_on_defect' controls whether or not an RFC
126 violation results in an error being raised or not, while 'max_line_length'
127 controls the maximum length of output lines when a Message is serialized.
128
129 Any valid attribute may be overridden when a Policy is created by passing
130 it as a keyword argument to the constructor. Policy objects are immutable,
131 but a new Policy object can be created with only certain values changed by
132 calling the Policy instance with keyword arguments. Policy objects can
133 also be added, producing a new Policy object in which the non-default
134 attributes set in the right hand operand overwrite those specified in the
135 left operand.
136
137 Settable attributes:
138
139 raise_on_defect -- If true, then defects should be raised as errors.
140 Default: False.
141
142 linesep -- string containing the value to use as separation
143 between output lines. Default '\n'.
144
145 cte_type -- Type of allowed content transfer encodings
146
147 7bit -- ASCII only
148 8bit -- Content-Transfer-Encoding: 8bit is allowed
149
150 Default: 8bit. Also controls the disposition of
151 (RFC invalid) binary data in headers; see the
152 documentation of the binary_fold method.
153
154 max_line_length -- maximum length of lines, excluding 'linesep',
155 during serialization. None or 0 means no line
156 wrapping is done. Default is 78.
157
158 """
159
160 raise_on_defect = False
161 linesep = '\n'
162 cte_type = '8bit'
163 max_line_length = 78
164
165 def handle_defect(self, obj, defect):
166 """Based on policy, either raise defect or call register_defect.
167
168 handle_defect(obj, defect)
169
170 defect should be a Defect subclass, but in any case must be an
171 Exception subclass. obj is the object on which the defect should be
172 registered if it is not raised. If the raise_on_defect is True, the
173 defect is raised as an error, otherwise the object and the defect are
174 passed to register_defect.
175
176 This method is intended to be called by parsers that discover defects.
177 The email package parsers always call it with Defect instances.
178
179 """
180 if self.raise_on_defect:
181 raise defect
182 self.register_defect(obj, defect)
183
184 def register_defect(self, obj, defect):
185 """Record 'defect' on 'obj'.
186
187 Called by handle_defect if raise_on_defect is False. This method is
188 part of the Policy API so that Policy subclasses can implement custom
189 defect handling. The default implementation calls the append method of
190 the defects attribute of obj. The objects used by the email package by
191 default that get passed to this method will always have a defects
192 attribute with an append method.
193
194 """
195 obj.defects.append(defect)
196
197 @abc.abstractmethod
198 def header_source_parse(self, sourcelines):
199 """Given a list of linesep terminated strings constituting the lines of
200 a single header, return the (name, value) tuple that should be stored
201 in the model. The input lines should retain their terminating linesep
202 characters. The lines passed in by the email package may contain
203 surrogateescaped binary data.
204 """
205 raise NotImplementedError
206
207 @abc.abstractmethod
208 def header_store_parse(self, name, value):
209 """Given the header name and the value provided by the application
210 program, return the (name, value) that should be stored in the model.
211 """
212 raise NotImplementedError
213
214 @abc.abstractmethod
215 def header_fetch_parse(self, name, value):
216 """Given the header name and the value from the model, return the value
217 to be returned to the application program that is requesting that
218 header. The value passed in by the email package may contain
219 surrogateescaped binary data if the lines were parsed by a BytesParser.
220 The returned value should not contain any surrogateescaped data.
221
222 """
223 raise NotImplementedError
224
225 @abc.abstractmethod
226 def fold(self, name, value):
227 """Given the header name and the value from the model, return a string
228 containing linesep characters that implement the folding of the header
229 according to the policy controls. The value passed in by the email
230 package may contain surrogateescaped binary data if the lines were
231 parsed by a BytesParser. The returned value should not contain any
232 surrogateescaped data.
233
234 """
235 raise NotImplementedError
236
237 @abc.abstractmethod
238 def fold_binary(self, name, value):
239 """Given the header name and the value from the model, return binary
240 data containing linesep characters that implement the folding of the
241 header according to the policy controls. The value passed in by the
242 email package may contain surrogateescaped binary data.
243
244 """
245 raise NotImplementedError
246
247
248class Compat32(Policy):
249
250 """+
251 This particular policy is the backward compatibility Policy. It
252 replicates the behavior of the email package version 5.1.
253 """
254
255 def _sanitize_header(self, name, value):
256 # If the header value contains surrogates, return a Header using
257 # the unknown-8bit charset to encode the bytes as encoded words.
258 if not isinstance(value, str):
259 # Assume it is already a header object
260 return value
261 if _has_surrogates(value):
262 return header.Header(value, charset=_charset.UNKNOWN8BIT,
263 header_name=name)
264 else:
265 return value
266
267 def header_source_parse(self, sourcelines):
268 """+
269 The name is parsed as everything up to the ':' and returned unmodified.
270 The value is determined by stripping leading whitespace off the
271 remainder of the first line, joining all subsequent lines together, and
272 stripping any trailing carriage return or linefeed characters.
273
274 """
275 name, value = sourcelines[0].split(':', 1)
276 value = value.lstrip(' \t') + ''.join(sourcelines[1:])
277 return (name, value.rstrip('\r\n'))
278
279 def header_store_parse(self, name, value):
280 """+
281 The name and value are returned unmodified.
282 """
283 return (name, value)
284
285 def header_fetch_parse(self, name, value):
286 """+
287 If the value contains binary data, it is converted into a Header object
288 using the unknown-8bit charset. Otherwise it is returned unmodified.
289 """
290 return self._sanitize_header(name, value)
291
292 def fold(self, name, value):
293 """+
294 Headers are folded using the Header folding algorithm, which preserves
295 existing line breaks in the value, and wraps each resulting line to the
296 max_line_length. Non-ASCII binary data are CTE encoded using the
297 unknown-8bit charset.
298
299 """
300 return self._fold(name, value, sanitize=True)
301
302 def fold_binary(self, name, value):
303 """+
304 Headers are folded using the Header folding algorithm, which preserves
305 existing line breaks in the value, and wraps each resulting line to the
306 max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
307 encoded using the unknown-8bit charset. Otherwise the original source
308 header is used, with its existing line breaks and/or binary data.
309
310 """
311 folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
312 return folded.encode('ascii', 'surrogateescape')
313
314 def _fold(self, name, value, sanitize):
315 parts = []
316 parts.append('%s: ' % name)
317 if isinstance(value, str):
318 if _has_surrogates(value):
319 if sanitize:
320 h = header.Header(value,
321 charset=_charset.UNKNOWN8BIT,
322 header_name=name)
323 else:
324 # If we have raw 8bit data in a byte string, we have no idea
325 # what the encoding is. There is no safe way to split this
326 # string. If it's ascii-subset, then we could do a normal
327 # ascii split, but if it's multibyte then we could break the
328 # string. There's no way to know so the least harm seems to
329 # be to not split the string and risk it being too long.
330 parts.append(value)
331 h = None
332 else:
333 h = header.Header(value, header_name=name)
334 else:
335 # Assume it is a Header-like object.
336 h = value
337 if h is not None:
338 parts.append(h.encode(linesep=self.linesep,
339 maxlinelen=self.max_line_length))
340 parts.append(self.linesep)
341 return ''.join(parts)
342
343
344compat32 = Compat32()