blob: c9cbadd2a80c48e7ce1c9247a22fd3024f365249 [file] [log] [blame]
Jingwen Chen475b3cc2021-01-05 21:45:16 -05001"""Policy framework for the email package.
2
3Allows fine grained feature control of how the package parses and emits data.
4"""
5
6import abc
7from email import header
8from email import charset as _charset
9from email.utils import _has_surrogates
10
11__all__ = [
12 'Policy',
13 'Compat32',
14 'compat32',
15 ]
16
17
18class _PolicyBase:
19
20 """Policy Object basic framework.
21
22 This class is useless unless subclassed. A subclass should define
23 class attributes with defaults for any values that are to be
24 managed by the Policy object. The constructor will then allow
25 non-default values to be set for these attributes at instance
26 creation time. The instance will be callable, taking these same
27 attributes keyword arguments, and returning a new instance
28 identical to the called instance except for those values changed
29 by the keyword arguments. Instances may be added, yielding new
30 instances with any non-default values from the right hand
31 operand overriding those in the left hand operand. That is,
32
33 A + B == A(<non-default values of B>)
34
35 The repr of an instance can be used to reconstruct the object
36 if and only if the repr of the values can be used to reconstruct
37 those values.
38
39 """
40
41 def __init__(self, **kw):
42 """Create new Policy, possibly overriding some defaults.
43
44 See class docstring for a list of overridable attributes.
45
46 """
47 for name, value in kw.items():
48 if hasattr(self, name):
49 super(_PolicyBase,self).__setattr__(name, value)
50 else:
51 raise TypeError(
52 "{!r} is an invalid keyword argument for {}".format(
53 name, self.__class__.__name__))
54
55 def __repr__(self):
56 args = [ "{}={!r}".format(name, value)
57 for name, value in self.__dict__.items() ]
58 return "{}({})".format(self.__class__.__name__, ', '.join(args))
59
60 def clone(self, **kw):
61 """Return a new instance with specified attributes changed.
62
63 The new instance has the same attribute values as the current object,
64 except for the changes passed in as keyword arguments.
65
66 """
67 newpolicy = self.__class__.__new__(self.__class__)
68 for attr, value in self.__dict__.items():
69 object.__setattr__(newpolicy, attr, value)
70 for attr, value in kw.items():
71 if not hasattr(self, attr):
72 raise TypeError(
73 "{!r} is an invalid keyword argument for {}".format(
74 attr, self.__class__.__name__))
75 object.__setattr__(newpolicy, attr, value)
76 return newpolicy
77
78 def __setattr__(self, name, value):
79 if hasattr(self, name):
80 msg = "{!r} object attribute {!r} is read-only"
81 else:
82 msg = "{!r} object has no attribute {!r}"
83 raise AttributeError(msg.format(self.__class__.__name__, name))
84
85 def __add__(self, other):
86 """Non-default values from right operand override those from left.
87
88 The object returned is a new instance of the subclass.
89
90 """
91 return self.clone(**other.__dict__)
92
93
94def _append_doc(doc, added_doc):
95 doc = doc.rsplit('\n', 1)[0]
96 added_doc = added_doc.split('\n', 1)[1]
97 return doc + '\n' + added_doc
98
99def _extend_docstrings(cls):
100 if cls.__doc__ and cls.__doc__.startswith('+'):
101 cls.__doc__ = _append_doc(cls.__bases__[0].__doc__, cls.__doc__)
102 for name, attr in cls.__dict__.items():
103 if attr.__doc__ and attr.__doc__.startswith('+'):
104 for c in (c for base in cls.__bases__ for c in base.mro()):
105 doc = getattr(getattr(c, name), '__doc__')
106 if doc:
107 attr.__doc__ = _append_doc(doc, attr.__doc__)
108 break
109 return cls
110
111
112class Policy(_PolicyBase, metaclass=abc.ABCMeta):
113
114 r"""Controls for how messages are interpreted and formatted.
115
116 Most of the classes and many of the methods in the email package accept
117 Policy objects as parameters. A Policy object contains a set of values and
118 functions that control how input is interpreted and how output is rendered.
119 For example, the parameter 'raise_on_defect' controls whether or not an RFC
120 violation results in an error being raised or not, while 'max_line_length'
121 controls the maximum length of output lines when a Message is serialized.
122
123 Any valid attribute may be overridden when a Policy is created by passing
124 it as a keyword argument to the constructor. Policy objects are immutable,
125 but a new Policy object can be created with only certain values changed by
126 calling the Policy instance with keyword arguments. Policy objects can
127 also be added, producing a new Policy object in which the non-default
128 attributes set in the right hand operand overwrite those specified in the
129 left operand.
130
131 Settable attributes:
132
133 raise_on_defect -- If true, then defects should be raised as errors.
134 Default: False.
135
136 linesep -- string containing the value to use as separation
137 between output lines. Default '\n'.
138
139 cte_type -- Type of allowed content transfer encodings
140
141 7bit -- ASCII only
142 8bit -- Content-Transfer-Encoding: 8bit is allowed
143
144 Default: 8bit. Also controls the disposition of
145 (RFC invalid) binary data in headers; see the
146 documentation of the binary_fold method.
147
148 max_line_length -- maximum length of lines, excluding 'linesep',
149 during serialization. None or 0 means no line
150 wrapping is done. Default is 78.
151
152 mangle_from_ -- a flag that, when True escapes From_ lines in the
153 body of the message by putting a `>' in front of
154 them. This is used when the message is being
155 serialized by a generator. Default: True.
156
157 message_factory -- the class to use to create new message objects.
158 If the value is None, the default is Message.
159
160 """
161
162 raise_on_defect = False
163 linesep = '\n'
164 cte_type = '8bit'
165 max_line_length = 78
166 mangle_from_ = False
167 message_factory = None
168
169 def handle_defect(self, obj, defect):
170 """Based on policy, either raise defect or call register_defect.
171
172 handle_defect(obj, defect)
173
174 defect should be a Defect subclass, but in any case must be an
175 Exception subclass. obj is the object on which the defect should be
176 registered if it is not raised. If the raise_on_defect is True, the
177 defect is raised as an error, otherwise the object and the defect are
178 passed to register_defect.
179
180 This method is intended to be called by parsers that discover defects.
181 The email package parsers always call it with Defect instances.
182
183 """
184 if self.raise_on_defect:
185 raise defect
186 self.register_defect(obj, defect)
187
188 def register_defect(self, obj, defect):
189 """Record 'defect' on 'obj'.
190
191 Called by handle_defect if raise_on_defect is False. This method is
192 part of the Policy API so that Policy subclasses can implement custom
193 defect handling. The default implementation calls the append method of
194 the defects attribute of obj. The objects used by the email package by
195 default that get passed to this method will always have a defects
196 attribute with an append method.
197
198 """
199 obj.defects.append(defect)
200
201 def header_max_count(self, name):
202 """Return the maximum allowed number of headers named 'name'.
203
204 Called when a header is added to a Message object. If the returned
205 value is not 0 or None, and there are already a number of headers with
206 the name 'name' equal to the value returned, a ValueError is raised.
207
208 Because the default behavior of Message's __setitem__ is to append the
209 value to the list of headers, it is easy to create duplicate headers
210 without realizing it. This method allows certain headers to be limited
211 in the number of instances of that header that may be added to a
212 Message programmatically. (The limit is not observed by the parser,
213 which will faithfully produce as many headers as exist in the message
214 being parsed.)
215
216 The default implementation returns None for all header names.
217 """
218 return None
219
220 @abc.abstractmethod
221 def header_source_parse(self, sourcelines):
222 """Given a list of linesep terminated strings constituting the lines of
223 a single header, return the (name, value) tuple that should be stored
224 in the model. The input lines should retain their terminating linesep
225 characters. The lines passed in by the email package may contain
226 surrogateescaped binary data.
227 """
228 raise NotImplementedError
229
230 @abc.abstractmethod
231 def header_store_parse(self, name, value):
232 """Given the header name and the value provided by the application
233 program, return the (name, value) that should be stored in the model.
234 """
235 raise NotImplementedError
236
237 @abc.abstractmethod
238 def header_fetch_parse(self, name, value):
239 """Given the header name and the value from the model, return the value
240 to be returned to the application program that is requesting that
241 header. The value passed in by the email package may contain
242 surrogateescaped binary data if the lines were parsed by a BytesParser.
243 The returned value should not contain any surrogateescaped data.
244
245 """
246 raise NotImplementedError
247
248 @abc.abstractmethod
249 def fold(self, name, value):
250 """Given the header name and the value from the model, return a string
251 containing linesep characters that implement the folding of the header
252 according to the policy controls. The value passed in by the email
253 package may contain surrogateescaped binary data if the lines were
254 parsed by a BytesParser. The returned value should not contain any
255 surrogateescaped data.
256
257 """
258 raise NotImplementedError
259
260 @abc.abstractmethod
261 def fold_binary(self, name, value):
262 """Given the header name and the value from the model, return binary
263 data containing linesep characters that implement the folding of the
264 header according to the policy controls. The value passed in by the
265 email package may contain surrogateescaped binary data.
266
267 """
268 raise NotImplementedError
269
270
271@_extend_docstrings
272class Compat32(Policy):
273
274 """+
275 This particular policy is the backward compatibility Policy. It
276 replicates the behavior of the email package version 5.1.
277 """
278
279 mangle_from_ = True
280
281 def _sanitize_header(self, name, value):
282 # If the header value contains surrogates, return a Header using
283 # the unknown-8bit charset to encode the bytes as encoded words.
284 if not isinstance(value, str):
285 # Assume it is already a header object
286 return value
287 if _has_surrogates(value):
288 return header.Header(value, charset=_charset.UNKNOWN8BIT,
289 header_name=name)
290 else:
291 return value
292
293 def header_source_parse(self, sourcelines):
294 """+
295 The name is parsed as everything up to the ':' and returned unmodified.
296 The value is determined by stripping leading whitespace off the
297 remainder of the first line, joining all subsequent lines together, and
298 stripping any trailing carriage return or linefeed characters.
299
300 """
301 name, value = sourcelines[0].split(':', 1)
302 value = value.lstrip(' \t') + ''.join(sourcelines[1:])
303 return (name, value.rstrip('\r\n'))
304
305 def header_store_parse(self, name, value):
306 """+
307 The name and value are returned unmodified.
308 """
309 return (name, value)
310
311 def header_fetch_parse(self, name, value):
312 """+
313 If the value contains binary data, it is converted into a Header object
314 using the unknown-8bit charset. Otherwise it is returned unmodified.
315 """
316 return self._sanitize_header(name, value)
317
318 def fold(self, name, value):
319 """+
320 Headers are folded using the Header folding algorithm, which preserves
321 existing line breaks in the value, and wraps each resulting line to the
322 max_line_length. Non-ASCII binary data are CTE encoded using the
323 unknown-8bit charset.
324
325 """
326 return self._fold(name, value, sanitize=True)
327
328 def fold_binary(self, name, value):
329 """+
330 Headers are folded using the Header folding algorithm, which preserves
331 existing line breaks in the value, and wraps each resulting line to the
332 max_line_length. If cte_type is 7bit, non-ascii binary data is CTE
333 encoded using the unknown-8bit charset. Otherwise the original source
334 header is used, with its existing line breaks and/or binary data.
335
336 """
337 folded = self._fold(name, value, sanitize=self.cte_type=='7bit')
338 return folded.encode('ascii', 'surrogateescape')
339
340 def _fold(self, name, value, sanitize):
341 parts = []
342 parts.append('%s: ' % name)
343 if isinstance(value, str):
344 if _has_surrogates(value):
345 if sanitize:
346 h = header.Header(value,
347 charset=_charset.UNKNOWN8BIT,
348 header_name=name)
349 else:
350 # If we have raw 8bit data in a byte string, we have no idea
351 # what the encoding is. There is no safe way to split this
352 # string. If it's ascii-subset, then we could do a normal
353 # ascii split, but if it's multibyte then we could break the
354 # string. There's no way to know so the least harm seems to
355 # be to not split the string and risk it being too long.
356 parts.append(value)
357 h = None
358 else:
359 h = header.Header(value, header_name=name)
360 else:
361 # Assume it is a Header-like object.
362 h = value
363 if h is not None:
364 # The Header class interprets a value of None for maxlinelen as the
365 # default value of 78, as recommended by RFC 2822.
366 maxlinelen = 0
367 if self.max_line_length is not None:
368 maxlinelen = self.max_line_length
369 parts.append(h.encode(linesep=self.linesep, maxlinelen=maxlinelen))
370 parts.append(self.linesep)
371 return ''.join(parts)
372
373
374compat32 = Compat32()