blob: 611deb50bb5290e193942236f691be6979a1f73c [file] [log] [blame]
R David Murrayc27e5222012-05-25 15:01:48 -04001"""This will be the home for the policy that hooks in the new
2code that adds all the email6 features.
R David Murray3edd22a2011-04-18 13:59:37 -04003"""
4
R David Murraydc1650c2016-09-07 17:44:34 -04005import re
Abhilash Rajfeac6cd2019-05-17 12:28:44 -07006import sys
R David Murray1be413e2012-05-31 18:00:45 -04007from email._policybase import Policy, Compat32, compat32, _extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -04008from email.utils import _has_surrogates
R David Murrayea976682012-05-27 15:03:38 -04009from email.headerregistry import HeaderRegistry as HeaderRegistry
R David Murray3da240f2013-10-16 22:48:40 -040010from email.contentmanager import raw_data_manager
R David Murray06ed2182016-09-09 18:39:18 -040011from email.message import EmailMessage
R David Murray3edd22a2011-04-18 13:59:37 -040012
R David Murray0b6f6c82012-05-25 18:42:14 -040013__all__ = [
14 'Compat32',
15 'compat32',
16 'Policy',
17 'EmailPolicy',
18 'default',
19 'strict',
20 'SMTP',
21 'HTTP',
22 ]
R David Murray3edd22a2011-04-18 13:59:37 -040023
R David Murraydc1650c2016-09-07 17:44:34 -040024linesep_splitter = re.compile(r'\n|\r')
25
R David Murray1be413e2012-05-31 18:00:45 -040026@_extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -040027class EmailPolicy(Policy):
28
29 """+
30 PROVISIONAL
31
Terry Jan Reedy0f847642013-03-11 18:34:00 -040032 The API extensions enabled by this policy are currently provisional.
R David Murray0b6f6c82012-05-25 18:42:14 -040033 Refer to the documentation for details.
34
35 This policy adds new header parsing and folding algorithms. Instead of
36 simple strings, headers are custom objects with custom attributes
37 depending on the type of the field. The folding algorithm fully
38 implements RFCs 2047 and 5322.
39
40 In addition to the settable attributes listed above that apply to
41 all Policies, this policy adds the following additional attributes:
42
R David Murray224ef3e2015-05-17 11:29:21 -040043 utf8 -- if False (the default) message headers will be
44 serialized as ASCII, using encoded words to encode
45 any non-ASCII characters in the source strings. If
46 True, the message headers will be serialized using
47 utf8 and will not contain encoded words (see RFC
48 6532 for more on this serialization format).
49
R David Murray0b6f6c82012-05-25 18:42:14 -040050 refold_source -- if the value for a header in the Message object
51 came from the parsing of some source, this attribute
52 indicates whether or not a generator should refold
53 that value when transforming the message back into
54 stream form. The possible values are:
55
56 none -- all source values use original folding
57 long -- source values that have any line that is
58 longer than max_line_length will be
59 refolded
60 all -- all values are refolded.
61
62 The default is 'long'.
63
64 header_factory -- a callable that takes two arguments, 'name' and
65 'value', where 'name' is a header field name and
66 'value' is an unfolded header field value, and
67 returns a string-like object that represents that
68 header. A default header_factory is provided that
69 understands some of the RFC5322 header field types.
70 (Currently address fields and date fields have
71 special treatment, while all other fields are
72 treated as unstructured. This list will be
73 completed before the extension is marked stable.)
R David Murray3da240f2013-10-16 22:48:40 -040074
75 content_manager -- an object with at least two methods: get_content
76 and set_content. When the get_content or
77 set_content method of a Message object is called,
78 it calls the corresponding method of this object,
79 passing it the message object as its first argument,
80 and any arguments or keywords that were passed to
81 it as additional arguments. The default
82 content_manager is
83 :data:`~email.contentmanager.raw_data_manager`.
84
R David Murray0b6f6c82012-05-25 18:42:14 -040085 """
86
R David Murray06ed2182016-09-09 18:39:18 -040087 message_factory = EmailMessage
R David Murray224ef3e2015-05-17 11:29:21 -040088 utf8 = False
R David Murray0b6f6c82012-05-25 18:42:14 -040089 refold_source = 'long'
R David Murrayea976682012-05-27 15:03:38 -040090 header_factory = HeaderRegistry()
R David Murray3da240f2013-10-16 22:48:40 -040091 content_manager = raw_data_manager
R David Murray0b6f6c82012-05-25 18:42:14 -040092
93 def __init__(self, **kw):
94 # Ensure that each new instance gets a unique header factory
95 # (as opposed to clones, which share the factory).
96 if 'header_factory' not in kw:
R David Murrayea976682012-05-27 15:03:38 -040097 object.__setattr__(self, 'header_factory', HeaderRegistry())
R David Murray0b6f6c82012-05-25 18:42:14 -040098 super().__init__(**kw)
99
R David Murrayabfc3742012-05-29 09:14:44 -0400100 def header_max_count(self, name):
101 """+
102 The implementation for this class returns the max_count attribute from
103 the specialized header class that would be used to construct a header
104 of type 'name'.
105 """
106 return self.header_factory[name].max_count
107
R David Murray0b6f6c82012-05-25 18:42:14 -0400108 # The logic of the next three methods is chosen such that it is possible to
109 # switch a Message object between a Compat32 policy and a policy derived
110 # from this class and have the results stay consistent. This allows a
111 # Message object constructed with this policy to be passed to a library
112 # that only handles Compat32 objects, or to receive such an object and
113 # convert it to use the newer style by just changing its policy. It is
114 # also chosen because it postpones the relatively expensive full rfc5322
115 # parse until as late as possible when parsing from source, since in many
116 # applications only a few headers will actually be inspected.
117
118 def header_source_parse(self, sourcelines):
119 """+
120 The name is parsed as everything up to the ':' and returned unmodified.
121 The value is determined by stripping leading whitespace off the
122 remainder of the first line, joining all subsequent lines together, and
123 stripping any trailing carriage return or linefeed characters. (This
124 is the same as Compat32).
125
126 """
127 name, value = sourcelines[0].split(':', 1)
128 value = value.lstrip(' \t') + ''.join(sourcelines[1:])
129 return (name, value.rstrip('\r\n'))
130
131 def header_store_parse(self, name, value):
132 """+
133 The name is returned unchanged. If the input value has a 'name'
134 attribute and it matches the name ignoring case, the value is returned
135 unchanged. Otherwise the name and value are passed to header_factory
136 method, and the resulting custom header object is returned as the
137 value. In this case a ValueError is raised if the input value contains
138 CR or LF characters.
139
140 """
141 if hasattr(value, 'name') and value.name.lower() == name.lower():
142 return (name, value)
R David Murraydcaf2ec2012-05-25 22:53:12 -0400143 if isinstance(value, str) and len(value.splitlines())>1:
R David Murraydc1650c2016-09-07 17:44:34 -0400144 # XXX this error message isn't quite right when we use splitlines
145 # (see issue 22233), but I'm not sure what should happen here.
R David Murray0b6f6c82012-05-25 18:42:14 -0400146 raise ValueError("Header values may not contain linefeed "
147 "or carriage return characters")
148 return (name, self.header_factory(name, value))
149
150 def header_fetch_parse(self, name, value):
151 """+
152 If the value has a 'name' attribute, it is returned to unmodified.
153 Otherwise the name and the value with any linesep characters removed
154 are passed to the header_factory method, and the resulting custom
155 header object is returned. Any surrogateescaped bytes get turned
156 into the unicode unknown-character glyph.
157
158 """
159 if hasattr(value, 'name'):
160 return value
R David Murraydc1650c2016-09-07 17:44:34 -0400161 # We can't use splitlines here because it splits on more than \r and \n.
162 value = ''.join(linesep_splitter.split(value))
163 return self.header_factory(name, value)
R David Murray0b6f6c82012-05-25 18:42:14 -0400164
165 def fold(self, name, value):
166 """+
167 Header folding is controlled by the refold_source policy setting. A
168 value is considered to be a 'source value' if and only if it does not
169 have a 'name' attribute (having a 'name' attribute means it is a header
170 object of some sort). If a source value needs to be refolded according
171 to the policy, it is converted into a custom header object by passing
172 the name and the value with any linesep characters removed to the
173 header_factory method. Folding of a custom header object is done by
174 calling its fold method with the current policy.
175
176 Source values are split into lines using splitlines. If the value is
177 not to be refolded, the lines are rejoined using the linesep from the
178 policy and returned. The exception is lines containing non-ascii
179 binary data. In that case the value is refolded regardless of the
180 refold_source setting, which causes the binary data to be CTE encoded
181 using the unknown-8bit charset.
182
183 """
184 return self._fold(name, value, refold_binary=True)
185
186 def fold_binary(self, name, value):
187 """+
188 The same as fold if cte_type is 7bit, except that the returned value is
189 bytes.
190
191 If cte_type is 8bit, non-ASCII binary data is converted back into
192 bytes. Headers with binary data are not refolded, regardless of the
193 refold_header setting, since there is no way to know whether the binary
194 data consists of single byte characters or multibyte characters.
195
R David Murray224ef3e2015-05-17 11:29:21 -0400196 If utf8 is true, headers are encoded to utf8, otherwise to ascii with
197 non-ASCII unicode rendered as encoded words.
198
R David Murray0b6f6c82012-05-25 18:42:14 -0400199 """
200 folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
R David Murray224ef3e2015-05-17 11:29:21 -0400201 charset = 'utf8' if self.utf8 else 'ascii'
202 return folded.encode(charset, 'surrogateescape')
R David Murray0b6f6c82012-05-25 18:42:14 -0400203
204 def _fold(self, name, value, refold_binary=False):
205 if hasattr(value, 'name'):
206 return value.fold(policy=self)
Abhilash Rajfeac6cd2019-05-17 12:28:44 -0700207 maxlen = self.max_line_length if self.max_line_length else sys.maxsize
R David Murray0b6f6c82012-05-25 18:42:14 -0400208 lines = value.splitlines()
209 refold = (self.refold_source == 'all' or
210 self.refold_source == 'long' and
R David Murray844b0e62013-02-04 15:22:53 -0500211 (lines and len(lines[0])+len(name)+2 > maxlen or
R David Murray0b6f6c82012-05-25 18:42:14 -0400212 any(len(x) > maxlen for x in lines[1:])))
213 if refold or refold_binary and _has_surrogates(value):
214 return self.header_factory(name, ''.join(lines)).fold(policy=self)
215 return name + ': ' + self.linesep.join(lines) + self.linesep
216
217
218default = EmailPolicy()
219# Make the default policy use the class default header_factory
220del default.header_factory
R David Murray3edd22a2011-04-18 13:59:37 -0400221strict = default.clone(raise_on_defect=True)
222SMTP = default.clone(linesep='\r\n')
223HTTP = default.clone(linesep='\r\n', max_line_length=None)
R David Murray224ef3e2015-05-17 11:29:21 -0400224SMTPUTF8 = SMTP.clone(utf8=True)