blob: 35d0e699c6de212f4ccd41ccc4b24ae65129fe2b [file] [log] [blame]
R David Murrayc27e5222012-05-25 15:01:48 -04001"""This will be the home for the policy that hooks in the new
2code that adds all the email6 features.
R David Murray3edd22a2011-04-18 13:59:37 -04003"""
4
R David Murraydc1650c2016-09-07 17:44:34 -04005import re
R David Murray1be413e2012-05-31 18:00:45 -04006from email._policybase import Policy, Compat32, compat32, _extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -04007from email.utils import _has_surrogates
R David Murrayea976682012-05-27 15:03:38 -04008from email.headerregistry import HeaderRegistry as HeaderRegistry
R David Murray3da240f2013-10-16 22:48:40 -04009from email.contentmanager import raw_data_manager
R David Murray3edd22a2011-04-18 13:59:37 -040010
R David Murray0b6f6c82012-05-25 18:42:14 -040011__all__ = [
12 'Compat32',
13 'compat32',
14 'Policy',
15 'EmailPolicy',
16 'default',
17 'strict',
18 'SMTP',
19 'HTTP',
20 ]
R David Murray3edd22a2011-04-18 13:59:37 -040021
R David Murraydc1650c2016-09-07 17:44:34 -040022linesep_splitter = re.compile(r'\n|\r')
23
R David Murray1be413e2012-05-31 18:00:45 -040024@_extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -040025class EmailPolicy(Policy):
26
27 """+
28 PROVISIONAL
29
Terry Jan Reedy0f847642013-03-11 18:34:00 -040030 The API extensions enabled by this policy are currently provisional.
R David Murray0b6f6c82012-05-25 18:42:14 -040031 Refer to the documentation for details.
32
33 This policy adds new header parsing and folding algorithms. Instead of
34 simple strings, headers are custom objects with custom attributes
35 depending on the type of the field. The folding algorithm fully
36 implements RFCs 2047 and 5322.
37
38 In addition to the settable attributes listed above that apply to
39 all Policies, this policy adds the following additional attributes:
40
R David Murray224ef3e2015-05-17 11:29:21 -040041 utf8 -- if False (the default) message headers will be
42 serialized as ASCII, using encoded words to encode
43 any non-ASCII characters in the source strings. If
44 True, the message headers will be serialized using
45 utf8 and will not contain encoded words (see RFC
46 6532 for more on this serialization format).
47
R David Murray0b6f6c82012-05-25 18:42:14 -040048 refold_source -- if the value for a header in the Message object
49 came from the parsing of some source, this attribute
50 indicates whether or not a generator should refold
51 that value when transforming the message back into
52 stream form. The possible values are:
53
54 none -- all source values use original folding
55 long -- source values that have any line that is
56 longer than max_line_length will be
57 refolded
58 all -- all values are refolded.
59
60 The default is 'long'.
61
62 header_factory -- a callable that takes two arguments, 'name' and
63 'value', where 'name' is a header field name and
64 'value' is an unfolded header field value, and
65 returns a string-like object that represents that
66 header. A default header_factory is provided that
67 understands some of the RFC5322 header field types.
68 (Currently address fields and date fields have
69 special treatment, while all other fields are
70 treated as unstructured. This list will be
71 completed before the extension is marked stable.)
R David Murray3da240f2013-10-16 22:48:40 -040072
73 content_manager -- an object with at least two methods: get_content
74 and set_content. When the get_content or
75 set_content method of a Message object is called,
76 it calls the corresponding method of this object,
77 passing it the message object as its first argument,
78 and any arguments or keywords that were passed to
79 it as additional arguments. The default
80 content_manager is
81 :data:`~email.contentmanager.raw_data_manager`.
82
R David Murray0b6f6c82012-05-25 18:42:14 -040083 """
84
R David Murray224ef3e2015-05-17 11:29:21 -040085 utf8 = False
R David Murray0b6f6c82012-05-25 18:42:14 -040086 refold_source = 'long'
R David Murrayea976682012-05-27 15:03:38 -040087 header_factory = HeaderRegistry()
R David Murray3da240f2013-10-16 22:48:40 -040088 content_manager = raw_data_manager
R David Murray0b6f6c82012-05-25 18:42:14 -040089
90 def __init__(self, **kw):
91 # Ensure that each new instance gets a unique header factory
92 # (as opposed to clones, which share the factory).
93 if 'header_factory' not in kw:
R David Murrayea976682012-05-27 15:03:38 -040094 object.__setattr__(self, 'header_factory', HeaderRegistry())
R David Murray0b6f6c82012-05-25 18:42:14 -040095 super().__init__(**kw)
96
R David Murrayabfc3742012-05-29 09:14:44 -040097 def header_max_count(self, name):
98 """+
99 The implementation for this class returns the max_count attribute from
100 the specialized header class that would be used to construct a header
101 of type 'name'.
102 """
103 return self.header_factory[name].max_count
104
R David Murray0b6f6c82012-05-25 18:42:14 -0400105 # The logic of the next three methods is chosen such that it is possible to
106 # switch a Message object between a Compat32 policy and a policy derived
107 # from this class and have the results stay consistent. This allows a
108 # Message object constructed with this policy to be passed to a library
109 # that only handles Compat32 objects, or to receive such an object and
110 # convert it to use the newer style by just changing its policy. It is
111 # also chosen because it postpones the relatively expensive full rfc5322
112 # parse until as late as possible when parsing from source, since in many
113 # applications only a few headers will actually be inspected.
114
115 def header_source_parse(self, sourcelines):
116 """+
117 The name is parsed as everything up to the ':' and returned unmodified.
118 The value is determined by stripping leading whitespace off the
119 remainder of the first line, joining all subsequent lines together, and
120 stripping any trailing carriage return or linefeed characters. (This
121 is the same as Compat32).
122
123 """
124 name, value = sourcelines[0].split(':', 1)
125 value = value.lstrip(' \t') + ''.join(sourcelines[1:])
126 return (name, value.rstrip('\r\n'))
127
128 def header_store_parse(self, name, value):
129 """+
130 The name is returned unchanged. If the input value has a 'name'
131 attribute and it matches the name ignoring case, the value is returned
132 unchanged. Otherwise the name and value are passed to header_factory
133 method, and the resulting custom header object is returned as the
134 value. In this case a ValueError is raised if the input value contains
135 CR or LF characters.
136
137 """
138 if hasattr(value, 'name') and value.name.lower() == name.lower():
139 return (name, value)
R David Murraydcaf2ec2012-05-25 22:53:12 -0400140 if isinstance(value, str) and len(value.splitlines())>1:
R David Murraydc1650c2016-09-07 17:44:34 -0400141 # XXX this error message isn't quite right when we use splitlines
142 # (see issue 22233), but I'm not sure what should happen here.
R David Murray0b6f6c82012-05-25 18:42:14 -0400143 raise ValueError("Header values may not contain linefeed "
144 "or carriage return characters")
145 return (name, self.header_factory(name, value))
146
147 def header_fetch_parse(self, name, value):
148 """+
149 If the value has a 'name' attribute, it is returned to unmodified.
150 Otherwise the name and the value with any linesep characters removed
151 are passed to the header_factory method, and the resulting custom
152 header object is returned. Any surrogateescaped bytes get turned
153 into the unicode unknown-character glyph.
154
155 """
156 if hasattr(value, 'name'):
157 return value
R David Murraydc1650c2016-09-07 17:44:34 -0400158 # We can't use splitlines here because it splits on more than \r and \n.
159 value = ''.join(linesep_splitter.split(value))
160 return self.header_factory(name, value)
R David Murray0b6f6c82012-05-25 18:42:14 -0400161
162 def fold(self, name, value):
163 """+
164 Header folding is controlled by the refold_source policy setting. A
165 value is considered to be a 'source value' if and only if it does not
166 have a 'name' attribute (having a 'name' attribute means it is a header
167 object of some sort). If a source value needs to be refolded according
168 to the policy, it is converted into a custom header object by passing
169 the name and the value with any linesep characters removed to the
170 header_factory method. Folding of a custom header object is done by
171 calling its fold method with the current policy.
172
173 Source values are split into lines using splitlines. If the value is
174 not to be refolded, the lines are rejoined using the linesep from the
175 policy and returned. The exception is lines containing non-ascii
176 binary data. In that case the value is refolded regardless of the
177 refold_source setting, which causes the binary data to be CTE encoded
178 using the unknown-8bit charset.
179
180 """
181 return self._fold(name, value, refold_binary=True)
182
183 def fold_binary(self, name, value):
184 """+
185 The same as fold if cte_type is 7bit, except that the returned value is
186 bytes.
187
188 If cte_type is 8bit, non-ASCII binary data is converted back into
189 bytes. Headers with binary data are not refolded, regardless of the
190 refold_header setting, since there is no way to know whether the binary
191 data consists of single byte characters or multibyte characters.
192
R David Murray224ef3e2015-05-17 11:29:21 -0400193 If utf8 is true, headers are encoded to utf8, otherwise to ascii with
194 non-ASCII unicode rendered as encoded words.
195
R David Murray0b6f6c82012-05-25 18:42:14 -0400196 """
197 folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
R David Murray224ef3e2015-05-17 11:29:21 -0400198 charset = 'utf8' if self.utf8 else 'ascii'
199 return folded.encode(charset, 'surrogateescape')
R David Murray0b6f6c82012-05-25 18:42:14 -0400200
201 def _fold(self, name, value, refold_binary=False):
202 if hasattr(value, 'name'):
203 return value.fold(policy=self)
204 maxlen = self.max_line_length if self.max_line_length else float('inf')
205 lines = value.splitlines()
206 refold = (self.refold_source == 'all' or
207 self.refold_source == 'long' and
R David Murray844b0e62013-02-04 15:22:53 -0500208 (lines and len(lines[0])+len(name)+2 > maxlen or
R David Murray0b6f6c82012-05-25 18:42:14 -0400209 any(len(x) > maxlen for x in lines[1:])))
210 if refold or refold_binary and _has_surrogates(value):
211 return self.header_factory(name, ''.join(lines)).fold(policy=self)
212 return name + ': ' + self.linesep.join(lines) + self.linesep
213
214
215default = EmailPolicy()
216# Make the default policy use the class default header_factory
217del default.header_factory
R David Murray3edd22a2011-04-18 13:59:37 -0400218strict = default.clone(raise_on_defect=True)
219SMTP = default.clone(linesep='\r\n')
220HTTP = default.clone(linesep='\r\n', max_line_length=None)
R David Murray224ef3e2015-05-17 11:29:21 -0400221SMTPUTF8 = SMTP.clone(utf8=True)