blob: 6ac64a56831d261abe3842eac1376e5b8e85bc58 [file] [log] [blame]
R David Murrayc27e5222012-05-25 15:01:48 -04001"""This will be the home for the policy that hooks in the new
2code that adds all the email6 features.
R David Murray3edd22a2011-04-18 13:59:37 -04003"""
4
R David Murray1be413e2012-05-31 18:00:45 -04005from email._policybase import Policy, Compat32, compat32, _extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -04006from email.utils import _has_surrogates
R David Murrayea976682012-05-27 15:03:38 -04007from email.headerregistry import HeaderRegistry as HeaderRegistry
R David Murray3da240f2013-10-16 22:48:40 -04008from email.contentmanager import raw_data_manager
R David Murray3edd22a2011-04-18 13:59:37 -04009
R David Murray0b6f6c82012-05-25 18:42:14 -040010__all__ = [
11 'Compat32',
12 'compat32',
13 'Policy',
14 'EmailPolicy',
15 'default',
16 'strict',
17 'SMTP',
18 'HTTP',
19 ]
R David Murray3edd22a2011-04-18 13:59:37 -040020
R David Murray1be413e2012-05-31 18:00:45 -040021@_extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -040022class EmailPolicy(Policy):
23
24 """+
25 PROVISIONAL
26
Terry Jan Reedy0f847642013-03-11 18:34:00 -040027 The API extensions enabled by this policy are currently provisional.
R David Murray0b6f6c82012-05-25 18:42:14 -040028 Refer to the documentation for details.
29
30 This policy adds new header parsing and folding algorithms. Instead of
31 simple strings, headers are custom objects with custom attributes
32 depending on the type of the field. The folding algorithm fully
33 implements RFCs 2047 and 5322.
34
35 In addition to the settable attributes listed above that apply to
36 all Policies, this policy adds the following additional attributes:
37
R David Murray224ef3e2015-05-17 11:29:21 -040038 utf8 -- if False (the default) message headers will be
39 serialized as ASCII, using encoded words to encode
40 any non-ASCII characters in the source strings. If
41 True, the message headers will be serialized using
42 utf8 and will not contain encoded words (see RFC
43 6532 for more on this serialization format).
44
R David Murray0b6f6c82012-05-25 18:42:14 -040045 refold_source -- if the value for a header in the Message object
46 came from the parsing of some source, this attribute
47 indicates whether or not a generator should refold
48 that value when transforming the message back into
49 stream form. The possible values are:
50
51 none -- all source values use original folding
52 long -- source values that have any line that is
53 longer than max_line_length will be
54 refolded
55 all -- all values are refolded.
56
57 The default is 'long'.
58
59 header_factory -- a callable that takes two arguments, 'name' and
60 'value', where 'name' is a header field name and
61 'value' is an unfolded header field value, and
62 returns a string-like object that represents that
63 header. A default header_factory is provided that
64 understands some of the RFC5322 header field types.
65 (Currently address fields and date fields have
66 special treatment, while all other fields are
67 treated as unstructured. This list will be
68 completed before the extension is marked stable.)
R David Murray3da240f2013-10-16 22:48:40 -040069
70 content_manager -- an object with at least two methods: get_content
71 and set_content. When the get_content or
72 set_content method of a Message object is called,
73 it calls the corresponding method of this object,
74 passing it the message object as its first argument,
75 and any arguments or keywords that were passed to
76 it as additional arguments. The default
77 content_manager is
78 :data:`~email.contentmanager.raw_data_manager`.
79
R David Murray0b6f6c82012-05-25 18:42:14 -040080 """
81
R David Murray224ef3e2015-05-17 11:29:21 -040082 utf8 = False
R David Murray0b6f6c82012-05-25 18:42:14 -040083 refold_source = 'long'
R David Murrayea976682012-05-27 15:03:38 -040084 header_factory = HeaderRegistry()
R David Murray3da240f2013-10-16 22:48:40 -040085 content_manager = raw_data_manager
R David Murray0b6f6c82012-05-25 18:42:14 -040086
87 def __init__(self, **kw):
88 # Ensure that each new instance gets a unique header factory
89 # (as opposed to clones, which share the factory).
90 if 'header_factory' not in kw:
R David Murrayea976682012-05-27 15:03:38 -040091 object.__setattr__(self, 'header_factory', HeaderRegistry())
R David Murray0b6f6c82012-05-25 18:42:14 -040092 super().__init__(**kw)
93
R David Murrayabfc3742012-05-29 09:14:44 -040094 def header_max_count(self, name):
95 """+
96 The implementation for this class returns the max_count attribute from
97 the specialized header class that would be used to construct a header
98 of type 'name'.
99 """
100 return self.header_factory[name].max_count
101
R David Murray0b6f6c82012-05-25 18:42:14 -0400102 # The logic of the next three methods is chosen such that it is possible to
103 # switch a Message object between a Compat32 policy and a policy derived
104 # from this class and have the results stay consistent. This allows a
105 # Message object constructed with this policy to be passed to a library
106 # that only handles Compat32 objects, or to receive such an object and
107 # convert it to use the newer style by just changing its policy. It is
108 # also chosen because it postpones the relatively expensive full rfc5322
109 # parse until as late as possible when parsing from source, since in many
110 # applications only a few headers will actually be inspected.
111
112 def header_source_parse(self, sourcelines):
113 """+
114 The name is parsed as everything up to the ':' and returned unmodified.
115 The value is determined by stripping leading whitespace off the
116 remainder of the first line, joining all subsequent lines together, and
117 stripping any trailing carriage return or linefeed characters. (This
118 is the same as Compat32).
119
120 """
121 name, value = sourcelines[0].split(':', 1)
122 value = value.lstrip(' \t') + ''.join(sourcelines[1:])
123 return (name, value.rstrip('\r\n'))
124
125 def header_store_parse(self, name, value):
126 """+
127 The name is returned unchanged. If the input value has a 'name'
128 attribute and it matches the name ignoring case, the value is returned
129 unchanged. Otherwise the name and value are passed to header_factory
130 method, and the resulting custom header object is returned as the
131 value. In this case a ValueError is raised if the input value contains
132 CR or LF characters.
133
134 """
135 if hasattr(value, 'name') and value.name.lower() == name.lower():
136 return (name, value)
R David Murraydcaf2ec2012-05-25 22:53:12 -0400137 if isinstance(value, str) and len(value.splitlines())>1:
R David Murray0b6f6c82012-05-25 18:42:14 -0400138 raise ValueError("Header values may not contain linefeed "
139 "or carriage return characters")
140 return (name, self.header_factory(name, value))
141
142 def header_fetch_parse(self, name, value):
143 """+
144 If the value has a 'name' attribute, it is returned to unmodified.
145 Otherwise the name and the value with any linesep characters removed
146 are passed to the header_factory method, and the resulting custom
147 header object is returned. Any surrogateescaped bytes get turned
148 into the unicode unknown-character glyph.
149
150 """
151 if hasattr(value, 'name'):
152 return value
153 return self.header_factory(name, ''.join(value.splitlines()))
154
155 def fold(self, name, value):
156 """+
157 Header folding is controlled by the refold_source policy setting. A
158 value is considered to be a 'source value' if and only if it does not
159 have a 'name' attribute (having a 'name' attribute means it is a header
160 object of some sort). If a source value needs to be refolded according
161 to the policy, it is converted into a custom header object by passing
162 the name and the value with any linesep characters removed to the
163 header_factory method. Folding of a custom header object is done by
164 calling its fold method with the current policy.
165
166 Source values are split into lines using splitlines. If the value is
167 not to be refolded, the lines are rejoined using the linesep from the
168 policy and returned. The exception is lines containing non-ascii
169 binary data. In that case the value is refolded regardless of the
170 refold_source setting, which causes the binary data to be CTE encoded
171 using the unknown-8bit charset.
172
173 """
174 return self._fold(name, value, refold_binary=True)
175
176 def fold_binary(self, name, value):
177 """+
178 The same as fold if cte_type is 7bit, except that the returned value is
179 bytes.
180
181 If cte_type is 8bit, non-ASCII binary data is converted back into
182 bytes. Headers with binary data are not refolded, regardless of the
183 refold_header setting, since there is no way to know whether the binary
184 data consists of single byte characters or multibyte characters.
185
R David Murray224ef3e2015-05-17 11:29:21 -0400186 If utf8 is true, headers are encoded to utf8, otherwise to ascii with
187 non-ASCII unicode rendered as encoded words.
188
R David Murray0b6f6c82012-05-25 18:42:14 -0400189 """
190 folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
R David Murray224ef3e2015-05-17 11:29:21 -0400191 charset = 'utf8' if self.utf8 else 'ascii'
192 return folded.encode(charset, 'surrogateescape')
R David Murray0b6f6c82012-05-25 18:42:14 -0400193
194 def _fold(self, name, value, refold_binary=False):
195 if hasattr(value, 'name'):
196 return value.fold(policy=self)
197 maxlen = self.max_line_length if self.max_line_length else float('inf')
198 lines = value.splitlines()
199 refold = (self.refold_source == 'all' or
200 self.refold_source == 'long' and
R David Murray844b0e62013-02-04 15:22:53 -0500201 (lines and len(lines[0])+len(name)+2 > maxlen or
R David Murray0b6f6c82012-05-25 18:42:14 -0400202 any(len(x) > maxlen for x in lines[1:])))
203 if refold or refold_binary and _has_surrogates(value):
204 return self.header_factory(name, ''.join(lines)).fold(policy=self)
205 return name + ': ' + self.linesep.join(lines) + self.linesep
206
207
208default = EmailPolicy()
209# Make the default policy use the class default header_factory
210del default.header_factory
R David Murray3edd22a2011-04-18 13:59:37 -0400211strict = default.clone(raise_on_defect=True)
212SMTP = default.clone(linesep='\r\n')
213HTTP = default.clone(linesep='\r\n', max_line_length=None)
R David Murray224ef3e2015-05-17 11:29:21 -0400214SMTPUTF8 = SMTP.clone(utf8=True)