blob: f0b20f4b198ac34d58552fb415fef16f9bd8c26d [file] [log] [blame]
R David Murrayc27e5222012-05-25 15:01:48 -04001"""This will be the home for the policy that hooks in the new
2code that adds all the email6 features.
R David Murray3edd22a2011-04-18 13:59:37 -04003"""
4
R David Murray1be413e2012-05-31 18:00:45 -04005from email._policybase import Policy, Compat32, compat32, _extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -04006from email.utils import _has_surrogates
R David Murrayea976682012-05-27 15:03:38 -04007from email.headerregistry import HeaderRegistry as HeaderRegistry
R David Murray3da240f2013-10-16 22:48:40 -04008from email.contentmanager import raw_data_manager
R David Murray3edd22a2011-04-18 13:59:37 -04009
R David Murray0b6f6c82012-05-25 18:42:14 -040010__all__ = [
11 'Compat32',
12 'compat32',
13 'Policy',
14 'EmailPolicy',
15 'default',
16 'strict',
17 'SMTP',
18 'HTTP',
19 ]
R David Murray3edd22a2011-04-18 13:59:37 -040020
R David Murray1be413e2012-05-31 18:00:45 -040021@_extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -040022class EmailPolicy(Policy):
23
24 """+
25 PROVISIONAL
26
Terry Jan Reedy0f847642013-03-11 18:34:00 -040027 The API extensions enabled by this policy are currently provisional.
R David Murray0b6f6c82012-05-25 18:42:14 -040028 Refer to the documentation for details.
29
30 This policy adds new header parsing and folding algorithms. Instead of
31 simple strings, headers are custom objects with custom attributes
32 depending on the type of the field. The folding algorithm fully
33 implements RFCs 2047 and 5322.
34
35 In addition to the settable attributes listed above that apply to
36 all Policies, this policy adds the following additional attributes:
37
38 refold_source -- if the value for a header in the Message object
39 came from the parsing of some source, this attribute
40 indicates whether or not a generator should refold
41 that value when transforming the message back into
42 stream form. The possible values are:
43
44 none -- all source values use original folding
45 long -- source values that have any line that is
46 longer than max_line_length will be
47 refolded
48 all -- all values are refolded.
49
50 The default is 'long'.
51
52 header_factory -- a callable that takes two arguments, 'name' and
53 'value', where 'name' is a header field name and
54 'value' is an unfolded header field value, and
55 returns a string-like object that represents that
56 header. A default header_factory is provided that
57 understands some of the RFC5322 header field types.
58 (Currently address fields and date fields have
59 special treatment, while all other fields are
60 treated as unstructured. This list will be
61 completed before the extension is marked stable.)
R David Murray3da240f2013-10-16 22:48:40 -040062
63 content_manager -- an object with at least two methods: get_content
64 and set_content. When the get_content or
65 set_content method of a Message object is called,
66 it calls the corresponding method of this object,
67 passing it the message object as its first argument,
68 and any arguments or keywords that were passed to
69 it as additional arguments. The default
70 content_manager is
71 :data:`~email.contentmanager.raw_data_manager`.
72
R David Murray0b6f6c82012-05-25 18:42:14 -040073 """
74
75 refold_source = 'long'
R David Murrayea976682012-05-27 15:03:38 -040076 header_factory = HeaderRegistry()
R David Murray3da240f2013-10-16 22:48:40 -040077 content_manager = raw_data_manager
R David Murray0b6f6c82012-05-25 18:42:14 -040078
79 def __init__(self, **kw):
80 # Ensure that each new instance gets a unique header factory
81 # (as opposed to clones, which share the factory).
82 if 'header_factory' not in kw:
R David Murrayea976682012-05-27 15:03:38 -040083 object.__setattr__(self, 'header_factory', HeaderRegistry())
R David Murray0b6f6c82012-05-25 18:42:14 -040084 super().__init__(**kw)
85
R David Murrayabfc3742012-05-29 09:14:44 -040086 def header_max_count(self, name):
87 """+
88 The implementation for this class returns the max_count attribute from
89 the specialized header class that would be used to construct a header
90 of type 'name'.
91 """
92 return self.header_factory[name].max_count
93
R David Murray0b6f6c82012-05-25 18:42:14 -040094 # The logic of the next three methods is chosen such that it is possible to
95 # switch a Message object between a Compat32 policy and a policy derived
96 # from this class and have the results stay consistent. This allows a
97 # Message object constructed with this policy to be passed to a library
98 # that only handles Compat32 objects, or to receive such an object and
99 # convert it to use the newer style by just changing its policy. It is
100 # also chosen because it postpones the relatively expensive full rfc5322
101 # parse until as late as possible when parsing from source, since in many
102 # applications only a few headers will actually be inspected.
103
104 def header_source_parse(self, sourcelines):
105 """+
106 The name is parsed as everything up to the ':' and returned unmodified.
107 The value is determined by stripping leading whitespace off the
108 remainder of the first line, joining all subsequent lines together, and
109 stripping any trailing carriage return or linefeed characters. (This
110 is the same as Compat32).
111
112 """
113 name, value = sourcelines[0].split(':', 1)
114 value = value.lstrip(' \t') + ''.join(sourcelines[1:])
115 return (name, value.rstrip('\r\n'))
116
117 def header_store_parse(self, name, value):
118 """+
119 The name is returned unchanged. If the input value has a 'name'
120 attribute and it matches the name ignoring case, the value is returned
121 unchanged. Otherwise the name and value are passed to header_factory
122 method, and the resulting custom header object is returned as the
123 value. In this case a ValueError is raised if the input value contains
124 CR or LF characters.
125
126 """
127 if hasattr(value, 'name') and value.name.lower() == name.lower():
128 return (name, value)
R David Murraydcaf2ec2012-05-25 22:53:12 -0400129 if isinstance(value, str) and len(value.splitlines())>1:
R David Murray0b6f6c82012-05-25 18:42:14 -0400130 raise ValueError("Header values may not contain linefeed "
131 "or carriage return characters")
132 return (name, self.header_factory(name, value))
133
134 def header_fetch_parse(self, name, value):
135 """+
136 If the value has a 'name' attribute, it is returned to unmodified.
137 Otherwise the name and the value with any linesep characters removed
138 are passed to the header_factory method, and the resulting custom
139 header object is returned. Any surrogateescaped bytes get turned
140 into the unicode unknown-character glyph.
141
142 """
143 if hasattr(value, 'name'):
144 return value
145 return self.header_factory(name, ''.join(value.splitlines()))
146
147 def fold(self, name, value):
148 """+
149 Header folding is controlled by the refold_source policy setting. A
150 value is considered to be a 'source value' if and only if it does not
151 have a 'name' attribute (having a 'name' attribute means it is a header
152 object of some sort). If a source value needs to be refolded according
153 to the policy, it is converted into a custom header object by passing
154 the name and the value with any linesep characters removed to the
155 header_factory method. Folding of a custom header object is done by
156 calling its fold method with the current policy.
157
158 Source values are split into lines using splitlines. If the value is
159 not to be refolded, the lines are rejoined using the linesep from the
160 policy and returned. The exception is lines containing non-ascii
161 binary data. In that case the value is refolded regardless of the
162 refold_source setting, which causes the binary data to be CTE encoded
163 using the unknown-8bit charset.
164
165 """
166 return self._fold(name, value, refold_binary=True)
167
168 def fold_binary(self, name, value):
169 """+
170 The same as fold if cte_type is 7bit, except that the returned value is
171 bytes.
172
173 If cte_type is 8bit, non-ASCII binary data is converted back into
174 bytes. Headers with binary data are not refolded, regardless of the
175 refold_header setting, since there is no way to know whether the binary
176 data consists of single byte characters or multibyte characters.
177
178 """
179 folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
180 return folded.encode('ascii', 'surrogateescape')
181
182 def _fold(self, name, value, refold_binary=False):
183 if hasattr(value, 'name'):
184 return value.fold(policy=self)
185 maxlen = self.max_line_length if self.max_line_length else float('inf')
186 lines = value.splitlines()
187 refold = (self.refold_source == 'all' or
188 self.refold_source == 'long' and
R David Murray844b0e62013-02-04 15:22:53 -0500189 (lines and len(lines[0])+len(name)+2 > maxlen or
R David Murray0b6f6c82012-05-25 18:42:14 -0400190 any(len(x) > maxlen for x in lines[1:])))
191 if refold or refold_binary and _has_surrogates(value):
192 return self.header_factory(name, ''.join(lines)).fold(policy=self)
193 return name + ': ' + self.linesep.join(lines) + self.linesep
194
195
196default = EmailPolicy()
197# Make the default policy use the class default header_factory
198del default.header_factory
R David Murray3edd22a2011-04-18 13:59:37 -0400199strict = default.clone(raise_on_defect=True)
200SMTP = default.clone(linesep='\r\n')
201HTTP = default.clone(linesep='\r\n', max_line_length=None)