blob: 32cad0d505f987a614824ae00f40ef90a36df374 [file] [log] [blame]
R David Murrayc27e5222012-05-25 15:01:48 -04001"""This will be the home for the policy that hooks in the new
2code that adds all the email6 features.
R David Murray3edd22a2011-04-18 13:59:37 -04003"""
4
R David Murray1be413e2012-05-31 18:00:45 -04005from email._policybase import Policy, Compat32, compat32, _extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -04006from email.utils import _has_surrogates
R David Murrayea976682012-05-27 15:03:38 -04007from email.headerregistry import HeaderRegistry as HeaderRegistry
R David Murray3edd22a2011-04-18 13:59:37 -04008
R David Murray0b6f6c82012-05-25 18:42:14 -04009__all__ = [
10 'Compat32',
11 'compat32',
12 'Policy',
13 'EmailPolicy',
14 'default',
15 'strict',
16 'SMTP',
17 'HTTP',
18 ]
R David Murray3edd22a2011-04-18 13:59:37 -040019
R David Murray1be413e2012-05-31 18:00:45 -040020@_extend_docstrings
R David Murray0b6f6c82012-05-25 18:42:14 -040021class EmailPolicy(Policy):
22
23 """+
24 PROVISIONAL
25
26 The API extensions enabled by this this policy are currently provisional.
27 Refer to the documentation for details.
28
29 This policy adds new header parsing and folding algorithms. Instead of
30 simple strings, headers are custom objects with custom attributes
31 depending on the type of the field. The folding algorithm fully
32 implements RFCs 2047 and 5322.
33
34 In addition to the settable attributes listed above that apply to
35 all Policies, this policy adds the following additional attributes:
36
37 refold_source -- if the value for a header in the Message object
38 came from the parsing of some source, this attribute
39 indicates whether or not a generator should refold
40 that value when transforming the message back into
41 stream form. The possible values are:
42
43 none -- all source values use original folding
44 long -- source values that have any line that is
45 longer than max_line_length will be
46 refolded
47 all -- all values are refolded.
48
49 The default is 'long'.
50
51 header_factory -- a callable that takes two arguments, 'name' and
52 'value', where 'name' is a header field name and
53 'value' is an unfolded header field value, and
54 returns a string-like object that represents that
55 header. A default header_factory is provided that
56 understands some of the RFC5322 header field types.
57 (Currently address fields and date fields have
58 special treatment, while all other fields are
59 treated as unstructured. This list will be
60 completed before the extension is marked stable.)
61 """
62
63 refold_source = 'long'
R David Murrayea976682012-05-27 15:03:38 -040064 header_factory = HeaderRegistry()
R David Murray0b6f6c82012-05-25 18:42:14 -040065
66 def __init__(self, **kw):
67 # Ensure that each new instance gets a unique header factory
68 # (as opposed to clones, which share the factory).
69 if 'header_factory' not in kw:
R David Murrayea976682012-05-27 15:03:38 -040070 object.__setattr__(self, 'header_factory', HeaderRegistry())
R David Murray0b6f6c82012-05-25 18:42:14 -040071 super().__init__(**kw)
72
R David Murrayabfc3742012-05-29 09:14:44 -040073 def header_max_count(self, name):
74 """+
75 The implementation for this class returns the max_count attribute from
76 the specialized header class that would be used to construct a header
77 of type 'name'.
78 """
79 return self.header_factory[name].max_count
80
R David Murray0b6f6c82012-05-25 18:42:14 -040081 # The logic of the next three methods is chosen such that it is possible to
82 # switch a Message object between a Compat32 policy and a policy derived
83 # from this class and have the results stay consistent. This allows a
84 # Message object constructed with this policy to be passed to a library
85 # that only handles Compat32 objects, or to receive such an object and
86 # convert it to use the newer style by just changing its policy. It is
87 # also chosen because it postpones the relatively expensive full rfc5322
88 # parse until as late as possible when parsing from source, since in many
89 # applications only a few headers will actually be inspected.
90
91 def header_source_parse(self, sourcelines):
92 """+
93 The name is parsed as everything up to the ':' and returned unmodified.
94 The value is determined by stripping leading whitespace off the
95 remainder of the first line, joining all subsequent lines together, and
96 stripping any trailing carriage return or linefeed characters. (This
97 is the same as Compat32).
98
99 """
100 name, value = sourcelines[0].split(':', 1)
101 value = value.lstrip(' \t') + ''.join(sourcelines[1:])
102 return (name, value.rstrip('\r\n'))
103
104 def header_store_parse(self, name, value):
105 """+
106 The name is returned unchanged. If the input value has a 'name'
107 attribute and it matches the name ignoring case, the value is returned
108 unchanged. Otherwise the name and value are passed to header_factory
109 method, and the resulting custom header object is returned as the
110 value. In this case a ValueError is raised if the input value contains
111 CR or LF characters.
112
113 """
114 if hasattr(value, 'name') and value.name.lower() == name.lower():
115 return (name, value)
R David Murraydcaf2ec2012-05-25 22:53:12 -0400116 if isinstance(value, str) and len(value.splitlines())>1:
R David Murray0b6f6c82012-05-25 18:42:14 -0400117 raise ValueError("Header values may not contain linefeed "
118 "or carriage return characters")
119 return (name, self.header_factory(name, value))
120
121 def header_fetch_parse(self, name, value):
122 """+
123 If the value has a 'name' attribute, it is returned to unmodified.
124 Otherwise the name and the value with any linesep characters removed
125 are passed to the header_factory method, and the resulting custom
126 header object is returned. Any surrogateescaped bytes get turned
127 into the unicode unknown-character glyph.
128
129 """
130 if hasattr(value, 'name'):
131 return value
132 return self.header_factory(name, ''.join(value.splitlines()))
133
134 def fold(self, name, value):
135 """+
136 Header folding is controlled by the refold_source policy setting. A
137 value is considered to be a 'source value' if and only if it does not
138 have a 'name' attribute (having a 'name' attribute means it is a header
139 object of some sort). If a source value needs to be refolded according
140 to the policy, it is converted into a custom header object by passing
141 the name and the value with any linesep characters removed to the
142 header_factory method. Folding of a custom header object is done by
143 calling its fold method with the current policy.
144
145 Source values are split into lines using splitlines. If the value is
146 not to be refolded, the lines are rejoined using the linesep from the
147 policy and returned. The exception is lines containing non-ascii
148 binary data. In that case the value is refolded regardless of the
149 refold_source setting, which causes the binary data to be CTE encoded
150 using the unknown-8bit charset.
151
152 """
153 return self._fold(name, value, refold_binary=True)
154
155 def fold_binary(self, name, value):
156 """+
157 The same as fold if cte_type is 7bit, except that the returned value is
158 bytes.
159
160 If cte_type is 8bit, non-ASCII binary data is converted back into
161 bytes. Headers with binary data are not refolded, regardless of the
162 refold_header setting, since there is no way to know whether the binary
163 data consists of single byte characters or multibyte characters.
164
165 """
166 folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
167 return folded.encode('ascii', 'surrogateescape')
168
169 def _fold(self, name, value, refold_binary=False):
170 if hasattr(value, 'name'):
171 return value.fold(policy=self)
172 maxlen = self.max_line_length if self.max_line_length else float('inf')
173 lines = value.splitlines()
174 refold = (self.refold_source == 'all' or
175 self.refold_source == 'long' and
176 (len(lines[0])+len(name)+2 > maxlen or
177 any(len(x) > maxlen for x in lines[1:])))
178 if refold or refold_binary and _has_surrogates(value):
179 return self.header_factory(name, ''.join(lines)).fold(policy=self)
180 return name + ': ' + self.linesep.join(lines) + self.linesep
181
182
183default = EmailPolicy()
184# Make the default policy use the class default header_factory
185del default.header_factory
R David Murray3edd22a2011-04-18 13:59:37 -0400186strict = default.clone(raise_on_defect=True)
187SMTP = default.clone(linesep='\r\n')
188HTTP = default.clone(linesep='\r\n', max_line_length=None)