blob: ea90a8f11b6bbb32b3a8acf95bbd193d6fc75d0f [file] [log] [blame]
R David Murrayc27e5222012-05-25 15:01:48 -04001"""This will be the home for the policy that hooks in the new
2code that adds all the email6 features.
R David Murray3edd22a2011-04-18 13:59:37 -04003"""
4
R David Murray0b6f6c82012-05-25 18:42:14 -04005from email._policybase import Policy, Compat32, compat32
6from email.utils import _has_surrogates
7from email._headerregistry import HeaderRegistry as _HeaderRegistry
R David Murray3edd22a2011-04-18 13:59:37 -04008
R David Murray0b6f6c82012-05-25 18:42:14 -04009__all__ = [
10 'Compat32',
11 'compat32',
12 'Policy',
13 'EmailPolicy',
14 'default',
15 'strict',
16 'SMTP',
17 'HTTP',
18 ]
R David Murray3edd22a2011-04-18 13:59:37 -040019
R David Murray0b6f6c82012-05-25 18:42:14 -040020class EmailPolicy(Policy):
21
22 """+
23 PROVISIONAL
24
25 The API extensions enabled by this this policy are currently provisional.
26 Refer to the documentation for details.
27
28 This policy adds new header parsing and folding algorithms. Instead of
29 simple strings, headers are custom objects with custom attributes
30 depending on the type of the field. The folding algorithm fully
31 implements RFCs 2047 and 5322.
32
33 In addition to the settable attributes listed above that apply to
34 all Policies, this policy adds the following additional attributes:
35
36 refold_source -- if the value for a header in the Message object
37 came from the parsing of some source, this attribute
38 indicates whether or not a generator should refold
39 that value when transforming the message back into
40 stream form. The possible values are:
41
42 none -- all source values use original folding
43 long -- source values that have any line that is
44 longer than max_line_length will be
45 refolded
46 all -- all values are refolded.
47
48 The default is 'long'.
49
50 header_factory -- a callable that takes two arguments, 'name' and
51 'value', where 'name' is a header field name and
52 'value' is an unfolded header field value, and
53 returns a string-like object that represents that
54 header. A default header_factory is provided that
55 understands some of the RFC5322 header field types.
56 (Currently address fields and date fields have
57 special treatment, while all other fields are
58 treated as unstructured. This list will be
59 completed before the extension is marked stable.)
60 """
61
62 refold_source = 'long'
63 header_factory = _HeaderRegistry()
64
65 def __init__(self, **kw):
66 # Ensure that each new instance gets a unique header factory
67 # (as opposed to clones, which share the factory).
68 if 'header_factory' not in kw:
69 object.__setattr__(self, 'header_factory', _HeaderRegistry())
70 super().__init__(**kw)
71
72 # The logic of the next three methods is chosen such that it is possible to
73 # switch a Message object between a Compat32 policy and a policy derived
74 # from this class and have the results stay consistent. This allows a
75 # Message object constructed with this policy to be passed to a library
76 # that only handles Compat32 objects, or to receive such an object and
77 # convert it to use the newer style by just changing its policy. It is
78 # also chosen because it postpones the relatively expensive full rfc5322
79 # parse until as late as possible when parsing from source, since in many
80 # applications only a few headers will actually be inspected.
81
82 def header_source_parse(self, sourcelines):
83 """+
84 The name is parsed as everything up to the ':' and returned unmodified.
85 The value is determined by stripping leading whitespace off the
86 remainder of the first line, joining all subsequent lines together, and
87 stripping any trailing carriage return or linefeed characters. (This
88 is the same as Compat32).
89
90 """
91 name, value = sourcelines[0].split(':', 1)
92 value = value.lstrip(' \t') + ''.join(sourcelines[1:])
93 return (name, value.rstrip('\r\n'))
94
95 def header_store_parse(self, name, value):
96 """+
97 The name is returned unchanged. If the input value has a 'name'
98 attribute and it matches the name ignoring case, the value is returned
99 unchanged. Otherwise the name and value are passed to header_factory
100 method, and the resulting custom header object is returned as the
101 value. In this case a ValueError is raised if the input value contains
102 CR or LF characters.
103
104 """
105 if hasattr(value, 'name') and value.name.lower() == name.lower():
106 return (name, value)
107 if len(value.splitlines())>1:
108 raise ValueError("Header values may not contain linefeed "
109 "or carriage return characters")
110 return (name, self.header_factory(name, value))
111
112 def header_fetch_parse(self, name, value):
113 """+
114 If the value has a 'name' attribute, it is returned to unmodified.
115 Otherwise the name and the value with any linesep characters removed
116 are passed to the header_factory method, and the resulting custom
117 header object is returned. Any surrogateescaped bytes get turned
118 into the unicode unknown-character glyph.
119
120 """
121 if hasattr(value, 'name'):
122 return value
123 return self.header_factory(name, ''.join(value.splitlines()))
124
125 def fold(self, name, value):
126 """+
127 Header folding is controlled by the refold_source policy setting. A
128 value is considered to be a 'source value' if and only if it does not
129 have a 'name' attribute (having a 'name' attribute means it is a header
130 object of some sort). If a source value needs to be refolded according
131 to the policy, it is converted into a custom header object by passing
132 the name and the value with any linesep characters removed to the
133 header_factory method. Folding of a custom header object is done by
134 calling its fold method with the current policy.
135
136 Source values are split into lines using splitlines. If the value is
137 not to be refolded, the lines are rejoined using the linesep from the
138 policy and returned. The exception is lines containing non-ascii
139 binary data. In that case the value is refolded regardless of the
140 refold_source setting, which causes the binary data to be CTE encoded
141 using the unknown-8bit charset.
142
143 """
144 return self._fold(name, value, refold_binary=True)
145
146 def fold_binary(self, name, value):
147 """+
148 The same as fold if cte_type is 7bit, except that the returned value is
149 bytes.
150
151 If cte_type is 8bit, non-ASCII binary data is converted back into
152 bytes. Headers with binary data are not refolded, regardless of the
153 refold_header setting, since there is no way to know whether the binary
154 data consists of single byte characters or multibyte characters.
155
156 """
157 folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
158 return folded.encode('ascii', 'surrogateescape')
159
160 def _fold(self, name, value, refold_binary=False):
161 if hasattr(value, 'name'):
162 return value.fold(policy=self)
163 maxlen = self.max_line_length if self.max_line_length else float('inf')
164 lines = value.splitlines()
165 refold = (self.refold_source == 'all' or
166 self.refold_source == 'long' and
167 (len(lines[0])+len(name)+2 > maxlen or
168 any(len(x) > maxlen for x in lines[1:])))
169 if refold or refold_binary and _has_surrogates(value):
170 return self.header_factory(name, ''.join(lines)).fold(policy=self)
171 return name + ': ' + self.linesep.join(lines) + self.linesep
172
173
174default = EmailPolicy()
175# Make the default policy use the class default header_factory
176del default.header_factory
R David Murray3edd22a2011-04-18 13:59:37 -0400177strict = default.clone(raise_on_defect=True)
178SMTP = default.clone(linesep='\r\n')
179HTTP = default.clone(linesep='\r\n', max_line_length=None)