R David Murray | c27e522 | 2012-05-25 15:01:48 -0400 | [diff] [blame] | 1 | """This will be the home for the policy that hooks in the new |
| 2 | code that adds all the email6 features. |
R David Murray | 3edd22a | 2011-04-18 13:59:37 -0400 | [diff] [blame] | 3 | """ |
| 4 | |
R David Murray | 1be413e | 2012-05-31 18:00:45 -0400 | [diff] [blame] | 5 | from email._policybase import Policy, Compat32, compat32, _extend_docstrings |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 6 | from email.utils import _has_surrogates |
R David Murray | ea97668 | 2012-05-27 15:03:38 -0400 | [diff] [blame] | 7 | from email.headerregistry import HeaderRegistry as HeaderRegistry |
R David Murray | 3da240f | 2013-10-16 22:48:40 -0400 | [diff] [blame] | 8 | from email.contentmanager import raw_data_manager |
R David Murray | 3edd22a | 2011-04-18 13:59:37 -0400 | [diff] [blame] | 9 | |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 10 | __all__ = [ |
| 11 | 'Compat32', |
| 12 | 'compat32', |
| 13 | 'Policy', |
| 14 | 'EmailPolicy', |
| 15 | 'default', |
| 16 | 'strict', |
| 17 | 'SMTP', |
| 18 | 'HTTP', |
| 19 | ] |
R David Murray | 3edd22a | 2011-04-18 13:59:37 -0400 | [diff] [blame] | 20 | |
R David Murray | 1be413e | 2012-05-31 18:00:45 -0400 | [diff] [blame] | 21 | @_extend_docstrings |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 22 | class EmailPolicy(Policy): |
| 23 | |
| 24 | """+ |
| 25 | PROVISIONAL |
| 26 | |
Terry Jan Reedy | 0f84764 | 2013-03-11 18:34:00 -0400 | [diff] [blame] | 27 | The API extensions enabled by this policy are currently provisional. |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 28 | Refer to the documentation for details. |
| 29 | |
| 30 | This policy adds new header parsing and folding algorithms. Instead of |
| 31 | simple strings, headers are custom objects with custom attributes |
| 32 | depending on the type of the field. The folding algorithm fully |
| 33 | implements RFCs 2047 and 5322. |
| 34 | |
| 35 | In addition to the settable attributes listed above that apply to |
| 36 | all Policies, this policy adds the following additional attributes: |
| 37 | |
R David Murray | 224ef3e | 2015-05-17 11:29:21 -0400 | [diff] [blame] | 38 | utf8 -- if False (the default) message headers will be |
| 39 | serialized as ASCII, using encoded words to encode |
| 40 | any non-ASCII characters in the source strings. If |
| 41 | True, the message headers will be serialized using |
| 42 | utf8 and will not contain encoded words (see RFC |
| 43 | 6532 for more on this serialization format). |
| 44 | |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 45 | refold_source -- if the value for a header in the Message object |
| 46 | came from the parsing of some source, this attribute |
| 47 | indicates whether or not a generator should refold |
| 48 | that value when transforming the message back into |
| 49 | stream form. The possible values are: |
| 50 | |
| 51 | none -- all source values use original folding |
| 52 | long -- source values that have any line that is |
| 53 | longer than max_line_length will be |
| 54 | refolded |
| 55 | all -- all values are refolded. |
| 56 | |
| 57 | The default is 'long'. |
| 58 | |
| 59 | header_factory -- a callable that takes two arguments, 'name' and |
| 60 | 'value', where 'name' is a header field name and |
| 61 | 'value' is an unfolded header field value, and |
| 62 | returns a string-like object that represents that |
| 63 | header. A default header_factory is provided that |
| 64 | understands some of the RFC5322 header field types. |
| 65 | (Currently address fields and date fields have |
| 66 | special treatment, while all other fields are |
| 67 | treated as unstructured. This list will be |
| 68 | completed before the extension is marked stable.) |
R David Murray | 3da240f | 2013-10-16 22:48:40 -0400 | [diff] [blame] | 69 | |
| 70 | content_manager -- an object with at least two methods: get_content |
| 71 | and set_content. When the get_content or |
| 72 | set_content method of a Message object is called, |
| 73 | it calls the corresponding method of this object, |
| 74 | passing it the message object as its first argument, |
| 75 | and any arguments or keywords that were passed to |
| 76 | it as additional arguments. The default |
| 77 | content_manager is |
| 78 | :data:`~email.contentmanager.raw_data_manager`. |
| 79 | |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 80 | """ |
| 81 | |
R David Murray | 224ef3e | 2015-05-17 11:29:21 -0400 | [diff] [blame] | 82 | utf8 = False |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 83 | refold_source = 'long' |
R David Murray | ea97668 | 2012-05-27 15:03:38 -0400 | [diff] [blame] | 84 | header_factory = HeaderRegistry() |
R David Murray | 3da240f | 2013-10-16 22:48:40 -0400 | [diff] [blame] | 85 | content_manager = raw_data_manager |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 86 | |
| 87 | def __init__(self, **kw): |
| 88 | # Ensure that each new instance gets a unique header factory |
| 89 | # (as opposed to clones, which share the factory). |
| 90 | if 'header_factory' not in kw: |
R David Murray | ea97668 | 2012-05-27 15:03:38 -0400 | [diff] [blame] | 91 | object.__setattr__(self, 'header_factory', HeaderRegistry()) |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 92 | super().__init__(**kw) |
| 93 | |
R David Murray | abfc374 | 2012-05-29 09:14:44 -0400 | [diff] [blame] | 94 | def header_max_count(self, name): |
| 95 | """+ |
| 96 | The implementation for this class returns the max_count attribute from |
| 97 | the specialized header class that would be used to construct a header |
| 98 | of type 'name'. |
| 99 | """ |
| 100 | return self.header_factory[name].max_count |
| 101 | |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 102 | # The logic of the next three methods is chosen such that it is possible to |
| 103 | # switch a Message object between a Compat32 policy and a policy derived |
| 104 | # from this class and have the results stay consistent. This allows a |
| 105 | # Message object constructed with this policy to be passed to a library |
| 106 | # that only handles Compat32 objects, or to receive such an object and |
| 107 | # convert it to use the newer style by just changing its policy. It is |
| 108 | # also chosen because it postpones the relatively expensive full rfc5322 |
| 109 | # parse until as late as possible when parsing from source, since in many |
| 110 | # applications only a few headers will actually be inspected. |
| 111 | |
| 112 | def header_source_parse(self, sourcelines): |
| 113 | """+ |
| 114 | The name is parsed as everything up to the ':' and returned unmodified. |
| 115 | The value is determined by stripping leading whitespace off the |
| 116 | remainder of the first line, joining all subsequent lines together, and |
| 117 | stripping any trailing carriage return or linefeed characters. (This |
| 118 | is the same as Compat32). |
| 119 | |
| 120 | """ |
| 121 | name, value = sourcelines[0].split(':', 1) |
| 122 | value = value.lstrip(' \t') + ''.join(sourcelines[1:]) |
| 123 | return (name, value.rstrip('\r\n')) |
| 124 | |
| 125 | def header_store_parse(self, name, value): |
| 126 | """+ |
| 127 | The name is returned unchanged. If the input value has a 'name' |
| 128 | attribute and it matches the name ignoring case, the value is returned |
| 129 | unchanged. Otherwise the name and value are passed to header_factory |
| 130 | method, and the resulting custom header object is returned as the |
| 131 | value. In this case a ValueError is raised if the input value contains |
| 132 | CR or LF characters. |
| 133 | |
| 134 | """ |
| 135 | if hasattr(value, 'name') and value.name.lower() == name.lower(): |
| 136 | return (name, value) |
R David Murray | dcaf2ec | 2012-05-25 22:53:12 -0400 | [diff] [blame] | 137 | if isinstance(value, str) and len(value.splitlines())>1: |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 138 | raise ValueError("Header values may not contain linefeed " |
| 139 | "or carriage return characters") |
| 140 | return (name, self.header_factory(name, value)) |
| 141 | |
| 142 | def header_fetch_parse(self, name, value): |
| 143 | """+ |
| 144 | If the value has a 'name' attribute, it is returned to unmodified. |
| 145 | Otherwise the name and the value with any linesep characters removed |
| 146 | are passed to the header_factory method, and the resulting custom |
| 147 | header object is returned. Any surrogateescaped bytes get turned |
| 148 | into the unicode unknown-character glyph. |
| 149 | |
| 150 | """ |
| 151 | if hasattr(value, 'name'): |
| 152 | return value |
| 153 | return self.header_factory(name, ''.join(value.splitlines())) |
| 154 | |
| 155 | def fold(self, name, value): |
| 156 | """+ |
| 157 | Header folding is controlled by the refold_source policy setting. A |
| 158 | value is considered to be a 'source value' if and only if it does not |
| 159 | have a 'name' attribute (having a 'name' attribute means it is a header |
| 160 | object of some sort). If a source value needs to be refolded according |
| 161 | to the policy, it is converted into a custom header object by passing |
| 162 | the name and the value with any linesep characters removed to the |
| 163 | header_factory method. Folding of a custom header object is done by |
| 164 | calling its fold method with the current policy. |
| 165 | |
| 166 | Source values are split into lines using splitlines. If the value is |
| 167 | not to be refolded, the lines are rejoined using the linesep from the |
| 168 | policy and returned. The exception is lines containing non-ascii |
| 169 | binary data. In that case the value is refolded regardless of the |
| 170 | refold_source setting, which causes the binary data to be CTE encoded |
| 171 | using the unknown-8bit charset. |
| 172 | |
| 173 | """ |
| 174 | return self._fold(name, value, refold_binary=True) |
| 175 | |
| 176 | def fold_binary(self, name, value): |
| 177 | """+ |
| 178 | The same as fold if cte_type is 7bit, except that the returned value is |
| 179 | bytes. |
| 180 | |
| 181 | If cte_type is 8bit, non-ASCII binary data is converted back into |
| 182 | bytes. Headers with binary data are not refolded, regardless of the |
| 183 | refold_header setting, since there is no way to know whether the binary |
| 184 | data consists of single byte characters or multibyte characters. |
| 185 | |
R David Murray | 224ef3e | 2015-05-17 11:29:21 -0400 | [diff] [blame] | 186 | If utf8 is true, headers are encoded to utf8, otherwise to ascii with |
| 187 | non-ASCII unicode rendered as encoded words. |
| 188 | |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 189 | """ |
| 190 | folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') |
R David Murray | 224ef3e | 2015-05-17 11:29:21 -0400 | [diff] [blame] | 191 | charset = 'utf8' if self.utf8 else 'ascii' |
| 192 | return folded.encode(charset, 'surrogateescape') |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 193 | |
| 194 | def _fold(self, name, value, refold_binary=False): |
| 195 | if hasattr(value, 'name'): |
| 196 | return value.fold(policy=self) |
| 197 | maxlen = self.max_line_length if self.max_line_length else float('inf') |
| 198 | lines = value.splitlines() |
| 199 | refold = (self.refold_source == 'all' or |
| 200 | self.refold_source == 'long' and |
R David Murray | 844b0e6 | 2013-02-04 15:22:53 -0500 | [diff] [blame] | 201 | (lines and len(lines[0])+len(name)+2 > maxlen or |
R David Murray | 0b6f6c8 | 2012-05-25 18:42:14 -0400 | [diff] [blame] | 202 | any(len(x) > maxlen for x in lines[1:]))) |
| 203 | if refold or refold_binary and _has_surrogates(value): |
| 204 | return self.header_factory(name, ''.join(lines)).fold(policy=self) |
| 205 | return name + ': ' + self.linesep.join(lines) + self.linesep |
| 206 | |
| 207 | |
| 208 | default = EmailPolicy() |
| 209 | # Make the default policy use the class default header_factory |
| 210 | del default.header_factory |
R David Murray | 3edd22a | 2011-04-18 13:59:37 -0400 | [diff] [blame] | 211 | strict = default.clone(raise_on_defect=True) |
| 212 | SMTP = default.clone(linesep='\r\n') |
| 213 | HTTP = default.clone(linesep='\r\n', max_line_length=None) |
R David Murray | 224ef3e | 2015-05-17 11:29:21 -0400 | [diff] [blame] | 214 | SMTPUTF8 = SMTP.clone(utf8=True) |