#14731: refactor email policy framework.
This patch primarily does two things: (1) it adds some internal-interface
methods to Policy that allow for Policy to control the parsing and folding of
headers in such a way that we can construct a backward compatibility policy
that is 100% compatible with the 3.2 API, while allowing a new policy to
implement the email6 API. (2) it adds that backward compatibility policy and
refactors the test suite so that the only differences between the 3.2
test_email.py file and the 3.3 test_email.py file is some small changes in
test framework and the addition of tests for bugs fixed that apply to the 3.2
API.
There are some additional teaks, such as moving just the code needed for the
compatibility policy into _policybase, so that the library code can import
only _policybase. That way the new code that will be added for email6
will only get imported when a non-compatibility policy is imported.
diff --git a/Lib/email/message.py b/Lib/email/message.py
index 922617a..91976f1 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -10,13 +10,12 @@
import uu
import base64
import binascii
-import warnings
from io import BytesIO, StringIO
# Intrapackage imports
from email import utils
from email import errors
-from email import header
+from email._policybase import compat32
from email import charset as _charset
Charset = _charset.Charset
@@ -26,24 +25,6 @@
# existence of which force quoting of the parameter value.
tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
-# How to figure out if we are processing strings that come from a byte
-# source with undecodable characters.
-_has_surrogates = re.compile(
- '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
-
-
-# Helper functions
-def _sanitize_header(name, value):
- # If the header value contains surrogates, return a Header using
- # the unknown-8bit charset to encode the bytes as encoded words.
- if not isinstance(value, str):
- # Assume it is already a header object
- return value
- if _has_surrogates(value):
- return header.Header(value, charset=_charset.UNKNOWN8BIT,
- header_name=name)
- else:
- return value
def _splitparam(param):
# Split header parameters. BAW: this may be too simple. It isn't
@@ -136,7 +117,8 @@
you must use the explicit API to set or get all the headers. Not all of
the mapping methods are implemented.
"""
- def __init__(self):
+ def __init__(self, policy=compat32):
+ self.policy = policy
self._headers = []
self._unixfrom = None
self._payload = None
@@ -246,7 +228,7 @@
cte = str(self.get('content-transfer-encoding', '')).lower()
# payload may be bytes here.
if isinstance(payload, str):
- if _has_surrogates(payload):
+ if utils._has_surrogates(payload):
bpayload = payload.encode('ascii', 'surrogateescape')
if not decode:
try:
@@ -362,7 +344,7 @@
Note: this does not overwrite an existing header with the same field
name. Use __delitem__() first to delete any existing headers.
"""
- self._headers.append((name, val))
+ self._headers.append(self.policy.header_store_parse(name, val))
def __delitem__(self, name):
"""Delete all occurrences of a header, if present.
@@ -401,7 +383,8 @@
Any fields deleted and re-inserted are always appended to the header
list.
"""
- return [_sanitize_header(k, v) for k, v in self._headers]
+ return [self.policy.header_fetch_parse(k, v)
+ for k, v in self._headers]
def items(self):
"""Get all the message's header fields and values.
@@ -411,7 +394,8 @@
Any fields deleted and re-inserted are always appended to the header
list.
"""
- return [(k, _sanitize_header(k, v)) for k, v in self._headers]
+ return [(k, self.policy.header_fetch_parse(k, v))
+ for k, v in self._headers]
def get(self, name, failobj=None):
"""Get a header value.
@@ -422,10 +406,29 @@
name = name.lower()
for k, v in self._headers:
if k.lower() == name:
- return _sanitize_header(k, v)
+ return self.policy.header_fetch_parse(k, v)
return failobj
#
+ # "Internal" methods (public API, but only intended for use by a parser
+ # or generator, not normal application code.
+ #
+
+ def set_raw(self, name, value):
+ """Store name and value in the model without modification.
+
+ This is an "internal" API, intended only for use by a parser.
+ """
+ self._headers.append((name, value))
+
+ def raw_items(self):
+ """Return the (name, value) header pairs without modification.
+
+ This is an "internal" API, intended only for use by a generator.
+ """
+ return iter(self._headers.copy())
+
+ #
# Additional useful stuff
#
@@ -442,7 +445,7 @@
name = name.lower()
for k, v in self._headers:
if k.lower() == name:
- values.append(_sanitize_header(k, v))
+ values.append(self.policy.header_fetch_parse(k, v))
if not values:
return failobj
return values
@@ -475,7 +478,7 @@
parts.append(_formatparam(k.replace('_', '-'), v))
if _value is not None:
parts.insert(0, _value)
- self._headers.append((_name, SEMISPACE.join(parts)))
+ self[_name] = SEMISPACE.join(parts)
def replace_header(self, _name, _value):
"""Replace a header.
@@ -487,7 +490,7 @@
_name = _name.lower()
for i, (k, v) in zip(range(len(self._headers)), self._headers):
if k.lower() == _name:
- self._headers[i] = (k, _value)
+ self._headers[i] = self.policy.header_store_parse(k, _value)
break
else:
raise KeyError(_name)
@@ -805,7 +808,8 @@
parts.append(k)
else:
parts.append('%s=%s' % (k, v))
- newheaders.append((h, SEMISPACE.join(parts)))
+ val = SEMISPACE.join(parts)
+ newheaders.append(self.policy.header_store_parse(h, val))
else:
newheaders.append((h, v))