#14731: refactor email policy framework.

This patch primarily does two things: (1) it adds some internal-interface
methods to Policy that allow for Policy to control the parsing and folding of
headers in such a way that we can construct a backward compatibility policy
that is 100% compatible with the 3.2 API, while allowing a new policy to
implement the email6 API.  (2) it adds that backward compatibility policy and
refactors the test suite so that the only differences between the 3.2
test_email.py file and the 3.3 test_email.py file is some small changes in
test framework and the addition of tests for bugs fixed that apply to the 3.2
API.

There are some additional teaks, such as moving just the code needed for the
compatibility policy into _policybase, so that the library code can import
only _policybase.  That way the new code that will be added for email6
will only get imported when a non-compatibility policy is imported.
diff --git a/Lib/email/message.py b/Lib/email/message.py
index 922617a..91976f1 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py
@@ -10,13 +10,12 @@
 import uu
 import base64
 import binascii
-import warnings
 from io import BytesIO, StringIO
 
 # Intrapackage imports
 from email import utils
 from email import errors
-from email import header
+from email._policybase import compat32
 from email import charset as _charset
 Charset = _charset.Charset
 
@@ -26,24 +25,6 @@
 # existence of which force quoting of the parameter value.
 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
 
-# How to figure out if we are processing strings that come from a byte
-# source with undecodable characters.
-_has_surrogates = re.compile(
-    '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
-
-
-# Helper functions
-def _sanitize_header(name, value):
-    # If the header value contains surrogates, return a Header using
-    # the unknown-8bit charset to encode the bytes as encoded words.
-    if not isinstance(value, str):
-        # Assume it is already a header object
-        return value
-    if _has_surrogates(value):
-        return header.Header(value, charset=_charset.UNKNOWN8BIT,
-                             header_name=name)
-    else:
-        return value
 
 def _splitparam(param):
     # Split header parameters.  BAW: this may be too simple.  It isn't
@@ -136,7 +117,8 @@
     you must use the explicit API to set or get all the headers.  Not all of
     the mapping methods are implemented.
     """
-    def __init__(self):
+    def __init__(self, policy=compat32):
+        self.policy = policy
         self._headers = []
         self._unixfrom = None
         self._payload = None
@@ -246,7 +228,7 @@
         cte = str(self.get('content-transfer-encoding', '')).lower()
         # payload may be bytes here.
         if isinstance(payload, str):
-            if _has_surrogates(payload):
+            if utils._has_surrogates(payload):
                 bpayload = payload.encode('ascii', 'surrogateescape')
                 if not decode:
                     try:
@@ -362,7 +344,7 @@
         Note: this does not overwrite an existing header with the same field
         name.  Use __delitem__() first to delete any existing headers.
         """
-        self._headers.append((name, val))
+        self._headers.append(self.policy.header_store_parse(name, val))
 
     def __delitem__(self, name):
         """Delete all occurrences of a header, if present.
@@ -401,7 +383,8 @@
         Any fields deleted and re-inserted are always appended to the header
         list.
         """
-        return [_sanitize_header(k, v) for k, v in self._headers]
+        return [self.policy.header_fetch_parse(k, v)
+                for k, v in self._headers]
 
     def items(self):
         """Get all the message's header fields and values.
@@ -411,7 +394,8 @@
         Any fields deleted and re-inserted are always appended to the header
         list.
         """
-        return [(k, _sanitize_header(k, v)) for k, v in self._headers]
+        return [(k, self.policy.header_fetch_parse(k, v))
+                for k, v in self._headers]
 
     def get(self, name, failobj=None):
         """Get a header value.
@@ -422,10 +406,29 @@
         name = name.lower()
         for k, v in self._headers:
             if k.lower() == name:
-                return _sanitize_header(k, v)
+                return self.policy.header_fetch_parse(k, v)
         return failobj
 
     #
+    # "Internal" methods (public API, but only intended for use by a parser
+    # or generator, not normal application code.
+    #
+
+    def set_raw(self, name, value):
+        """Store name and value in the model without modification.
+
+        This is an "internal" API, intended only for use by a parser.
+        """
+        self._headers.append((name, value))
+
+    def raw_items(self):
+        """Return the (name, value) header pairs without modification.
+
+        This is an "internal" API, intended only for use by a generator.
+        """
+        return iter(self._headers.copy())
+
+    #
     # Additional useful stuff
     #
 
@@ -442,7 +445,7 @@
         name = name.lower()
         for k, v in self._headers:
             if k.lower() == name:
-                values.append(_sanitize_header(k, v))
+                values.append(self.policy.header_fetch_parse(k, v))
         if not values:
             return failobj
         return values
@@ -475,7 +478,7 @@
                 parts.append(_formatparam(k.replace('_', '-'), v))
         if _value is not None:
             parts.insert(0, _value)
-        self._headers.append((_name, SEMISPACE.join(parts)))
+        self[_name] = SEMISPACE.join(parts)
 
     def replace_header(self, _name, _value):
         """Replace a header.
@@ -487,7 +490,7 @@
         _name = _name.lower()
         for i, (k, v) in zip(range(len(self._headers)), self._headers):
             if k.lower() == _name:
-                self._headers[i] = (k, _value)
+                self._headers[i] = self.policy.header_store_parse(k, _value)
                 break
         else:
             raise KeyError(_name)
@@ -805,7 +808,8 @@
                         parts.append(k)
                     else:
                         parts.append('%s=%s' % (k, v))
-                newheaders.append((h, SEMISPACE.join(parts)))
+                val = SEMISPACE.join(parts)
+                newheaders.append(self.policy.header_store_parse(h, val))
 
             else:
                 newheaders.append((h, v))