#14731: refactor email policy framework. This patch primarily does two things: (1) it adds some internal-interface methods to Policy that allow for Policy to control the parsing and folding of headers in such a way that we can construct a backward compatibility policy that is 100% compatible with the 3.2 API, while allowing a new policy to implement the email6 API. (2) it adds that backward compatibility policy and refactors the test suite so that the only differences between the 3.2 test_email.py file and the 3.3 test_email.py file is some small changes in test framework and the addition of tests for bugs fixed that apply to the 3.2 API. There are some additional teaks, such as moving just the code needed for the compatibility policy into _policybase, so that the library code can import only _policybase. That way the new code that will be added for email6 will only get imported when a non-compatibility policy is imported.

commit: c27e52265b7ff4aa57dc357c289cce8c9dd0fec3 [log] [tgz]
author: R David Murray <rdmurray@bitdance.com> Fri May 25 15:01:48 2012 -0400
committer: R David Murray <rdmurray@bitdance.com> Fri May 25 15:01:48 2012 -0400
tree: b2a25260b0aa89d0a4db3c0d2f91c8cb5e68d51a
parent: 9242c1378f77214f5b9b90149861cb13ca986fb0 [diff] [blame]
diff --git a/Lib/email/message.py b/Lib/email/message.py
index 922617a..91976f1 100644
--- a/Lib/email/message.py
+++ b/Lib/email/message.py

@@ -10,13 +10,12 @@
 import uu
 import base64
 import binascii
-import warnings
 from io import BytesIO, StringIO
 
 # Intrapackage imports
 from email import utils
 from email import errors
-from email import header
+from email._policybase import compat32
 from email import charset as _charset
 Charset = _charset.Charset
 
@@ -26,24 +25,6 @@
 # existence of which force quoting of the parameter value.
 tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]')
 
-# How to figure out if we are processing strings that come from a byte
-# source with undecodable characters.
-_has_surrogates = re.compile(
-    '([^\ud800-\udbff]|\A)[\udc00-\udfff]([^\udc00-\udfff]|\Z)').search
-
-
-# Helper functions
-def _sanitize_header(name, value):
-    # If the header value contains surrogates, return a Header using
-    # the unknown-8bit charset to encode the bytes as encoded words.
-    if not isinstance(value, str):
-        # Assume it is already a header object
-        return value
-    if _has_surrogates(value):
-        return header.Header(value, charset=_charset.UNKNOWN8BIT,
-                             header_name=name)
-    else:
-        return value
 
 def _splitparam(param):
     # Split header parameters.  BAW: this may be too simple.  It isn't
@@ -136,7 +117,8 @@
     you must use the explicit API to set or get all the headers.  Not all of
     the mapping methods are implemented.
     """
-    def __init__(self):
+    def __init__(self, policy=compat32):
+        self.policy = policy
         self._headers = []
         self._unixfrom = None
         self._payload = None
@@ -246,7 +228,7 @@
         cte = str(self.get('content-transfer-encoding', '')).lower()
         # payload may be bytes here.
         if isinstance(payload, str):
-            if _has_surrogates(payload):
+            if utils._has_surrogates(payload):
                 bpayload = payload.encode('ascii', 'surrogateescape')
                 if not decode:
                     try:
@@ -362,7 +344,7 @@
         Note: this does not overwrite an existing header with the same field
         name.  Use __delitem__() first to delete any existing headers.
         """
-        self._headers.append((name, val))
+        self._headers.append(self.policy.header_store_parse(name, val))
 
     def __delitem__(self, name):
         """Delete all occurrences of a header, if present.
@@ -401,7 +383,8 @@
         Any fields deleted and re-inserted are always appended to the header
         list.
         """
-        return [_sanitize_header(k, v) for k, v in self._headers]
+        return [self.policy.header_fetch_parse(k, v)
+                for k, v in self._headers]
 
     def items(self):
         """Get all the message's header fields and values.
@@ -411,7 +394,8 @@
         Any fields deleted and re-inserted are always appended to the header
         list.
         """
-        return [(k, _sanitize_header(k, v)) for k, v in self._headers]
+        return [(k, self.policy.header_fetch_parse(k, v))
+                for k, v in self._headers]
 
     def get(self, name, failobj=None):
         """Get a header value.
@@ -422,10 +406,29 @@
         name = name.lower()
         for k, v in self._headers:
             if k.lower() == name:
-                return _sanitize_header(k, v)
+                return self.policy.header_fetch_parse(k, v)
         return failobj
 
     #
+    # "Internal" methods (public API, but only intended for use by a parser
+    # or generator, not normal application code.
+    #
+
+    def set_raw(self, name, value):
+        """Store name and value in the model without modification.
+
+        This is an "internal" API, intended only for use by a parser.
+        """
+        self._headers.append((name, value))
+
+    def raw_items(self):
+        """Return the (name, value) header pairs without modification.
+
+        This is an "internal" API, intended only for use by a generator.
+        """
+        return iter(self._headers.copy())
+
+    #
     # Additional useful stuff
     #
 
@@ -442,7 +445,7 @@
         name = name.lower()
         for k, v in self._headers:
             if k.lower() == name:
-                values.append(_sanitize_header(k, v))
+                values.append(self.policy.header_fetch_parse(k, v))
         if not values:
             return failobj
         return values
@@ -475,7 +478,7 @@
                 parts.append(_formatparam(k.replace('_', '-'), v))
         if _value is not None:
             parts.insert(0, _value)
-        self._headers.append((_name, SEMISPACE.join(parts)))
+        self[_name] = SEMISPACE.join(parts)
 
     def replace_header(self, _name, _value):
         """Replace a header.
@@ -487,7 +490,7 @@
         _name = _name.lower()
         for i, (k, v) in zip(range(len(self._headers)), self._headers):
             if k.lower() == _name:
-                self._headers[i] = (k, _value)
+                self._headers[i] = self.policy.header_store_parse(k, _value)
                 break
         else:
             raise KeyError(_name)
@@ -805,7 +808,8 @@
                         parts.append(k)
                     else:
                         parts.append('%s=%s' % (k, v))
-                newheaders.append((h, SEMISPACE.join(parts)))
+                val = SEMISPACE.join(parts)
+                newheaders.append(self.policy.header_store_parse(h, val))
 
             else:
                 newheaders.append((h, v))
commit	c27e52265b7ff4aa57dc357c289cce8c9dd0fec3	[log] [tgz]
author	R David Murray <rdmurray@bitdance.com>	Fri May 25 15:01:48 2012 -0400
committer	R David Murray <rdmurray@bitdance.com>	Fri May 25 15:01:48 2012 -0400
tree	b2a25260b0aa89d0a4db3c0d2f91c8cb5e68d51a
parent	9242c1378f77214f5b9b90149861cb13ca986fb0 [diff] [blame]