Merged revisions 72223 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r72223 | antoine.pitrou | 2009-05-02 23:13:23 +0200 (sam., 02 mai 2009) | 5 lines

  Isue #5084: unpickling now interns the attribute names of pickled objects,
  saving memory and avoiding growth in size of subsequent pickles. Proposal
  and original patch by Jake McGuire.
........
diff --git a/Lib/pickle.py b/Lib/pickle.py
index 409d4b2..b94b305 100644
--- a/Lib/pickle.py
+++ b/Lib/pickle.py
@@ -1195,7 +1195,15 @@
         if isinstance(state, tuple) and len(state) == 2:
             state, slotstate = state
         if state:
-            inst.__dict__.update(state)
+            d = inst.__dict__
+            intern = sys.intern
+            try:
+                for k, v in state.items():
+                    d[intern(k)] = v
+            # keys in state don't have to be strings
+            # don't blow up, but don't go out of our way
+            except TypeError:
+                d.update(state)
         if slotstate:
             for k, v in slotstate.items():
                 setattr(inst, k, v)
diff --git a/Lib/test/pickletester.py b/Lib/test/pickletester.py
index e3a929f..1585586 100644
--- a/Lib/test/pickletester.py
+++ b/Lib/test/pickletester.py
@@ -932,6 +932,20 @@
                              "Failed protocol %d: %r != %r"
                              % (proto, obj, loaded))
 
+    def test_attribute_name_interning(self):
+        # Test that attribute names of pickled objects are interned when
+        # unpickling.
+        for proto in protocols:
+            x = C()
+            x.foo = 42
+            x.bar = "hello"
+            s = self.dumps(x, proto)
+            y = self.loads(s)
+            x_keys = sorted(x.__dict__)
+            y_keys = sorted(y.__dict__)
+            for x_key, y_key in zip(x_keys, y_keys):
+                self.assertIs(x_key, y_key)
+
 # Test classes for reduce_ex
 
 class REX_one(object):
diff --git a/Misc/NEWS b/Misc/NEWS
index f4116ad..00270fd 100644
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -109,6 +109,10 @@
 Library
 -------
 
+- Issue #5084: unpickling now interns the attribute names of pickled objects,
+  saving memory and avoiding growth in size of subsequent pickles. Proposal
+  and original patch by Jake McGuire.
+
 - The json module now works exclusively with str and not bytes.
 
 - Issue #3959: The ipaddr module has been added to the standard library.
diff --git a/Modules/_pickle.c b/Modules/_pickle.c
index e9c4a76..754d132 100644
--- a/Modules/_pickle.c
+++ b/Modules/_pickle.c
@@ -4020,6 +4020,8 @@
     /* Set inst.__dict__ from the state dict (if any). */
     if (state != Py_None) {
         PyObject *dict;
+        PyObject *d_key, *d_value;
+        Py_ssize_t i;
 
         if (!PyDict_Check(state)) {
             PyErr_SetString(UnpicklingError, "state is not a dictionary");
@@ -4029,7 +4031,19 @@
         if (dict == NULL)
             goto error;
 
-        PyDict_Update(dict, state);
+        i = 0;
+        while (PyDict_Next(state, &i, &d_key, &d_value)) {
+            /* normally the keys for instance attributes are
+               interned.  we should try to do that here. */
+            Py_INCREF(d_key);
+            if (PyUnicode_CheckExact(d_key))
+                PyUnicode_InternInPlace(&d_key);
+            if (PyObject_SetItem(dict, d_key, d_value) < 0) {
+                Py_DECREF(d_key);
+                goto error;
+            }
+            Py_DECREF(d_key);
+        }
         Py_DECREF(dict);
     }