Issue #9707: Rewritten reference implementation of threading.local which
is friendlier towards reference cycles.  This change is not normally
visible since an optimized C implementation (_thread._local) is used
instead.
diff --git a/Lib/_threading_local.py b/Lib/_threading_local.py
index 3af7807..4ec4828 100644
--- a/Lib/_threading_local.py
+++ b/Lib/_threading_local.py
@@ -132,6 +132,9 @@
 >>> del mydata
 """
 
+from weakref import ref
+from contextlib import contextmanager
+
 __all__ = ["local"]
 
 # We need to use objects from the threading module, but the threading
@@ -139,112 +142,105 @@
 # isn't compiled in to the `thread` module.  This creates potential problems
 # with circular imports.  For that reason, we don't import `threading`
 # until the bottom of this file (a hack sufficient to worm around the
-# potential problems).  Note that almost all platforms do have support for
-# locals in the `thread` module, and there is no circular import problem
+# potential problems).  Note that all platforms on CPython do have support
+# for locals in the `thread` module, and there is no circular import problem
 # then, so problems introduced by fiddling the order of imports here won't
-# manifest on most boxes.
+# manifest.
 
-class _localbase(object):
-    __slots__ = '_local__key', '_local__args', '_local__lock'
+class _localimpl:
+    """A class managing thread-local dicts"""
+    __slots__ = 'key', 'dicts', 'localargs', 'locallock', '__weakref__'
+
+    def __init__(self):
+        # The key used in the Thread objects' attribute dicts.
+        # We keep it a string for speed but make it unlikely to clash with
+        # a "real" attribute.
+        self.key = '_threading_local._localimpl.' + str(id(self))
+        # { id(Thread) -> (ref(Thread), thread-local dict) }
+        self.dicts = {}
+
+    def get_dict(self):
+        """Return the dict for the current thread. Raises KeyError if none
+        defined."""
+        thread = current_thread()
+        return self.dicts[id(thread)][1]
+
+    def create_dict(self):
+        """Create a new dict for the current thread, and return it."""
+        localdict = {}
+        key = self.key
+        thread = current_thread()
+        idt = id(thread)
+        def local_deleted(_, key=key):
+            # When the localimpl is deleted, remove the thread attribute.
+            thread = wrthread()
+            if thread is not None:
+                del thread.__dict__[key]
+        def thread_deleted(_, idt=idt):
+            # When the thread is deleted, remove the local dict.
+            # Note that this is suboptimal if the thread object gets
+            # caught in a reference loop. We would like to be called
+            # as soon as the OS-level thread ends instead.
+            local = wrlocal()
+            if local is not None:
+                dct = local.dicts.pop(idt)
+        wrlocal = ref(self, local_deleted)
+        wrthread = ref(thread, thread_deleted)
+        thread.__dict__[key] = wrlocal
+        self.dicts[idt] = wrthread, localdict
+        return localdict
+
+
+@contextmanager
+def _patch(self):
+    impl = object.__getattribute__(self, '_local__impl')
+    try:
+        dct = impl.get_dict()
+    except KeyError:
+        dct = impl.create_dict()
+        args, kw = impl.localargs
+        self.__init__(*args, **kw)
+    with impl.locallock:
+        object.__setattr__(self, '__dict__', dct)
+        yield
+
+
+class local:
+    __slots__ = '_local__impl', '__dict__'
 
     def __new__(cls, *args, **kw):
-        self = object.__new__(cls)
-        key = '_local__key', 'thread.local.' + str(id(self))
-        object.__setattr__(self, '_local__key', key)
-        object.__setattr__(self, '_local__args', (args, kw))
-        object.__setattr__(self, '_local__lock', RLock())
-
         if (args or kw) and (cls.__init__ is object.__init__):
             raise TypeError("Initialization arguments are not supported")
-
+        self = object.__new__(cls)
+        impl = _localimpl()
+        impl.localargs = (args, kw)
+        impl.locallock = RLock()
+        object.__setattr__(self, '_local__impl', impl)
         # We need to create the thread dict in anticipation of
         # __init__ being called, to make sure we don't call it
         # again ourselves.
-        dict = object.__getattribute__(self, '__dict__')
-        current_thread().__dict__[key] = dict
-
+        impl.create_dict()
         return self
 
-def _patch(self):
-    key = object.__getattribute__(self, '_local__key')
-    d = current_thread().__dict__.get(key)
-    if d is None:
-        d = {}
-        current_thread().__dict__[key] = d
-        object.__setattr__(self, '__dict__', d)
-
-        # we have a new instance dict, so call out __init__ if we have
-        # one
-        cls = type(self)
-        if cls.__init__ is not object.__init__:
-            args, kw = object.__getattribute__(self, '_local__args')
-            cls.__init__(self, *args, **kw)
-    else:
-        object.__setattr__(self, '__dict__', d)
-
-class local(_localbase):
-
     def __getattribute__(self, name):
-        lock = object.__getattribute__(self, '_local__lock')
-        lock.acquire()
-        try:
-            _patch(self)
+        with _patch(self):
             return object.__getattribute__(self, name)
-        finally:
-            lock.release()
 
     def __setattr__(self, name, value):
         if name == '__dict__':
             raise AttributeError(
                 "%r object attribute '__dict__' is read-only"
                 % self.__class__.__name__)
-        lock = object.__getattribute__(self, '_local__lock')
-        lock.acquire()
-        try:
-            _patch(self)
+        with _patch(self):
             return object.__setattr__(self, name, value)
-        finally:
-            lock.release()
 
     def __delattr__(self, name):
         if name == '__dict__':
             raise AttributeError(
                 "%r object attribute '__dict__' is read-only"
                 % self.__class__.__name__)
-        lock = object.__getattribute__(self, '_local__lock')
-        lock.acquire()
-        try:
-            _patch(self)
+        with _patch(self):
             return object.__delattr__(self, name)
-        finally:
-            lock.release()
 
-    def __del__(self):
-        import threading
-
-        key = object.__getattribute__(self, '_local__key')
-
-        try:
-            # We use the non-locking API since we might already hold the lock
-            # (__del__ can be called at any point by the cyclic GC).
-            threads = threading._enumerate()
-        except:
-            # If enumerating the current threads fails, as it seems to do
-            # during shutdown, we'll skip cleanup under the assumption
-            # that there is nothing to clean up.
-            return
-
-        for thread in threads:
-            try:
-                __dict__ = thread.__dict__
-            except AttributeError:
-                # Thread is dying, rest in peace.
-                continue
-
-            if key in __dict__:
-                try:
-                    del __dict__[key]
-                except KeyError:
-                    pass # didn't have anything in this thread
 
 from threading import current_thread, RLock
diff --git a/Lib/test/test_threading_local.py b/Lib/test/test_threading_local.py
index acf37a0..c886a25 100644
--- a/Lib/test/test_threading_local.py
+++ b/Lib/test/test_threading_local.py
@@ -184,11 +184,6 @@
             """To test that subclasses behave properly."""
         self._test_dict_attribute(LocalSubclass)
 
-
-class ThreadLocalTest(unittest.TestCase, BaseLocalTest):
-    _local = _thread._local
-
-    # Fails for the pure Python implementation
     def test_cycle_collection(self):
         class X:
             pass
@@ -201,6 +196,10 @@
         gc.collect()
         self.assertIs(wr(), None)
 
+
+class ThreadLocalTest(unittest.TestCase, BaseLocalTest):
+    _local = _thread._local
+
 class PyThreadingLocalTest(unittest.TestCase, BaseLocalTest):
     _local = _threading_local.local