Hold strong references to keep_alive patients

This fixes #856. Instead of the weakref trick, the internals structure
holds an unordered_map from PyObject* to a vector of references. To
avoid the cost of the unordered_map lookup for objects that don't have
any keep_alive patients, a flag is added to each instance to indicate
whether there is anything to do.
diff --git a/include/pybind11/class_support.h b/include/pybind11/class_support.h
index b6065bd..2bfc5a8 100644
--- a/include/pybind11/class_support.h
+++ b/include/pybind11/class_support.h
@@ -280,6 +280,29 @@
     return -1;
 }
 
+inline void add_patient(PyObject *nurse, PyObject *patient) {
+    auto &internals = get_internals();
+    auto instance = reinterpret_cast<detail::instance *>(nurse);
+    instance->has_patients = true;
+    Py_INCREF(patient);
+    internals.patients[nurse].push_back(patient);
+}
+
+inline void clear_patients(PyObject *self) {
+    auto instance = reinterpret_cast<detail::instance *>(self);
+    auto &internals = get_internals();
+    auto pos = internals.patients.find(self);
+    assert(pos != internals.patients.end());
+    // Clearing the patients can cause more Python code to run, which
+    // can invalidate the iterator. Extract the vector of patients
+    // from the unordered_map first.
+    auto patients = std::move(pos->second);
+    internals.patients.erase(pos);
+    instance->has_patients = false;
+    for (PyObject *&patient : patients)
+        Py_CLEAR(patient);
+}
+
 /// Clears all internal data from the instance and removes it from registered instances in
 /// preparation for deallocation.
 inline void clear_instance(PyObject *self) {
@@ -304,6 +327,9 @@
     PyObject **dict_ptr = _PyObject_GetDictPtr(self);
     if (dict_ptr)
         Py_CLEAR(*dict_ptr);
+
+    if (instance->has_patients)
+        clear_patients(self);
 }
 
 /// Instance destructor function for all pybind11 types. It calls `type_info.dealloc`
diff --git a/include/pybind11/common.h b/include/pybind11/common.h
index 9282051..626178e 100644
--- a/include/pybind11/common.h
+++ b/include/pybind11/common.h
@@ -405,6 +405,8 @@
     bool simple_layout : 1;
     /// For simple layout, tracks whether the holder has been constructed
     bool simple_holder_constructed : 1;
+    /// If true, get_internals().patients has an entry for this object
+    bool has_patients : 1;
 
     /// Initializes all of the above type/values/holders data
     void allocate_layout();
@@ -469,6 +471,7 @@
     std::unordered_multimap<const void *, instance*> registered_instances; // void * -> instance*
     std::unordered_set<std::pair<const PyObject *, const char *>, overload_hash> inactive_overload_cache;
     type_map<std::vector<bool (*)(PyObject *, void *&)>> direct_conversions;
+    std::unordered_map<const PyObject *, std::vector<PyObject *>> patients;
     std::forward_list<void (*) (std::exception_ptr)> registered_exception_translators;
     std::unordered_map<std::string, void *> shared_data; // Custom data to be shared across extensions
     PyTypeObject *static_property_type;
diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h
index 40b32a8..973dea9 100644
--- a/include/pybind11/pybind11.h
+++ b/include/pybind11/pybind11.h
@@ -1328,20 +1328,30 @@
 
 
 inline void keep_alive_impl(handle nurse, handle patient) {
-    /* Clever approach based on weak references taken from Boost.Python */
     if (!nurse || !patient)
         pybind11_fail("Could not activate keep_alive!");
 
     if (patient.is_none() || nurse.is_none())
         return; /* Nothing to keep alive or nothing to be kept alive by */
 
-    cpp_function disable_lifesupport(
-        [patient](handle weakref) { patient.dec_ref(); weakref.dec_ref(); });
+    auto tinfo = all_type_info(Py_TYPE(nurse.ptr()));
+    if (!tinfo.empty()) {
+        /* It's a pybind-registered type, so we can store the patient in the
+         * internal list. */
+        add_patient(nurse.ptr(), patient.ptr());
+    }
+    else {
+        /* Fall back to clever approach based on weak references taken from
+         * Boost.Python. This is not used for pybind-registered types because
+         * the objects can be destroyed out-of-order in a GC pass. */
+        cpp_function disable_lifesupport(
+            [patient](handle weakref) { patient.dec_ref(); weakref.dec_ref(); });
 
-    weakref wr(nurse, disable_lifesupport);
+        weakref wr(nurse, disable_lifesupport);
 
-    patient.inc_ref(); /* reference patient and leak the weak reference */
-    (void) wr.release();
+        patient.inc_ref(); /* reference patient and leak the weak reference */
+        (void) wr.release();
+    }
 }
 
 PYBIND11_NOINLINE inline void keep_alive_impl(size_t Nurse, size_t Patient, function_call &call, handle ret) {
diff --git a/tests/test_call_policies.cpp b/tests/test_call_policies.cpp
index 33a3f15..1527592 100644
--- a/tests/test_call_policies.cpp
+++ b/tests/test_call_policies.cpp
@@ -24,6 +24,13 @@
     Child *returnNullChild() { return nullptr; }
 };
 
+#if !defined(PYPY_VERSION)
+class ParentGC : public Parent {
+public:
+    using Parent::Parent;
+};
+#endif
+
 test_initializer keep_alive([](py::module &m) {
     py::class_<Parent>(m, "Parent")
         .def(py::init<>())
@@ -34,6 +41,11 @@
         .def("returnNullChildKeepAliveChild", &Parent::returnNullChild, py::keep_alive<1, 0>())
         .def("returnNullChildKeepAliveParent", &Parent::returnNullChild, py::keep_alive<0, 1>());
 
+#if !defined(PYPY_VERSION)
+    py::class_<ParentGC, Parent>(m, "ParentGC", py::dynamic_attr())
+        .def(py::init<>());
+#endif
+
     py::class_<Child>(m, "Child")
         .def(py::init<>());
 });
diff --git a/tests/test_call_policies.py b/tests/test_call_policies.py
index 1cb11c2..de4ec96 100644
--- a/tests/test_call_policies.py
+++ b/tests/test_call_policies.py
@@ -2,21 +2,22 @@
 
 
 def test_keep_alive_argument(capture):
-    from pybind11_tests import Parent, Child
+    from pybind11_tests import Parent, Child, ConstructorStats
 
+    n_inst = ConstructorStats.detail_reg_inst()
     with capture:
         p = Parent()
     assert capture == "Allocating parent."
     with capture:
         p.addChild(Child())
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst + 1
     assert capture == """
         Allocating child.
         Releasing child.
     """
     with capture:
         del p
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst
     assert capture == "Releasing parent."
 
     with capture:
@@ -24,11 +25,11 @@
     assert capture == "Allocating parent."
     with capture:
         p.addChildKeepAlive(Child())
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst + 2
     assert capture == "Allocating child."
     with capture:
         del p
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst
     assert capture == """
         Releasing parent.
         Releasing child.
@@ -36,21 +37,22 @@
 
 
 def test_keep_alive_return_value(capture):
-    from pybind11_tests import Parent
+    from pybind11_tests import Parent, ConstructorStats
 
+    n_inst = ConstructorStats.detail_reg_inst()
     with capture:
         p = Parent()
     assert capture == "Allocating parent."
     with capture:
         p.returnChild()
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst + 1
     assert capture == """
         Allocating child.
         Releasing child.
     """
     with capture:
         del p
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst
     assert capture == "Releasing parent."
 
     with capture:
@@ -58,11 +60,74 @@
     assert capture == "Allocating parent."
     with capture:
         p.returnChildKeepAlive()
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst + 2
     assert capture == "Allocating child."
     with capture:
         del p
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst
+    assert capture == """
+        Releasing parent.
+        Releasing child.
+    """
+
+
+# https://bitbucket.org/pypy/pypy/issues/2447
+@pytest.unsupported_on_pypy
+def test_alive_gc(capture):
+    from pybind11_tests import ParentGC, Child, ConstructorStats
+
+    n_inst = ConstructorStats.detail_reg_inst()
+    p = ParentGC()
+    p.addChildKeepAlive(Child())
+    assert ConstructorStats.detail_reg_inst() == n_inst + 2
+    lst = [p]
+    lst.append(lst)   # creates a circular reference
+    with capture:
+        del p, lst
+        assert ConstructorStats.detail_reg_inst() == n_inst
+    assert capture == """
+        Releasing parent.
+        Releasing child.
+    """
+
+
+def test_alive_gc_derived(capture):
+    from pybind11_tests import Parent, Child, ConstructorStats
+
+    class Derived(Parent):
+        pass
+
+    n_inst = ConstructorStats.detail_reg_inst()
+    p = Derived()
+    p.addChildKeepAlive(Child())
+    assert ConstructorStats.detail_reg_inst() == n_inst + 2
+    lst = [p]
+    lst.append(lst)   # creates a circular reference
+    with capture:
+        del p, lst
+        assert ConstructorStats.detail_reg_inst() == n_inst
+    assert capture == """
+        Releasing parent.
+        Releasing child.
+    """
+
+
+def test_alive_gc_multi_derived(capture):
+    from pybind11_tests import Parent, Child, ConstructorStats
+
+    class Derived(Parent, Child):
+        pass
+
+    n_inst = ConstructorStats.detail_reg_inst()
+    p = Derived()
+    p.addChildKeepAlive(Child())
+    # +3 rather than +2 because Derived corresponds to two registered instances
+    assert ConstructorStats.detail_reg_inst() == n_inst + 3
+    lst = [p]
+    lst.append(lst)   # creates a circular reference
+    with capture:
+        del p, lst
+        assert ConstructorStats.detail_reg_inst() == n_inst
     assert capture == """
         Releasing parent.
         Releasing child.
@@ -70,18 +135,19 @@
 
 
 def test_return_none(capture):
-    from pybind11_tests import Parent
+    from pybind11_tests import Parent, ConstructorStats
 
+    n_inst = ConstructorStats.detail_reg_inst()
     with capture:
         p = Parent()
     assert capture == "Allocating parent."
     with capture:
         p.returnNullChildKeepAliveChild()
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst + 1
     assert capture == ""
     with capture:
         del p
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst
     assert capture == "Releasing parent."
 
     with capture:
@@ -89,11 +155,11 @@
     assert capture == "Allocating parent."
     with capture:
         p.returnNullChildKeepAliveParent()
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst + 1
     assert capture == ""
     with capture:
         del p
-        pytest.gc_collect()
+        assert ConstructorStats.detail_reg_inst() == n_inst
     assert capture == "Releasing parent."