Track base class pointers of instances

This commits adds base class pointers of offset base classes (i.e. due
to multiple inheritance) to `registered_instances` so that if such a
pointer is returned we properly recognize it as an existing instance.

Without this, returning a base class pointer will cast to the existing
instance if the pointer happens to coincide with the instance pointer,
but constructs a new instance (quite possibly with a segfault, if
ownership is applied) for unequal base class pointers due to multiple
inheritance.
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index 812921c..5bc4605 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -24,6 +24,7 @@
 /// Additional type information which does not fit into the PyTypeObject
 struct type_info {
     PyTypeObject *type;
+    const std::type_info *cpptype;
     size_t type_size;
     void *(*operator_new)(size_t);
     void (*init_holder)(PyObject *, const void *);
@@ -35,9 +36,11 @@
     void *get_buffer_data = nullptr;
     /** A simple type never occurs as a (direct or indirect) parent
      * of a class that makes use of multiple inheritance */
-    bool simple_type = true;
+    bool simple_type : 1;
+    /* True if there is no multiple inheritance in this type's inheritance tree */
+    bool simple_ancestors : 1;
     /* for base vs derived holder_type checks */
-    bool default_holder = true;
+    bool default_holder : 1;
 };
 
 PYBIND11_NOINLINE inline internals &get_internals() {
@@ -197,7 +200,7 @@
 
 // Forward declarations
 inline void keep_alive_impl(handle nurse, handle patient);
-inline void register_instance(void *self);
+inline void register_instance(void *self, const type_info *tinfo);
 inline PyObject *make_new_instance(PyTypeObject *type, bool allocate_value = true);
 
 class type_caster_generic {
@@ -338,7 +341,7 @@
                 throw cast_error("unhandled return_value_policy: should not happen!");
         }
 
-        register_instance(wrapper);
+        register_instance(wrapper, tinfo);
         tinfo->init_holder(inst.ptr(), existing_holder);
 
         return inst.release();
diff --git a/include/pybind11/class_support.h b/include/pybind11/class_support.h
index 370a67c..b293037 100644
--- a/include/pybind11/class_support.h
+++ b/include/pybind11/class_support.h
@@ -185,18 +185,35 @@
     return type;
 }
 
-inline void register_instance(void *self) {
-    auto *inst = (instance_essentials<void> *) self;
-    get_internals().registered_instances.emplace(inst->value, self);
+/// For multiple inheritance types we need to recursively register/deregister base pointers for any
+/// base classes with pointers that are difference from the instance value pointer so that we can
+/// correctly recognize an offset base class pointer. This calls a function with any offset base ptrs.
+inline void traverse_offset_bases(void *valueptr, const detail::type_info *tinfo, void *self,
+        bool (*f)(void * /*parentptr*/, void * /*self*/)) {
+    for (handle h : reinterpret_borrow<tuple>(tinfo->type->tp_bases)) {
+        if (auto parent_tinfo = get_type_info((PyTypeObject *) h.ptr())) {
+            for (auto &c : parent_tinfo->implicit_casts) {
+                if (c.first == tinfo->cpptype) {
+                    auto *parentptr = c.second(valueptr);
+                    if (parentptr != valueptr)
+                        f(parentptr, self);
+                    traverse_offset_bases(parentptr, parent_tinfo, self, f);
+                    break;
+                }
+            }
+        }
+    }
 }
 
-inline bool deregister_instance(void *self) {
-    auto *inst = (instance_essentials<void> *) self;
-    auto type = Py_TYPE(inst);
+inline bool register_instance_impl(void *ptr, void *self) {
+    get_internals().registered_instances.emplace(ptr, self);
+    return true; // unused, but gives the same signature as the deregister func
+}
+inline bool deregister_instance_impl(void *ptr, void *self) {
     auto &registered_instances = get_internals().registered_instances;
-    auto range = registered_instances.equal_range(inst->value);
+    auto range = registered_instances.equal_range(ptr);
     for (auto it = range.first; it != range.second; ++it) {
-        if (type == Py_TYPE(it->second)) {
+        if (Py_TYPE(self) == Py_TYPE(it->second)) {
             registered_instances.erase(it);
             return true;
         }
@@ -204,6 +221,21 @@
     return false;
 }
 
+inline void register_instance(void *self, const type_info *tinfo) {
+    auto *inst = (instance_essentials<void> *) self;
+    register_instance_impl(inst->value, self);
+    if (!tinfo->simple_ancestors)
+        traverse_offset_bases(inst->value, tinfo, self, register_instance_impl);
+}
+
+inline bool deregister_instance(void *self, const detail::type_info *tinfo) {
+    auto *inst = (instance_essentials<void> *) self;
+    bool ret = deregister_instance_impl(inst->value, self);
+    if (!tinfo->simple_ancestors)
+        traverse_offset_bases(inst->value, tinfo, self, deregister_instance_impl);
+    return ret;
+}
+
 /// Creates a new instance which, by default, includes allocation (but not construction of) the
 /// wrapped C++ instance.  If allocating value, the instance is registered; otherwise
 /// register_instance will need to be called once the value has been assigned.
@@ -215,7 +247,7 @@
     instance->holder_constructed = false;
     if (allocate_value) {
         instance->value = tinfo->operator_new(tinfo->type_size);
-        register_instance(self);
+        register_instance(self, tinfo);
     } else {
         instance->value = nullptr;
     }
@@ -248,12 +280,15 @@
 inline void clear_instance(PyObject *self) {
     auto instance = (instance_essentials<void> *) self;
     bool has_value = instance->value;
+    type_info *tinfo = nullptr;
     if (has_value || instance->holder_constructed) {
         auto type = Py_TYPE(self);
-        get_type_info(type)->dealloc(self);
+        tinfo = get_type_info(type);
+        tinfo->dealloc(self);
     }
     if (has_value) {
-        if (!deregister_instance(self))
+        if (!tinfo) tinfo = get_type_info(Py_TYPE(self));
+        if (!deregister_instance(self, tinfo))
             pybind11_fail("pybind11_object_dealloc(): Tried to deallocate unregistered instance!");
 
         if (instance->weakrefs)
diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h
index c1bc673..7f4c517 100644
--- a/include/pybind11/pybind11.h
+++ b/include/pybind11/pybind11.h
@@ -813,10 +813,13 @@
         /* Register supplemental type information in C++ dict */
         auto *tinfo = new detail::type_info();
         tinfo->type = (PyTypeObject *) m_ptr;
+        tinfo->cpptype = rec.type;
         tinfo->type_size = rec.type_size;
         tinfo->operator_new = rec.operator_new;
         tinfo->init_holder = rec.init_holder;
         tinfo->dealloc = rec.dealloc;
+        tinfo->simple_type = true;
+        tinfo->simple_ancestors = true;
 
         auto &internals = get_internals();
         auto tindex = std::type_index(*rec.type);
@@ -825,8 +828,14 @@
         internals.registered_types_cpp[tindex] = tinfo;
         internals.registered_types_py[m_ptr] = tinfo;
 
-        if (rec.bases.size() > 1 || rec.multiple_inheritance)
+        if (rec.bases.size() > 1 || rec.multiple_inheritance) {
             mark_parents_nonsimple(tinfo->type);
+            tinfo->simple_ancestors = false;
+        }
+        else if (rec.bases.size() == 1) {
+            auto parent_tinfo = get_type_info((PyTypeObject *) rec.bases[0].ptr());
+            tinfo->simple_ancestors = parent_tinfo->simple_ancestors;
+        }
     }
 
     /// Helper function which tags all parents of a type using mult. inheritance
diff --git a/tests/test_multiple_inheritance.cpp b/tests/test_multiple_inheritance.cpp
index 7424970..41576db 100644
--- a/tests/test_multiple_inheritance.cpp
+++ b/tests/test_multiple_inheritance.cpp
@@ -107,8 +107,16 @@
     py::class_<I801C, I801B1, I801B2, std::shared_ptr<I801C>>(m, "I801C").def(py::init<>());
     py::class_<I801D, I801C, std::shared_ptr<I801D>>(m, "I801D").def(py::init<>());
 
-    // When returned a base class pointer to a derived instance, we cannot assume that the
-    // pointer is `reinterpret_cast`able to the derived pointer because the base class
+    // Two separate issues here: first, we want to recognize a pointer to a base type as being a
+    // known instance even when the pointer value is unequal (i.e. due to a non-first
+    // multiple-inheritance base class):
+    m.def("i801b1_c", [](I801C *c) { return static_cast<I801B1 *>(c); });
+    m.def("i801b2_c", [](I801C *c) { return static_cast<I801B2 *>(c); });
+    m.def("i801b1_d", [](I801D *d) { return static_cast<I801B1 *>(d); });
+    m.def("i801b2_d", [](I801D *d) { return static_cast<I801B2 *>(d); });
+
+    // Second, when returned a base class pointer to a derived instance, we cannot assume that the
+    // pointer is `reinterpret_cast`able to the derived pointer because, like above, the base class
     // pointer could be offset.
     m.def("i801c_b1", []() -> I801B1 * { return new I801C(); });
     m.def("i801c_b2", []() -> I801B2 * { return new I801C(); });
diff --git a/tests/test_multiple_inheritance.py b/tests/test_multiple_inheritance.py
index 5da3220..2d40d25 100644
--- a/tests/test_multiple_inheritance.py
+++ b/tests/test_multiple_inheritance.py
@@ -112,6 +112,32 @@
         assert d.dynamic == 1
 
 
+def test_mi_unaligned_base():
+    """Returning an offset (non-first MI) base class pointer should recognize the instance"""
+    from pybind11_tests import I801C, I801D, i801b1_c, i801b2_c, i801b1_d, i801b2_d
+
+    n_inst = ConstructorStats.detail_reg_inst()
+
+    c = I801C()
+    d = I801D()
+    # + 4 below because we have the two instances, and each instance has offset base I801B2
+    assert ConstructorStats.detail_reg_inst() == n_inst + 4
+    b1c = i801b1_c(c)
+    assert b1c is c
+    b2c = i801b2_c(c)
+    assert b2c is c
+    b1d = i801b1_d(d)
+    assert b1d is d
+    b2d = i801b2_d(d)
+    assert b2d is d
+
+    assert ConstructorStats.detail_reg_inst() == n_inst + 4  # no extra instances
+    del c, b1c, b2c
+    assert ConstructorStats.detail_reg_inst() == n_inst + 2
+    del d, b1d, b2d
+    assert ConstructorStats.detail_reg_inst() == n_inst
+
+
 def test_mi_base_return():
     """Tests returning an offset (non-first MI) base class pointer to a derived instance"""
     from pybind11_tests import (I801B2, I801C, I801D, i801c_b1, i801c_b2, i801d_b1, i801d_b2,
@@ -129,7 +155,7 @@
     assert d1.a == 1
     assert d1.b == 2
 
-    assert ConstructorStats.detail_reg_inst() == n_inst + 2
+    assert ConstructorStats.detail_reg_inst() == n_inst + 4
 
     c2 = i801c_b2()
     assert type(c2) is I801C
@@ -141,10 +167,10 @@
     assert d2.a == 1
     assert d2.b == 2
 
-    assert ConstructorStats.detail_reg_inst() == n_inst + 4
+    assert ConstructorStats.detail_reg_inst() == n_inst + 8
 
     del c2
-    assert ConstructorStats.detail_reg_inst() == n_inst + 3
+    assert ConstructorStats.detail_reg_inst() == n_inst + 6
     del c1, d1, d2
     assert ConstructorStats.detail_reg_inst() == n_inst