Fix downcasting of base class pointers

When we are returned a base class pointer (either directly or via
shared_from_this()) we detect its runtime type (using `typeid`), then
end up essentially reinterpret_casting the pointer to the derived type.
This is invalid when the base class pointer was a non-first base, and we
end up with an invalid pointer.  We could dynamic_cast to the
most-derived type, but if *that* type isn't pybind11-registered, the
resulting pointer given to the base `cast` implementation isn't necessarily valid
to be reinterpret_cast'ed back to the backup type.

This commit removes the "backup" type argument from the many-argument
`cast(...)` and instead does the derived-or-pointer type decision and
type lookup in type_caster_base, where the dynamic_cast has to be to
correctly get the derived pointer, but also has to do the type lookup to
ensure that we don't pass the wrong (derived) pointer when the backup
type (i.e. the type caster intrinsic type) pointer is needed.

Since the lookup is needed before calling the base cast(), this also
changes the input type to a detail::type_info rather than doing a
(second) lookup in cast().
diff --git a/include/pybind11/attr.h b/include/pybind11/attr.h
index d046bdd..bcd69a8 100644
--- a/include/pybind11/attr.h
+++ b/include/pybind11/attr.h
@@ -266,7 +266,7 @@
             dynamic_attr = true;
 
         if (caster)
-            base_info->implicit_casts.push_back(std::make_pair(type, caster));
+            base_info->implicit_casts.emplace_back(type, caster);
     }
 };
 
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index 66ca4d4..812921c 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -270,34 +270,18 @@
     }
 
     PYBIND11_NOINLINE static handle cast(const void *_src, return_value_policy policy, handle parent,
-                                         const std::type_info *type_info,
-                                         const std::type_info *type_info_backup,
+                                         const detail::type_info *tinfo,
                                          void *(*copy_constructor)(const void *),
                                          void *(*move_constructor)(const void *),
                                          const void *existing_holder = nullptr) {
+        if (!tinfo) // no type info: error will be set already
+            return handle();
+
         void *src = const_cast<void *>(_src);
         if (src == nullptr)
-            return none().inc_ref();
+            return none().release();
 
-        auto &internals = get_internals();
-
-        auto it = internals.registered_types_cpp.find(std::type_index(*type_info));
-        if (it == internals.registered_types_cpp.end()) {
-            type_info = type_info_backup;
-            it = internals.registered_types_cpp.find(std::type_index(*type_info));
-        }
-
-        if (it == internals.registered_types_cpp.end()) {
-            std::string tname = type_info->name();
-            detail::clean_type_id(tname);
-            std::string msg = "Unregistered type : " + tname;
-            PyErr_SetString(PyExc_TypeError, msg.c_str());
-            return handle();
-        }
-
-        auto tinfo = (const detail::type_info *) it->second;
-
-        auto it_instances = internals.registered_instances.equal_range(src);
+        auto it_instances = get_internals().registered_instances.equal_range(src);
         for (auto it_i = it_instances.first; it_i != it_instances.second; ++it_i) {
             auto instance_type = detail::get_type_info(Py_TYPE(it_i->second));
             if (instance_type && instance_type == tinfo)
@@ -361,6 +345,25 @@
     }
 
 protected:
+
+    // Called to do type lookup and wrap the pointer and type in a pair when a dynamic_cast
+    // isn't needed or can't be used.  If the type is unknown, sets the error and returns a pair
+    // with .second = nullptr.  (p.first = nullptr is not an error: it becomes None).
+    PYBIND11_NOINLINE static std::pair<const void *, const type_info *> src_and_type(
+            const void *src, const std::type_info *cast_type, const std::type_info *rtti_type = nullptr) {
+        auto &internals = get_internals();
+        auto it = internals.registered_types_cpp.find(std::type_index(*cast_type));
+        if (it != internals.registered_types_cpp.end())
+            return {src, (const type_info *) it->second};
+
+        // Not found, set error:
+        std::string tname = (rtti_type ? rtti_type : cast_type)->name();
+        detail::clean_type_id(tname);
+        std::string msg = "Unregistered type : " + tname;
+        PyErr_SetString(PyExc_TypeError, msg.c_str());
+        return {nullptr, nullptr};
+    }
+
     const type_info *typeinfo = nullptr;
     void *value = nullptr;
     object temp;
@@ -403,16 +406,47 @@
         return cast(&src, return_value_policy::move, parent);
     }
 
+    // Returns a (pointer, type_info) pair taking care of necessary RTTI type lookup for a
+    // polymorphic type.  If the instance isn't derived, returns the non-RTTI base version.
+    template <typename T = itype, enable_if_t<std::is_polymorphic<T>::value, int> = 0>
+    static std::pair<const void *, const type_info *> src_and_type(const itype *src) {
+        const void *vsrc = src;
+        auto &internals = get_internals();
+        auto cast_type = &typeid(itype);
+        const std::type_info *instance_type = nullptr;
+        if (vsrc) {
+            instance_type = &typeid(*src);
+            if (instance_type != cast_type) {
+                // This is a base pointer to a derived type; if it is a pybind11-registered type, we
+                // can get the correct derived pointer (which may be != base pointer) by a
+                // dynamic_cast to most derived type:
+                auto it = internals.registered_types_cpp.find(std::type_index(*instance_type));
+                if (it != internals.registered_types_cpp.end())
+                    return {dynamic_cast<const void *>(src), (const type_info *) it->second};
+            }
+        }
+        // Otherwise we have either a nullptr, an `itype` pointer, or an unknown derived pointer, so
+        // don't do a cast
+        return type_caster_generic::src_and_type(vsrc, cast_type, instance_type);
+    }
+
+    // Non-polymorphic type, so no dynamic casting; just call the generic version directly
+    template <typename T = itype, enable_if_t<!std::is_polymorphic<T>::value, int> = 0>
+    static std::pair<const void *, const type_info *> src_and_type(const itype *src) {
+        return type_caster_generic::src_and_type(src, &typeid(itype));
+    }
+
     static handle cast(const itype *src, return_value_policy policy, handle parent) {
+        auto st = src_and_type(src);
         return type_caster_generic::cast(
-            src, policy, parent, src ? &typeid(*src) : nullptr, &typeid(type),
+            st.first, policy, parent, st.second,
             make_copy_constructor(src), make_move_constructor(src));
     }
 
     static handle cast_holder(const itype *src, const void *holder) {
+        auto st = src_and_type(src);
         return type_caster_generic::cast(
-            src, return_value_policy::take_ownership, {},
-            src ? &typeid(*src) : nullptr, &typeid(type),
+            st.first, return_value_policy::take_ownership, {}, st.second,
             nullptr, nullptr, holder);
     }
 
diff --git a/tests/pybind11_tests.cpp b/tests/pybind11_tests.cpp
index 1d805d7..17a00b0 100644
--- a/tests/pybind11_tests.cpp
+++ b/tests/pybind11_tests.cpp
@@ -41,7 +41,15 @@
         .def_readwrite("move_assignments", &ConstructorStats::move_assignments)
         .def_readwrite("copy_constructions", &ConstructorStats::copy_constructions)
         .def_readwrite("move_constructions", &ConstructorStats::move_constructions)
-        .def_static("get", (ConstructorStats &(*)(py::object)) &ConstructorStats::get, py::return_value_policy::reference_internal);
+        .def_static("get", (ConstructorStats &(*)(py::object)) &ConstructorStats::get, py::return_value_policy::reference_internal)
+
+        // Not exactly ConstructorStats, but related: expose the internal pybind number of registered instances
+        // to allow instance cleanup checks (invokes a GC first)
+        .def_static("detail_reg_inst", []() {
+            ConstructorStats::gc();
+            return py::detail::get_internals().registered_instances.size();
+        })
+        ;
 }
 
 PYBIND11_PLUGIN(pybind11_tests) {
diff --git a/tests/test_multiple_inheritance.cpp b/tests/test_multiple_inheritance.cpp
index 3ebeb20..7424970 100644
--- a/tests/test_multiple_inheritance.cpp
+++ b/tests/test_multiple_inheritance.cpp
@@ -9,6 +9,7 @@
 */
 
 #include "pybind11_tests.h"
+#include "constructor_stats.h"
 
 struct Base1 {
     Base1(int i) : i(i) { }
@@ -91,6 +92,36 @@
     m.def("bar_base2a_sharedptr", [](std::shared_ptr<Base2a> b) { return b->bar(); });
 });
 
+// Issue #801: invalid casting to derived type with MI bases
+struct I801B1 { int a = 1; virtual ~I801B1() = default; };
+struct I801B2 { int b = 2; virtual ~I801B2() = default; };
+struct I801C : I801B1, I801B2 {};
+struct I801D : I801C {}; // Indirect MI
+// Unregistered classes:
+struct I801B3 { int c = 3; virtual ~I801B3() = default; };
+struct I801E : I801B3, I801D {};
+
+test_initializer multiple_inheritance_casting([](py::module &m) {
+    py::class_<I801B1, std::shared_ptr<I801B1>>(m, "I801B1").def(py::init<>()).def_readonly("a", &I801B1::a);
+    py::class_<I801B2, std::shared_ptr<I801B2>>(m, "I801B2").def(py::init<>()).def_readonly("b", &I801B2::b);
+    py::class_<I801C, I801B1, I801B2, std::shared_ptr<I801C>>(m, "I801C").def(py::init<>());
+    py::class_<I801D, I801C, std::shared_ptr<I801D>>(m, "I801D").def(py::init<>());
+
+    // When returned a base class pointer to a derived instance, we cannot assume that the
+    // pointer is `reinterpret_cast`able to the derived pointer because the base class
+    // pointer could be offset.
+    m.def("i801c_b1", []() -> I801B1 * { return new I801C(); });
+    m.def("i801c_b2", []() -> I801B2 * { return new I801C(); });
+    m.def("i801d_b1", []() -> I801B1 * { return new I801D(); });
+    m.def("i801d_b2", []() -> I801B2 * { return new I801D(); });
+
+    // Return a base class pointer to a pybind-registered type when the actual derived type
+    // isn't pybind-registered (and uses multiple-inheritance to offset the pybind base)
+    m.def("i801e_c", []() -> I801C * { return new I801E(); });
+    m.def("i801e_b2", []() -> I801B2 * { return new I801E(); });
+});
+
+
 struct Vanilla {
     std::string vanilla() { return "Vanilla"; };
 };
diff --git a/tests/test_multiple_inheritance.py b/tests/test_multiple_inheritance.py
index 7aaab7c..5da3220 100644
--- a/tests/test_multiple_inheritance.py
+++ b/tests/test_multiple_inheritance.py
@@ -1,4 +1,5 @@
 import pytest
+from pybind11_tests import ConstructorStats
 
 
 def test_multiple_inheritance_cpp():
@@ -109,3 +110,52 @@
     for d in (mi.VanillaDictMix1(), mi.VanillaDictMix2()):
         d.dynamic = 1
         assert d.dynamic == 1
+
+
+def test_mi_base_return():
+    """Tests returning an offset (non-first MI) base class pointer to a derived instance"""
+    from pybind11_tests import (I801B2, I801C, I801D, i801c_b1, i801c_b2, i801d_b1, i801d_b2,
+                                i801e_c, i801e_b2)
+
+    n_inst = ConstructorStats.detail_reg_inst()
+
+    c1 = i801c_b1()
+    assert type(c1) is I801C
+    assert c1.a == 1
+    assert c1.b == 2
+
+    d1 = i801d_b1()
+    assert type(d1) is I801D
+    assert d1.a == 1
+    assert d1.b == 2
+
+    assert ConstructorStats.detail_reg_inst() == n_inst + 2
+
+    c2 = i801c_b2()
+    assert type(c2) is I801C
+    assert c2.a == 1
+    assert c2.b == 2
+
+    d2 = i801d_b2()
+    assert type(d2) is I801D
+    assert d2.a == 1
+    assert d2.b == 2
+
+    assert ConstructorStats.detail_reg_inst() == n_inst + 4
+
+    del c2
+    assert ConstructorStats.detail_reg_inst() == n_inst + 3
+    del c1, d1, d2
+    assert ConstructorStats.detail_reg_inst() == n_inst
+
+    # Returning an unregistered derived type with a registered base; we won't
+    # pick up the derived type, obviously, but should still work (as an object
+    # of whatever type was returned).
+    e1 = i801e_c()
+    assert type(e1) is I801C
+    assert e1.a == 1
+    assert e1.b == 2
+
+    e2 = i801e_b2()
+    assert type(e2) is I801B2
+    assert e2.b == 2