Fix segfault when reloading interpreter with external modules (#1092)

* Fix segfault when reloading interpreter with external modules

When embedding the interpreter and loading external modules in that
embedded interpreter, the external module correctly shares its
internals_ptr with the one in the embedded interpreter.  When the
interpreter is shut down, however, only the `internals_ptr` local to
the embedded code is actually reset to nullptr: the external module
remains set.

The result is that loading an external pybind11 module, letting the
interpreter go through a finalize/initialize, then attempting to use
something in the external module fails because this external module is
still trying to use the old (destroyed) internals.  This causes
undefined behaviour (typically a segfault).

This commit fixes it by adding a level of indirection in the internals
path, converting the local internals variable to `internals **` instead
of `internals *`.  With this change, we can detect a stale internals
pointer and reload the internals pointer (either from a capsule or by
creating a new internals instance).

(No issue number: this was reported on gitter by @henryiii and @aoloe).
diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h
index 213cbae..e39f386 100644
--- a/include/pybind11/detail/internals.h
+++ b/include/pybind11/detail/internals.h
@@ -127,21 +127,21 @@
 
 /// Each module locally stores a pointer to the `internals` data. The data
 /// itself is shared among modules with the same `PYBIND11_INTERNALS_ID`.
-inline internals *&get_internals_ptr() {
-    static internals *internals_ptr = nullptr;
-    return internals_ptr;
+inline internals **&get_internals_pp() {
+    static internals **internals_pp = nullptr;
+    return internals_pp;
 }
 
 /// Return a reference to the current `internals` data
 PYBIND11_NOINLINE inline internals &get_internals() {
-    auto *&internals_ptr = get_internals_ptr();
-    if (internals_ptr)
-        return *internals_ptr;
+    auto **&internals_pp = get_internals_pp();
+    if (internals_pp && *internals_pp)
+        return **internals_pp;
 
     constexpr auto *id = PYBIND11_INTERNALS_ID;
     auto builtins = handle(PyEval_GetBuiltins());
     if (builtins.contains(id) && isinstance<capsule>(builtins[id])) {
-        internals_ptr = *static_cast<internals **>(capsule(builtins[id]));
+        internals_pp = static_cast<internals **>(capsule(builtins[id]));
 
         // We loaded builtins through python's builtins, which means that our `error_already_set`
         // and `builtin_exception` may be different local classes than the ones set up in the
@@ -149,7 +149,7 @@
         //
         // libstdc++ doesn't require this (types there are identified only by name)
 #if !defined(__GLIBCXX__)
-        internals_ptr->registered_exception_translators.push_front(
+        (*internals_pp)->registered_exception_translators.push_front(
             [](std::exception_ptr p) -> void {
                 try {
                     if (p) std::rethrow_exception(p);
@@ -160,6 +160,8 @@
         );
 #endif
     } else {
+        if (!internals_pp) internals_pp = new internals*();
+        auto *&internals_ptr = *internals_pp;
         internals_ptr = new internals();
 #if defined(WITH_THREAD)
         PyEval_InitThreads();
@@ -168,7 +170,7 @@
         PyThread_set_key_value(internals_ptr->tstate, tstate);
         internals_ptr->istate = tstate->interp;
 #endif
-        builtins[id] = capsule(&internals_ptr);
+        builtins[id] = capsule(internals_pp);
         internals_ptr->registered_exception_translators.push_front(
             [](std::exception_ptr p) -> void {
                 try {
@@ -192,7 +194,7 @@
         internals_ptr->default_metaclass = make_default_metaclass();
         internals_ptr->instance_base = make_object_base_type(internals_ptr->default_metaclass);
     }
-    return *internals_ptr;
+    return **internals_pp;
 }
 
 /// Works like `internals.registered_types_cpp`, but for module-local registered types:
diff --git a/include/pybind11/embed.h b/include/pybind11/embed.h
index 6664967..9abc61c 100644
--- a/include/pybind11/embed.h
+++ b/include/pybind11/embed.h
@@ -145,7 +145,7 @@
     // Get the internals pointer (without creating it if it doesn't exist).  It's possible for the
     // internals to be created during Py_Finalize() (e.g. if a py::capsule calls `get_internals()`
     // during destruction), so we get the pointer-pointer here and check it after Py_Finalize().
-    detail::internals **internals_ptr_ptr = &detail::get_internals_ptr();
+    detail::internals **internals_ptr_ptr = detail::get_internals_pp();
     // It could also be stashed in builtins, so look there too:
     if (builtins.contains(id) && isinstance<capsule>(builtins[id]))
         internals_ptr_ptr = capsule(builtins[id]);
diff --git a/tests/test_embed/CMakeLists.txt b/tests/test_embed/CMakeLists.txt
index 3e07812..8b4f1f8 100644
--- a/tests/test_embed/CMakeLists.txt
+++ b/tests/test_embed/CMakeLists.txt
@@ -33,4 +33,9 @@
 
 add_custom_target(cpptest COMMAND $<TARGET_FILE:test_embed>
                   WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+
+pybind11_add_module(external_module THIN_LTO external_module.cpp)
+set_target_properties(external_module PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+add_dependencies(cpptest external_module)
+
 add_dependencies(check cpptest)
diff --git a/tests/test_embed/external_module.cpp b/tests/test_embed/external_module.cpp
new file mode 100644
index 0000000..e9a6058
--- /dev/null
+++ b/tests/test_embed/external_module.cpp
@@ -0,0 +1,23 @@
+#include <pybind11/pybind11.h>
+
+namespace py = pybind11;
+
+/* Simple test module/test class to check that the referenced internals data of external pybind11
+ * modules aren't preserved over a finalize/initialize.
+ */
+
+PYBIND11_MODULE(external_module, m) {
+    class A {
+    public:
+        A(int value) : v{value} {};
+        int v;
+    };
+
+    py::class_<A>(m, "A")
+        .def(py::init<int>())
+        .def_readwrite("value", &A::v);
+
+    m.def("internals_at", []() {
+        return reinterpret_cast<uintptr_t>(&py::detail::get_internals());
+    });
+}
diff --git a/tests/test_embed/test_interpreter.cpp b/tests/test_embed/test_interpreter.cpp
index 641402d..222bd56 100644
--- a/tests/test_embed/test_interpreter.cpp
+++ b/tests/test_embed/test_interpreter.cpp
@@ -101,7 +101,8 @@
 };
 
 bool has_pybind11_internals_static() {
-    return py::detail::get_internals_ptr() != nullptr;
+    auto **&ipp = py::detail::get_internals_pp();
+    return ipp && *ipp;
 }
 
 TEST_CASE("Restart the interpreter") {
@@ -109,6 +110,11 @@
     REQUIRE(py::module::import("widget_module").attr("add")(1, 2).cast<int>() == 3);
     REQUIRE(has_pybind11_internals_builtin());
     REQUIRE(has_pybind11_internals_static());
+    REQUIRE(py::module::import("external_module").attr("A")(123).attr("value").cast<int>() == 123);
+
+    // local and foreign module internals should point to the same internals:
+    REQUIRE(reinterpret_cast<uintptr_t>(*py::detail::get_internals_pp()) ==
+            py::module::import("external_module").attr("internals_at")().cast<uintptr_t>());
 
     // Restart the interpreter.
     py::finalize_interpreter();
@@ -123,6 +129,8 @@
     pybind11::detail::get_internals();
     REQUIRE(has_pybind11_internals_builtin());
     REQUIRE(has_pybind11_internals_static());
+    REQUIRE(reinterpret_cast<uintptr_t>(*py::detail::get_internals_pp()) ==
+            py::module::import("external_module").attr("internals_at")().cast<uintptr_t>());
 
     // Make sure that an interpreter with no get_internals() created until finalize still gets the
     // internals destroyed