diff --git a/tests/conftest.py b/tests/conftest.py
index 5b08004..3fe5023 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -196,7 +196,7 @@
     except ImportError:
         scipy = None
     try:
-        from pybind11_tests import have_eigen
+        from pybind11_tests.eigen import have_eigen
     except ImportError:
         have_eigen = False
     pypy = platform.python_implementation() == "PyPy"
diff --git a/tests/pybind11_tests.cpp b/tests/pybind11_tests.cpp
index 81fe8bf..bc7d2c3 100644
--- a/tests/pybind11_tests.cpp
+++ b/tests/pybind11_tests.cpp
@@ -77,7 +77,8 @@
         .def(py::init<>())
         .def(py::init<int>())
         .def("get_value", &UserType::value, "Get value using a method")
-        .def_property_readonly("value", &UserType::value, "Get value using a property")
+        .def("set_value", &UserType::set, "Set value using a method")
+        .def_property("value", &UserType::value, &UserType::set, "Get/set value using a property")
         .def("__repr__", [](const UserType& u) { return "UserType({})"_s.format(u.value()); });
 
     py::class_<IncType, UserType>(m, "IncType")
diff --git a/tests/pybind11_tests.h b/tests/pybind11_tests.h
index 18672cd..90963a5 100644
--- a/tests/pybind11_tests.h
+++ b/tests/pybind11_tests.h
@@ -33,6 +33,7 @@
     UserType(int i) : i(i) { }
 
     int value() const { return i; }
+    void set(int set) { i = set; }
 
 private:
     int i = -1;
diff --git a/tests/test_buffers.cpp b/tests/test_buffers.cpp
index 9e92e5d..c7f081d 100644
--- a/tests/test_buffers.cpp
+++ b/tests/test_buffers.cpp
@@ -10,105 +10,73 @@
 #include "pybind11_tests.h"
 #include "constructor_stats.h"
 
-class Matrix {
-public:
-    Matrix(ssize_t rows, ssize_t cols) : m_rows(rows), m_cols(cols) {
-        print_created(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
-        m_data = new float[(size_t) (rows*cols)];
-        memset(m_data, 0, sizeof(float) * (size_t) (rows * cols));
-    }
-
-    Matrix(const Matrix &s) : m_rows(s.m_rows), m_cols(s.m_cols) {
-        print_copy_created(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
-        m_data = new float[(size_t) (m_rows * m_cols)];
-        memcpy(m_data, s.m_data, sizeof(float) * (size_t) (m_rows * m_cols));
-    }
-
-    Matrix(Matrix &&s) : m_rows(s.m_rows), m_cols(s.m_cols), m_data(s.m_data) {
-        print_move_created(this);
-        s.m_rows = 0;
-        s.m_cols = 0;
-        s.m_data = nullptr;
-    }
-
-    ~Matrix() {
-        print_destroyed(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
-        delete[] m_data;
-    }
-
-    Matrix &operator=(const Matrix &s) {
-        print_copy_assigned(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
-        delete[] m_data;
-        m_rows = s.m_rows;
-        m_cols = s.m_cols;
-        m_data = new float[(size_t) (m_rows * m_cols)];
-        memcpy(m_data, s.m_data, sizeof(float) * (size_t) (m_rows * m_cols));
-        return *this;
-    }
-
-    Matrix &operator=(Matrix &&s) {
-        print_move_assigned(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
-        if (&s != this) {
-            delete[] m_data;
-            m_rows = s.m_rows; m_cols = s.m_cols; m_data = s.m_data;
-            s.m_rows = 0; s.m_cols = 0; s.m_data = nullptr;
+TEST_SUBMODULE(buffers, m) {
+    // test_from_python / test_to_python:
+    class Matrix {
+    public:
+        Matrix(ssize_t rows, ssize_t cols) : m_rows(rows), m_cols(cols) {
+            print_created(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
+            m_data = new float[(size_t) (rows*cols)];
+            memset(m_data, 0, sizeof(float) * (size_t) (rows * cols));
         }
-        return *this;
-    }
 
-    float operator()(ssize_t i, ssize_t j) const {
-        return m_data[(size_t) (i*m_cols + j)];
-    }
+        Matrix(const Matrix &s) : m_rows(s.m_rows), m_cols(s.m_cols) {
+            print_copy_created(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
+            m_data = new float[(size_t) (m_rows * m_cols)];
+            memcpy(m_data, s.m_data, sizeof(float) * (size_t) (m_rows * m_cols));
+        }
 
-    float &operator()(ssize_t i, ssize_t j) {
-        return m_data[(size_t) (i*m_cols + j)];
-    }
+        Matrix(Matrix &&s) : m_rows(s.m_rows), m_cols(s.m_cols), m_data(s.m_data) {
+            print_move_created(this);
+            s.m_rows = 0;
+            s.m_cols = 0;
+            s.m_data = nullptr;
+        }
 
-    float *data() { return m_data; }
+        ~Matrix() {
+            print_destroyed(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
+            delete[] m_data;
+        }
 
-    ssize_t rows() const { return m_rows; }
-    ssize_t cols() const { return m_cols; }
-private:
-    ssize_t m_rows;
-    ssize_t m_cols;
-    float *m_data;
-};
+        Matrix &operator=(const Matrix &s) {
+            print_copy_assigned(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
+            delete[] m_data;
+            m_rows = s.m_rows;
+            m_cols = s.m_cols;
+            m_data = new float[(size_t) (m_rows * m_cols)];
+            memcpy(m_data, s.m_data, sizeof(float) * (size_t) (m_rows * m_cols));
+            return *this;
+        }
 
-class SquareMatrix : public Matrix {
-public:
-    SquareMatrix(ssize_t n) : Matrix(n, n) { }
-};
+        Matrix &operator=(Matrix &&s) {
+            print_move_assigned(this, std::to_string(m_rows) + "x" + std::to_string(m_cols) + " matrix");
+            if (&s != this) {
+                delete[] m_data;
+                m_rows = s.m_rows; m_cols = s.m_cols; m_data = s.m_data;
+                s.m_rows = 0; s.m_cols = 0; s.m_data = nullptr;
+            }
+            return *this;
+        }
 
-struct PTMFBuffer {
-    int32_t value = 0;
+        float operator()(ssize_t i, ssize_t j) const {
+            return m_data[(size_t) (i*m_cols + j)];
+        }
 
-    py::buffer_info get_buffer_info() {
-        return py::buffer_info(&value, sizeof(value),
-                               py::format_descriptor<int32_t>::format(), 1);
-    }
-};
+        float &operator()(ssize_t i, ssize_t j) {
+            return m_data[(size_t) (i*m_cols + j)];
+        }
 
-class ConstPTMFBuffer {
-    std::unique_ptr<int32_t> value;
+        float *data() { return m_data; }
 
-public:
-    int32_t get_value() const { return *value; }
-    void set_value(int32_t v) { *value = v; }
-
-    py::buffer_info get_buffer_info() const {
-        return py::buffer_info(value.get(), sizeof(*value),
-                               py::format_descriptor<int32_t>::format(), 1);
-    }
-
-    ConstPTMFBuffer() : value(new int32_t{0}) { };
-};
-
-struct DerivedPTMFBuffer : public PTMFBuffer { };
-
-test_initializer buffers([](py::module &m) {
-    py::class_<Matrix> mtx(m, "Matrix", py::buffer_protocol());
-
-    mtx.def(py::init<ssize_t, ssize_t>())
+        ssize_t rows() const { return m_rows; }
+        ssize_t cols() const { return m_cols; }
+    private:
+        ssize_t m_rows;
+        ssize_t m_cols;
+        float *m_data;
+    };
+    py::class_<Matrix>(m, "Matrix", py::buffer_protocol())
+        .def(py::init<ssize_t, ssize_t>())
         /// Construct from a buffer
         .def("__init__", [](Matrix &v, py::buffer b) {
             py::buffer_info info = b.request();
@@ -143,24 +111,57 @@
         })
         ;
 
+
+    // test_inherited_protocol
+    class SquareMatrix : public Matrix {
+    public:
+        SquareMatrix(ssize_t n) : Matrix(n, n) { }
+    };
     // Derived classes inherit the buffer protocol and the buffer access function
     py::class_<SquareMatrix, Matrix>(m, "SquareMatrix")
         .def(py::init<ssize_t>());
 
-    py::class_<PTMFBuffer>(m, "PTMFBuffer", py::buffer_protocol())
-        .def(py::init<>())
-        .def_readwrite("value", &PTMFBuffer::value)
-        .def_buffer(&PTMFBuffer::get_buffer_info);
 
-    py::class_<ConstPTMFBuffer>(m, "ConstPTMFBuffer", py::buffer_protocol())
-        .def(py::init<>())
-        .def_property("value", &ConstPTMFBuffer::get_value, &ConstPTMFBuffer::set_value)
-        .def_buffer(&ConstPTMFBuffer::get_buffer_info);
-
+    // test_pointer_to_member_fn
     // Tests that passing a pointer to member to the base class works in
     // the derived class.
-    py::class_<DerivedPTMFBuffer>(m, "DerivedPTMFBuffer", py::buffer_protocol())
+    struct Buffer {
+        int32_t value = 0;
+
+        py::buffer_info get_buffer_info() {
+            return py::buffer_info(&value, sizeof(value),
+                                   py::format_descriptor<int32_t>::format(), 1);
+        }
+    };
+    py::class_<Buffer>(m, "Buffer", py::buffer_protocol())
         .def(py::init<>())
-        .def_readwrite("value", (int32_t DerivedPTMFBuffer::*) &DerivedPTMFBuffer::value)
-        .def_buffer(&DerivedPTMFBuffer::get_buffer_info);
-});
+        .def_readwrite("value", &Buffer::value)
+        .def_buffer(&Buffer::get_buffer_info);
+
+
+    class ConstBuffer {
+        std::unique_ptr<int32_t> value;
+
+    public:
+        int32_t get_value() const { return *value; }
+        void set_value(int32_t v) { *value = v; }
+
+        py::buffer_info get_buffer_info() const {
+            return py::buffer_info(value.get(), sizeof(*value),
+                                   py::format_descriptor<int32_t>::format(), 1);
+        }
+
+        ConstBuffer() : value(new int32_t{0}) { };
+    };
+    py::class_<ConstBuffer>(m, "ConstBuffer", py::buffer_protocol())
+        .def(py::init<>())
+        .def_property("value", &ConstBuffer::get_value, &ConstBuffer::set_value)
+        .def_buffer(&ConstBuffer::get_buffer_info);
+
+    struct DerivedBuffer : public Buffer { };
+    py::class_<DerivedBuffer>(m, "DerivedBuffer", py::buffer_protocol())
+        .def(py::init<>())
+        .def_readwrite("value", (int32_t DerivedBuffer::*) &DerivedBuffer::value)
+        .def_buffer(&DerivedBuffer::get_buffer_info);
+
+}
diff --git a/tests/test_buffers.py b/tests/test_buffers.py
index a937411..c348be5 100644
--- a/tests/test_buffers.py
+++ b/tests/test_buffers.py
@@ -1,6 +1,7 @@
 import struct
 import pytest
-from pybind11_tests import Matrix, ConstructorStats, PTMFBuffer, ConstPTMFBuffer, DerivedPTMFBuffer
+from pybind11_tests import buffers as m
+from pybind11_tests import ConstructorStats
 
 pytestmark = pytest.requires_numpy
 
@@ -10,17 +11,17 @@
 
 def test_from_python():
     with pytest.raises(RuntimeError) as excinfo:
-        Matrix(np.array([1, 2, 3]))  # trying to assign a 1D array
+        m.Matrix(np.array([1, 2, 3]))  # trying to assign a 1D array
     assert str(excinfo.value) == "Incompatible buffer format!"
 
     m3 = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32)
-    m4 = Matrix(m3)
+    m4 = m.Matrix(m3)
 
     for i in range(m4.rows()):
         for j in range(m4.cols()):
             assert m3[i, j] == m4[i, j]
 
-    cstats = ConstructorStats.get(Matrix)
+    cstats = ConstructorStats.get(m.Matrix)
     assert cstats.alive() == 1
     del m3, m4
     assert cstats.alive() == 0
@@ -35,26 +36,26 @@
 # https://bitbucket.org/pypy/pypy/issues/2444
 @pytest.unsupported_on_pypy
 def test_to_python():
-    m = Matrix(5, 5)
-    assert memoryview(m).shape == (5, 5)
+    mat = m.Matrix(5, 5)
+    assert memoryview(mat).shape == (5, 5)
 
-    assert m[2, 3] == 0
-    m[2, 3] = 4
-    assert m[2, 3] == 4
+    assert mat[2, 3] == 0
+    mat[2, 3] = 4
+    assert mat[2, 3] == 4
 
-    m2 = np.array(m, copy=False)
-    assert m2.shape == (5, 5)
-    assert abs(m2).sum() == 4
-    assert m2[2, 3] == 4
-    m2[2, 3] = 5
-    assert m2[2, 3] == 5
+    mat2 = np.array(mat, copy=False)
+    assert mat2.shape == (5, 5)
+    assert abs(mat2).sum() == 4
+    assert mat2[2, 3] == 4
+    mat2[2, 3] = 5
+    assert mat2[2, 3] == 5
 
-    cstats = ConstructorStats.get(Matrix)
+    cstats = ConstructorStats.get(m.Matrix)
     assert cstats.alive() == 1
-    del m
+    del mat
     pytest.gc_collect()
     assert cstats.alive() == 1
-    del m2  # holds an m reference
+    del mat2  # holds a mat reference
     pytest.gc_collect()
     assert cstats.alive() == 0
     assert cstats.values() == ["5x5 matrix"]
@@ -67,16 +68,15 @@
 @pytest.unsupported_on_pypy
 def test_inherited_protocol():
     """SquareMatrix is derived from Matrix and inherits the buffer protocol"""
-    from pybind11_tests import SquareMatrix
 
-    matrix = SquareMatrix(5)
+    matrix = m.SquareMatrix(5)
     assert memoryview(matrix).shape == (5, 5)
     assert np.asarray(matrix).shape == (5, 5)
 
 
 @pytest.unsupported_on_pypy
-def test_ptmf():
-    for cls in [PTMFBuffer, ConstPTMFBuffer, DerivedPTMFBuffer]:
+def test_pointer_to_member_fn():
+    for cls in [m.Buffer, m.ConstBuffer, m.DerivedBuffer]:
         buf = cls()
         buf.value = 0x12345678
         value = struct.unpack('i', bytearray(buf))[0]
diff --git a/tests/test_call_policies.cpp b/tests/test_call_policies.cpp
index 1527592..65e54a0 100644
--- a/tests/test_call_policies.cpp
+++ b/tests/test_call_policies.cpp
@@ -9,47 +9,6 @@
 
 #include "pybind11_tests.h"
 
-class Child {
-public:
-    Child() { py::print("Allocating child."); }
-    ~Child() { py::print("Releasing child."); }
-};
-
-class Parent {
-public:
-    Parent() { py::print("Allocating parent."); }
-    ~Parent() { py::print("Releasing parent."); }
-    void addChild(Child *) { }
-    Child *returnChild() { return new Child(); }
-    Child *returnNullChild() { return nullptr; }
-};
-
-#if !defined(PYPY_VERSION)
-class ParentGC : public Parent {
-public:
-    using Parent::Parent;
-};
-#endif
-
-test_initializer keep_alive([](py::module &m) {
-    py::class_<Parent>(m, "Parent")
-        .def(py::init<>())
-        .def("addChild", &Parent::addChild)
-        .def("addChildKeepAlive", &Parent::addChild, py::keep_alive<1, 2>())
-        .def("returnChild", &Parent::returnChild)
-        .def("returnChildKeepAlive", &Parent::returnChild, py::keep_alive<1, 0>())
-        .def("returnNullChildKeepAliveChild", &Parent::returnNullChild, py::keep_alive<1, 0>())
-        .def("returnNullChildKeepAliveParent", &Parent::returnNullChild, py::keep_alive<0, 1>());
-
-#if !defined(PYPY_VERSION)
-    py::class_<ParentGC, Parent>(m, "ParentGC", py::dynamic_attr())
-        .def(py::init<>());
-#endif
-
-    py::class_<Child>(m, "Child")
-        .def(py::init<>());
-});
-
 struct CustomGuard {
     static bool enabled;
 
@@ -58,7 +17,6 @@
 
     static const char *report_status() { return enabled ? "guarded" : "unguarded"; }
 };
-
 bool CustomGuard::enabled = false;
 
 struct DependentGuard {
@@ -69,12 +27,48 @@
 
     static const char *report_status() { return enabled ? "guarded" : "unguarded"; }
 };
-
 bool DependentGuard::enabled = false;
 
-test_initializer call_guard([](py::module &pm) {
-    auto m = pm.def_submodule("call_policies");
+TEST_SUBMODULE(call_policies, m) {
+    // Parent/Child are used in:
+    // test_keep_alive_argument, test_keep_alive_return_value, test_alive_gc_derived,
+    // test_alive_gc_multi_derived, test_return_none
+    class Child {
+    public:
+        Child() { py::print("Allocating child."); }
+        ~Child() { py::print("Releasing child."); }
+    };
+    py::class_<Child>(m, "Child")
+        .def(py::init<>());
 
+    class Parent {
+    public:
+        Parent() { py::print("Allocating parent."); }
+        ~Parent() { py::print("Releasing parent."); }
+        void addChild(Child *) { }
+        Child *returnChild() { return new Child(); }
+        Child *returnNullChild() { return nullptr; }
+    };
+    py::class_<Parent>(m, "Parent")
+        .def(py::init<>())
+        .def("addChild", &Parent::addChild)
+        .def("addChildKeepAlive", &Parent::addChild, py::keep_alive<1, 2>())
+        .def("returnChild", &Parent::returnChild)
+        .def("returnChildKeepAlive", &Parent::returnChild, py::keep_alive<1, 0>())
+        .def("returnNullChildKeepAliveChild", &Parent::returnNullChild, py::keep_alive<1, 0>())
+        .def("returnNullChildKeepAliveParent", &Parent::returnNullChild, py::keep_alive<0, 1>());
+
+#if !defined(PYPY_VERSION)
+    // test_alive_gc
+    class ParentGC : public Parent {
+    public:
+        using Parent::Parent;
+    };
+    py::class_<ParentGC, Parent>(m, "ParentGC", py::dynamic_attr())
+        .def(py::init<>());
+#endif
+
+    // test_call_guard
     m.def("unguarded_call", &CustomGuard::report_status);
     m.def("guarded_call", &CustomGuard::report_status, py::call_guard<CustomGuard>());
 
@@ -100,4 +94,4 @@
     m.def("with_gil", report_gil_status);
     m.def("without_gil", report_gil_status, py::call_guard<py::gil_scoped_release>());
 #endif
-});
+}
diff --git a/tests/test_call_policies.py b/tests/test_call_policies.py
index 42d0ffd..6c60f81 100644
--- a/tests/test_call_policies.py
+++ b/tests/test_call_policies.py
@@ -1,15 +1,15 @@
 import pytest
+from pybind11_tests import call_policies as m
+from pybind11_tests import ConstructorStats
 
 
 def test_keep_alive_argument(capture):
-    from pybind11_tests import Parent, Child, ConstructorStats
-
     n_inst = ConstructorStats.detail_reg_inst()
     with capture:
-        p = Parent()
+        p = m.Parent()
     assert capture == "Allocating parent."
     with capture:
-        p.addChild(Child())
+        p.addChild(m.Child())
         assert ConstructorStats.detail_reg_inst() == n_inst + 1
     assert capture == """
         Allocating child.
@@ -21,10 +21,10 @@
     assert capture == "Releasing parent."
 
     with capture:
-        p = Parent()
+        p = m.Parent()
     assert capture == "Allocating parent."
     with capture:
-        p.addChildKeepAlive(Child())
+        p.addChildKeepAlive(m.Child())
         assert ConstructorStats.detail_reg_inst() == n_inst + 2
     assert capture == "Allocating child."
     with capture:
@@ -37,11 +37,9 @@
 
 
 def test_keep_alive_return_value(capture):
-    from pybind11_tests import Parent, ConstructorStats
-
     n_inst = ConstructorStats.detail_reg_inst()
     with capture:
-        p = Parent()
+        p = m.Parent()
     assert capture == "Allocating parent."
     with capture:
         p.returnChild()
@@ -56,7 +54,7 @@
     assert capture == "Releasing parent."
 
     with capture:
-        p = Parent()
+        p = m.Parent()
     assert capture == "Allocating parent."
     with capture:
         p.returnChildKeepAlive()
@@ -74,11 +72,9 @@
 # https://bitbucket.org/pypy/pypy/issues/2447
 @pytest.unsupported_on_pypy
 def test_alive_gc(capture):
-    from pybind11_tests import ParentGC, Child, ConstructorStats
-
     n_inst = ConstructorStats.detail_reg_inst()
-    p = ParentGC()
-    p.addChildKeepAlive(Child())
+    p = m.ParentGC()
+    p.addChildKeepAlive(m.Child())
     assert ConstructorStats.detail_reg_inst() == n_inst + 2
     lst = [p]
     lst.append(lst)   # creates a circular reference
@@ -92,14 +88,12 @@
 
 
 def test_alive_gc_derived(capture):
-    from pybind11_tests import Parent, Child, ConstructorStats
-
-    class Derived(Parent):
+    class Derived(m.Parent):
         pass
 
     n_inst = ConstructorStats.detail_reg_inst()
     p = Derived()
-    p.addChildKeepAlive(Child())
+    p.addChildKeepAlive(m.Child())
     assert ConstructorStats.detail_reg_inst() == n_inst + 2
     lst = [p]
     lst.append(lst)   # creates a circular reference
@@ -113,16 +107,14 @@
 
 
 def test_alive_gc_multi_derived(capture):
-    from pybind11_tests import Parent, Child, ConstructorStats
-
-    class Derived(Parent, Child):
+    class Derived(m.Parent, m.Child):
         def __init__(self):
-            Parent.__init__(self)
-            Child.__init__(self)
+            m.Parent.__init__(self)
+            m.Child.__init__(self)
 
     n_inst = ConstructorStats.detail_reg_inst()
     p = Derived()
-    p.addChildKeepAlive(Child())
+    p.addChildKeepAlive(m.Child())
     # +3 rather than +2 because Derived corresponds to two registered instances
     assert ConstructorStats.detail_reg_inst() == n_inst + 3
     lst = [p]
@@ -138,11 +130,9 @@
 
 
 def test_return_none(capture):
-    from pybind11_tests import Parent, ConstructorStats
-
     n_inst = ConstructorStats.detail_reg_inst()
     with capture:
-        p = Parent()
+        p = m.Parent()
     assert capture == "Allocating parent."
     with capture:
         p.returnNullChildKeepAliveChild()
@@ -154,7 +144,7 @@
     assert capture == "Releasing parent."
 
     with capture:
-        p = Parent()
+        p = m.Parent()
     assert capture == "Allocating parent."
     with capture:
         p.returnNullChildKeepAliveParent()
@@ -167,14 +157,12 @@
 
 
 def test_call_guard():
-    from pybind11_tests import call_policies
+    assert m.unguarded_call() == "unguarded"
+    assert m.guarded_call() == "guarded"
 
-    assert call_policies.unguarded_call() == "unguarded"
-    assert call_policies.guarded_call() == "guarded"
+    assert m.multiple_guards_correct_order() == "guarded & guarded"
+    assert m.multiple_guards_wrong_order() == "unguarded & guarded"
 
-    assert call_policies.multiple_guards_correct_order() == "guarded & guarded"
-    assert call_policies.multiple_guards_wrong_order() == "unguarded & guarded"
-
-    if hasattr(call_policies, "with_gil"):
-        assert call_policies.with_gil() == "GIL held"
-        assert call_policies.without_gil() == "GIL released"
+    if hasattr(m, "with_gil"):
+        assert m.with_gil() == "GIL held"
+        assert m.without_gil() == "GIL released"
diff --git a/tests/test_callbacks.cpp b/tests/test_callbacks.cpp
index f26f6c3..273eacc 100644
--- a/tests/test_callbacks.cpp
+++ b/tests/test_callbacks.cpp
@@ -12,94 +12,20 @@
 #include <pybind11/functional.h>
 
 
-py::object test_callback1(py::object func) {
-    return func();
-}
-
-py::tuple test_callback2(py::object func) {
-    return func("Hello", 'x', true, 5);
-}
-
-std::string test_callback3(const std::function<int(int)> &func) {
-    return "func(43) = " + std::to_string(func(43));
-}
-
-std::function<int(int)> test_callback4() {
-    return [](int i) { return i+1; };
-}
-
-py::cpp_function test_callback5() {
-    return py::cpp_function([](int i) { return i+1; },
-       py::arg("number"));
-}
-
 int dummy_function(int i) { return i + 1; }
-int dummy_function2(int i, int j) { return i + j; }
-std::function<int(int)> roundtrip(std::function<int(int)> f, bool expect_none = false) {
-    if (expect_none && f) {
-        throw std::runtime_error("Expected None to be converted to empty std::function");
-    }
-    return f;
-}
 
-std::string test_dummy_function(const std::function<int(int)> &f) {
-    using fn_type = int (*)(int);
-    auto result = f.target<fn_type>();
-    if (!result) {
-        auto r = f(1);
-        return "can't convert to function pointer: eval(1) = " + std::to_string(r);
-    } else if (*result == dummy_function) {
-        auto r = (*result)(1);
-        return "matches dummy_function: eval(1) = " + std::to_string(r);
-    } else {
-        return "argument does NOT match dummy_function. This should never happen!";
-    }
-}
+TEST_SUBMODULE(callbacks, m) {
+    // test_callbacks, test_function_signatures
+    m.def("test_callback1", [](py::object func) { return func(); });
+    m.def("test_callback2", [](py::object func) { return func("Hello", 'x', true, 5); });
+    m.def("test_callback3", [](const std::function<int(int)> &func) {
+        return "func(43) = " + std::to_string(func(43)); });
+    m.def("test_callback4", []() -> std::function<int(int)> { return [](int i) { return i+1; }; });
+    m.def("test_callback5", []() {
+        return py::cpp_function([](int i) { return i+1; }, py::arg("number"));
+    });
 
-struct Payload {
-    Payload() {
-        print_default_created(this);
-    }
-    ~Payload() {
-        print_destroyed(this);
-    }
-    Payload(const Payload &) {
-        print_copy_created(this);
-    }
-    Payload(Payload &&) {
-        print_move_created(this);
-    }
-};
-
-class AbstractBase {
-public:
-  virtual unsigned int func() = 0;
-};
-
-void func_accepting_func_accepting_base(std::function<double(AbstractBase&)>) { }
-
-struct MovableObject {
-  bool valid = true;
-
-  MovableObject() = default;
-  MovableObject(const MovableObject &) = default;
-  MovableObject &operator=(const MovableObject &) = default;
-  MovableObject(MovableObject &&o) : valid(o.valid) { o.valid = false; }
-  MovableObject &operator=(MovableObject &&o) {
-    valid = o.valid;
-    o.valid = false;
-    return *this;
-  }
-};
-
-test_initializer callbacks([](py::module &m) {
-    m.def("test_callback1", &test_callback1);
-    m.def("test_callback2", &test_callback2);
-    m.def("test_callback3", &test_callback3);
-    m.def("test_callback4", &test_callback4);
-    m.def("test_callback5", &test_callback5);
-
-    // Test keyword args and generalized unpacking
+    // test_keyword_args_and_generalized_unpacking
     m.def("test_tuple_unpacking", [](py::function f) {
         auto t1 = py::make_tuple(2, 3);
         auto t2 = py::make_tuple(5, 6);
@@ -148,6 +74,15 @@
         f(234, "expected_name"_a=UnregisteredType(), "kw"_a=567);
     });
 
+    // test_lambda_closure_cleanup
+    struct Payload {
+        Payload() { print_default_created(this); }
+        ~Payload() { print_destroyed(this); }
+        Payload(const Payload &) { print_copy_created(this); }
+        Payload(Payload &&) { print_move_created(this); }
+    };
+    // Export the payload constructor statistics for testing purposes:
+    m.def("payload_cstats", &ConstructorStats::get<Payload>);
     /* Test cleanup of lambda closure */
     m.def("test_cleanup", []() -> std::function<void(void)> {
         Payload p;
@@ -158,27 +93,57 @@
         };
     });
 
+    // test_cpp_function_roundtrip
     /* Test if passing a function pointer from C++ -> Python -> C++ yields the original pointer */
     m.def("dummy_function", &dummy_function);
-    m.def("dummy_function2", &dummy_function2);
-    m.def("roundtrip", &roundtrip, py::arg("f"), py::arg("expect_none")=false);
-    m.def("test_dummy_function", &test_dummy_function);
-    // Export the payload constructor statistics for testing purposes:
-    m.def("payload_cstats", &ConstructorStats::get<Payload>);
+    m.def("dummy_function2", [](int i, int j) { return i + j; });
+    m.def("roundtrip", [](std::function<int(int)> f, bool expect_none = false) {
+        if (expect_none && f)
+            throw std::runtime_error("Expected None to be converted to empty std::function");
+        return f;
+    }, py::arg("f"), py::arg("expect_none")=false);
+    m.def("test_dummy_function", [](const std::function<int(int)> &f) -> std::string {
+        using fn_type = int (*)(int);
+        auto result = f.target<fn_type>();
+        if (!result) {
+            auto r = f(1);
+            return "can't convert to function pointer: eval(1) = " + std::to_string(r);
+        } else if (*result == dummy_function) {
+            auto r = (*result)(1);
+            return "matches dummy_function: eval(1) = " + std::to_string(r);
+        } else {
+            return "argument does NOT match dummy_function. This should never happen!";
+        }
+    });
 
-    m.def("func_accepting_func_accepting_base",
-          func_accepting_func_accepting_base);
+    class AbstractBase { public: virtual unsigned int func() = 0; };
+    m.def("func_accepting_func_accepting_base", [](std::function<double(AbstractBase&)>) { });
 
+    struct MovableObject {
+        bool valid = true;
+
+        MovableObject() = default;
+        MovableObject(const MovableObject &) = default;
+        MovableObject &operator=(const MovableObject &) = default;
+        MovableObject(MovableObject &&o) : valid(o.valid) { o.valid = false; }
+        MovableObject &operator=(MovableObject &&o) {
+            valid = o.valid;
+            o.valid = false;
+            return *this;
+        }
+    };
     py::class_<MovableObject>(m, "MovableObject");
 
+    // test_movable_object
     m.def("callback_with_movable", [](std::function<void(MovableObject &)> f) {
         auto x = MovableObject();
         f(x); // lvalue reference shouldn't move out object
         return x.valid; // must still return `true`
-      });
+    });
 
+    // test_bound_method_callback
     struct CppBoundMethodTest {};
     py::class_<CppBoundMethodTest>(m, "CppBoundMethodTest")
         .def(py::init<>())
         .def("triple", [](CppBoundMethodTest &, int val) { return 3 * val; });
-});
+}
diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py
index a5109d0..93c42c2 100644
--- a/tests/test_callbacks.py
+++ b/tests/test_callbacks.py
@@ -1,10 +1,9 @@
 import pytest
+from pybind11_tests import callbacks as m
 
 
 def test_callbacks():
     from functools import partial
-    from pybind11_tests import (test_callback1, test_callback2, test_callback3,
-                                test_callback4, test_callback5)
 
     def func1():
         return "func1"
@@ -15,73 +14,65 @@
     def func3(a):
         return "func3({})".format(a)
 
-    assert test_callback1(func1) == "func1"
-    assert test_callback2(func2) == ("func2", "Hello", "x", True, 5)
-    assert test_callback1(partial(func2, 1, 2, 3, 4)) == ("func2", 1, 2, 3, 4)
-    assert test_callback1(partial(func3, "partial")) == "func3(partial)"
-    assert test_callback3(lambda i: i + 1) == "func(43) = 44"
+    assert m.test_callback1(func1) == "func1"
+    assert m.test_callback2(func2) == ("func2", "Hello", "x", True, 5)
+    assert m.test_callback1(partial(func2, 1, 2, 3, 4)) == ("func2", 1, 2, 3, 4)
+    assert m.test_callback1(partial(func3, "partial")) == "func3(partial)"
+    assert m.test_callback3(lambda i: i + 1) == "func(43) = 44"
 
-    f = test_callback4()
+    f = m.test_callback4()
     assert f(43) == 44
-    f = test_callback5()
+    f = m.test_callback5()
     assert f(number=43) == 44
 
 
 def test_bound_method_callback():
-    from pybind11_tests import test_callback3, CppBoundMethodTest
-
     # Bound Python method:
     class MyClass:
         def double(self, val):
             return 2 * val
 
     z = MyClass()
-    assert test_callback3(z.double) == "func(43) = 86"
+    assert m.test_callback3(z.double) == "func(43) = 86"
 
-    z = CppBoundMethodTest()
-    assert test_callback3(z.triple) == "func(43) = 129"
+    z = m.CppBoundMethodTest()
+    assert m.test_callback3(z.triple) == "func(43) = 129"
 
 
 def test_keyword_args_and_generalized_unpacking():
-    from pybind11_tests import (test_tuple_unpacking, test_dict_unpacking, test_keyword_args,
-                                test_unpacking_and_keywords1, test_unpacking_and_keywords2,
-                                test_unpacking_error1, test_unpacking_error2,
-                                test_arg_conversion_error1, test_arg_conversion_error2)
 
     def f(*args, **kwargs):
         return args, kwargs
 
-    assert test_tuple_unpacking(f) == (("positional", 1, 2, 3, 4, 5, 6), {})
-    assert test_dict_unpacking(f) == (("positional", 1), {"key": "value", "a": 1, "b": 2})
-    assert test_keyword_args(f) == ((), {"x": 10, "y": 20})
-    assert test_unpacking_and_keywords1(f) == ((1, 2), {"c": 3, "d": 4})
-    assert test_unpacking_and_keywords2(f) == (
+    assert m.test_tuple_unpacking(f) == (("positional", 1, 2, 3, 4, 5, 6), {})
+    assert m.test_dict_unpacking(f) == (("positional", 1), {"key": "value", "a": 1, "b": 2})
+    assert m.test_keyword_args(f) == ((), {"x": 10, "y": 20})
+    assert m.test_unpacking_and_keywords1(f) == ((1, 2), {"c": 3, "d": 4})
+    assert m.test_unpacking_and_keywords2(f) == (
         ("positional", 1, 2, 3, 4, 5),
         {"key": "value", "a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
     )
 
     with pytest.raises(TypeError) as excinfo:
-        test_unpacking_error1(f)
+        m.test_unpacking_error1(f)
     assert "Got multiple values for keyword argument" in str(excinfo.value)
 
     with pytest.raises(TypeError) as excinfo:
-        test_unpacking_error2(f)
+        m.test_unpacking_error2(f)
     assert "Got multiple values for keyword argument" in str(excinfo.value)
 
     with pytest.raises(RuntimeError) as excinfo:
-        test_arg_conversion_error1(f)
+        m.test_arg_conversion_error1(f)
     assert "Unable to convert call argument" in str(excinfo.value)
 
     with pytest.raises(RuntimeError) as excinfo:
-        test_arg_conversion_error2(f)
+        m.test_arg_conversion_error2(f)
     assert "Unable to convert call argument" in str(excinfo.value)
 
 
 def test_lambda_closure_cleanup():
-    from pybind11_tests import test_cleanup, payload_cstats
-
-    test_cleanup()
-    cstats = payload_cstats()
+    m.test_cleanup()
+    cstats = m.payload_cstats()
     assert cstats.alive() == 0
     assert cstats.copy_constructions == 1
     assert cstats.move_constructions >= 1
@@ -89,31 +80,28 @@
 
 def test_cpp_function_roundtrip():
     """Test if passing a function pointer from C++ -> Python -> C++ yields the original pointer"""
-    from pybind11_tests import dummy_function, dummy_function2, test_dummy_function, roundtrip
 
-    assert test_dummy_function(dummy_function) == "matches dummy_function: eval(1) = 2"
-    assert test_dummy_function(roundtrip(dummy_function)) == "matches dummy_function: eval(1) = 2"
-    assert roundtrip(None, expect_none=True) is None
-    assert test_dummy_function(lambda x: x + 2) == "can't convert to function pointer: eval(1) = 3"
+    assert m.test_dummy_function(m.dummy_function) == "matches dummy_function: eval(1) = 2"
+    assert (m.test_dummy_function(m.roundtrip(m.dummy_function)) ==
+            "matches dummy_function: eval(1) = 2")
+    assert m.roundtrip(None, expect_none=True) is None
+    assert (m.test_dummy_function(lambda x: x + 2) ==
+            "can't convert to function pointer: eval(1) = 3")
 
     with pytest.raises(TypeError) as excinfo:
-        test_dummy_function(dummy_function2)
+        m.test_dummy_function(m.dummy_function2)
     assert "incompatible function arguments" in str(excinfo.value)
 
     with pytest.raises(TypeError) as excinfo:
-        test_dummy_function(lambda x, y: x + y)
+        m.test_dummy_function(lambda x, y: x + y)
     assert any(s in str(excinfo.value) for s in ("missing 1 required positional argument",
                                                  "takes exactly 2 arguments"))
 
 
 def test_function_signatures(doc):
-    from pybind11_tests import test_callback3, test_callback4
-
-    assert doc(test_callback3) == "test_callback3(arg0: Callable[[int], int]) -> str"
-    assert doc(test_callback4) == "test_callback4() -> Callable[[int], int]"
+    assert doc(m.test_callback3) == "test_callback3(arg0: Callable[[int], int]) -> str"
+    assert doc(m.test_callback4) == "test_callback4() -> Callable[[int], int]"
 
 
 def test_movable_object():
-    from pybind11_tests import callback_with_movable
-
-    assert callback_with_movable(lambda _: None) is True
+    assert m.callback_with_movable(lambda _: None) is True
diff --git a/tests/test_chrono.cpp b/tests/test_chrono.cpp
index fcc1b61..195a93b 100644
--- a/tests/test_chrono.cpp
+++ b/tests/test_chrono.cpp
@@ -8,58 +8,40 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
-
 #include "pybind11_tests.h"
-#include "constructor_stats.h"
 #include <pybind11/chrono.h>
 
-// Return the current time off the wall clock
-std::chrono::system_clock::time_point test_chrono1() {
-    return std::chrono::system_clock::now();
-}
+TEST_SUBMODULE(chrono, m) {
+    using system_time = std::chrono::system_clock::time_point;
+    using steady_time = std::chrono::steady_clock::time_point;
+    // test_chrono_system_clock
+    // Return the current time off the wall clock
+    m.def("test_chrono1", []() { return std::chrono::system_clock::now(); });
 
-// Round trip the passed in system clock time
-std::chrono::system_clock::time_point test_chrono2(std::chrono::system_clock::time_point t) {
-    return t;
-}
+    // test_chrono_system_clock_roundtrip
+    // Round trip the passed in system clock time
+    m.def("test_chrono2", [](system_time t) { return t; });
 
-// Round trip the passed in duration
-std::chrono::system_clock::duration test_chrono3(std::chrono::system_clock::duration d) {
-    return d;
-}
+    // test_chrono_duration_roundtrip
+    // Round trip the passed in duration
+    m.def("test_chrono3", [](std::chrono::system_clock::duration d) { return d; });
 
-// Difference between two passed in time_points
-std::chrono::system_clock::duration test_chrono4(std::chrono::system_clock::time_point a, std::chrono::system_clock::time_point b) {
-    return a - b;
-}
+    // test_chrono_duration_subtraction_equivalence
+    // Difference between two passed in time_points
+    m.def("test_chrono4", [](system_time a, system_time b) { return a - b; });
 
-// Return the current time off the steady_clock
-std::chrono::steady_clock::time_point test_chrono5() {
-    return std::chrono::steady_clock::now();
-}
+    // test_chrono_steady_clock
+    // Return the current time off the steady_clock
+    m.def("test_chrono5", []() { return std::chrono::steady_clock::now(); });
 
-// Round trip a steady clock timepoint
-std::chrono::steady_clock::time_point test_chrono6(std::chrono::steady_clock::time_point t) {
-    return t;
-}
+    // test_chrono_steady_clock_roundtrip
+    // Round trip a steady clock timepoint
+    m.def("test_chrono6", [](steady_time t) { return t; });
 
-// Roundtrip a duration in microseconds from a float argument
-std::chrono::microseconds test_chrono7(std::chrono::microseconds t) {
-    return t;
+    // test_floating_point_duration
+    // Roundtrip a duration in microseconds from a float argument
+    m.def("test_chrono7", [](std::chrono::microseconds t) { return t; });
+    // Float durations (issue #719)
+    m.def("test_chrono_float_diff", [](std::chrono::duration<float> a, std::chrono::duration<float> b) {
+        return a - b; });
 }
-
-// Float durations (issue #719)
-std::chrono::duration<double> test_chrono_float_diff(std::chrono::duration<float> a, std::chrono::duration<float> b) {
-    return a - b;
-}
-
-test_initializer chrono([] (py::module &m) {
-    m.def("test_chrono1", &test_chrono1);
-    m.def("test_chrono2", &test_chrono2);
-    m.def("test_chrono3", &test_chrono3);
-    m.def("test_chrono4", &test_chrono4);
-    m.def("test_chrono5", &test_chrono5);
-    m.def("test_chrono6", &test_chrono6);
-    m.def("test_chrono7", &test_chrono7);
-    m.def("test_chrono_float_diff", &test_chrono_float_diff);
-});
diff --git a/tests/test_chrono.py b/tests/test_chrono.py
index 55094ed..2b75bd1 100644
--- a/tests/test_chrono.py
+++ b/tests/test_chrono.py
@@ -1,11 +1,11 @@
+from pybind11_tests import chrono as m
+import datetime
 
 
 def test_chrono_system_clock():
-    from pybind11_tests import test_chrono1
-    import datetime
 
     # Get the time from both c++ and datetime
-    date1 = test_chrono1()
+    date1 = m.test_chrono1()
     date2 = datetime.datetime.today()
 
     # The returned value should be a datetime
@@ -25,13 +25,10 @@
 
 
 def test_chrono_system_clock_roundtrip():
-    from pybind11_tests import test_chrono2
-    import datetime
-
     date1 = datetime.datetime.today()
 
     # Roundtrip the time
-    date2 = test_chrono2(date1)
+    date2 = m.test_chrono2(date1)
 
     # The returned value should be a datetime
     assert isinstance(date2, datetime.datetime)
@@ -44,8 +41,6 @@
 
 
 def test_chrono_duration_roundtrip():
-    from pybind11_tests import test_chrono3
-    import datetime
 
     # Get the difference between two times (a timedelta)
     date1 = datetime.datetime.today()
@@ -55,7 +50,7 @@
     # Make sure this is a timedelta
     assert isinstance(diff, datetime.timedelta)
 
-    cpp_diff = test_chrono3(diff)
+    cpp_diff = m.test_chrono3(diff)
 
     assert cpp_diff.days == diff.days
     assert cpp_diff.seconds == diff.seconds
@@ -63,14 +58,12 @@
 
 
 def test_chrono_duration_subtraction_equivalence():
-    from pybind11_tests import test_chrono4
-    import datetime
 
     date1 = datetime.datetime.today()
     date2 = datetime.datetime.today()
 
     diff = date2 - date1
-    cpp_diff = test_chrono4(date2, date1)
+    cpp_diff = m.test_chrono4(date2, date1)
 
     assert cpp_diff.days == diff.days
     assert cpp_diff.seconds == diff.seconds
@@ -78,22 +71,13 @@
 
 
 def test_chrono_steady_clock():
-    from pybind11_tests import test_chrono5
-    import datetime
-
-    time1 = test_chrono5()
-    time2 = test_chrono5()
-
+    time1 = m.test_chrono5()
     assert isinstance(time1, datetime.timedelta)
-    assert isinstance(time2, datetime.timedelta)
 
 
 def test_chrono_steady_clock_roundtrip():
-    from pybind11_tests import test_chrono6
-    import datetime
-
     time1 = datetime.timedelta(days=10, seconds=10, microseconds=100)
-    time2 = test_chrono6(time1)
+    time2 = m.test_chrono6(time1)
 
     assert isinstance(time2, datetime.timedelta)
 
@@ -104,17 +88,14 @@
 
 
 def test_floating_point_duration():
-    from pybind11_tests import test_chrono7, test_chrono_float_diff
-    import datetime
-
-    # Test using 35.525123 seconds as an example floating point number in seconds
-    time = test_chrono7(35.525123)
+    # Test using a floating point number in seconds
+    time = m.test_chrono7(35.525123)
 
     assert isinstance(time, datetime.timedelta)
 
     assert time.seconds == 35
     assert 525122 <= time.microseconds <= 525123
 
-    diff = test_chrono_float_diff(43.789012, 1.123456)
+    diff = m.test_chrono_float_diff(43.789012, 1.123456)
     assert diff.seconds == 42
     assert 665556 <= diff.microseconds <= 665557
diff --git a/tests/test_class.py b/tests/test_class.py
index e62f982..7381c4a 100644
--- a/tests/test_class.py
+++ b/tests/test_class.py
@@ -35,7 +35,7 @@
 
         Get value using a method
     """
-    assert doc(UserType.value) == "Get value using a property"
+    assert doc(UserType.value) == "Get/set value using a property"
 
     assert doc(m.NoConstructor.new_instance) == """
         new_instance() -> m.class_.NoConstructor
diff --git a/tests/test_constants_and_functions.cpp b/tests/test_constants_and_functions.cpp
index 653bdf6..8c9ef7f 100644
--- a/tests/test_constants_and_functions.cpp
+++ b/tests/test_constants_and_functions.cpp
@@ -23,6 +23,8 @@
     return "test_function(" + std::to_string(i) + ")";
 }
 
+py::str test_function4()           { return "test_function()"; }
+py::str test_function4(char *)     { return "test_function(char *)"; }
 py::str test_function4(int, float) { return "test_function(int, float)"; }
 py::str test_function4(float, int) { return "test_function(float, int)"; }
 
@@ -61,17 +63,23 @@
 }
 
 
-test_initializer constants_and_functions([](py::module &m) {
+TEST_SUBMODULE(constants_and_functions, m) {
+    // test_constants
     m.attr("some_constant") = py::int_(14);
 
+    // test_function_overloading
     m.def("test_function", &test_function1);
     m.def("test_function", &test_function2);
     m.def("test_function", &test_function3);
 
 #if defined(PYBIND11_OVERLOAD_CAST)
+    m.def("test_function", py::overload_cast<>(&test_function4));
+    m.def("test_function", py::overload_cast<char *>(&test_function4));
     m.def("test_function", py::overload_cast<int, float>(&test_function4));
     m.def("test_function", py::overload_cast<float, int>(&test_function4));
 #else
+    m.def("test_function", static_cast<py::str (*)()>(&test_function4));
+    m.def("test_function", static_cast<py::str (*)(char *)>(&test_function4));
     m.def("test_function", static_cast<py::str (*)(int, float)>(&test_function4));
     m.def("test_function", static_cast<py::str (*)(float, int)>(&test_function4));
 #endif
@@ -81,12 +89,13 @@
         .value("ESecondEntry", ESecondEntry)
         .export_values();
 
+    // test_bytes
     m.def("return_bytes", &return_bytes);
     m.def("print_bytes", &print_bytes);
 
+    // test_exception_specifiers
     using namespace test_exc_sp;
-    py::module m2 = m.def_submodule("exc_sp");
-    py::class_<C>(m2, "C")
+    py::class_<C>(m, "C")
         .def(py::init<>())
         .def("m1", &C::m1)
         .def("m2", &C::m2)
@@ -97,8 +106,8 @@
         .def("m7", &C::m7)
         .def("m8", &C::m8)
         ;
-    m2.def("f1", f1);
-    m2.def("f2", f2);
-    m2.def("f3", f3);
-    m2.def("f4", f4);
-});
+    m.def("f1", f1);
+    m.def("f2", f2);
+    m.def("f3", f3);
+    m.def("f4", f4);
+}
diff --git a/tests/test_constants_and_functions.py b/tests/test_constants_and_functions.py
index 2a570d2..472682d 100644
--- a/tests/test_constants_and_functions.py
+++ b/tests/test_constants_and_functions.py
@@ -1,33 +1,29 @@
+from pybind11_tests import constants_and_functions as m
 
 
 def test_constants():
-    from pybind11_tests import some_constant
-
-    assert some_constant == 14
+    assert m.some_constant == 14
 
 
 def test_function_overloading():
-    from pybind11_tests import MyEnum, test_function
+    assert m.test_function() == "test_function()"
+    assert m.test_function(7) == "test_function(7)"
+    assert m.test_function(m.MyEnum.EFirstEntry) == "test_function(enum=1)"
+    assert m.test_function(m.MyEnum.ESecondEntry) == "test_function(enum=2)"
 
-    assert test_function() == "test_function()"
-    assert test_function(7) == "test_function(7)"
-    assert test_function(MyEnum.EFirstEntry) == "test_function(enum=1)"
-    assert test_function(MyEnum.ESecondEntry) == "test_function(enum=2)"
-
-    assert test_function(1, 1.0) == "test_function(int, float)"
-    assert test_function(2.0, 2) == "test_function(float, int)"
+    assert m.test_function() == "test_function()"
+    assert m.test_function("abcd") == "test_function(char *)"
+    assert m.test_function(1, 1.0) == "test_function(int, float)"
+    assert m.test_function(1, 1.0) == "test_function(int, float)"
+    assert m.test_function(2.0, 2) == "test_function(float, int)"
 
 
 def test_bytes():
-    from pybind11_tests import return_bytes, print_bytes
-
-    assert print_bytes(return_bytes()) == "bytes[1 0 2 0]"
+    assert m.print_bytes(m.return_bytes()) == "bytes[1 0 2 0]"
 
 
 def test_exception_specifiers():
-    from pybind11_tests.exc_sp import C, f1, f2, f3, f4
-
-    c = C()
+    c = m.C()
     assert c.m1(2) == 1
     assert c.m2(3) == 1
     assert c.m3(5) == 2
@@ -37,7 +33,7 @@
     assert c.m7(20) == 13
     assert c.m8(29) == 21
 
-    assert f1(33) == 34
-    assert f2(53) == 55
-    assert f3(86) == 89
-    assert f4(140) == 144
+    assert m.f1(33) == 34
+    assert m.f2(53) == 55
+    assert m.f3(86) == 89
+    assert m.f4(140) == 144
diff --git a/tests/test_copy_move.cpp b/tests/test_copy_move.cpp
index e80cdb8..94113e3 100644
--- a/tests/test_copy_move.cpp
+++ b/tests/test_copy_move.cpp
@@ -68,7 +68,8 @@
 
     int value;
 };
-namespace pybind11 { namespace detail {
+NAMESPACE_BEGIN(pybind11)
+NAMESPACE_BEGIN(detail)
 template <> struct type_caster<MoveOnlyInt> {
     PYBIND11_TYPE_CASTER(MoveOnlyInt, _("MoveOnlyInt"));
     bool load(handle src, bool) { value = MoveOnlyInt(src.cast<int>()); return true; }
@@ -96,32 +97,20 @@
     operator CopyOnlyInt&() { return value; }
     template <typename T> using cast_op_type = pybind11::detail::cast_op_type<T>;
 };
-}}
+NAMESPACE_END(detail)
+NAMESPACE_END(pybind11)
 
-struct PrivateOpNew {
-    int value = 1;
-
-private:
-    void *operator new(size_t bytes);
-};
-
-test_initializer copy_move_policies([](py::module &m) {
+TEST_SUBMODULE(copy_move_policies, m) {
+    // test_lacking_copy_ctor
     py::class_<lacking_copy_ctor>(m, "lacking_copy_ctor")
         .def_static("get_one", &lacking_copy_ctor::get_one,
                     py::return_value_policy::copy);
+    // test_lacking_move_ctor
     py::class_<lacking_move_ctor>(m, "lacking_move_ctor")
         .def_static("get_one", &lacking_move_ctor::get_one,
                     py::return_value_policy::move);
 
-    m.def("move_only", [](MoveOnlyInt m) {
-        return m.value;
-    });
-    m.def("move_or_copy", [](MoveOrCopyInt m) {
-        return m.value;
-    });
-    m.def("copy_only", [](CopyOnlyInt m) {
-        return m.value;
-    });
+    // test_move_and_copy_casts
     m.def("move_and_copy_casts", [](py::object o) {
         int r = 0;
         r += py::cast<MoveOrCopyInt>(o).value; /* moves */
@@ -134,6 +123,11 @@
 
         return r;
     });
+
+    // test_move_and_copy_loads
+    m.def("move_only", [](MoveOnlyInt m) { return m.value; });
+    m.def("move_or_copy", [](MoveOrCopyInt m) { return m.value; });
+    m.def("copy_only", [](CopyOnlyInt m) { return m.value; });
     m.def("move_pair", [](std::pair<MoveOnlyInt, MoveOrCopyInt> p) {
         return p.first.value + p.second.value;
     });
@@ -163,6 +157,7 @@
         return d;
     });
 #ifdef PYBIND11_HAS_OPTIONAL
+    // test_move_and_copy_load_optional
     m.attr("has_optional") = true;
     m.def("move_optional", [](std::optional<MoveOnlyInt> o) {
         return o->value;
@@ -181,6 +176,14 @@
 #endif
 
     // #70 compilation issue if operator new is not public
+    struct PrivateOpNew {
+        int value = 1;
+    private:
+#if defined(_MSC_VER)
+#  pragma warning(disable: 4822) // warning C4822: local class member function does not have a body
+#endif
+        void *operator new(size_t bytes);
+    };
     py::class_<PrivateOpNew>(m, "PrivateOpNew").def_readonly("value", &PrivateOpNew::value);
     m.def("private_op_new_value", []() { return PrivateOpNew(); });
     m.def("private_op_new_reference", []() -> const PrivateOpNew & {
@@ -188,6 +191,7 @@
         return x;
     }, py::return_value_policy::reference);
 
+    // test_move_fallback
     // #389: rvp::move should fall-through to copy on non-movable objects
     struct MoveIssue1 {
         int v;
@@ -195,15 +199,15 @@
         MoveIssue1(const MoveIssue1 &c) = default;
         MoveIssue1(MoveIssue1 &&) = delete;
     };
+    py::class_<MoveIssue1>(m, "MoveIssue1").def(py::init<int>()).def_readwrite("value", &MoveIssue1::v);
 
     struct MoveIssue2 {
         int v;
         MoveIssue2(int v) : v{v} {}
         MoveIssue2(MoveIssue2 &&) = default;
     };
-
-    py::class_<MoveIssue1>(m, "MoveIssue1").def(py::init<int>()).def_readwrite("value", &MoveIssue1::v);
     py::class_<MoveIssue2>(m, "MoveIssue2").def(py::init<int>()).def_readwrite("value", &MoveIssue2::v);
+
     m.def("get_moveissue1", [](int i) { return new MoveIssue1(i); }, py::return_value_policy::move);
     m.def("get_moveissue2", [](int i) { return MoveIssue2(i); }, py::return_value_policy::move);
-});
+}
diff --git a/tests/test_copy_move.py b/tests/test_copy_move.py
index b73c950..aff2d99 100644
--- a/tests/test_copy_move.py
+++ b/tests/test_copy_move.py
@@ -1,32 +1,29 @@
 import pytest
-from pybind11_tests import has_optional
+from pybind11_tests import copy_move_policies as m
 
 
 def test_lacking_copy_ctor():
-    from pybind11_tests import lacking_copy_ctor
     with pytest.raises(RuntimeError) as excinfo:
-        lacking_copy_ctor.get_one()
+        m.lacking_copy_ctor.get_one()
     assert "the object is non-copyable!" in str(excinfo.value)
 
 
 def test_lacking_move_ctor():
-    from pybind11_tests import lacking_move_ctor
     with pytest.raises(RuntimeError) as excinfo:
-        lacking_move_ctor.get_one()
+        m.lacking_move_ctor.get_one()
     assert "the object is neither movable nor copyable!" in str(excinfo.value)
 
 
 def test_move_and_copy_casts():
     """Cast some values in C++ via custom type casters and count the number of moves/copies."""
-    from pybind11_tests import move_and_copy_casts, move_and_copy_cstats
 
-    cstats = move_and_copy_cstats()
+    cstats = m.move_and_copy_cstats()
     c_m, c_mc, c_c = cstats["MoveOnlyInt"], cstats["MoveOrCopyInt"], cstats["CopyOnlyInt"]
 
     # The type move constructions/assignments below each get incremented: the move assignment comes
     # from the type_caster load; the move construction happens when extracting that via a cast or
     # loading into an argument.
-    assert move_and_copy_casts(3) == 18
+    assert m.move_and_copy_casts(3) == 18
     assert c_m.copy_assignments + c_m.copy_constructions == 0
     assert c_m.move_assignments == 2
     assert c_m.move_constructions >= 2
@@ -43,21 +40,19 @@
 def test_move_and_copy_loads():
     """Call some functions that load arguments via custom type casters and count the number of
     moves/copies."""
-    from pybind11_tests import (move_and_copy_cstats, move_only, move_or_copy, copy_only,
-                                move_pair, move_tuple, copy_tuple, move_copy_nested)
 
-    cstats = move_and_copy_cstats()
+    cstats = m.move_and_copy_cstats()
     c_m, c_mc, c_c = cstats["MoveOnlyInt"], cstats["MoveOrCopyInt"], cstats["CopyOnlyInt"]
 
-    assert move_only(10) == 10  # 1 move, c_m
-    assert move_or_copy(11) == 11  # 1 move, c_mc
-    assert copy_only(12) == 12  # 1 copy, c_c
-    assert move_pair((13, 14)) == 27  # 1 c_m move, 1 c_mc move
-    assert move_tuple((15, 16, 17)) == 48  # 2 c_m moves, 1 c_mc move
-    assert copy_tuple((18, 19)) == 37  # 2 c_c copies
+    assert m.move_only(10) == 10  # 1 move, c_m
+    assert m.move_or_copy(11) == 11  # 1 move, c_mc
+    assert m.copy_only(12) == 12  # 1 copy, c_c
+    assert m.move_pair((13, 14)) == 27  # 1 c_m move, 1 c_mc move
+    assert m.move_tuple((15, 16, 17)) == 48  # 2 c_m moves, 1 c_mc move
+    assert m.copy_tuple((18, 19)) == 37  # 2 c_c copies
     # Direct constructions: 2 c_m moves, 2 c_mc moves, 1 c_c copy
     # Extra moves/copies when moving pairs/tuples: 3 c_m, 3 c_mc, 2 c_c
-    assert move_copy_nested((1, ((2, 3, (4,)), 5))) == 15
+    assert m.move_copy_nested((1, ((2, 3, (4,)), 5))) == 15
 
     assert c_m.copy_assignments + c_m.copy_constructions == 0
     assert c_m.move_assignments == 6
@@ -70,24 +65,22 @@
     assert c_m.alive() + c_mc.alive() + c_c.alive() == 0
 
 
-@pytest.mark.skipif(not has_optional, reason='no <optional>')
+@pytest.mark.skipif(not m.has_optional, reason='no <optional>')
 def test_move_and_copy_load_optional():
     """Tests move/copy loads of std::optional arguments"""
-    from pybind11_tests import (move_and_copy_cstats, move_optional, move_or_copy_optional,
-                                copy_optional, move_optional_tuple)
 
-    cstats = move_and_copy_cstats()
+    cstats = m.move_and_copy_cstats()
     c_m, c_mc, c_c = cstats["MoveOnlyInt"], cstats["MoveOrCopyInt"], cstats["CopyOnlyInt"]
 
     # The extra move/copy constructions below come from the std::optional move (which has to move
     # its arguments):
-    assert move_optional(10) == 10  # c_m: 1 move assign, 2 move construct
-    assert move_or_copy_optional(11) == 11  # c_mc: 1 move assign, 2 move construct
-    assert copy_optional(12) == 12  # c_c: 1 copy assign, 2 copy construct
+    assert m.move_optional(10) == 10  # c_m: 1 move assign, 2 move construct
+    assert m.move_or_copy_optional(11) == 11  # c_mc: 1 move assign, 2 move construct
+    assert m.copy_optional(12) == 12  # c_c: 1 copy assign, 2 copy construct
     # 1 move assign + move construct moves each of c_m, c_mc, 1 c_c copy
     # +1 move/copy construct each from moving the tuple
     # +1 move/copy construct each from moving the optional (which moves the tuple again)
-    assert move_optional_tuple((3, 4, 5)) == 12
+    assert m.move_optional_tuple((3, 4, 5)) == 12
 
     assert c_m.copy_assignments + c_m.copy_constructions == 0
     assert c_m.move_assignments == 2
@@ -102,7 +95,6 @@
 
 def test_private_op_new():
     """An object with a private `operator new` cannot be returned by value"""
-    import pybind11_tests as m
 
     with pytest.raises(RuntimeError) as excinfo:
         m.private_op_new_value()
@@ -113,9 +105,8 @@
 
 def test_move_fallback():
     """#389: rvp::move should fall-through to copy on non-movable objects"""
-    from pybind11_tests import get_moveissue1, get_moveissue2
 
-    m2 = get_moveissue2(2)
+    m2 = m.get_moveissue2(2)
     assert m2.value == 2
-    m1 = get_moveissue1(1)
+    m1 = m.get_moveissue1(1)
     assert m1.value == 1
diff --git a/tests/test_docstring_options.cpp b/tests/test_docstring_options.cpp
index 9a9297c..8c8f79f 100644
--- a/tests/test_docstring_options.cpp
+++ b/tests/test_docstring_options.cpp
@@ -9,14 +9,8 @@
 
 #include "pybind11_tests.h"
 
-struct DocstringTestFoo {
-    int value;
-    void setValue(int v) { value = v; }
-    int getValue() const { return value; }
-};
-
-test_initializer docstring_generation([](py::module &m) {
-
+TEST_SUBMODULE(docstring_options, m) {
+    // test_docstring_options
     {
         py::options options;
         options.disable_function_signatures();
@@ -55,8 +49,13 @@
         py::options options;
         options.disable_user_defined_docstrings();
 
+        struct DocstringTestFoo {
+            int value;
+            void setValue(int v) { value = v; }
+            int getValue() const { return value; }
+        };
         py::class_<DocstringTestFoo>(m, "DocstringTestFoo", "This is a class docstring")
             .def_property("value_prop", &DocstringTestFoo::getValue, &DocstringTestFoo::setValue, "This is a property docstring")
         ;
     }
-});
+}
diff --git a/tests/test_docstring_options.py b/tests/test_docstring_options.py
index 5e40f68..0dbca60 100644
--- a/tests/test_docstring_options.py
+++ b/tests/test_docstring_options.py
@@ -1,42 +1,38 @@
+from pybind11_tests import docstring_options as m
 
 
 def test_docstring_options():
-    from pybind11_tests import (test_function1, test_function2, test_function3,
-                                test_function4, test_function5, test_function6,
-                                test_function7, DocstringTestFoo,
-                                test_overloaded1, test_overloaded2, test_overloaded3)
-
     # options.disable_function_signatures()
-    assert not test_function1.__doc__
+    assert not m.test_function1.__doc__
 
-    assert test_function2.__doc__ == "A custom docstring"
+    assert m.test_function2.__doc__ == "A custom docstring"
 
     # docstring specified on just the first overload definition:
-    assert test_overloaded1.__doc__ == "Overload docstring"
+    assert m.test_overloaded1.__doc__ == "Overload docstring"
 
     # docstring on both overloads:
-    assert test_overloaded2.__doc__ == "overload docstring 1\noverload docstring 2"
+    assert m.test_overloaded2.__doc__ == "overload docstring 1\noverload docstring 2"
 
     # docstring on only second overload:
-    assert test_overloaded3.__doc__ == "Overload docstr"
+    assert m.test_overloaded3.__doc__ == "Overload docstr"
 
     # options.enable_function_signatures()
-    assert test_function3.__doc__ .startswith("test_function3(a: int, b: int) -> None")
+    assert m.test_function3.__doc__ .startswith("test_function3(a: int, b: int) -> None")
 
-    assert test_function4.__doc__ .startswith("test_function4(a: int, b: int) -> None")
-    assert test_function4.__doc__ .endswith("A custom docstring\n")
+    assert m.test_function4.__doc__ .startswith("test_function4(a: int, b: int) -> None")
+    assert m.test_function4.__doc__ .endswith("A custom docstring\n")
 
     # options.disable_function_signatures()
     # options.disable_user_defined_docstrings()
-    assert not test_function5.__doc__
+    assert not m.test_function5.__doc__
 
     # nested options.enable_user_defined_docstrings()
-    assert test_function6.__doc__ == "A custom docstring"
+    assert m.test_function6.__doc__ == "A custom docstring"
 
     # RAII destructor
-    assert test_function7.__doc__ .startswith("test_function7(a: int, b: int) -> None")
-    assert test_function7.__doc__ .endswith("A custom docstring\n")
+    assert m.test_function7.__doc__ .startswith("test_function7(a: int, b: int) -> None")
+    assert m.test_function7.__doc__ .endswith("A custom docstring\n")
 
     # Suppression of user-defined docstrings for non-function objects
-    assert not DocstringTestFoo.__doc__
-    assert not DocstringTestFoo.value_prop.__doc__
+    assert not m.DocstringTestFoo.__doc__
+    assert not m.DocstringTestFoo.value_prop.__doc__
diff --git a/tests/test_eigen.cpp b/tests/test_eigen.cpp
index 413fed3..17b156c 100644
--- a/tests/test_eigen.cpp
+++ b/tests/test_eigen.cpp
@@ -70,20 +70,21 @@
     EIGEN_MAKE_ALIGNED_OPERATOR_NEW;
 };
 
-test_initializer eigen([](py::module &m) {
-    typedef Eigen::Matrix<float, 5, 6, Eigen::RowMajor> FixedMatrixR;
-    typedef Eigen::Matrix<float, 5, 6> FixedMatrixC;
-    typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> DenseMatrixR;
-    typedef Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic> DenseMatrixC;
-    typedef Eigen::Matrix<float, 4, Eigen::Dynamic> FourRowMatrixC;
-    typedef Eigen::Matrix<float, Eigen::Dynamic, 4> FourColMatrixC;
-    typedef Eigen::Matrix<float, 4, Eigen::Dynamic> FourRowMatrixR;
-    typedef Eigen::Matrix<float, Eigen::Dynamic, 4> FourColMatrixR;
-    typedef Eigen::SparseMatrix<float, Eigen::RowMajor> SparseMatrixR;
-    typedef Eigen::SparseMatrix<float> SparseMatrixC;
+TEST_SUBMODULE(eigen, m) {
+    using FixedMatrixR = Eigen::Matrix<float, 5, 6, Eigen::RowMajor>;
+    using FixedMatrixC = Eigen::Matrix<float, 5, 6>;
+    using DenseMatrixR = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
+    using DenseMatrixC = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>;
+    using FourRowMatrixC = Eigen::Matrix<float, 4, Eigen::Dynamic>;
+    using FourColMatrixC = Eigen::Matrix<float, Eigen::Dynamic, 4>;
+    using FourRowMatrixR = Eigen::Matrix<float, 4, Eigen::Dynamic>;
+    using FourColMatrixR = Eigen::Matrix<float, Eigen::Dynamic, 4>;
+    using SparseMatrixR = Eigen::SparseMatrix<float, Eigen::RowMajor>;
+    using SparseMatrixC = Eigen::SparseMatrix<float>;
 
     m.attr("have_eigen") = true;
 
+    // various tests
     m.def("double_col", [](const Eigen::VectorXf &x) -> Eigen::VectorXf { return 2.0f * x; });
     m.def("double_row", [](const Eigen::RowVectorXf &x) -> Eigen::RowVectorXf { return 2.0f * x; });
     m.def("double_complex", [](const Eigen::VectorXcf &x) -> Eigen::VectorXcf { return 2.0f * x; });
@@ -92,12 +93,14 @@
     m.def("double_mat_cm", [](Eigen::MatrixXf x) -> Eigen::MatrixXf { return 2.0f * x; });
     m.def("double_mat_rm", [](DenseMatrixR x) -> DenseMatrixR { return 2.0f * x; });
 
+    // test_eigen_ref_to_python
     // Different ways of passing via Eigen::Ref; the first and second are the Eigen-recommended
     m.def("cholesky1", [](Eigen::Ref<MatrixXdR> x) -> Eigen::MatrixXd { return x.llt().matrixL(); });
     m.def("cholesky2", [](const Eigen::Ref<const MatrixXdR> &x) -> Eigen::MatrixXd { return x.llt().matrixL(); });
     m.def("cholesky3", [](const Eigen::Ref<MatrixXdR> &x) -> Eigen::MatrixXd { return x.llt().matrixL(); });
     m.def("cholesky4", [](Eigen::Ref<const MatrixXdR> x) -> Eigen::MatrixXd { return x.llt().matrixL(); });
 
+    // test_eigen_ref_mutators
     // Mutators: these add some value to the given element using Eigen, but Eigen should be mapping into
     // the numpy array data and so the result should show up there.  There are three versions: one that
     // works on a contiguous-row matrix (numpy's default), one for a contiguous-column matrix, and one
@@ -122,19 +125,6 @@
     // The same references, but non-mutable (numpy maps into eigen variables, but is !writeable)
     m.def("get_cm_const_ref", []() { return Eigen::Ref<const Eigen::MatrixXd>(get_cm()); });
     m.def("get_rm_const_ref", []() { return Eigen::Ref<const MatrixXdR>(get_rm()); });
-    // Just the corners (via a Map instead of a Ref):
-    m.def("get_cm_corners", []() {
-        auto &x = get_cm();
-        return py::EigenDMap<Eigen::Matrix2d>(
-                x.data(),
-                py::EigenDStride(x.outerStride() * (x.rows() - 1), x.innerStride() * (x.cols() - 1)));
-    });
-    m.def("get_cm_corners_const", []() {
-        const auto &x = get_cm();
-        return py::EigenDMap<const Eigen::Matrix2d>(
-                x.data(),
-                py::EigenDStride(x.outerStride() * (x.rows() - 1), x.innerStride() * (x.cols() - 1)));
-    });
 
     m.def("reset_refs", reset_refs); // Restores get_{cm,rm}_ref to original values
 
@@ -174,6 +164,7 @@
         return x.block(start_row, start_col, block_rows, block_cols);
     });
 
+    // test_eigen_return_references, test_eigen_keepalive
     // return value referencing/copying tests:
     class ReturnTester {
         Eigen::MatrixXd mat = create();
@@ -220,6 +211,7 @@
         .def("corners_const", &ReturnTester::cornersConst, rvp::reference_internal)
         ;
 
+    // test_special_matrix_objects
     // Returns a DiagonalMatrix with diagonal (1,2,3,...)
     m.def("incr_diag", [](int k) {
         Eigen::DiagonalMatrix<int, Eigen::Dynamic> m(k);
@@ -244,27 +236,33 @@
            0,  0,  0,  0,  0, 11,
            0,  0, 14,  0,  8, 11;
 
+    // test_fixed, and various other tests
     m.def("fixed_r", [mat]() -> FixedMatrixR { return FixedMatrixR(mat); });
     m.def("fixed_r_const", [mat]() -> const FixedMatrixR { return FixedMatrixR(mat); });
     m.def("fixed_c", [mat]() -> FixedMatrixC { return FixedMatrixC(mat); });
     m.def("fixed_copy_r", [](const FixedMatrixR &m) -> FixedMatrixR { return m; });
     m.def("fixed_copy_c", [](const FixedMatrixC &m) -> FixedMatrixC { return m; });
+    // test_mutator_descriptors
     m.def("fixed_mutator_r", [](Eigen::Ref<FixedMatrixR>) {});
     m.def("fixed_mutator_c", [](Eigen::Ref<FixedMatrixC>) {});
     m.def("fixed_mutator_a", [](py::EigenDRef<FixedMatrixC>) {});
+    // test_dense
     m.def("dense_r", [mat]() -> DenseMatrixR { return DenseMatrixR(mat); });
     m.def("dense_c", [mat]() -> DenseMatrixC { return DenseMatrixC(mat); });
     m.def("dense_copy_r", [](const DenseMatrixR &m) -> DenseMatrixR { return m; });
     m.def("dense_copy_c", [](const DenseMatrixC &m) -> DenseMatrixC { return m; });
+    // test_sparse, test_sparse_signature
     m.def("sparse_r", [mat]() -> SparseMatrixR { return Eigen::SparseView<Eigen::MatrixXf>(mat); });
     m.def("sparse_c", [mat]() -> SparseMatrixC { return Eigen::SparseView<Eigen::MatrixXf>(mat); });
     m.def("sparse_copy_r", [](const SparseMatrixR &m) -> SparseMatrixR { return m; });
     m.def("sparse_copy_c", [](const SparseMatrixC &m) -> SparseMatrixC { return m; });
+    // test_partially_fixed
     m.def("partial_copy_four_rm_r", [](const FourRowMatrixR &m) -> FourRowMatrixR { return m; });
     m.def("partial_copy_four_rm_c", [](const FourColMatrixR &m) -> FourColMatrixR { return m; });
     m.def("partial_copy_four_cm_r", [](const FourRowMatrixC &m) -> FourRowMatrixC { return m; });
     m.def("partial_copy_four_cm_c", [](const FourColMatrixC &m) -> FourColMatrixC { return m; });
 
+    // test_cpp_casting
     // Test that we can cast a numpy object to a Eigen::MatrixXd explicitly
     m.def("cpp_copy", [](py::handle m) { return m.cast<Eigen::MatrixXd>()(1, 0); });
     m.def("cpp_ref_c", [](py::handle m) { return m.cast<Eigen::Ref<Eigen::MatrixXd>>()(1, 0); });
@@ -272,6 +270,7 @@
     m.def("cpp_ref_any", [](py::handle m) { return m.cast<py::EigenDRef<Eigen::MatrixXd>>()(1, 0); });
 
 
+    // test_nocopy_wrapper
     // Test that we can prevent copying into an argument that would normally copy: First a version
     // that would allow copying (if types or strides don't match) for comparison:
     m.def("get_elem", &get_elem);
@@ -282,12 +281,14 @@
     m.def("get_elem_rm_nocopy", [](Eigen::Ref<const Eigen::Matrix<long, -1, -1, Eigen::RowMajor>> &m) -> long { return m(2, 1); },
             py::arg().noconvert());
 
+    // test_issue738
     // Issue #738: 1xN or Nx1 2D matrices were neither accepted nor properly copied with an
     // incompatible stride value on the length-1 dimension--but that should be allowed (without
     // requiring a copy!) because the stride value can be safely ignored on a size-1 dimension.
     m.def("iss738_f1", &adjust_matrix<const Eigen::Ref<const Eigen::MatrixXd> &>, py::arg().noconvert());
     m.def("iss738_f2", &adjust_matrix<const Eigen::Ref<const Eigen::Matrix<double, -1, -1, Eigen::RowMajor>> &>, py::arg().noconvert());
 
+    // test_named_arguments
     // Make sure named arguments are working properly:
     m.def("matrix_multiply", [](const py::EigenDRef<const Eigen::MatrixXd> A, const py::EigenDRef<const Eigen::MatrixXd> B)
             -> Eigen::MatrixXd {
@@ -295,6 +296,7 @@
         return A * B;
     }, py::arg("A"), py::arg("B"));
 
+    // test_custom_operator_new
     py::class_<CustomOperatorNew>(m, "CustomOperatorNew")
         .def(py::init<>())
         .def_readonly("a", &CustomOperatorNew::a)
@@ -312,4 +314,4 @@
         py::module::import("numpy").attr("ones")(10);
         return v[0](5);
     });
-});
+}
diff --git a/tests/test_eigen.py b/tests/test_eigen.py
index c9fe69f..4ac8cbf 100644
--- a/tests/test_eigen.py
+++ b/tests/test_eigen.py
@@ -1,8 +1,10 @@
 import pytest
+from pybind11_tests import ConstructorStats
 
 pytestmark = pytest.requires_eigen_and_numpy
 
 with pytest.suppress(ImportError):
+    from pybind11_tests import eigen as m
     import numpy as np
 
     ref = np.array([[ 0.,  3,  0,  0,  0, 11],
@@ -21,51 +23,44 @@
 
 
 def test_fixed():
-    from pybind11_tests import fixed_r, fixed_c, fixed_copy_r, fixed_copy_c
-
-    assert_equal_ref(fixed_c())
-    assert_equal_ref(fixed_r())
-    assert_equal_ref(fixed_copy_r(fixed_r()))
-    assert_equal_ref(fixed_copy_c(fixed_c()))
-    assert_equal_ref(fixed_copy_r(fixed_c()))
-    assert_equal_ref(fixed_copy_c(fixed_r()))
+    assert_equal_ref(m.fixed_c())
+    assert_equal_ref(m.fixed_r())
+    assert_equal_ref(m.fixed_copy_r(m.fixed_r()))
+    assert_equal_ref(m.fixed_copy_c(m.fixed_c()))
+    assert_equal_ref(m.fixed_copy_r(m.fixed_c()))
+    assert_equal_ref(m.fixed_copy_c(m.fixed_r()))
 
 
 def test_dense():
-    from pybind11_tests import dense_r, dense_c, dense_copy_r, dense_copy_c
-
-    assert_equal_ref(dense_r())
-    assert_equal_ref(dense_c())
-    assert_equal_ref(dense_copy_r(dense_r()))
-    assert_equal_ref(dense_copy_c(dense_c()))
-    assert_equal_ref(dense_copy_r(dense_c()))
-    assert_equal_ref(dense_copy_c(dense_r()))
+    assert_equal_ref(m.dense_r())
+    assert_equal_ref(m.dense_c())
+    assert_equal_ref(m.dense_copy_r(m.dense_r()))
+    assert_equal_ref(m.dense_copy_c(m.dense_c()))
+    assert_equal_ref(m.dense_copy_r(m.dense_c()))
+    assert_equal_ref(m.dense_copy_c(m.dense_r()))
 
 
 def test_partially_fixed():
-    from pybind11_tests import (partial_copy_four_rm_r, partial_copy_four_rm_c,
-                                partial_copy_four_cm_r, partial_copy_four_cm_c)
-
     ref2 = np.array([[0., 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11], [12, 13, 14, 15]])
-    np.testing.assert_array_equal(partial_copy_four_rm_r(ref2), ref2)
-    np.testing.assert_array_equal(partial_copy_four_rm_c(ref2), ref2)
-    np.testing.assert_array_equal(partial_copy_four_rm_r(ref2[:, 1]), ref2[:, [1]])
-    np.testing.assert_array_equal(partial_copy_four_rm_c(ref2[0, :]), ref2[[0], :])
-    np.testing.assert_array_equal(partial_copy_four_rm_r(ref2[:, (0, 2)]), ref2[:, (0, 2)])
+    np.testing.assert_array_equal(m.partial_copy_four_rm_r(ref2), ref2)
+    np.testing.assert_array_equal(m.partial_copy_four_rm_c(ref2), ref2)
+    np.testing.assert_array_equal(m.partial_copy_four_rm_r(ref2[:, 1]), ref2[:, [1]])
+    np.testing.assert_array_equal(m.partial_copy_four_rm_c(ref2[0, :]), ref2[[0], :])
+    np.testing.assert_array_equal(m.partial_copy_four_rm_r(ref2[:, (0, 2)]), ref2[:, (0, 2)])
     np.testing.assert_array_equal(
-        partial_copy_four_rm_c(ref2[(3, 1, 2), :]), ref2[(3, 1, 2), :])
+        m.partial_copy_four_rm_c(ref2[(3, 1, 2), :]), ref2[(3, 1, 2), :])
 
-    np.testing.assert_array_equal(partial_copy_four_cm_r(ref2), ref2)
-    np.testing.assert_array_equal(partial_copy_four_cm_c(ref2), ref2)
-    np.testing.assert_array_equal(partial_copy_four_cm_r(ref2[:, 1]), ref2[:, [1]])
-    np.testing.assert_array_equal(partial_copy_four_cm_c(ref2[0, :]), ref2[[0], :])
-    np.testing.assert_array_equal(partial_copy_four_cm_r(ref2[:, (0, 2)]), ref2[:, (0, 2)])
+    np.testing.assert_array_equal(m.partial_copy_four_cm_r(ref2), ref2)
+    np.testing.assert_array_equal(m.partial_copy_four_cm_c(ref2), ref2)
+    np.testing.assert_array_equal(m.partial_copy_four_cm_r(ref2[:, 1]), ref2[:, [1]])
+    np.testing.assert_array_equal(m.partial_copy_four_cm_c(ref2[0, :]), ref2[[0], :])
+    np.testing.assert_array_equal(m.partial_copy_four_cm_r(ref2[:, (0, 2)]), ref2[:, (0, 2)])
     np.testing.assert_array_equal(
-        partial_copy_four_cm_c(ref2[(3, 1, 2), :]), ref2[(3, 1, 2), :])
+        m.partial_copy_four_cm_c(ref2[(3, 1, 2), :]), ref2[(3, 1, 2), :])
 
     # TypeError should be raise for a shape mismatch
-    functions = [partial_copy_four_rm_r, partial_copy_four_rm_c,
-                 partial_copy_four_cm_r, partial_copy_four_cm_c]
+    functions = [m.partial_copy_four_rm_r, m.partial_copy_four_rm_c,
+                 m.partial_copy_four_cm_r, m.partial_copy_four_cm_c]
     matrix_with_wrong_shape = [[1, 2],
                                [3, 4]]
     for f in functions:
@@ -75,159 +70,143 @@
 
 
 def test_mutator_descriptors():
-    from pybind11_tests import fixed_mutator_r, fixed_mutator_c, fixed_mutator_a
     zr = np.arange(30, dtype='float32').reshape(5, 6)  # row-major
     zc = zr.reshape(6, 5).transpose()  # column-major
 
-    fixed_mutator_r(zr)
-    fixed_mutator_c(zc)
-    fixed_mutator_a(zr)
-    fixed_mutator_a(zc)
+    m.fixed_mutator_r(zr)
+    m.fixed_mutator_c(zc)
+    m.fixed_mutator_a(zr)
+    m.fixed_mutator_a(zc)
     with pytest.raises(TypeError) as excinfo:
-        fixed_mutator_r(zc)
+        m.fixed_mutator_r(zc)
     assert ('(arg0: numpy.ndarray[float32[5, 6], flags.writeable, flags.c_contiguous]) -> None'
             in str(excinfo.value))
     with pytest.raises(TypeError) as excinfo:
-        fixed_mutator_c(zr)
+        m.fixed_mutator_c(zr)
     assert ('(arg0: numpy.ndarray[float32[5, 6], flags.writeable, flags.f_contiguous]) -> None'
             in str(excinfo.value))
     with pytest.raises(TypeError) as excinfo:
-        fixed_mutator_a(np.array([[1, 2], [3, 4]], dtype='float32'))
+        m.fixed_mutator_a(np.array([[1, 2], [3, 4]], dtype='float32'))
     assert ('(arg0: numpy.ndarray[float32[5, 6], flags.writeable]) -> None'
             in str(excinfo.value))
     zr.flags.writeable = False
     with pytest.raises(TypeError):
-        fixed_mutator_r(zr)
+        m.fixed_mutator_r(zr)
     with pytest.raises(TypeError):
-        fixed_mutator_a(zr)
+        m.fixed_mutator_a(zr)
 
 
 def test_cpp_casting():
-    from pybind11_tests import (cpp_copy, cpp_ref_c, cpp_ref_r, cpp_ref_any,
-                                fixed_r, fixed_c, get_cm_ref, get_rm_ref, ReturnTester)
-    assert cpp_copy(fixed_r()) == 22.
-    assert cpp_copy(fixed_c()) == 22.
+    assert m.cpp_copy(m.fixed_r()) == 22.
+    assert m.cpp_copy(m.fixed_c()) == 22.
     z = np.array([[5., 6], [7, 8]])
-    assert cpp_copy(z) == 7.
-    assert cpp_copy(get_cm_ref()) == 21.
-    assert cpp_copy(get_rm_ref()) == 21.
-    assert cpp_ref_c(get_cm_ref()) == 21.
-    assert cpp_ref_r(get_rm_ref()) == 21.
+    assert m.cpp_copy(z) == 7.
+    assert m.cpp_copy(m.get_cm_ref()) == 21.
+    assert m.cpp_copy(m.get_rm_ref()) == 21.
+    assert m.cpp_ref_c(m.get_cm_ref()) == 21.
+    assert m.cpp_ref_r(m.get_rm_ref()) == 21.
     with pytest.raises(RuntimeError) as excinfo:
-        # Can't reference fixed_c: it contains floats, cpp_ref_any wants doubles
-        cpp_ref_any(fixed_c())
+        # Can't reference m.fixed_c: it contains floats, m.cpp_ref_any wants doubles
+        m.cpp_ref_any(m.fixed_c())
     assert 'Unable to cast Python instance' in str(excinfo.value)
     with pytest.raises(RuntimeError) as excinfo:
-        # Can't reference fixed_r: it contains floats, cpp_ref_any wants doubles
-        cpp_ref_any(fixed_r())
+        # Can't reference m.fixed_r: it contains floats, m.cpp_ref_any wants doubles
+        m.cpp_ref_any(m.fixed_r())
     assert 'Unable to cast Python instance' in str(excinfo.value)
-    assert cpp_ref_any(ReturnTester.create()) == 1.
+    assert m.cpp_ref_any(m.ReturnTester.create()) == 1.
 
-    assert cpp_ref_any(get_cm_ref()) == 21.
-    assert cpp_ref_any(get_cm_ref()) == 21.
+    assert m.cpp_ref_any(m.get_cm_ref()) == 21.
+    assert m.cpp_ref_any(m.get_cm_ref()) == 21.
 
 
 def test_pass_readonly_array():
-    from pybind11_tests import fixed_copy_r, fixed_r, fixed_r_const
     z = np.full((5, 6), 42.0)
     z.flags.writeable = False
-    np.testing.assert_array_equal(z, fixed_copy_r(z))
-    np.testing.assert_array_equal(fixed_r_const(), fixed_r())
-    assert not fixed_r_const().flags.writeable
-    np.testing.assert_array_equal(fixed_copy_r(fixed_r_const()), fixed_r_const())
+    np.testing.assert_array_equal(z, m.fixed_copy_r(z))
+    np.testing.assert_array_equal(m.fixed_r_const(), m.fixed_r())
+    assert not m.fixed_r_const().flags.writeable
+    np.testing.assert_array_equal(m.fixed_copy_r(m.fixed_r_const()), m.fixed_r_const())
 
 
 def test_nonunit_stride_from_python():
-    from pybind11_tests import (
-        double_row, double_col, double_complex, double_mat_cm, double_mat_rm,
-        double_threec, double_threer)
-
     counting_mat = np.arange(9.0, dtype=np.float32).reshape((3, 3))
     second_row = counting_mat[1, :]
     second_col = counting_mat[:, 1]
-    np.testing.assert_array_equal(double_row(second_row), 2.0 * second_row)
-    np.testing.assert_array_equal(double_col(second_row), 2.0 * second_row)
-    np.testing.assert_array_equal(double_complex(second_row), 2.0 * second_row)
-    np.testing.assert_array_equal(double_row(second_col), 2.0 * second_col)
-    np.testing.assert_array_equal(double_col(second_col), 2.0 * second_col)
-    np.testing.assert_array_equal(double_complex(second_col), 2.0 * second_col)
+    np.testing.assert_array_equal(m.double_row(second_row), 2.0 * second_row)
+    np.testing.assert_array_equal(m.double_col(second_row), 2.0 * second_row)
+    np.testing.assert_array_equal(m.double_complex(second_row), 2.0 * second_row)
+    np.testing.assert_array_equal(m.double_row(second_col), 2.0 * second_col)
+    np.testing.assert_array_equal(m.double_col(second_col), 2.0 * second_col)
+    np.testing.assert_array_equal(m.double_complex(second_col), 2.0 * second_col)
 
     counting_3d = np.arange(27.0, dtype=np.float32).reshape((3, 3, 3))
     slices = [counting_3d[0, :, :], counting_3d[:, 0, :], counting_3d[:, :, 0]]
     for slice_idx, ref_mat in enumerate(slices):
-        np.testing.assert_array_equal(double_mat_cm(ref_mat), 2.0 * ref_mat)
-        np.testing.assert_array_equal(double_mat_rm(ref_mat), 2.0 * ref_mat)
+        np.testing.assert_array_equal(m.double_mat_cm(ref_mat), 2.0 * ref_mat)
+        np.testing.assert_array_equal(m.double_mat_rm(ref_mat), 2.0 * ref_mat)
 
     # Mutator:
-    double_threer(second_row)
-    double_threec(second_col)
+    m.double_threer(second_row)
+    m.double_threec(second_col)
     np.testing.assert_array_equal(counting_mat, [[0., 2, 2], [6, 16, 10], [6, 14, 8]])
 
 
 def test_negative_stride_from_python(msg):
-    from pybind11_tests import (
-        double_row, double_col, double_complex, double_mat_cm, double_mat_rm,
-        double_threec, double_threer)
-
-    # Eigen doesn't support (as of yet) negative strides. When a function takes an Eigen
-    # matrix by copy or const reference, we can pass a numpy array that has negative strides.
-    # Otherwise, an exception will be thrown as Eigen will not be able to map the numpy array.
+    """Eigen doesn't support (as of yet) negative strides. When a function takes an Eigen matrix by
+    copy or const reference, we can pass a numpy array that has negative strides.  Otherwise, an
+    exception will be thrown as Eigen will not be able to map the numpy array."""
 
     counting_mat = np.arange(9.0, dtype=np.float32).reshape((3, 3))
     counting_mat = counting_mat[::-1, ::-1]
     second_row = counting_mat[1, :]
     second_col = counting_mat[:, 1]
-    np.testing.assert_array_equal(double_row(second_row), 2.0 * second_row)
-    np.testing.assert_array_equal(double_col(second_row), 2.0 * second_row)
-    np.testing.assert_array_equal(double_complex(second_row), 2.0 * second_row)
-    np.testing.assert_array_equal(double_row(second_col), 2.0 * second_col)
-    np.testing.assert_array_equal(double_col(second_col), 2.0 * second_col)
-    np.testing.assert_array_equal(double_complex(second_col), 2.0 * second_col)
+    np.testing.assert_array_equal(m.double_row(second_row), 2.0 * second_row)
+    np.testing.assert_array_equal(m.double_col(second_row), 2.0 * second_row)
+    np.testing.assert_array_equal(m.double_complex(second_row), 2.0 * second_row)
+    np.testing.assert_array_equal(m.double_row(second_col), 2.0 * second_col)
+    np.testing.assert_array_equal(m.double_col(second_col), 2.0 * second_col)
+    np.testing.assert_array_equal(m.double_complex(second_col), 2.0 * second_col)
 
     counting_3d = np.arange(27.0, dtype=np.float32).reshape((3, 3, 3))
     counting_3d = counting_3d[::-1, ::-1, ::-1]
     slices = [counting_3d[0, :, :], counting_3d[:, 0, :], counting_3d[:, :, 0]]
     for slice_idx, ref_mat in enumerate(slices):
-        np.testing.assert_array_equal(double_mat_cm(ref_mat), 2.0 * ref_mat)
-        np.testing.assert_array_equal(double_mat_rm(ref_mat), 2.0 * ref_mat)
+        np.testing.assert_array_equal(m.double_mat_cm(ref_mat), 2.0 * ref_mat)
+        np.testing.assert_array_equal(m.double_mat_rm(ref_mat), 2.0 * ref_mat)
 
     # Mutator:
     with pytest.raises(TypeError) as excinfo:
-        double_threer(second_row)
+        m.double_threer(second_row)
     assert msg(excinfo.value) == """
-    double_threer(): incompatible function arguments. The following argument types are supported:
-        1. (arg0: numpy.ndarray[float32[1, 3], flags.writeable]) -> None
+        double_threer(): incompatible function arguments. The following argument types are supported:
+            1. (arg0: numpy.ndarray[float32[1, 3], flags.writeable]) -> None
 
-    Invoked with: array([ 5.,  4.,  3.], dtype=float32)
-"""
+        Invoked with: array([ 5.,  4.,  3.], dtype=float32)
+    """  # noqa: E501 line too long
 
     with pytest.raises(TypeError) as excinfo:
-        double_threec(second_col)
+        m.double_threec(second_col)
     assert msg(excinfo.value) == """
-    double_threec(): incompatible function arguments. The following argument types are supported:
-        1. (arg0: numpy.ndarray[float32[3, 1], flags.writeable]) -> None
+        double_threec(): incompatible function arguments. The following argument types are supported:
+            1. (arg0: numpy.ndarray[float32[3, 1], flags.writeable]) -> None
 
-    Invoked with: array([ 7.,  4.,  1.], dtype=float32)
-"""
+        Invoked with: array([ 7.,  4.,  1.], dtype=float32)
+    """  # noqa: E501 line too long
 
 
 def test_nonunit_stride_to_python():
-    from pybind11_tests import diagonal, diagonal_1, diagonal_n, block
-
-    assert np.all(diagonal(ref) == ref.diagonal())
-    assert np.all(diagonal_1(ref) == ref.diagonal(1))
+    assert np.all(m.diagonal(ref) == ref.diagonal())
+    assert np.all(m.diagonal_1(ref) == ref.diagonal(1))
     for i in range(-5, 7):
-        assert np.all(diagonal_n(ref, i) == ref.diagonal(i)), "diagonal_n({})".format(i)
+        assert np.all(m.diagonal_n(ref, i) == ref.diagonal(i)), "m.diagonal_n({})".format(i)
 
-    assert np.all(block(ref, 2, 1, 3, 3) == ref[2:5, 1:4])
-    assert np.all(block(ref, 1, 4, 4, 2) == ref[1:, 4:])
-    assert np.all(block(ref, 1, 4, 3, 2) == ref[1:4, 4:])
+    assert np.all(m.block(ref, 2, 1, 3, 3) == ref[2:5, 1:4])
+    assert np.all(m.block(ref, 1, 4, 4, 2) == ref[1:, 4:])
+    assert np.all(m.block(ref, 1, 4, 3, 2) == ref[1:4, 4:])
 
 
 def test_eigen_ref_to_python():
-    from pybind11_tests import cholesky1, cholesky2, cholesky3, cholesky4
-
-    chols = [cholesky1, cholesky2, cholesky3, cholesky4]
+    chols = [m.cholesky1, m.cholesky2, m.cholesky3, m.cholesky4]
     for i, chol in enumerate(chols, start=1):
         mymat = chol(np.array([[1., 2, 4], [2, 13, 23], [4, 23, 77]]))
         assert np.all(mymat == np.array([[1, 0, 0], [2, 3, 0], [4, 5, 6]])), "cholesky{}".format(i)
@@ -246,9 +225,9 @@
 
 def test_eigen_return_references():
     """Tests various ways of returning references and non-referencing copies"""
-    from pybind11_tests import ReturnTester
+
     master = np.ones((10, 10))
-    a = ReturnTester()
+    a = m.ReturnTester()
     a_get1 = a.get()
     assert not a_get1.flags.owndata and a_get1.flags.writeable
     assign_both(a_get1, master, 3, 3, 5)
@@ -358,7 +337,6 @@
 
 
 def assert_keeps_alive(cl, method, *args):
-    from pybind11_tests import ConstructorStats
     cstats = ConstructorStats.get(cl)
     start_with = cstats.alive()
     a = cl()
@@ -374,10 +352,8 @@
 
 
 def test_eigen_keepalive():
-    from pybind11_tests import ReturnTester, ConstructorStats
-    a = ReturnTester()
-
-    cstats = ConstructorStats.get(ReturnTester)
+    a = m.ReturnTester()
+    cstats = ConstructorStats.get(m.ReturnTester)
     assert cstats.alive() == 1
     unsafe = [a.ref(), a.ref_const(), a.block(1, 2, 3, 4)]
     copies = [a.copy_get(), a.copy_view(), a.copy_ref(), a.copy_ref_const(),
@@ -387,43 +363,43 @@
     del unsafe
     del copies
 
-    for meth in [ReturnTester.get, ReturnTester.get_ptr, ReturnTester.view,
-                 ReturnTester.view_ptr, ReturnTester.ref_safe, ReturnTester.ref_const_safe,
-                 ReturnTester.corners, ReturnTester.corners_const]:
-        assert_keeps_alive(ReturnTester, meth)
+    for meth in [m.ReturnTester.get, m.ReturnTester.get_ptr, m.ReturnTester.view,
+                 m.ReturnTester.view_ptr, m.ReturnTester.ref_safe, m.ReturnTester.ref_const_safe,
+                 m.ReturnTester.corners, m.ReturnTester.corners_const]:
+        assert_keeps_alive(m.ReturnTester, meth)
 
-    for meth in [ReturnTester.block_safe, ReturnTester.block_const]:
-        assert_keeps_alive(ReturnTester, meth, 4, 3, 2, 1)
+    for meth in [m.ReturnTester.block_safe, m.ReturnTester.block_const]:
+        assert_keeps_alive(m.ReturnTester, meth, 4, 3, 2, 1)
 
 
 def test_eigen_ref_mutators():
-    """Tests whether Eigen can mutate numpy values"""
-    from pybind11_tests import add_rm, add_cm, add_any, add1, add2
+    """Tests Eigen's ability to mutate numpy values"""
+
     orig = np.array([[1., 2, 3], [4, 5, 6], [7, 8, 9]])
     zr = np.array(orig)
     zc = np.array(orig, order='F')
-    add_rm(zr, 1, 0, 100)
+    m.add_rm(zr, 1, 0, 100)
     assert np.all(zr == np.array([[1., 2, 3], [104, 5, 6], [7, 8, 9]]))
-    add_cm(zc, 1, 0, 200)
+    m.add_cm(zc, 1, 0, 200)
     assert np.all(zc == np.array([[1., 2, 3], [204, 5, 6], [7, 8, 9]]))
 
-    add_any(zr, 1, 0, 20)
+    m.add_any(zr, 1, 0, 20)
     assert np.all(zr == np.array([[1., 2, 3], [124, 5, 6], [7, 8, 9]]))
-    add_any(zc, 1, 0, 10)
+    m.add_any(zc, 1, 0, 10)
     assert np.all(zc == np.array([[1., 2, 3], [214, 5, 6], [7, 8, 9]]))
 
     # Can't reference a col-major array with a row-major Ref, and vice versa:
     with pytest.raises(TypeError):
-        add_rm(zc, 1, 0, 1)
+        m.add_rm(zc, 1, 0, 1)
     with pytest.raises(TypeError):
-        add_cm(zr, 1, 0, 1)
+        m.add_cm(zr, 1, 0, 1)
 
     # Overloads:
-    add1(zr, 1, 0, -100)
-    add2(zr, 1, 0, -20)
+    m.add1(zr, 1, 0, -100)
+    m.add2(zr, 1, 0, -20)
     assert np.all(zr == orig)
-    add1(zc, 1, 0, -200)
-    add2(zc, 1, 0, -10)
+    m.add1(zc, 1, 0, -200)
+    m.add2(zc, 1, 0, -10)
     assert np.all(zc == orig)
 
     # a non-contiguous slice (this won't work on either the row- or
@@ -435,15 +411,15 @@
     assert np.all(cornersc == np.array([[1., 3], [7, 9]]))
 
     with pytest.raises(TypeError):
-        add_rm(cornersr, 0, 1, 25)
+        m.add_rm(cornersr, 0, 1, 25)
     with pytest.raises(TypeError):
-        add_cm(cornersr, 0, 1, 25)
+        m.add_cm(cornersr, 0, 1, 25)
     with pytest.raises(TypeError):
-        add_rm(cornersc, 0, 1, 25)
+        m.add_rm(cornersc, 0, 1, 25)
     with pytest.raises(TypeError):
-        add_cm(cornersc, 0, 1, 25)
-    add_any(cornersr, 0, 1, 25)
-    add_any(cornersc, 0, 1, 44)
+        m.add_cm(cornersc, 0, 1, 25)
+    m.add_any(cornersr, 0, 1, 25)
+    m.add_any(cornersc, 0, 1, 44)
     assert np.all(zr == np.array([[1., 2, 28], [4, 5, 6], [7, 8, 9]]))
     assert np.all(zc == np.array([[1., 2, 47], [4, 5, 6], [7, 8, 9]]))
 
@@ -451,30 +427,29 @@
     zro = zr[0:4, 0:4]
     zro.flags.writeable = False
     with pytest.raises(TypeError):
-        add_rm(zro, 0, 0, 0)
+        m.add_rm(zro, 0, 0, 0)
     with pytest.raises(TypeError):
-        add_any(zro, 0, 0, 0)
+        m.add_any(zro, 0, 0, 0)
     with pytest.raises(TypeError):
-        add1(zro, 0, 0, 0)
+        m.add1(zro, 0, 0, 0)
     with pytest.raises(TypeError):
-        add2(zro, 0, 0, 0)
+        m.add2(zro, 0, 0, 0)
 
     # integer array shouldn't be passable to a double-matrix-accepting mutating func:
     zi = np.array([[1, 2], [3, 4]])
     with pytest.raises(TypeError):
-        add_rm(zi)
+        m.add_rm(zi)
 
 
 def test_numpy_ref_mutators():
     """Tests numpy mutating Eigen matrices (for returned Eigen::Ref<...>s)"""
-    from pybind11_tests import (
-        get_cm_ref, get_cm_const_ref, get_rm_ref, get_rm_const_ref, reset_refs)
-    reset_refs()  # In case another test already changed it
 
-    zc = get_cm_ref()
-    zcro = get_cm_const_ref()
-    zr = get_rm_ref()
-    zrro = get_rm_const_ref()
+    m.reset_refs()  # In case another test already changed it
+
+    zc = m.get_cm_ref()
+    zcro = m.get_cm_const_ref()
+    zr = m.get_rm_ref()
+    zrro = m.get_rm_const_ref()
 
     assert [zc[1, 2], zcro[1, 2], zr[1, 2], zrro[1, 2]] == [23] * 4
 
@@ -488,12 +463,12 @@
     # We should have just changed zc, of course, but also zcro and the original eigen matrix
     assert np.all(zc == expect)
     assert np.all(zcro == expect)
-    assert np.all(get_cm_ref() == expect)
+    assert np.all(m.get_cm_ref() == expect)
 
     zr[1, 2] = 99
     assert np.all(zr == expect)
     assert np.all(zrro == expect)
-    assert np.all(get_rm_ref() == expect)
+    assert np.all(m.get_rm_ref() == expect)
 
     # Make sure the readonly ones are numpy-readonly:
     with pytest.raises(ValueError):
@@ -503,7 +478,7 @@
 
     # We should be able to explicitly copy like this (and since we're copying,
     # the const should drop away)
-    y1 = np.array(get_cm_const_ref())
+    y1 = np.array(m.get_cm_const_ref())
 
     assert y1.flags.owndata and y1.flags.writeable
     # We should get copies of the eigen data, which was modified above:
@@ -515,19 +490,18 @@
 
 def test_both_ref_mutators():
     """Tests a complex chain of nested eigen/numpy references"""
-    from pybind11_tests import (
-        incr_matrix, get_cm_ref, incr_matrix_any, even_cols, even_rows, reset_refs)
-    reset_refs()  # In case another test already changed it
 
-    z = get_cm_ref()  # numpy -> eigen
+    m.reset_refs()  # In case another test already changed it
+
+    z = m.get_cm_ref()  # numpy -> eigen
     z[0, 2] -= 3
-    z2 = incr_matrix(z, 1)  # numpy -> eigen -> numpy -> eigen
+    z2 = m.incr_matrix(z, 1)  # numpy -> eigen -> numpy -> eigen
     z2[1, 1] += 6
-    z3 = incr_matrix(z, 2)  # (numpy -> eigen)^3
+    z3 = m.incr_matrix(z, 2)  # (numpy -> eigen)^3
     z3[2, 2] += -5
-    z4 = incr_matrix(z, 3)  # (numpy -> eigen)^4
+    z4 = m.incr_matrix(z, 3)  # (numpy -> eigen)^4
     z4[1, 1] -= 1
-    z5 = incr_matrix(z, 4)  # (numpy -> eigen)^5
+    z5 = m.incr_matrix(z, 4)  # (numpy -> eigen)^5
     z5[0, 0] = 0
     assert np.all(z == z2)
     assert np.all(z == z3)
@@ -537,11 +511,11 @@
     assert np.all(z == expect)
 
     y = np.array(range(100), dtype='float64').reshape(10, 10)
-    y2 = incr_matrix_any(y, 10)  # np -> eigen -> np
-    y3 = incr_matrix_any(y2[0::2, 0::2], -33)  # np -> eigen -> np slice -> np -> eigen -> np
-    y4 = even_rows(y3)  # numpy -> eigen slice -> (... y3)
-    y5 = even_cols(y4)  # numpy -> eigen slice -> (... y4)
-    y6 = incr_matrix_any(y5, 1000)  # numpy -> eigen -> (... y5)
+    y2 = m.incr_matrix_any(y, 10)  # np -> eigen -> np
+    y3 = m.incr_matrix_any(y2[0::2, 0::2], -33)  # np -> eigen -> np slice -> np -> eigen -> np
+    y4 = m.even_rows(y3)  # numpy -> eigen slice -> (... y3)
+    y5 = m.even_cols(y4)  # numpy -> eigen slice -> (... y4)
+    y6 = m.incr_matrix_any(y5, 1000)  # numpy -> eigen -> (... y5)
 
     # Apply same mutations using just numpy:
     yexpect = np.array(range(100), dtype='float64').reshape(10, 10)
@@ -557,7 +531,6 @@
 
 
 def test_nocopy_wrapper():
-    from pybind11_tests import get_elem, get_elem_nocopy, get_elem_rm_nocopy
     # get_elem requires a column-contiguous matrix reference, but should be
     # callable with other types of matrix (via copying):
     int_matrix_colmajor = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], order='F')
@@ -566,38 +539,38 @@
     dbl_matrix_rowmajor = np.array(int_matrix_rowmajor, dtype='double', order='C', copy=True)
 
     # All should be callable via get_elem:
-    assert get_elem(int_matrix_colmajor) == 8
-    assert get_elem(dbl_matrix_colmajor) == 8
-    assert get_elem(int_matrix_rowmajor) == 8
-    assert get_elem(dbl_matrix_rowmajor) == 8
+    assert m.get_elem(int_matrix_colmajor) == 8
+    assert m.get_elem(dbl_matrix_colmajor) == 8
+    assert m.get_elem(int_matrix_rowmajor) == 8
+    assert m.get_elem(dbl_matrix_rowmajor) == 8
 
-    # All but the second should fail with get_elem_nocopy:
+    # All but the second should fail with m.get_elem_nocopy:
     with pytest.raises(TypeError) as excinfo:
-        get_elem_nocopy(int_matrix_colmajor)
+        m.get_elem_nocopy(int_matrix_colmajor)
     assert ('get_elem_nocopy(): incompatible function arguments.' in str(excinfo.value) and
             ', flags.f_contiguous' in str(excinfo.value))
-    assert get_elem_nocopy(dbl_matrix_colmajor) == 8
+    assert m.get_elem_nocopy(dbl_matrix_colmajor) == 8
     with pytest.raises(TypeError) as excinfo:
-        get_elem_nocopy(int_matrix_rowmajor)
+        m.get_elem_nocopy(int_matrix_rowmajor)
     assert ('get_elem_nocopy(): incompatible function arguments.' in str(excinfo.value) and
             ', flags.f_contiguous' in str(excinfo.value))
     with pytest.raises(TypeError) as excinfo:
-        get_elem_nocopy(dbl_matrix_rowmajor)
+        m.get_elem_nocopy(dbl_matrix_rowmajor)
     assert ('get_elem_nocopy(): incompatible function arguments.' in str(excinfo.value) and
             ', flags.f_contiguous' in str(excinfo.value))
 
     # For the row-major test, we take a long matrix in row-major, so only the third is allowed:
     with pytest.raises(TypeError) as excinfo:
-        get_elem_rm_nocopy(int_matrix_colmajor)
+        m.get_elem_rm_nocopy(int_matrix_colmajor)
     assert ('get_elem_rm_nocopy(): incompatible function arguments.' in str(excinfo.value) and
             ', flags.c_contiguous' in str(excinfo.value))
     with pytest.raises(TypeError) as excinfo:
-        get_elem_rm_nocopy(dbl_matrix_colmajor)
+        m.get_elem_rm_nocopy(dbl_matrix_colmajor)
     assert ('get_elem_rm_nocopy(): incompatible function arguments.' in str(excinfo.value) and
             ', flags.c_contiguous' in str(excinfo.value))
-    assert get_elem_rm_nocopy(int_matrix_rowmajor) == 8
+    assert m.get_elem_rm_nocopy(int_matrix_rowmajor) == 8
     with pytest.raises(TypeError) as excinfo:
-        get_elem_rm_nocopy(dbl_matrix_rowmajor)
+        m.get_elem_rm_nocopy(dbl_matrix_rowmajor)
     assert ('get_elem_rm_nocopy(): incompatible function arguments.' in str(excinfo.value) and
             ', flags.c_contiguous' in str(excinfo.value))
 
@@ -608,19 +581,16 @@
     The `Ref` caster sometimes creates a copy which needs to stay alive. This needs to
     happen both for directs casts (just the array) or indirectly (e.g. list of arrays).
     """
-    from pybind11_tests import get_elem_direct, get_elem_indirect
 
     a = np.full(shape=10, fill_value=8, dtype=np.int8)
-    assert get_elem_direct(a) == 8
+    assert m.get_elem_direct(a) == 8
 
     list_of_a = [a]
-    assert get_elem_indirect(list_of_a) == 8
+    assert m.get_elem_indirect(list_of_a) == 8
 
 
 def test_special_matrix_objects():
-    from pybind11_tests import incr_diag, symmetric_upper, symmetric_lower
-
-    assert np.all(incr_diag(7) == np.diag([1., 2, 3, 4, 5, 6, 7]))
+    assert np.all(m.incr_diag(7) == np.diag([1., 2, 3, 4, 5, 6, 7]))
 
     asymm = np.array([[ 1.,  2,  3,  4],
                       [ 5,  6,  7,  8],
@@ -633,89 +603,79 @@
             symm_lower[i, j] = symm_lower[j, i]
             symm_upper[j, i] = symm_upper[i, j]
 
-    assert np.all(symmetric_lower(asymm) == symm_lower)
-    assert np.all(symmetric_upper(asymm) == symm_upper)
+    assert np.all(m.symmetric_lower(asymm) == symm_lower)
+    assert np.all(m.symmetric_upper(asymm) == symm_upper)
 
 
 def test_dense_signature(doc):
-    from pybind11_tests import double_col, double_row, double_complex, double_mat_rm
-
-    assert doc(double_col) == """
+    assert doc(m.double_col) == """
         double_col(arg0: numpy.ndarray[float32[m, 1]]) -> numpy.ndarray[float32[m, 1]]
     """
-    assert doc(double_row) == """
+    assert doc(m.double_row) == """
         double_row(arg0: numpy.ndarray[float32[1, n]]) -> numpy.ndarray[float32[1, n]]
     """
-    assert doc(double_complex) == """
+    assert doc(m.double_complex) == """
         double_complex(arg0: numpy.ndarray[complex64[m, 1]]) -> numpy.ndarray[complex64[m, 1]]
     """
-    assert doc(double_mat_rm) == """
+    assert doc(m.double_mat_rm) == """
         double_mat_rm(arg0: numpy.ndarray[float32[m, n]]) -> numpy.ndarray[float32[m, n]]
     """
 
 
 def test_named_arguments():
-    from pybind11_tests import matrix_multiply
-
     a = np.array([[1.0, 2], [3, 4], [5, 6]])
     b = np.ones((2, 1))
 
-    assert np.all(matrix_multiply(a, b) == np.array([[3.], [7], [11]]))
-    assert np.all(matrix_multiply(A=a, B=b) == np.array([[3.], [7], [11]]))
-    assert np.all(matrix_multiply(B=b, A=a) == np.array([[3.], [7], [11]]))
+    assert np.all(m.matrix_multiply(a, b) == np.array([[3.], [7], [11]]))
+    assert np.all(m.matrix_multiply(A=a, B=b) == np.array([[3.], [7], [11]]))
+    assert np.all(m.matrix_multiply(B=b, A=a) == np.array([[3.], [7], [11]]))
 
     with pytest.raises(ValueError) as excinfo:
-        matrix_multiply(b, a)
+        m.matrix_multiply(b, a)
     assert str(excinfo.value) == 'Nonconformable matrices!'
 
     with pytest.raises(ValueError) as excinfo:
-        matrix_multiply(A=b, B=a)
+        m.matrix_multiply(A=b, B=a)
     assert str(excinfo.value) == 'Nonconformable matrices!'
 
     with pytest.raises(ValueError) as excinfo:
-        matrix_multiply(B=a, A=b)
+        m.matrix_multiply(B=a, A=b)
     assert str(excinfo.value) == 'Nonconformable matrices!'
 
 
 @pytest.requires_eigen_and_scipy
 def test_sparse():
-    from pybind11_tests import sparse_r, sparse_c, sparse_copy_r, sparse_copy_c
-
-    assert_sparse_equal_ref(sparse_r())
-    assert_sparse_equal_ref(sparse_c())
-    assert_sparse_equal_ref(sparse_copy_r(sparse_r()))
-    assert_sparse_equal_ref(sparse_copy_c(sparse_c()))
-    assert_sparse_equal_ref(sparse_copy_r(sparse_c()))
-    assert_sparse_equal_ref(sparse_copy_c(sparse_r()))
+    assert_sparse_equal_ref(m.sparse_r())
+    assert_sparse_equal_ref(m.sparse_c())
+    assert_sparse_equal_ref(m.sparse_copy_r(m.sparse_r()))
+    assert_sparse_equal_ref(m.sparse_copy_c(m.sparse_c()))
+    assert_sparse_equal_ref(m.sparse_copy_r(m.sparse_c()))
+    assert_sparse_equal_ref(m.sparse_copy_c(m.sparse_r()))
 
 
 @pytest.requires_eigen_and_scipy
 def test_sparse_signature(doc):
-    from pybind11_tests import sparse_copy_r, sparse_copy_c
-
-    assert doc(sparse_copy_r) == """
+    assert doc(m.sparse_copy_r) == """
         sparse_copy_r(arg0: scipy.sparse.csr_matrix[float32]) -> scipy.sparse.csr_matrix[float32]
     """  # noqa: E501 line too long
-    assert doc(sparse_copy_c) == """
+    assert doc(m.sparse_copy_c) == """
         sparse_copy_c(arg0: scipy.sparse.csc_matrix[float32]) -> scipy.sparse.csc_matrix[float32]
     """  # noqa: E501 line too long
 
 
 def test_issue738():
-    from pybind11_tests import iss738_f1, iss738_f2
+    """Ignore strides on a length-1 dimension (even if they would be incompatible length > 1)"""
+    assert np.all(m.iss738_f1(np.array([[1., 2, 3]])) == np.array([[1., 102, 203]]))
+    assert np.all(m.iss738_f1(np.array([[1.], [2], [3]])) == np.array([[1.], [12], [23]]))
 
-    assert np.all(iss738_f1(np.array([[1., 2, 3]])) == np.array([[1., 102, 203]]))
-    assert np.all(iss738_f1(np.array([[1.], [2], [3]])) == np.array([[1.], [12], [23]]))
-
-    assert np.all(iss738_f2(np.array([[1., 2, 3]])) == np.array([[1., 102, 203]]))
-    assert np.all(iss738_f2(np.array([[1.], [2], [3]])) == np.array([[1.], [12], [23]]))
+    assert np.all(m.iss738_f2(np.array([[1., 2, 3]])) == np.array([[1., 102, 203]]))
+    assert np.all(m.iss738_f2(np.array([[1.], [2], [3]])) == np.array([[1.], [12], [23]]))
 
 
 def test_custom_operator_new():
     """Using Eigen types as member variables requires a class-specific
     operator new with proper alignment"""
-    from pybind11_tests import CustomOperatorNew
 
-    o = CustomOperatorNew()
+    o = m.CustomOperatorNew()
     np.testing.assert_allclose(o.a, 0.0)
     np.testing.assert_allclose(o.b.diagonal(), 1.0)
diff --git a/tests/test_enum.cpp b/tests/test_enum.cpp
index 67341f4..49f31ba 100644
--- a/tests/test_enum.cpp
+++ b/tests/test_enum.cpp
@@ -9,56 +9,54 @@
 
 #include "pybind11_tests.h"
 
-enum UnscopedEnum {
-    EOne = 1,
-    ETwo
-};
-
-enum class ScopedEnum {
-    Two = 2,
-    Three
-};
-
-enum Flags {
-    Read = 4,
-    Write = 2,
-    Execute = 1
-};
-
-class ClassWithUnscopedEnum {
-public:
-    enum EMode {
-        EFirstMode = 1,
-        ESecondMode
+TEST_SUBMODULE(enums, m) {
+    // test_unscoped_enum
+    enum UnscopedEnum {
+        EOne = 1,
+        ETwo
     };
-
-    static EMode test_function(EMode mode) {
-        return mode;
-    }
-};
-
-std::string test_scoped_enum(ScopedEnum z) {
-    return "ScopedEnum::" + std::string(z == ScopedEnum::Two ? "Two" : "Three");
-}
-
-test_initializer enums([](py::module &m) {
-    m.def("test_scoped_enum", &test_scoped_enum);
-
     py::enum_<UnscopedEnum>(m, "UnscopedEnum", py::arithmetic())
         .value("EOne", EOne)
         .value("ETwo", ETwo)
         .export_values();
 
+    // test_scoped_enum
+    enum class ScopedEnum {
+        Two = 2,
+        Three
+    };
     py::enum_<ScopedEnum>(m, "ScopedEnum", py::arithmetic())
         .value("Two", ScopedEnum::Two)
         .value("Three", ScopedEnum::Three);
 
+    m.def("test_scoped_enum", [](ScopedEnum z) {
+        return "ScopedEnum::" + std::string(z == ScopedEnum::Two ? "Two" : "Three");
+    });
+
+    // test_binary_operators
+    enum Flags {
+        Read = 4,
+        Write = 2,
+        Execute = 1
+    };
     py::enum_<Flags>(m, "Flags", py::arithmetic())
         .value("Read", Flags::Read)
         .value("Write", Flags::Write)
         .value("Execute", Flags::Execute)
         .export_values();
 
+    // test_implicit_conversion
+    class ClassWithUnscopedEnum {
+    public:
+        enum EMode {
+            EFirstMode = 1,
+            ESecondMode
+        };
+
+        static EMode test_function(EMode mode) {
+            return mode;
+        }
+    };
     py::class_<ClassWithUnscopedEnum> exenum_class(m, "ClassWithUnscopedEnum");
     exenum_class.def_static("test_function", &ClassWithUnscopedEnum::test_function);
     py::enum_<ClassWithUnscopedEnum::EMode>(exenum_class, "EMode")
@@ -66,7 +64,8 @@
         .value("ESecondMode", ClassWithUnscopedEnum::ESecondMode)
         .export_values();
 
+    // test_enum_to_int
     m.def("test_enum_to_int", [](int) { });
     m.def("test_enum_to_uint", [](uint32_t) { });
     m.def("test_enum_to_long_long", [](long long) { });
-});
+}
diff --git a/tests/test_enum.py b/tests/test_enum.py
index 6cc4887..d8eff52 100644
--- a/tests/test_enum.py
+++ b/tests/test_enum.py
@@ -1,51 +1,50 @@
 import pytest
+from pybind11_tests import enums as m
 
 
 def test_unscoped_enum():
-    from pybind11_tests import UnscopedEnum, EOne
-
-    assert str(UnscopedEnum.EOne) == "UnscopedEnum.EOne"
-    assert str(UnscopedEnum.ETwo) == "UnscopedEnum.ETwo"
-    assert str(EOne) == "UnscopedEnum.EOne"
+    assert str(m.UnscopedEnum.EOne) == "UnscopedEnum.EOne"
+    assert str(m.UnscopedEnum.ETwo) == "UnscopedEnum.ETwo"
+    assert str(m.EOne) == "UnscopedEnum.EOne"
     # __members__ property
-    assert UnscopedEnum.__members__ == {"EOne": UnscopedEnum.EOne, "ETwo": UnscopedEnum.ETwo}
+    assert m.UnscopedEnum.__members__ == \
+        {"EOne": m.UnscopedEnum.EOne, "ETwo": m.UnscopedEnum.ETwo}
     # __members__ readonly
     with pytest.raises(AttributeError):
-        UnscopedEnum.__members__ = {}
+        m.UnscopedEnum.__members__ = {}
     # __members__ returns a copy
-    foo = UnscopedEnum.__members__
+    foo = m.UnscopedEnum.__members__
     foo["bar"] = "baz"
-    assert UnscopedEnum.__members__ == {"EOne": UnscopedEnum.EOne, "ETwo": UnscopedEnum.ETwo}
+    assert m.UnscopedEnum.__members__ == \
+        {"EOne": m.UnscopedEnum.EOne, "ETwo": m.UnscopedEnum.ETwo}
 
     # no TypeError exception for unscoped enum ==/!= int comparisons
-    y = UnscopedEnum.ETwo
+    y = m.UnscopedEnum.ETwo
     assert y == 2
     assert y != 3
 
-    assert int(UnscopedEnum.ETwo) == 2
-    assert str(UnscopedEnum(2)) == "UnscopedEnum.ETwo"
+    assert int(m.UnscopedEnum.ETwo) == 2
+    assert str(m.UnscopedEnum(2)) == "UnscopedEnum.ETwo"
 
     # order
-    assert UnscopedEnum.EOne < UnscopedEnum.ETwo
-    assert UnscopedEnum.EOne < 2
-    assert UnscopedEnum.ETwo > UnscopedEnum.EOne
-    assert UnscopedEnum.ETwo > 1
-    assert UnscopedEnum.ETwo <= 2
-    assert UnscopedEnum.ETwo >= 2
-    assert UnscopedEnum.EOne <= UnscopedEnum.ETwo
-    assert UnscopedEnum.EOne <= 2
-    assert UnscopedEnum.ETwo >= UnscopedEnum.EOne
-    assert UnscopedEnum.ETwo >= 1
-    assert not (UnscopedEnum.ETwo < UnscopedEnum.EOne)
-    assert not (2 < UnscopedEnum.EOne)
+    assert m.UnscopedEnum.EOne < m.UnscopedEnum.ETwo
+    assert m.UnscopedEnum.EOne < 2
+    assert m.UnscopedEnum.ETwo > m.UnscopedEnum.EOne
+    assert m.UnscopedEnum.ETwo > 1
+    assert m.UnscopedEnum.ETwo <= 2
+    assert m.UnscopedEnum.ETwo >= 2
+    assert m.UnscopedEnum.EOne <= m.UnscopedEnum.ETwo
+    assert m.UnscopedEnum.EOne <= 2
+    assert m.UnscopedEnum.ETwo >= m.UnscopedEnum.EOne
+    assert m.UnscopedEnum.ETwo >= 1
+    assert not (m.UnscopedEnum.ETwo < m.UnscopedEnum.EOne)
+    assert not (2 < m.UnscopedEnum.EOne)
 
 
 def test_scoped_enum():
-    from pybind11_tests import ScopedEnum, test_scoped_enum
-
-    assert test_scoped_enum(ScopedEnum.Three) == "ScopedEnum::Three"
-    z = ScopedEnum.Two
-    assert test_scoped_enum(z) == "ScopedEnum::Two"
+    assert m.test_scoped_enum(m.ScopedEnum.Three) == "ScopedEnum::Three"
+    z = m.ScopedEnum.Two
+    assert m.test_scoped_enum(z) == "ScopedEnum::Two"
 
     # expected TypeError exceptions for scoped enum ==/!= int comparisons
     with pytest.raises(TypeError):
@@ -54,23 +53,21 @@
         assert z != 3
 
     # order
-    assert ScopedEnum.Two < ScopedEnum.Three
-    assert ScopedEnum.Three > ScopedEnum.Two
-    assert ScopedEnum.Two <= ScopedEnum.Three
-    assert ScopedEnum.Two <= ScopedEnum.Two
-    assert ScopedEnum.Two >= ScopedEnum.Two
-    assert ScopedEnum.Three >= ScopedEnum.Two
+    assert m.ScopedEnum.Two < m.ScopedEnum.Three
+    assert m.ScopedEnum.Three > m.ScopedEnum.Two
+    assert m.ScopedEnum.Two <= m.ScopedEnum.Three
+    assert m.ScopedEnum.Two <= m.ScopedEnum.Two
+    assert m.ScopedEnum.Two >= m.ScopedEnum.Two
+    assert m.ScopedEnum.Three >= m.ScopedEnum.Two
 
 
 def test_implicit_conversion():
-    from pybind11_tests import ClassWithUnscopedEnum
+    assert str(m.ClassWithUnscopedEnum.EMode.EFirstMode) == "EMode.EFirstMode"
+    assert str(m.ClassWithUnscopedEnum.EFirstMode) == "EMode.EFirstMode"
 
-    assert str(ClassWithUnscopedEnum.EMode.EFirstMode) == "EMode.EFirstMode"
-    assert str(ClassWithUnscopedEnum.EFirstMode) == "EMode.EFirstMode"
-
-    f = ClassWithUnscopedEnum.test_function
-    first = ClassWithUnscopedEnum.EFirstMode
-    second = ClassWithUnscopedEnum.ESecondMode
+    f = m.ClassWithUnscopedEnum.test_function
+    first = m.ClassWithUnscopedEnum.EFirstMode
+    second = m.ClassWithUnscopedEnum.ESecondMode
 
     assert f(first) == 1
 
@@ -95,21 +92,19 @@
 
 
 def test_binary_operators():
-    from pybind11_tests import Flags
+    assert int(m.Flags.Read) == 4
+    assert int(m.Flags.Write) == 2
+    assert int(m.Flags.Execute) == 1
+    assert int(m.Flags.Read | m.Flags.Write | m.Flags.Execute) == 7
+    assert int(m.Flags.Read | m.Flags.Write) == 6
+    assert int(m.Flags.Read | m.Flags.Execute) == 5
+    assert int(m.Flags.Write | m.Flags.Execute) == 3
+    assert int(m.Flags.Write | 1) == 3
 
-    assert int(Flags.Read) == 4
-    assert int(Flags.Write) == 2
-    assert int(Flags.Execute) == 1
-    assert int(Flags.Read | Flags.Write | Flags.Execute) == 7
-    assert int(Flags.Read | Flags.Write) == 6
-    assert int(Flags.Read | Flags.Execute) == 5
-    assert int(Flags.Write | Flags.Execute) == 3
-    assert int(Flags.Write | 1) == 3
-
-    state = Flags.Read | Flags.Write
-    assert (state & Flags.Read) != 0
-    assert (state & Flags.Write) != 0
-    assert (state & Flags.Execute) == 0
+    state = m.Flags.Read | m.Flags.Write
+    assert (state & m.Flags.Read) != 0
+    assert (state & m.Flags.Write) != 0
+    assert (state & m.Flags.Execute) == 0
     assert (state & 1) == 0
 
     state2 = ~state
@@ -118,12 +113,9 @@
 
 
 def test_enum_to_int():
-    from pybind11_tests import Flags, ClassWithUnscopedEnum
-    from pybind11_tests import test_enum_to_int, test_enum_to_uint, test_enum_to_long_long
-
-    test_enum_to_int(Flags.Read)
-    test_enum_to_int(ClassWithUnscopedEnum.EMode.EFirstMode)
-    test_enum_to_uint(Flags.Read)
-    test_enum_to_uint(ClassWithUnscopedEnum.EMode.EFirstMode)
-    test_enum_to_long_long(Flags.Read)
-    test_enum_to_long_long(ClassWithUnscopedEnum.EMode.EFirstMode)
+    m.test_enum_to_int(m.Flags.Read)
+    m.test_enum_to_int(m.ClassWithUnscopedEnum.EMode.EFirstMode)
+    m.test_enum_to_uint(m.Flags.Read)
+    m.test_enum_to_uint(m.ClassWithUnscopedEnum.EMode.EFirstMode)
+    m.test_enum_to_long_long(m.Flags.Read)
+    m.test_enum_to_long_long(m.ClassWithUnscopedEnum.EMode.EFirstMode)
diff --git a/tests/test_eval.cpp b/tests/test_eval.cpp
index 610d0e2..e094821 100644
--- a/tests/test_eval.cpp
+++ b/tests/test_eval.cpp
@@ -11,7 +11,9 @@
 #include <pybind11/eval.h>
 #include "pybind11_tests.h"
 
-test_initializer eval([](py::module &m) {
+TEST_SUBMODULE(eval_, m) {
+    // test_evals
+
     auto global = py::dict(py::module::import("__main__").attr("__dict__"));
 
     m.def("test_eval_statements", [global]() {
@@ -86,4 +88,4 @@
         }
         return false;
     });
-});
+}
diff --git a/tests/test_eval.py b/tests/test_eval.py
index 8715dba..bda4ef6 100644
--- a/tests/test_eval.py
+++ b/tests/test_eval.py
@@ -1,19 +1,17 @@
 import os
+from pybind11_tests import eval_ as m
 
 
 def test_evals(capture):
-    from pybind11_tests import (test_eval_statements, test_eval, test_eval_single_statement,
-                                test_eval_file, test_eval_failure, test_eval_file_failure)
-
     with capture:
-        assert test_eval_statements()
+        assert m.test_eval_statements()
     assert capture == "Hello World!"
 
-    assert test_eval()
-    assert test_eval_single_statement()
+    assert m.test_eval()
+    assert m.test_eval_single_statement()
 
     filename = os.path.join(os.path.dirname(__file__), "test_eval_call.py")
-    assert test_eval_file(filename)
+    assert m.test_eval_file(filename)
 
-    assert test_eval_failure()
-    assert test_eval_file_failure()
+    assert m.test_eval_failure()
+    assert m.test_eval_file_failure()
diff --git a/tests/test_kwargs_and_defaults.cpp b/tests/test_kwargs_and_defaults.cpp
index 3180123..165f801 100644
--- a/tests/test_kwargs_and_defaults.cpp
+++ b/tests/test_kwargs_and_defaults.cpp
@@ -10,84 +10,62 @@
 #include "pybind11_tests.h"
 #include <pybind11/stl.h>
 
-std::string kw_func(int x, int y) { return "x=" + std::to_string(x) + ", y=" + std::to_string(y); }
+TEST_SUBMODULE(kwargs_and_defaults, m) {
+    auto kw_func = [](int x, int y) { return "x=" + std::to_string(x) + ", y=" + std::to_string(y); };
 
-std::string kw_func4(const std::vector<int> &entries) {
-    std::string ret = "{";
-    for (int i : entries)
-        ret += std::to_string(i) + " ";
-    ret.back() = '}';
-    return ret;
-}
-
-py::tuple args_function(py::args args) {
-    return args;
-}
-
-py::tuple args_kwargs_function(py::args args, py::kwargs kwargs) {
-    return py::make_tuple(args, kwargs);
-}
-
-py::tuple mixed_plus_args(int i, double j, py::args args) {
-    return py::make_tuple(i, j, args);
-}
-
-py::tuple mixed_plus_kwargs(int i, double j, py::kwargs kwargs) {
-    return py::make_tuple(i, j, kwargs);
-}
-
-py::tuple mixed_plus_args_kwargs(int i, double j, py::args args, py::kwargs kwargs) {
-    return py::make_tuple(i, j, args, kwargs);
-}
-
-// pybind11 won't allow these to be bound: args and kwargs, if present, must be at the end.
-void bad_args1(py::args, int) {}
-void bad_args2(py::kwargs, int) {}
-void bad_args3(py::kwargs, py::args) {}
-void bad_args4(py::args, int, py::kwargs) {}
-void bad_args5(py::args, py::kwargs, int) {}
-void bad_args6(py::args, py::args) {}
-void bad_args7(py::kwargs, py::kwargs) {}
-
-struct KWClass {
-    void foo(int, float) {}
-};
-
-test_initializer arg_keywords_and_defaults([](py::module &m) {
-    m.def("kw_func0", &kw_func);
-    m.def("kw_func1", &kw_func, py::arg("x"), py::arg("y"));
-    m.def("kw_func2", &kw_func, py::arg("x") = 100, py::arg("y") = 200);
+    // test_named_arguments
+    m.def("kw_func0", kw_func);
+    m.def("kw_func1", kw_func, py::arg("x"), py::arg("y"));
+    m.def("kw_func2", kw_func, py::arg("x") = 100, py::arg("y") = 200);
     m.def("kw_func3", [](const char *) { }, py::arg("data") = std::string("Hello world!"));
 
     /* A fancier default argument */
-    std::vector<int> list;
-    list.push_back(13);
-    list.push_back(17);
-    m.def("kw_func4", &kw_func4, py::arg("myList") = list);
+    std::vector<int> list{{13, 17}};
+    m.def("kw_func4", [](const std::vector<int> &entries) {
+        std::string ret = "{";
+        for (int i : entries)
+            ret += std::to_string(i) + " ";
+        ret.back() = '}';
+        return ret;
+    }, py::arg("myList") = list);
 
-    m.def("args_function", &args_function);
-    m.def("args_kwargs_function", &args_kwargs_function);
+    m.def("kw_func_udl", kw_func, "x"_a, "y"_a=300);
+    m.def("kw_func_udl_z", kw_func, "x"_a, "y"_a=0);
 
-    m.def("kw_func_udl", &kw_func, "x"_a, "y"_a=300);
-    m.def("kw_func_udl_z", &kw_func, "x"_a, "y"_a=0);
+    // test_args_and_kwargs
+    m.def("args_function", [](py::args args) -> py::tuple { return args; });
+    m.def("args_kwargs_function", [](py::args args, py::kwargs kwargs) {
+        return py::make_tuple(args, kwargs);
+    });
 
+    // test_mixed_args_and_kwargs
+    m.def("mixed_plus_args", [](int i, double j, py::args args) {
+        return py::make_tuple(i, j, args);
+    });
+    m.def("mixed_plus_kwargs", [](int i, double j, py::kwargs kwargs) {
+        return py::make_tuple(i, j, kwargs);
+    });
+    auto mixed_plus_both = [](int i, double j, py::args args, py::kwargs kwargs) {
+        return py::make_tuple(i, j, args, kwargs);
+    };
+    m.def("mixed_plus_args_kwargs", mixed_plus_both);
+
+    m.def("mixed_plus_args_kwargs_defaults", mixed_plus_both,
+            py::arg("i") = 1, py::arg("j") = 3.14159);
+
+    // pybind11 won't allow these to be bound: args and kwargs, if present, must be at the end.
+    // Uncomment these to test that the static_assert is indeed working:
+//    m.def("bad_args1", [](py::args, int) {});
+//    m.def("bad_args2", [](py::kwargs, int) {});
+//    m.def("bad_args3", [](py::kwargs, py::args) {});
+//    m.def("bad_args4", [](py::args, int, py::kwargs) {});
+//    m.def("bad_args5", [](py::args, py::kwargs, int) {});
+//    m.def("bad_args6", [](py::args, py::args) {});
+//    m.def("bad_args7", [](py::kwargs, py::kwargs) {});
+
+    // test_function_signatures (along with most of the above)
+    struct KWClass { void foo(int, float) {} };
     py::class_<KWClass>(m, "KWClass")
         .def("foo0", &KWClass::foo)
         .def("foo1", &KWClass::foo, "x"_a, "y"_a);
-
-    m.def("mixed_plus_args", &mixed_plus_args);
-    m.def("mixed_plus_kwargs", &mixed_plus_kwargs);
-    m.def("mixed_plus_args_kwargs", &mixed_plus_args_kwargs);
-
-    m.def("mixed_plus_args_kwargs_defaults", &mixed_plus_args_kwargs,
-            py::arg("i") = 1, py::arg("j") = 3.14159);
-
-    // Uncomment these to test that the static_assert is indeed working:
-//    m.def("bad_args1", &bad_args1);
-//    m.def("bad_args2", &bad_args2);
-//    m.def("bad_args3", &bad_args3);
-//    m.def("bad_args4", &bad_args4);
-//    m.def("bad_args5", &bad_args5);
-//    m.def("bad_args6", &bad_args6);
-//    m.def("bad_args7", &bad_args7);
-});
+}
diff --git a/tests/test_kwargs_and_defaults.py b/tests/test_kwargs_and_defaults.py
index 90f8489..733fe85 100644
--- a/tests/test_kwargs_and_defaults.py
+++ b/tests/test_kwargs_and_defaults.py
@@ -1,65 +1,64 @@
 import pytest
-from pybind11_tests import (kw_func0, kw_func1, kw_func2, kw_func3, kw_func4, args_function,
-                            args_kwargs_function, kw_func_udl, kw_func_udl_z, KWClass)
+from pybind11_tests import kwargs_and_defaults as m
 
 
 def test_function_signatures(doc):
-    assert doc(kw_func0) == "kw_func0(arg0: int, arg1: int) -> str"
-    assert doc(kw_func1) == "kw_func1(x: int, y: int) -> str"
-    assert doc(kw_func2) == "kw_func2(x: int=100, y: int=200) -> str"
-    assert doc(kw_func3) == "kw_func3(data: str='Hello world!') -> None"
-    assert doc(kw_func4) == "kw_func4(myList: List[int]=[13, 17]) -> str"
-    assert doc(kw_func_udl) == "kw_func_udl(x: int, y: int=300) -> str"
-    assert doc(kw_func_udl_z) == "kw_func_udl_z(x: int, y: int=0) -> str"
-    assert doc(args_function) == "args_function(*args) -> tuple"
-    assert doc(args_kwargs_function) == "args_kwargs_function(*args, **kwargs) -> tuple"
-    assert doc(KWClass.foo0) == "foo0(self: m.KWClass, arg0: int, arg1: float) -> None"
-    assert doc(KWClass.foo1) == "foo1(self: m.KWClass, x: int, y: float) -> None"
+    assert doc(m.kw_func0) == "kw_func0(arg0: int, arg1: int) -> str"
+    assert doc(m.kw_func1) == "kw_func1(x: int, y: int) -> str"
+    assert doc(m.kw_func2) == "kw_func2(x: int=100, y: int=200) -> str"
+    assert doc(m.kw_func3) == "kw_func3(data: str='Hello world!') -> None"
+    assert doc(m.kw_func4) == "kw_func4(myList: List[int]=[13, 17]) -> str"
+    assert doc(m.kw_func_udl) == "kw_func_udl(x: int, y: int=300) -> str"
+    assert doc(m.kw_func_udl_z) == "kw_func_udl_z(x: int, y: int=0) -> str"
+    assert doc(m.args_function) == "args_function(*args) -> tuple"
+    assert doc(m.args_kwargs_function) == "args_kwargs_function(*args, **kwargs) -> tuple"
+    assert doc(m.KWClass.foo0) == \
+        "foo0(self: m.kwargs_and_defaults.KWClass, arg0: int, arg1: float) -> None"
+    assert doc(m.KWClass.foo1) == \
+        "foo1(self: m.kwargs_and_defaults.KWClass, x: int, y: float) -> None"
 
 
 def test_named_arguments(msg):
-    assert kw_func0(5, 10) == "x=5, y=10"
+    assert m.kw_func0(5, 10) == "x=5, y=10"
 
-    assert kw_func1(5, 10) == "x=5, y=10"
-    assert kw_func1(5, y=10) == "x=5, y=10"
-    assert kw_func1(y=10, x=5) == "x=5, y=10"
+    assert m.kw_func1(5, 10) == "x=5, y=10"
+    assert m.kw_func1(5, y=10) == "x=5, y=10"
+    assert m.kw_func1(y=10, x=5) == "x=5, y=10"
 
-    assert kw_func2() == "x=100, y=200"
-    assert kw_func2(5) == "x=5, y=200"
-    assert kw_func2(x=5) == "x=5, y=200"
-    assert kw_func2(y=10) == "x=100, y=10"
-    assert kw_func2(5, 10) == "x=5, y=10"
-    assert kw_func2(x=5, y=10) == "x=5, y=10"
+    assert m.kw_func2() == "x=100, y=200"
+    assert m.kw_func2(5) == "x=5, y=200"
+    assert m.kw_func2(x=5) == "x=5, y=200"
+    assert m.kw_func2(y=10) == "x=100, y=10"
+    assert m.kw_func2(5, 10) == "x=5, y=10"
+    assert m.kw_func2(x=5, y=10) == "x=5, y=10"
 
     with pytest.raises(TypeError) as excinfo:
         # noinspection PyArgumentList
-        kw_func2(x=5, y=10, z=12)
+        m.kw_func2(x=5, y=10, z=12)
     assert excinfo.match(
         r'(?s)^kw_func2\(\): incompatible.*Invoked with: kwargs: ((x=5|y=10|z=12)(, |$))' + '{3}$')
 
-    assert kw_func4() == "{13 17}"
-    assert kw_func4(myList=[1, 2, 3]) == "{1 2 3}"
+    assert m.kw_func4() == "{13 17}"
+    assert m.kw_func4(myList=[1, 2, 3]) == "{1 2 3}"
 
-    assert kw_func_udl(x=5, y=10) == "x=5, y=10"
-    assert kw_func_udl_z(x=5) == "x=5, y=0"
+    assert m.kw_func_udl(x=5, y=10) == "x=5, y=10"
+    assert m.kw_func_udl_z(x=5) == "x=5, y=0"
 
 
 def test_arg_and_kwargs():
     args = 'arg1_value', 'arg2_value', 3
-    assert args_function(*args) == args
+    assert m.args_function(*args) == args
 
     args = 'a1', 'a2'
     kwargs = dict(arg3='a3', arg4=4)
-    assert args_kwargs_function(*args, **kwargs) == (args, kwargs)
+    assert m.args_kwargs_function(*args, **kwargs) == (args, kwargs)
 
 
 def test_mixed_args_and_kwargs(msg):
-    from pybind11_tests import (mixed_plus_args, mixed_plus_kwargs, mixed_plus_args_kwargs,
-                                mixed_plus_args_kwargs_defaults)
-    mpa = mixed_plus_args
-    mpk = mixed_plus_kwargs
-    mpak = mixed_plus_args_kwargs
-    mpakd = mixed_plus_args_kwargs_defaults
+    mpa = m.mixed_plus_args
+    mpk = m.mixed_plus_kwargs
+    mpak = m.mixed_plus_args_kwargs
+    mpakd = m.mixed_plus_args_kwargs_defaults
 
     assert mpa(1, 2.5, 4, 99.5, None) == (1, 2.5, (4, 99.5, None))
     assert mpa(1, 2.5) == (1, 2.5, ())
diff --git a/tests/test_methods_and_attributes.cpp b/tests/test_methods_and_attributes.cpp
index 8e0fc2d..cd15869 100644
--- a/tests/test_methods_and_attributes.cpp
+++ b/tests/test_methods_and_attributes.cpp
@@ -26,40 +26,43 @@
     void operator=(const ExampleMandA &e) { print_copy_assigned(this); value = e.value; }
     void operator=(ExampleMandA &&e) { print_move_assigned(this); value = e.value; }
 
-    void add1(ExampleMandA other) { value += other.value; }           // passing by value
-    void add2(ExampleMandA &other) { value += other.value; }          // passing by reference
-    void add3(const ExampleMandA &other) { value += other.value; }    // passing by const reference
-    void add4(ExampleMandA *other) { value += other->value; }         // passing by pointer
-    void add5(const ExampleMandA *other) { value += other->value; }   // passing by const pointer
+    void add1(ExampleMandA other) { value += other.value; }         // passing by value
+    void add2(ExampleMandA &other) { value += other.value; }        // passing by reference
+    void add3(const ExampleMandA &other) { value += other.value; }  // passing by const reference
+    void add4(ExampleMandA *other) { value += other->value; }       // passing by pointer
+    void add5(const ExampleMandA *other) { value += other->value; } // passing by const pointer
 
-    void add6(int other) { value += other; }                      // passing by value
-    void add7(int &other) { value += other; }                     // passing by reference
-    void add8(const int &other) { value += other; }               // passing by const reference
-    void add9(int *other) { value += *other; }                    // passing by pointer
-    void add10(const int *other) { value += *other; }             // passing by const pointer
+    void add6(int other) { value += other; }                        // passing by value
+    void add7(int &other) { value += other; }                       // passing by reference
+    void add8(const int &other) { value += other; }                 // passing by const reference
+    void add9(int *other) { value += *other; }                      // passing by pointer
+    void add10(const int *other) { value += *other; }               // passing by const pointer
 
-    ExampleMandA self1() { return *this; }                            // return by value
-    ExampleMandA &self2() { return *this; }                           // return by reference
-    const ExampleMandA &self3() { return *this; }                     // return by const reference
-    ExampleMandA *self4() { return this; }                            // return by pointer
-    const ExampleMandA *self5() { return this; }                      // return by const pointer
+    ExampleMandA self1() { return *this; }                          // return by value
+    ExampleMandA &self2() { return *this; }                         // return by reference
+    const ExampleMandA &self3() { return *this; }                   // return by const reference
+    ExampleMandA *self4() { return this; }                          // return by pointer
+    const ExampleMandA *self5() { return this; }                    // return by const pointer
 
-    int internal1() { return value; }                             // return by value
-    int &internal2() { return value; }                            // return by reference
-    const int &internal3() { return value; }                      // return by const reference
-    int *internal4() { return &value; }                           // return by pointer
-    const int *internal5() { return &value; }                     // return by const pointer
+    int internal1() { return value; }                               // return by value
+    int &internal2() { return value; }                              // return by reference
+    const int &internal3() { return value; }                        // return by const reference
+    int *internal4() { return &value; }                             // return by pointer
+    const int *internal5() { return &value; }                       // return by const pointer
 
+    py::str overloaded()             { return "()"; }
+    py::str overloaded(int)          { return "(int)"; }
     py::str overloaded(int, float)   { return "(int, float)"; }
     py::str overloaded(float, int)   { return "(float, int)"; }
     py::str overloaded(int, int)     { return "(int, int)"; }
     py::str overloaded(float, float) { return "(float, float)"; }
+    py::str overloaded(int)          const { return "(int) const"; }
     py::str overloaded(int, float)   const { return "(int, float) const"; }
     py::str overloaded(float, int)   const { return "(float, int) const"; }
     py::str overloaded(int, int)     const { return "(int, int) const"; }
     py::str overloaded(float, float) const { return "(float, float) const"; }
 
-    static py::str overloaded() { return "static"; }
+    static py::str overloaded(float) { return "static float"; }
 
     int value = 0;
 };
@@ -74,41 +77,28 @@
     static int static_get() { return static_value; }
     static void static_set(int v) { static_value = v; }
 };
-
 int TestProperties::static_value = 1;
 
 struct TestPropertiesOverride : TestProperties {
     int value = 99;
     static int static_value;
 };
-
 int TestPropertiesOverride::static_value = 99;
 
-struct SimpleValue { int value = 1; };
-
 struct TestPropRVP {
-    SimpleValue v1;
-    SimpleValue v2;
-    static SimpleValue sv1;
-    static SimpleValue sv2;
+    UserType v1{1};
+    UserType v2{1};
+    static UserType sv1;
+    static UserType sv2;
 
-    const SimpleValue &get1() const { return v1; }
-    const SimpleValue &get2() const { return v2; }
-    SimpleValue get_rvalue() const { return v2; }
-    void set1(int v) { v1.value = v; }
-    void set2(int v) { v2.value = v; }
+    const UserType &get1() const { return v1; }
+    const UserType &get2() const { return v2; }
+    UserType get_rvalue() const { return v2; }
+    void set1(int v) { v1.set(v); }
+    void set2(int v) { v2.set(v); }
 };
-
-SimpleValue TestPropRVP::sv1{};
-SimpleValue TestPropRVP::sv2{};
-
-class DynamicClass {
-public:
-    DynamicClass() { print_default_created(this); }
-    ~DynamicClass() { print_destroyed(this); }
-};
-
-class CppDerivedDynamicClass : public DynamicClass { };
+UserType TestPropRVP::sv1(1);
+UserType TestPropRVP::sv2(1);
 
 // py::arg/py::arg_v testing: these arguments just record their argument when invoked
 class ArgInspector1 { public: std::string arg = "(default arg inspector 1)"; };
@@ -180,9 +170,6 @@
 };
 }}
 
-// Issue/PR #648: bad arg default debugging output
-class NotRegistered {};
-
 // Test None-allowed py::arg argument policy
 class NoneTester { public: int answer = 42; };
 int none1(const NoneTester &obj) { return obj.answer; }
@@ -215,7 +202,8 @@
     double sum() const { return rw_value + ro_value; }
 };
 
-test_initializer methods_and_attributes([](py::module &m) {
+TEST_SUBMODULE(methods_and_attributes, m) {
+    // test_methods_and_attributes
     py::class_<ExampleMandA> emna(m, "ExampleMandA");
     emna.def(py::init<>())
         .def(py::init<int>())
@@ -241,43 +229,52 @@
         .def("internal4", &ExampleMandA::internal4)
         .def("internal5", &ExampleMandA::internal5)
 #if defined(PYBIND11_OVERLOAD_CAST)
+        .def("overloaded", py::overload_cast<>(&ExampleMandA::overloaded))
+        .def("overloaded", py::overload_cast<int>(&ExampleMandA::overloaded))
         .def("overloaded", py::overload_cast<int,   float>(&ExampleMandA::overloaded))
         .def("overloaded", py::overload_cast<float,   int>(&ExampleMandA::overloaded))
         .def("overloaded", py::overload_cast<int,     int>(&ExampleMandA::overloaded))
         .def("overloaded", py::overload_cast<float, float>(&ExampleMandA::overloaded))
         .def("overloaded_float", py::overload_cast<float, float>(&ExampleMandA::overloaded))
+        .def("overloaded_const", py::overload_cast<int         >(&ExampleMandA::overloaded, py::const_))
         .def("overloaded_const", py::overload_cast<int,   float>(&ExampleMandA::overloaded, py::const_))
         .def("overloaded_const", py::overload_cast<float,   int>(&ExampleMandA::overloaded, py::const_))
         .def("overloaded_const", py::overload_cast<int,     int>(&ExampleMandA::overloaded, py::const_))
         .def("overloaded_const", py::overload_cast<float, float>(&ExampleMandA::overloaded, py::const_))
 #else
+        .def("overloaded", static_cast<py::str (ExampleMandA::*)()>(&ExampleMandA::overloaded))
+        .def("overloaded", static_cast<py::str (ExampleMandA::*)(int)>(&ExampleMandA::overloaded))
         .def("overloaded", static_cast<py::str (ExampleMandA::*)(int,   float)>(&ExampleMandA::overloaded))
         .def("overloaded", static_cast<py::str (ExampleMandA::*)(float,   int)>(&ExampleMandA::overloaded))
         .def("overloaded", static_cast<py::str (ExampleMandA::*)(int,     int)>(&ExampleMandA::overloaded))
         .def("overloaded", static_cast<py::str (ExampleMandA::*)(float, float)>(&ExampleMandA::overloaded))
         .def("overloaded_float", static_cast<py::str (ExampleMandA::*)(float, float)>(&ExampleMandA::overloaded))
+        .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(int         ) const>(&ExampleMandA::overloaded))
         .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(int,   float) const>(&ExampleMandA::overloaded))
         .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(float,   int) const>(&ExampleMandA::overloaded))
         .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(int,     int) const>(&ExampleMandA::overloaded))
         .def("overloaded_const", static_cast<py::str (ExampleMandA::*)(float, float) const>(&ExampleMandA::overloaded))
 #endif
+        // test_no_mixed_overloads
         // Raise error if trying to mix static/non-static overloads on the same name:
         .def_static("add_mixed_overloads1", []() {
-            auto emna = py::reinterpret_borrow<py::class_<ExampleMandA>>(py::module::import("pybind11_tests").attr("ExampleMandA"));
+            auto emna = py::reinterpret_borrow<py::class_<ExampleMandA>>(py::module::import("pybind11_tests.methods_and_attributes").attr("ExampleMandA"));
             emna.def       ("overload_mixed1", static_cast<py::str (ExampleMandA::*)(int, int)>(&ExampleMandA::overloaded))
-                .def_static("overload_mixed1", static_cast<py::str (              *)(        )>(&ExampleMandA::overloaded));
+                .def_static("overload_mixed1", static_cast<py::str (              *)(float   )>(&ExampleMandA::overloaded));
         })
         .def_static("add_mixed_overloads2", []() {
-            auto emna = py::reinterpret_borrow<py::class_<ExampleMandA>>(py::module::import("pybind11_tests").attr("ExampleMandA"));
-            emna.def_static("overload_mixed2", static_cast<py::str (              *)(        )>(&ExampleMandA::overloaded))
+            auto emna = py::reinterpret_borrow<py::class_<ExampleMandA>>(py::module::import("pybind11_tests.methods_and_attributes").attr("ExampleMandA"));
+            emna.def_static("overload_mixed2", static_cast<py::str (              *)(float   )>(&ExampleMandA::overloaded))
                 .def       ("overload_mixed2", static_cast<py::str (ExampleMandA::*)(int, int)>(&ExampleMandA::overloaded));
         })
         .def("__str__", &ExampleMandA::toString)
         .def_readwrite("value", &ExampleMandA::value);
 
+    // test_copy_method
     // Issue #443: can't call copied methods in Python 3
     emna.attr("add2b") = emna.attr("add2");
 
+    // test_properties, test_static_properties, test_static_cls
     py::class_<TestProperties>(m, "TestProperties")
         .def(py::init<>())
         .def_readonly("def_readonly", &TestProperties::value)
@@ -300,15 +297,13 @@
         .def_readonly("def_readonly", &TestPropertiesOverride::value)
         .def_readonly_static("def_readonly_static", &TestPropertiesOverride::static_value);
 
-    py::class_<SimpleValue>(m, "SimpleValue")
-        .def_readwrite("value", &SimpleValue::value);
-
-    auto static_get1 = [](py::object) -> const SimpleValue & { return TestPropRVP::sv1; };
-    auto static_get2 = [](py::object) -> const SimpleValue & { return TestPropRVP::sv2; };
-    auto static_set1 = [](py::object, int v) { TestPropRVP::sv1.value = v; };
-    auto static_set2 = [](py::object, int v) { TestPropRVP::sv2.value = v; };
+    auto static_get1 = [](py::object) -> const UserType & { return TestPropRVP::sv1; };
+    auto static_get2 = [](py::object) -> const UserType & { return TestPropRVP::sv2; };
+    auto static_set1 = [](py::object, int v) { TestPropRVP::sv1.set(v); };
+    auto static_set2 = [](py::object, int v) { TestPropRVP::sv2.set(v); };
     auto rvp_copy = py::return_value_policy::copy;
 
+    // test_property_return_value_policies
     py::class_<TestPropRVP>(m, "TestPropRVP")
         .def(py::init<>())
         .def_property_readonly("ro_ref", &TestPropRVP::get1)
@@ -323,21 +318,32 @@
         .def_property_static("static_rw_ref", static_get1, static_set1)
         .def_property_static("static_rw_copy", static_get2, static_set2, rvp_copy)
         .def_property_static("static_rw_func", py::cpp_function(static_get2, rvp_copy), static_set2)
+        // test_property_rvalue_policy
         .def_property_readonly("rvalue", &TestPropRVP::get_rvalue)
-        .def_property_readonly_static("static_rvalue", [](py::object) { return SimpleValue(); });
+        .def_property_readonly_static("static_rvalue", [](py::object) { return UserType(1); });
 
+    // test_metaclass_override
     struct MetaclassOverride { };
     py::class_<MetaclassOverride>(m, "MetaclassOverride", py::metaclass((PyObject *) &PyType_Type))
         .def_property_readonly_static("readonly", [](py::object) { return 1; });
 
 #if !defined(PYPY_VERSION)
+    // test_dynamic_attributes
+    class DynamicClass {
+    public:
+        DynamicClass() { print_default_created(this); }
+        ~DynamicClass() { print_destroyed(this); }
+    };
     py::class_<DynamicClass>(m, "DynamicClass", py::dynamic_attr())
         .def(py::init());
 
+    class CppDerivedDynamicClass : public DynamicClass { };
     py::class_<CppDerivedDynamicClass, DynamicClass>(m, "CppDerivedDynamicClass")
         .def(py::init());
 #endif
 
+    // test_noconvert_args
+    //
     // Test converting.  The ArgAlwaysConverts is just there to make the first no-conversion pass
     // fail so that our call always ends up happening via the second dispatch (the one that allows
     // some conversion).
@@ -363,6 +369,7 @@
     m.def("ints_preferred", [](int i) { return i / 2; }, py::arg("i"));
     m.def("ints_only", [](int i) { return i / 2; }, py::arg("i").noconvert());
 
+    // test_bad_arg_default
     // Issue/PR #648: bad arg default debugging output
 #if !defined(NDEBUG)
     m.attr("debug_enabled") = true;
@@ -371,13 +378,14 @@
 #endif
     m.def("bad_arg_def_named", []{
         auto m = py::module::import("pybind11_tests");
-        m.def("should_fail", [](int, NotRegistered) {}, py::arg(), py::arg("a") = NotRegistered());
+        m.def("should_fail", [](int, UnregisteredType) {}, py::arg(), py::arg("a") = UnregisteredType());
     });
     m.def("bad_arg_def_unnamed", []{
         auto m = py::module::import("pybind11_tests");
-        m.def("should_fail", [](int, NotRegistered) {}, py::arg(), py::arg() = NotRegistered());
+        m.def("should_fail", [](int, UnregisteredType) {}, py::arg(), py::arg() = UnregisteredType());
     });
 
+    // test_accepts_none
     py::class_<NoneTester, std::shared_ptr<NoneTester>>(m, "NoneTester")
         .def(py::init<>());
     m.def("no_none1", &none1, py::arg().none(false));
@@ -391,6 +399,7 @@
     m.def("ok_none4", &none4, py::arg().none(true));
     m.def("ok_none5", &none5);
 
+    // test_str_issue
     // Issue #283: __str__ called on uninitialized instance when constructor arguments invalid
     py::class_<StrIssue>(m, "StrIssue")
         .def(py::init<int>())
@@ -399,6 +408,8 @@
             return "StrIssue[" + std::to_string(si.val) + "]"; }
         );
 
+    // test_unregistered_base_implementations
+    //
     // Issues #854/910: incompatible function args when member function/pointer is in unregistered
     // base class The methods and member pointers below actually resolve to members/pointers in
     // UnregisteredBase; before this test/fix they would be registered via lambda with a first
@@ -410,8 +421,8 @@
         .def_readwrite("rw_value", &RegisteredDerived::rw_value)
         .def_readonly("ro_value", &RegisteredDerived::ro_value)
         // These should trigger a static_assert if uncommented
-        //.def_readwrite("fails", &SimpleValue::value) // should trigger a static_assert if uncommented
-        //.def_readonly("fails", &SimpleValue::value) // should trigger a static_assert if uncommented
+        //.def_readwrite("fails", &UserType::value) // should trigger a static_assert if uncommented
+        //.def_readonly("fails", &UserType::value) // should trigger a static_assert if uncommented
         .def_property("rw_value_prop", &RegisteredDerived::get_int, &RegisteredDerived::set_int)
         .def_property_readonly("ro_value_prop", &RegisteredDerived::get_double)
         // This one is in the registered class:
@@ -432,4 +443,4 @@
     m.def("custom_caster_destroy_const", []() -> const DestructionTester * { return new DestructionTester(); },
             py::return_value_policy::take_ownership); // Likewise (const doesn't inhibit destruction)
     m.def("destruction_tester_cstats", &ConstructorStats::get<DestructionTester>, py::return_value_policy::reference);
-});
+}
diff --git a/tests/test_methods_and_attributes.py b/tests/test_methods_and_attributes.py
index 0557380..9fd9cb7 100644
--- a/tests/test_methods_and_attributes.py
+++ b/tests/test_methods_and_attributes.py
@@ -1,10 +1,11 @@
 import pytest
-from pybind11_tests import ExampleMandA, ConstructorStats
+from pybind11_tests import methods_and_attributes as m
+from pybind11_tests import ConstructorStats
 
 
 def test_methods_and_attributes():
-    instance1 = ExampleMandA()
-    instance2 = ExampleMandA(32)
+    instance1 = m.ExampleMandA()
+    instance2 = m.ExampleMandA(32)
 
     instance1.add1(instance2)
     instance1.add2(instance2)
@@ -31,10 +32,13 @@
     assert instance1.internal4() == 320
     assert instance1.internal5() == 320
 
+    assert instance1.overloaded() == "()"
+    assert instance1.overloaded(0) == "(int)"
     assert instance1.overloaded(1, 1.0) == "(int, float)"
     assert instance1.overloaded(2.0, 2) == "(float, int)"
     assert instance1.overloaded(3,   3) == "(int, int)"
     assert instance1.overloaded(4., 4.) == "(float, float)"
+    assert instance1.overloaded_const(-3) == "(int) const"
     assert instance1.overloaded_const(5, 5.0) == "(int, float) const"
     assert instance1.overloaded_const(6.0, 6) == "(float, int) const"
     assert instance1.overloaded_const(7,   7) == "(int, int) const"
@@ -48,7 +52,7 @@
     instance1.value = 100
     assert str(instance1) == "ExampleMandA[value=100]"
 
-    cstats = ConstructorStats.get(ExampleMandA)
+    cstats = ConstructorStats.get(m.ExampleMandA)
     assert cstats.alive() == 2
     del instance1, instance2
     assert cstats.alive() == 0
@@ -60,10 +64,25 @@
     assert cstats.move_assignments == 0
 
 
-def test_properties():
-    from pybind11_tests import TestProperties
+def test_copy_method():
+    """Issue #443: calling copied methods fails in Python 3"""
 
-    instance = TestProperties()
+    m.ExampleMandA.add2c = m.ExampleMandA.add2
+    m.ExampleMandA.add2d = m.ExampleMandA.add2b
+    a = m.ExampleMandA(123)
+    assert a.value == 123
+    a.add2(m.ExampleMandA(-100))
+    assert a.value == 23
+    a.add2b(m.ExampleMandA(20))
+    assert a.value == 43
+    a.add2c(m.ExampleMandA(6))
+    assert a.value == 49
+    a.add2d(m.ExampleMandA(-7))
+    assert a.value == 42
+
+
+def test_properties():
+    instance = m.TestProperties()
 
     assert instance.def_readonly == 1
     with pytest.raises(AttributeError):
@@ -80,122 +99,97 @@
     assert instance.def_property == 3
 
 
-def test_copy_method():
-    """Issue #443: calling copied methods fails in Python 3"""
-    from pybind11_tests import ExampleMandA
-
-    ExampleMandA.add2c = ExampleMandA.add2
-    ExampleMandA.add2d = ExampleMandA.add2b
-    a = ExampleMandA(123)
-    assert a.value == 123
-    a.add2(ExampleMandA(-100))
-    assert a.value == 23
-    a.add2b(ExampleMandA(20))
-    assert a.value == 43
-    a.add2c(ExampleMandA(6))
-    assert a.value == 49
-    a.add2d(ExampleMandA(-7))
-    assert a.value == 42
-
-
 def test_static_properties():
-    from pybind11_tests import TestProperties as Type
-
-    assert Type.def_readonly_static == 1
+    assert m.TestProperties.def_readonly_static == 1
     with pytest.raises(AttributeError) as excinfo:
-        Type.def_readonly_static = 2
+        m.TestProperties.def_readonly_static = 2
     assert "can't set attribute" in str(excinfo)
 
-    Type.def_readwrite_static = 2
-    assert Type.def_readwrite_static == 2
+    m.TestProperties.def_readwrite_static = 2
+    assert m.TestProperties.def_readwrite_static == 2
 
-    assert Type.def_property_readonly_static == 2
+    assert m.TestProperties.def_property_readonly_static == 2
     with pytest.raises(AttributeError) as excinfo:
-        Type.def_property_readonly_static = 3
+        m.TestProperties.def_property_readonly_static = 3
     assert "can't set attribute" in str(excinfo)
 
-    Type.def_property_static = 3
-    assert Type.def_property_static == 3
+    m.TestProperties.def_property_static = 3
+    assert m.TestProperties.def_property_static == 3
 
     # Static property read and write via instance
-    instance = Type()
+    instance = m.TestProperties()
 
-    Type.def_readwrite_static = 0
-    assert Type.def_readwrite_static == 0
+    m.TestProperties.def_readwrite_static = 0
+    assert m.TestProperties.def_readwrite_static == 0
     assert instance.def_readwrite_static == 0
 
     instance.def_readwrite_static = 2
-    assert Type.def_readwrite_static == 2
+    assert m.TestProperties.def_readwrite_static == 2
     assert instance.def_readwrite_static == 2
 
     # It should be possible to override properties in derived classes
-    from pybind11_tests import TestPropertiesOverride as TypeOverride
-
-    assert TypeOverride().def_readonly == 99
-    assert TypeOverride.def_readonly_static == 99
+    assert m.TestPropertiesOverride().def_readonly == 99
+    assert m.TestPropertiesOverride.def_readonly_static == 99
 
 
 def test_static_cls():
     """Static property getter and setters expect the type object as the their only argument"""
-    from pybind11_tests import TestProperties as Type
 
-    instance = Type()
-    assert Type.static_cls is Type
-    assert instance.static_cls is Type
+    instance = m.TestProperties()
+    assert m.TestProperties.static_cls is m.TestProperties
+    assert instance.static_cls is m.TestProperties
 
     def check_self(self):
-        assert self is Type
+        assert self is m.TestProperties
 
-    Type.static_cls = check_self
+    m.TestProperties.static_cls = check_self
     instance.static_cls = check_self
 
 
 def test_metaclass_override():
     """Overriding pybind11's default metaclass changes the behavior of `static_property`"""
-    from pybind11_tests import MetaclassOverride
 
-    assert type(ExampleMandA).__name__ == "pybind11_type"
-    assert type(MetaclassOverride).__name__ == "type"
+    assert type(m.ExampleMandA).__name__ == "pybind11_type"
+    assert type(m.MetaclassOverride).__name__ == "type"
 
-    assert MetaclassOverride.readonly == 1
-    assert type(MetaclassOverride.__dict__["readonly"]).__name__ == "pybind11_static_property"
+    assert m.MetaclassOverride.readonly == 1
+    assert type(m.MetaclassOverride.__dict__["readonly"]).__name__ == "pybind11_static_property"
 
     # Regular `type` replaces the property instead of calling `__set__()`
-    MetaclassOverride.readonly = 2
-    assert MetaclassOverride.readonly == 2
-    assert isinstance(MetaclassOverride.__dict__["readonly"], int)
+    m.MetaclassOverride.readonly = 2
+    assert m.MetaclassOverride.readonly == 2
+    assert isinstance(m.MetaclassOverride.__dict__["readonly"], int)
 
 
 def test_no_mixed_overloads():
     from pybind11_tests import debug_enabled
 
     with pytest.raises(RuntimeError) as excinfo:
-        ExampleMandA.add_mixed_overloads1()
+        m.ExampleMandA.add_mixed_overloads1()
     assert (str(excinfo.value) ==
             "overloading a method with both static and instance methods is not supported; " +
             ("compile in debug mode for more details" if not debug_enabled else
              "error while attempting to bind static method ExampleMandA.overload_mixed1"
-             "() -> str")
+             "(arg0: float) -> str")
             )
 
     with pytest.raises(RuntimeError) as excinfo:
-        ExampleMandA.add_mixed_overloads2()
+        m.ExampleMandA.add_mixed_overloads2()
     assert (str(excinfo.value) ==
             "overloading a method with both static and instance methods is not supported; " +
             ("compile in debug mode for more details" if not debug_enabled else
              "error while attempting to bind instance method ExampleMandA.overload_mixed2"
-             "(self: pybind11_tests.ExampleMandA, arg0: int, arg1: int) -> str")
+             "(self: pybind11_tests.methods_and_attributes.ExampleMandA, arg0: int, arg1: int)"
+             " -> str")
             )
 
 
 @pytest.mark.parametrize("access", ["ro", "rw", "static_ro", "static_rw"])
 def test_property_return_value_policies(access):
-    from pybind11_tests import TestPropRVP
-
     if not access.startswith("static"):
-        obj = TestPropRVP()
+        obj = m.TestPropRVP()
     else:
-        obj = TestPropRVP
+        obj = m.TestPropRVP
 
     ref = getattr(obj, access + "_ref")
     assert ref.value == 1
@@ -216,30 +210,20 @@
 
 def test_property_rvalue_policy():
     """When returning an rvalue, the return value policy is automatically changed from
-    `reference(_internal)` to `move`. The following would not work otherwise.
-    """
-    from pybind11_tests import TestPropRVP
+    `reference(_internal)` to `move`. The following would not work otherwise."""
 
-    instance = TestPropRVP()
+    instance = m.TestPropRVP()
     o = instance.rvalue
     assert o.value == 1
 
-
-def test_property_rvalue_policy_static():
-    """When returning an rvalue, the return value policy is automatically changed from
-    `reference(_internal)` to `move`. The following would not work otherwise.
-    """
-    from pybind11_tests import TestPropRVP
-    o = TestPropRVP.static_rvalue
-    assert o.value == 1
+    os = m.TestPropRVP.static_rvalue
+    assert os.value == 1
 
 
 # https://bitbucket.org/pypy/pypy/issues/2447
 @pytest.unsupported_on_pypy
 def test_dynamic_attributes():
-    from pybind11_tests import DynamicClass, CppDerivedDynamicClass
-
-    instance = DynamicClass()
+    instance = m.DynamicClass()
     assert not hasattr(instance, "foo")
     assert "foo" not in dir(instance)
 
@@ -259,16 +243,16 @@
         instance.__dict__ = []
     assert str(excinfo.value) == "__dict__ must be set to a dictionary, not a 'list'"
 
-    cstats = ConstructorStats.get(DynamicClass)
+    cstats = ConstructorStats.get(m.DynamicClass)
     assert cstats.alive() == 1
     del instance
     assert cstats.alive() == 0
 
     # Derived classes should work as well
-    class PythonDerivedDynamicClass(DynamicClass):
+    class PythonDerivedDynamicClass(m.DynamicClass):
         pass
 
-    for cls in CppDerivedDynamicClass, PythonDerivedDynamicClass:
+    for cls in m.CppDerivedDynamicClass, PythonDerivedDynamicClass:
         derived = cls()
         derived.foobar = 100
         assert derived.foobar == 100
@@ -281,20 +265,18 @@
 # https://bitbucket.org/pypy/pypy/issues/2447
 @pytest.unsupported_on_pypy
 def test_cyclic_gc():
-    from pybind11_tests import DynamicClass
-
     # One object references itself
-    instance = DynamicClass()
+    instance = m.DynamicClass()
     instance.circular_reference = instance
 
-    cstats = ConstructorStats.get(DynamicClass)
+    cstats = ConstructorStats.get(m.DynamicClass)
     assert cstats.alive() == 1
     del instance
     assert cstats.alive() == 0
 
     # Two object reference each other
-    i1 = DynamicClass()
-    i2 = DynamicClass()
+    i1 = m.DynamicClass()
+    i2 = m.DynamicClass()
     i1.cycle = i2
     i2.cycle = i1
 
@@ -304,8 +286,6 @@
 
 
 def test_noconvert_args(msg):
-    import pybind11_tests as m
-
     a = m.ArgInspector()
     assert msg(a.f("hi")) == """
         loading ArgInspector1 argument WITH conversion allowed.  Argument value = hi
@@ -369,23 +349,23 @@
 
 
 def test_bad_arg_default(msg):
-    from pybind11_tests import debug_enabled, bad_arg_def_named, bad_arg_def_unnamed
+    from pybind11_tests import debug_enabled
 
     with pytest.raises(RuntimeError) as excinfo:
-        bad_arg_def_named()
+        m.bad_arg_def_named()
     assert msg(excinfo.value) == (
-        "arg(): could not convert default argument 'a: NotRegistered' in function 'should_fail' "
-        "into a Python object (type not registered yet?)"
+        "arg(): could not convert default argument 'a: UnregisteredType' in function "
+        "'should_fail' into a Python object (type not registered yet?)"
         if debug_enabled else
         "arg(): could not convert default argument into a Python object (type not registered "
         "yet?). Compile in debug mode for more information."
     )
 
     with pytest.raises(RuntimeError) as excinfo:
-        bad_arg_def_unnamed()
+        m.bad_arg_def_unnamed()
     assert msg(excinfo.value) == (
-        "arg(): could not convert default argument 'NotRegistered' in function 'should_fail' "
-        "into a Python object (type not registered yet?)"
+        "arg(): could not convert default argument 'UnregisteredType' in function "
+        "'should_fail' into a Python object (type not registered yet?)"
         if debug_enabled else
         "arg(): could not convert default argument into a Python object (type not registered "
         "yet?). Compile in debug mode for more information."
@@ -393,76 +373,69 @@
 
 
 def test_accepts_none(msg):
-    from pybind11_tests import (NoneTester,
-                                no_none1, no_none2, no_none3, no_none4, no_none5,
-                                ok_none1, ok_none2, ok_none3, ok_none4, ok_none5)
-
-    a = NoneTester()
-    assert no_none1(a) == 42
-    assert no_none2(a) == 42
-    assert no_none3(a) == 42
-    assert no_none4(a) == 42
-    assert no_none5(a) == 42
-    assert ok_none1(a) == 42
-    assert ok_none2(a) == 42
-    assert ok_none3(a) == 42
-    assert ok_none4(a) == 42
-    assert ok_none5(a) == 42
+    a = m.NoneTester()
+    assert m.no_none1(a) == 42
+    assert m.no_none2(a) == 42
+    assert m.no_none3(a) == 42
+    assert m.no_none4(a) == 42
+    assert m.no_none5(a) == 42
+    assert m.ok_none1(a) == 42
+    assert m.ok_none2(a) == 42
+    assert m.ok_none3(a) == 42
+    assert m.ok_none4(a) == 42
+    assert m.ok_none5(a) == 42
 
     with pytest.raises(TypeError) as excinfo:
-        no_none1(None)
+        m.no_none1(None)
     assert "incompatible function arguments" in str(excinfo.value)
     with pytest.raises(TypeError) as excinfo:
-        no_none2(None)
+        m.no_none2(None)
     assert "incompatible function arguments" in str(excinfo.value)
     with pytest.raises(TypeError) as excinfo:
-        no_none3(None)
+        m.no_none3(None)
     assert "incompatible function arguments" in str(excinfo.value)
     with pytest.raises(TypeError) as excinfo:
-        no_none4(None)
+        m.no_none4(None)
     assert "incompatible function arguments" in str(excinfo.value)
     with pytest.raises(TypeError) as excinfo:
-        no_none5(None)
+        m.no_none5(None)
     assert "incompatible function arguments" in str(excinfo.value)
 
     # The first one still raises because you can't pass None as a lvalue reference arg:
     with pytest.raises(TypeError) as excinfo:
-        assert ok_none1(None) == -1
+        assert m.ok_none1(None) == -1
     assert msg(excinfo.value) == """
         ok_none1(): incompatible function arguments. The following argument types are supported:
-            1. (arg0: m.NoneTester) -> int
+            1. (arg0: m.methods_and_attributes.NoneTester) -> int
 
         Invoked with: None
     """
 
     # The rest take the argument as pointer or holder, and accept None:
-    assert ok_none2(None) == -1
-    assert ok_none3(None) == -1
-    assert ok_none4(None) == -1
-    assert ok_none5(None) == -1
+    assert m.ok_none2(None) == -1
+    assert m.ok_none3(None) == -1
+    assert m.ok_none4(None) == -1
+    assert m.ok_none5(None) == -1
 
 
 def test_str_issue(msg):
     """#283: __str__ called on uninitialized instance when constructor arguments invalid"""
-    from pybind11_tests import StrIssue
 
-    assert str(StrIssue(3)) == "StrIssue[3]"
+    assert str(m.StrIssue(3)) == "StrIssue[3]"
 
     with pytest.raises(TypeError) as excinfo:
-        str(StrIssue("no", "such", "constructor"))
+        str(m.StrIssue("no", "such", "constructor"))
     assert msg(excinfo.value) == """
         __init__(): incompatible constructor arguments. The following argument types are supported:
-            1. m.StrIssue(arg0: int)
-            2. m.StrIssue()
+            1. m.methods_and_attributes.StrIssue(arg0: int)
+            2. m.methods_and_attributes.StrIssue()
 
         Invoked with: 'no', 'such', 'constructor'
     """
 
 
 def test_unregistered_base_implementations():
-    from pybind11_tests import RegisteredDerived
-
-    a = RegisteredDerived()
+    a = m.RegisteredDerived()
     a.do_nothing()
     assert a.rw_value == 42
     assert a.ro_value == 1.25
@@ -480,11 +453,8 @@
 
 
 def test_custom_caster_destruction():
-    """
-    Tests that returning a pointer to a type that gets converted with a custom type caster gets
-    destroyed when the function has py::return_value_policy::take_ownership policy applied.
-    """
-    import pybind11_tests as m
+    """Tests that returning a pointer to a type that gets converted with a custom type caster gets
+    destroyed when the function has py::return_value_policy::take_ownership policy applied."""
 
     cstats = m.destruction_tester_cstats()
     # This one *doesn't* have take_ownership: the pointer should be used but not destroyed:
diff --git a/tests/test_modules.cpp b/tests/test_modules.cpp
index 555ae07..c1475fa 100644
--- a/tests/test_modules.cpp
+++ b/tests/test_modules.cpp
@@ -11,42 +11,38 @@
 #include "pybind11_tests.h"
 #include "constructor_stats.h"
 
-std::string submodule_func() {
-    return "submodule_func()";
-}
+TEST_SUBMODULE(modules, m) {
+    // test_nested_modules
+    py::module m_sub = m.def_submodule("subsubmodule");
+    m_sub.def("submodule_func", []() { return "submodule_func()"; });
 
-class A {
-public:
-    A(int v) : v(v) { print_created(this, v); }
-    ~A() { print_destroyed(this); }
-    A(const A&) { print_copy_created(this); }
-    A& operator=(const A &copy) { print_copy_assigned(this); v = copy.v; return *this; }
-    std::string toString() { return "A[" + std::to_string(v) + "]"; }
-private:
-    int v;
-};
-
-class B {
-public:
-    B() { print_default_created(this); }
-    ~B() { print_destroyed(this); }
-    B(const B&) { print_copy_created(this); }
-    B& operator=(const B &copy) { print_copy_assigned(this); a1 = copy.a1; a2 = copy.a2; return *this; }
-    A &get_a1() { return a1; }
-    A &get_a2() { return a2; }
-
-    A a1{1};
-    A a2{2};
-};
-
-test_initializer modules([](py::module &m) {
-    py::module m_sub = m.def_submodule("submodule");
-    m_sub.def("submodule_func", &submodule_func);
-
+    // test_reference_internal
+    class A {
+    public:
+        A(int v) : v(v) { print_created(this, v); }
+        ~A() { print_destroyed(this); }
+        A(const A&) { print_copy_created(this); }
+        A& operator=(const A &copy) { print_copy_assigned(this); v = copy.v; return *this; }
+        std::string toString() { return "A[" + std::to_string(v) + "]"; }
+    private:
+        int v;
+    };
     py::class_<A>(m_sub, "A")
         .def(py::init<int>())
         .def("__repr__", &A::toString);
 
+    class B {
+    public:
+        B() { print_default_created(this); }
+        ~B() { print_destroyed(this); }
+        B(const B&) { print_copy_created(this); }
+        B& operator=(const B &copy) { print_copy_assigned(this); a1 = copy.a1; a2 = copy.a2; return *this; }
+        A &get_a1() { return a1; }
+        A &get_a2() { return a2; }
+
+        A a1{1};
+        A a2{2};
+    };
     py::class_<B>(m_sub, "B")
         .def(py::init<>())
         .def("get_a1", &B::get_a1, "Return the internal A 1", py::return_value_policy::reference_internal)
@@ -56,6 +52,7 @@
 
     m.attr("OD") = py::module::import("collections").attr("OrderedDict");
 
+    // test_duplicate_registration
     // Registering two things with the same name
     m.def("duplicate_registration", []() {
         class Dupe1 { };
@@ -98,4 +95,4 @@
 
         return failures;
     });
-});
+}
diff --git a/tests/test_modules.py b/tests/test_modules.py
index 17c00c8..2552838 100644
--- a/tests/test_modules.py
+++ b/tests/test_modules.py
@@ -1,32 +1,34 @@
+from pybind11_tests import modules as m
+from pybind11_tests.modules import subsubmodule as ms
+from pybind11_tests import ConstructorStats
+
 
 def test_nested_modules():
     import pybind11_tests
-    from pybind11_tests.submodule import submodule_func
-
     assert pybind11_tests.__name__ == "pybind11_tests"
-    assert pybind11_tests.submodule.__name__ == "pybind11_tests.submodule"
+    assert pybind11_tests.modules.__name__ == "pybind11_tests.modules"
+    assert pybind11_tests.modules.subsubmodule.__name__ == "pybind11_tests.modules.subsubmodule"
+    assert m.__name__ == "pybind11_tests.modules"
+    assert ms.__name__ == "pybind11_tests.modules.subsubmodule"
 
-    assert submodule_func() == "submodule_func()"
+    assert ms.submodule_func() == "submodule_func()"
 
 
 def test_reference_internal():
-    from pybind11_tests import ConstructorStats
-    from pybind11_tests.submodule import A, B
-
-    b = B()
+    b = ms.B()
     assert str(b.get_a1()) == "A[1]"
     assert str(b.a1) == "A[1]"
     assert str(b.get_a2()) == "A[2]"
     assert str(b.a2) == "A[2]"
 
-    b.a1 = A(42)
-    b.a2 = A(43)
+    b.a1 = ms.A(42)
+    b.a2 = ms.A(43)
     assert str(b.get_a1()) == "A[42]"
     assert str(b.a1) == "A[42]"
     assert str(b.get_a2()) == "A[43]"
     assert str(b.a2) == "A[43]"
 
-    astats, bstats = ConstructorStats.get(A), ConstructorStats.get(B)
+    astats, bstats = ConstructorStats.get(ms.A), ConstructorStats.get(ms.B)
     assert astats.alive() == 2
     assert bstats.alive() == 1
     del b
@@ -47,7 +49,7 @@
 
 
 def test_importing():
-    from pybind11_tests import OD
+    from pybind11_tests.modules import OD
     from collections import OrderedDict
 
     assert OD is OrderedDict
@@ -66,6 +68,5 @@
 
 def test_duplicate_registration():
     """Registering two things with the same name"""
-    from pybind11_tests import duplicate_registration
 
-    assert duplicate_registration() == []
+    assert m.duplicate_registration() == []
diff --git a/tests/test_numpy_array.cpp b/tests/test_numpy_array.cpp
index ea5be79..2046c0e 100644
--- a/tests/test_numpy_array.cpp
+++ b/tests/test_numpy_array.cpp
@@ -26,20 +26,6 @@
     return arr(a.size() - a.index_at(index...), a.data(index...));
 }
 
-arr& mutate_data(arr& a) {
-    auto ptr = (uint8_t *) a.mutable_data();
-    for (ssize_t i = 0; i < a.nbytes(); i++)
-        ptr[i] = (uint8_t) (ptr[i] * 2);
-    return a;
-}
-
-arr_t& mutate_data_t(arr_t& a) {
-    auto ptr = a.mutable_data();
-    for (ssize_t i = 0; i < a.size(); i++)
-        ptr[i]++;
-    return a;
-}
-
 template<typename... Ix> arr& mutate_data(arr& a, Ix... index) {
     auto ptr = (uint8_t *) a.mutable_data(index...);
     for (ssize_t i = 0; i < a.nbytes() - a.offset_at(index...); i++)
@@ -82,9 +68,11 @@
     return l.release();
 }
 
-test_initializer numpy_array([](py::module &m) {
-    auto sm = m.def_submodule("array");
+TEST_SUBMODULE(numpy_array, sm) {
+    try { py::module::import("numpy"); }
+    catch (...) { return; }
 
+    // test_array_attributes
     sm.def("ndim", [](const arr& a) { return a.ndim(); });
     sm.def("shape", [](const arr& a) { return arr(a.ndim(), a.shape()); });
     sm.def("shape", [](const arr& a, ssize_t dim) { return a.shape(dim); });
@@ -96,25 +84,25 @@
     sm.def("nbytes", [](const arr& a) { return a.nbytes(); });
     sm.def("owndata", [](const arr& a) { return a.owndata(); });
 
-    def_index_fn(data, const arr&);
-    def_index_fn(data_t, const arr_t&);
+    // test_index_offset
     def_index_fn(index_at, const arr&);
     def_index_fn(index_at_t, const arr_t&);
     def_index_fn(offset_at, const arr&);
     def_index_fn(offset_at_t, const arr_t&);
+    // test_data
+    def_index_fn(data, const arr&);
+    def_index_fn(data_t, const arr_t&);
+    // test_mutate_data, test_mutate_readonly
     def_index_fn(mutate_data, arr&);
     def_index_fn(mutate_data_t, arr_t&);
     def_index_fn(at_t, const arr_t&);
     def_index_fn(mutate_at_t, arr_t&);
 
-    sm.def("make_f_array", [] {
-        return py::array_t<float>({ 2, 2 }, { 4, 8 });
-    });
+    // test_make_c_f_array
+    sm.def("make_f_array", [] { return py::array_t<float>({ 2, 2 }, { 4, 8 }); });
+    sm.def("make_c_array", [] { return py::array_t<float>({ 2, 2 }, { 8, 4 }); });
 
-    sm.def("make_c_array", [] {
-        return py::array_t<float>({ 2, 2 }, { 8, 4 });
-    });
-
+    // test_wrap
     sm.def("wrap", [](py::array a) {
         return py::array(
             a.dtype(),
@@ -125,12 +113,12 @@
         );
     });
 
+    // test_numpy_view
     struct ArrayClass {
         int data[2] = { 1, 2 };
         ArrayClass() { py::print("ArrayClass()"); }
         ~ArrayClass() { py::print("~ArrayClass()"); }
     };
-
     py::class_<ArrayClass>(sm, "ArrayClass")
         .def(py::init<>())
         .def("numpy_view", [](py::object &obj) {
@@ -140,16 +128,18 @@
         }
     );
 
+    // test_cast_numpy_int64_to_uint64
     sm.def("function_taking_uint64", [](uint64_t) { });
 
+    // test_isinstance
     sm.def("isinstance_untyped", [](py::object yes, py::object no) {
         return py::isinstance<py::array>(yes) && !py::isinstance<py::array>(no);
     });
-
     sm.def("isinstance_typed", [](py::object o) {
         return py::isinstance<py::array_t<double>>(o) && !py::isinstance<py::array_t<int>>(o);
     });
 
+    // test_constructors
     sm.def("default_constructors", []() {
         return py::dict(
             "array"_a=py::array(),
@@ -157,7 +147,6 @@
             "array_t<double>"_a=py::array_t<double>()
         );
     });
-
     sm.def("converting_constructors", [](py::object o) {
         return py::dict(
             "array"_a=py::array(o),
@@ -166,7 +155,7 @@
         );
     });
 
-    // Overload resolution tests:
+    // test_overload_resolution
     sm.def("overloaded", [](py::array_t<double>) { return "double"; });
     sm.def("overloaded", [](py::array_t<float>) { return "float"; });
     sm.def("overloaded", [](py::array_t<int>) { return "int"; });
@@ -194,11 +183,13 @@
     sm.def("overloaded5", [](py::array_t<unsigned int>) { return "unsigned int"; });
     sm.def("overloaded5", [](py::array_t<double>) { return "double"; });
 
+    // test_greedy_string_overload
     // Issue 685: ndarray shouldn't go to std::string overload
     sm.def("issue685", [](std::string) { return "string"; });
     sm.def("issue685", [](py::array) { return "array"; });
     sm.def("issue685", [](py::object) { return "other"; });
 
+    // test_array_unchecked_fixed_dims
     sm.def("proxy_add2", [](py::array_t<double> a, double v) {
         auto r = a.mutable_unchecked<2>();
         for (ssize_t i = 0; i < r.shape(0); i++)
@@ -238,6 +229,7 @@
         return auxiliaries(r, r2);
     });
 
+    // test_array_unchecked_dyn_dims
     // Same as the above, but without a compile-time dimensions specification:
     sm.def("proxy_add2_dyn", [](py::array_t<double> a, double v) {
         auto r = a.mutable_unchecked();
@@ -264,19 +256,21 @@
         return auxiliaries(a, a);
     });
 
+    // test_array_failures
     // Issue #785: Uninformative "Unknown internal error" exception when constructing array from empty object:
     sm.def("array_fail_test", []() { return py::array(py::object()); });
     sm.def("array_t_fail_test", []() { return py::array_t<double>(py::object()); });
-
     // Make sure the error from numpy is being passed through:
     sm.def("array_fail_test_negative_size", []() { int c = 0; return py::array(-1, &c); });
 
+    // test_initializer_list
     // Issue (unnumbered; reported in #788): regression: initializer lists can be ambiguous
-    sm.def("array_initializer_list", []() { return py::array_t<float>(1); }); // { 1 } also works, but clang warns about it
-    sm.def("array_initializer_list", []() { return py::array_t<float>({ 1, 2 }); });
-    sm.def("array_initializer_list", []() { return py::array_t<float>({ 1, 2, 3 }); });
-    sm.def("array_initializer_list", []() { return py::array_t<float>({ 1, 2, 3, 4 }); });
+    sm.def("array_initializer_list1", []() { return py::array_t<float>(1); }); // { 1 } also works, but clang warns about it
+    sm.def("array_initializer_list2", []() { return py::array_t<float>({ 1, 2 }); });
+    sm.def("array_initializer_list3", []() { return py::array_t<float>({ 1, 2, 3 }); });
+    sm.def("array_initializer_list4", []() { return py::array_t<float>({ 1, 2, 3, 4 }); });
 
+    // test_array_resize
     // reshape array to 2D without changing size
     sm.def("array_reshape2", [](py::array_t<double> a) {
         const ssize_t dim_sz = (ssize_t)std::sqrt(a.size());
@@ -290,6 +284,7 @@
         a.resize({N, N, N}, refcheck);
     });
 
+    // test_array_create_and_resize
     // return 2D array with Nrows = Ncols = N
     sm.def("create_and_resize", [](size_t N) {
         py::array_t<double> a;
@@ -297,4 +292,4 @@
         std::fill(a.mutable_data(), a.mutable_data() + a.size(), 42.);
         return a;
     });
-});
+}
diff --git a/tests/test_numpy_array.py b/tests/test_numpy_array.py
index 90fa142..2743393 100644
--- a/tests/test_numpy_array.py
+++ b/tests/test_numpy_array.py
@@ -1,4 +1,5 @@
 import pytest
+from pybind11_tests import numpy_array as m
 
 pytestmark = pytest.requires_numpy
 
@@ -12,62 +13,55 @@
 
 
 def test_array_attributes():
-    from pybind11_tests.array import (
-        ndim, shape, strides, writeable, size, itemsize, nbytes, owndata
-    )
-
     a = np.array(0, 'f8')
-    assert ndim(a) == 0
-    assert all(shape(a) == [])
-    assert all(strides(a) == [])
+    assert m.ndim(a) == 0
+    assert all(m.shape(a) == [])
+    assert all(m.strides(a) == [])
     with pytest.raises(IndexError) as excinfo:
-        shape(a, 0)
+        m.shape(a, 0)
     assert str(excinfo.value) == 'invalid axis: 0 (ndim = 0)'
     with pytest.raises(IndexError) as excinfo:
-        strides(a, 0)
+        m.strides(a, 0)
     assert str(excinfo.value) == 'invalid axis: 0 (ndim = 0)'
-    assert writeable(a)
-    assert size(a) == 1
-    assert itemsize(a) == 8
-    assert nbytes(a) == 8
-    assert owndata(a)
+    assert m.writeable(a)
+    assert m.size(a) == 1
+    assert m.itemsize(a) == 8
+    assert m.nbytes(a) == 8
+    assert m.owndata(a)
 
     a = np.array([[1, 2, 3], [4, 5, 6]], 'u2').view()
     a.flags.writeable = False
-    assert ndim(a) == 2
-    assert all(shape(a) == [2, 3])
-    assert shape(a, 0) == 2
-    assert shape(a, 1) == 3
-    assert all(strides(a) == [6, 2])
-    assert strides(a, 0) == 6
-    assert strides(a, 1) == 2
+    assert m.ndim(a) == 2
+    assert all(m.shape(a) == [2, 3])
+    assert m.shape(a, 0) == 2
+    assert m.shape(a, 1) == 3
+    assert all(m.strides(a) == [6, 2])
+    assert m.strides(a, 0) == 6
+    assert m.strides(a, 1) == 2
     with pytest.raises(IndexError) as excinfo:
-        shape(a, 2)
+        m.shape(a, 2)
     assert str(excinfo.value) == 'invalid axis: 2 (ndim = 2)'
     with pytest.raises(IndexError) as excinfo:
-        strides(a, 2)
+        m.strides(a, 2)
     assert str(excinfo.value) == 'invalid axis: 2 (ndim = 2)'
-    assert not writeable(a)
-    assert size(a) == 6
-    assert itemsize(a) == 2
-    assert nbytes(a) == 12
-    assert not owndata(a)
+    assert not m.writeable(a)
+    assert m.size(a) == 6
+    assert m.itemsize(a) == 2
+    assert m.nbytes(a) == 12
+    assert not m.owndata(a)
 
 
 @pytest.mark.parametrize('args, ret', [([], 0), ([0], 0), ([1], 3), ([0, 1], 1), ([1, 2], 5)])
 def test_index_offset(arr, args, ret):
-    from pybind11_tests.array import index_at, index_at_t, offset_at, offset_at_t
-    assert index_at(arr, *args) == ret
-    assert index_at_t(arr, *args) == ret
-    assert offset_at(arr, *args) == ret * arr.dtype.itemsize
-    assert offset_at_t(arr, *args) == ret * arr.dtype.itemsize
+    assert m.index_at(arr, *args) == ret
+    assert m.index_at_t(arr, *args) == ret
+    assert m.offset_at(arr, *args) == ret * arr.dtype.itemsize
+    assert m.offset_at_t(arr, *args) == ret * arr.dtype.itemsize
 
 
 def test_dim_check_fail(arr):
-    from pybind11_tests.array import (index_at, index_at_t, offset_at, offset_at_t, data, data_t,
-                                      mutate_data, mutate_data_t)
-    for func in (index_at, index_at_t, offset_at, offset_at_t, data, data_t,
-                 mutate_data, mutate_data_t):
+    for func in (m.index_at, m.index_at_t, m.offset_at, m.offset_at_t, m.data, m.data_t,
+                 m.mutate_data, m.mutate_data_t):
         with pytest.raises(IndexError) as excinfo:
             func(arr, 1, 2, 3)
         assert str(excinfo.value) == 'too many indices for an array: 3 (ndim = 2)'
@@ -79,63 +73,53 @@
                           ([0, 1], [2, 3, 4, 5, 6]),
                           ([1, 2], [6])])
 def test_data(arr, args, ret):
-    from pybind11_tests.array import data, data_t
     from sys import byteorder
-    assert all(data_t(arr, *args) == ret)
-    assert all(data(arr, *args)[(0 if byteorder == 'little' else 1)::2] == ret)
-    assert all(data(arr, *args)[(1 if byteorder == 'little' else 0)::2] == 0)
-
-
-def test_mutate_readonly(arr):
-    from pybind11_tests.array import mutate_data, mutate_data_t, mutate_at_t
-    arr.flags.writeable = False
-    for func, args in (mutate_data, ()), (mutate_data_t, ()), (mutate_at_t, (0, 0)):
-        with pytest.raises(ValueError) as excinfo:
-            func(arr, *args)
-        assert str(excinfo.value) == 'array is not writeable'
+    assert all(m.data_t(arr, *args) == ret)
+    assert all(m.data(arr, *args)[(0 if byteorder == 'little' else 1)::2] == ret)
+    assert all(m.data(arr, *args)[(1 if byteorder == 'little' else 0)::2] == 0)
 
 
 @pytest.mark.parametrize('dim', [0, 1, 3])
 def test_at_fail(arr, dim):
-    from pybind11_tests.array import at_t, mutate_at_t
-    for func in at_t, mutate_at_t:
+    for func in m.at_t, m.mutate_at_t:
         with pytest.raises(IndexError) as excinfo:
             func(arr, *([0] * dim))
         assert str(excinfo.value) == 'index dimension mismatch: {} (ndim = 2)'.format(dim)
 
 
 def test_at(arr):
-    from pybind11_tests.array import at_t, mutate_at_t
+    assert m.at_t(arr, 0, 2) == 3
+    assert m.at_t(arr, 1, 0) == 4
 
-    assert at_t(arr, 0, 2) == 3
-    assert at_t(arr, 1, 0) == 4
+    assert all(m.mutate_at_t(arr, 0, 2).ravel() == [1, 2, 4, 4, 5, 6])
+    assert all(m.mutate_at_t(arr, 1, 0).ravel() == [1, 2, 4, 5, 5, 6])
 
-    assert all(mutate_at_t(arr, 0, 2).ravel() == [1, 2, 4, 4, 5, 6])
-    assert all(mutate_at_t(arr, 1, 0).ravel() == [1, 2, 4, 5, 5, 6])
+
+def test_mutate_readonly(arr):
+    arr.flags.writeable = False
+    for func, args in (m.mutate_data, ()), (m.mutate_data_t, ()), (m.mutate_at_t, (0, 0)):
+        with pytest.raises(ValueError) as excinfo:
+            func(arr, *args)
+        assert str(excinfo.value) == 'array is not writeable'
 
 
 def test_mutate_data(arr):
-    from pybind11_tests.array import mutate_data, mutate_data_t
+    assert all(m.mutate_data(arr).ravel() == [2, 4, 6, 8, 10, 12])
+    assert all(m.mutate_data(arr).ravel() == [4, 8, 12, 16, 20, 24])
+    assert all(m.mutate_data(arr, 1).ravel() == [4, 8, 12, 32, 40, 48])
+    assert all(m.mutate_data(arr, 0, 1).ravel() == [4, 16, 24, 64, 80, 96])
+    assert all(m.mutate_data(arr, 1, 2).ravel() == [4, 16, 24, 64, 80, 192])
 
-    assert all(mutate_data(arr).ravel() == [2, 4, 6, 8, 10, 12])
-    assert all(mutate_data(arr).ravel() == [4, 8, 12, 16, 20, 24])
-    assert all(mutate_data(arr, 1).ravel() == [4, 8, 12, 32, 40, 48])
-    assert all(mutate_data(arr, 0, 1).ravel() == [4, 16, 24, 64, 80, 96])
-    assert all(mutate_data(arr, 1, 2).ravel() == [4, 16, 24, 64, 80, 192])
-
-    assert all(mutate_data_t(arr).ravel() == [5, 17, 25, 65, 81, 193])
-    assert all(mutate_data_t(arr).ravel() == [6, 18, 26, 66, 82, 194])
-    assert all(mutate_data_t(arr, 1).ravel() == [6, 18, 26, 67, 83, 195])
-    assert all(mutate_data_t(arr, 0, 1).ravel() == [6, 19, 27, 68, 84, 196])
-    assert all(mutate_data_t(arr, 1, 2).ravel() == [6, 19, 27, 68, 84, 197])
+    assert all(m.mutate_data_t(arr).ravel() == [5, 17, 25, 65, 81, 193])
+    assert all(m.mutate_data_t(arr).ravel() == [6, 18, 26, 66, 82, 194])
+    assert all(m.mutate_data_t(arr, 1).ravel() == [6, 18, 26, 67, 83, 195])
+    assert all(m.mutate_data_t(arr, 0, 1).ravel() == [6, 19, 27, 68, 84, 196])
+    assert all(m.mutate_data_t(arr, 1, 2).ravel() == [6, 19, 27, 68, 84, 197])
 
 
 def test_bounds_check(arr):
-    from pybind11_tests.array import (index_at, index_at_t, data, data_t,
-                                      mutate_data, mutate_data_t, at_t, mutate_at_t)
-    funcs = (index_at, index_at_t, data, data_t,
-             mutate_data, mutate_data_t, at_t, mutate_at_t)
-    for func in funcs:
+    for func in (m.index_at, m.index_at_t, m.data, m.data_t,
+                 m.mutate_data, m.mutate_data_t, m.at_t, m.mutate_at_t):
         with pytest.raises(IndexError) as excinfo:
             func(arr, 2, 0)
         assert str(excinfo.value) == 'index 2 is out of bounds for axis 0 with size 2'
@@ -145,18 +129,13 @@
 
 
 def test_make_c_f_array():
-    from pybind11_tests.array import (
-        make_c_array, make_f_array
-    )
-    assert make_c_array().flags.c_contiguous
-    assert not make_c_array().flags.f_contiguous
-    assert make_f_array().flags.f_contiguous
-    assert not make_f_array().flags.c_contiguous
+    assert m.make_c_array().flags.c_contiguous
+    assert not m.make_c_array().flags.f_contiguous
+    assert m.make_f_array().flags.f_contiguous
+    assert not m.make_f_array().flags.c_contiguous
 
 
 def test_wrap():
-    from pybind11_tests.array import wrap
-
     def assert_references(a, b, base=None):
         if base is None:
             base = a
@@ -178,40 +157,39 @@
 
     a1 = np.array([1, 2], dtype=np.int16)
     assert a1.flags.owndata and a1.base is None
-    a2 = wrap(a1)
+    a2 = m.wrap(a1)
     assert_references(a1, a2)
 
     a1 = np.array([[1, 2], [3, 4]], dtype=np.float32, order='F')
     assert a1.flags.owndata and a1.base is None
-    a2 = wrap(a1)
+    a2 = m.wrap(a1)
     assert_references(a1, a2)
 
     a1 = np.array([[1, 2], [3, 4]], dtype=np.float32, order='C')
     a1.flags.writeable = False
-    a2 = wrap(a1)
+    a2 = m.wrap(a1)
     assert_references(a1, a2)
 
     a1 = np.random.random((4, 4, 4))
-    a2 = wrap(a1)
+    a2 = m.wrap(a1)
     assert_references(a1, a2)
 
     a1t = a1.transpose()
-    a2 = wrap(a1t)
+    a2 = m.wrap(a1t)
     assert_references(a1t, a2, a1)
 
     a1d = a1.diagonal()
-    a2 = wrap(a1d)
+    a2 = m.wrap(a1d)
     assert_references(a1d, a2, a1)
 
     a1m = a1[::-1, ::-1, ::-1]
-    a2 = wrap(a1m)
+    a2 = m.wrap(a1m)
     assert_references(a1m, a2, a1)
 
 
 def test_numpy_view(capture):
-    from pybind11_tests.array import ArrayClass
     with capture:
-        ac = ArrayClass()
+        ac = m.ArrayClass()
         ac_view_1 = ac.numpy_view()
         ac_view_2 = ac.numpy_view()
         assert np.all(ac_view_1 == np.array([1, 2], dtype=np.int32))
@@ -238,29 +216,24 @@
 
 @pytest.unsupported_on_pypy
 def test_cast_numpy_int64_to_uint64():
-    from pybind11_tests.array import function_taking_uint64
-    function_taking_uint64(123)
-    function_taking_uint64(np.uint64(123))
+    m.function_taking_uint64(123)
+    m.function_taking_uint64(np.uint64(123))
 
 
 def test_isinstance():
-    from pybind11_tests.array import isinstance_untyped, isinstance_typed
-
-    assert isinstance_untyped(np.array([1, 2, 3]), "not an array")
-    assert isinstance_typed(np.array([1.0, 2.0, 3.0]))
+    assert m.isinstance_untyped(np.array([1, 2, 3]), "not an array")
+    assert m.isinstance_typed(np.array([1.0, 2.0, 3.0]))
 
 
 def test_constructors():
-    from pybind11_tests.array import default_constructors, converting_constructors
-
-    defaults = default_constructors()
+    defaults = m.default_constructors()
     for a in defaults.values():
         assert a.size == 0
     assert defaults["array"].dtype == np.array([]).dtype
     assert defaults["array_t<int32>"].dtype == np.int32
     assert defaults["array_t<double>"].dtype == np.float64
 
-    results = converting_constructors([1, 2, 3])
+    results = m.converting_constructors([1, 2, 3])
     for a in results.values():
         np.testing.assert_array_equal(a, [1, 2, 3])
     assert results["array"].dtype == np.int_
@@ -269,22 +242,20 @@
 
 
 def test_overload_resolution(msg):
-    from pybind11_tests.array import overloaded, overloaded2, overloaded3, overloaded4, overloaded5
-
     # Exact overload matches:
-    assert overloaded(np.array([1], dtype='float64')) == 'double'
-    assert overloaded(np.array([1], dtype='float32')) == 'float'
-    assert overloaded(np.array([1], dtype='ushort')) == 'unsigned short'
-    assert overloaded(np.array([1], dtype='intc')) == 'int'
-    assert overloaded(np.array([1], dtype='longlong')) == 'long long'
-    assert overloaded(np.array([1], dtype='complex')) == 'double complex'
-    assert overloaded(np.array([1], dtype='csingle')) == 'float complex'
+    assert m.overloaded(np.array([1], dtype='float64')) == 'double'
+    assert m.overloaded(np.array([1], dtype='float32')) == 'float'
+    assert m.overloaded(np.array([1], dtype='ushort')) == 'unsigned short'
+    assert m.overloaded(np.array([1], dtype='intc')) == 'int'
+    assert m.overloaded(np.array([1], dtype='longlong')) == 'long long'
+    assert m.overloaded(np.array([1], dtype='complex')) == 'double complex'
+    assert m.overloaded(np.array([1], dtype='csingle')) == 'float complex'
 
     # No exact match, should call first convertible version:
-    assert overloaded(np.array([1], dtype='uint8')) == 'double'
+    assert m.overloaded(np.array([1], dtype='uint8')) == 'double'
 
     with pytest.raises(TypeError) as excinfo:
-        overloaded("not an array")
+        m.overloaded("not an array")
     assert msg(excinfo.value) == """
         overloaded(): incompatible function arguments. The following argument types are supported:
             1. (arg0: numpy.ndarray[float64]) -> str
@@ -298,14 +269,14 @@
         Invoked with: 'not an array'
     """
 
-    assert overloaded2(np.array([1], dtype='float64')) == 'double'
-    assert overloaded2(np.array([1], dtype='float32')) == 'float'
-    assert overloaded2(np.array([1], dtype='complex64')) == 'float complex'
-    assert overloaded2(np.array([1], dtype='complex128')) == 'double complex'
-    assert overloaded2(np.array([1], dtype='float32')) == 'float'
+    assert m.overloaded2(np.array([1], dtype='float64')) == 'double'
+    assert m.overloaded2(np.array([1], dtype='float32')) == 'float'
+    assert m.overloaded2(np.array([1], dtype='complex64')) == 'float complex'
+    assert m.overloaded2(np.array([1], dtype='complex128')) == 'double complex'
+    assert m.overloaded2(np.array([1], dtype='float32')) == 'float'
 
-    assert overloaded3(np.array([1], dtype='float64')) == 'double'
-    assert overloaded3(np.array([1], dtype='intc')) == 'int'
+    assert m.overloaded3(np.array([1], dtype='float64')) == 'double'
+    assert m.overloaded3(np.array([1], dtype='intc')) == 'int'
     expected_exc = """
         overloaded3(): incompatible function arguments. The following argument types are supported:
             1. (arg0: numpy.ndarray[int32]) -> str
@@ -314,122 +285,118 @@
         Invoked with:"""
 
     with pytest.raises(TypeError) as excinfo:
-        overloaded3(np.array([1], dtype='uintc'))
+        m.overloaded3(np.array([1], dtype='uintc'))
     assert msg(excinfo.value) == expected_exc + " array([1], dtype=uint32)"
     with pytest.raises(TypeError) as excinfo:
-        overloaded3(np.array([1], dtype='float32'))
+        m.overloaded3(np.array([1], dtype='float32'))
     assert msg(excinfo.value) == expected_exc + " array([ 1.], dtype=float32)"
     with pytest.raises(TypeError) as excinfo:
-        overloaded3(np.array([1], dtype='complex'))
+        m.overloaded3(np.array([1], dtype='complex'))
     assert msg(excinfo.value) == expected_exc + " array([ 1.+0.j])"
 
     # Exact matches:
-    assert overloaded4(np.array([1], dtype='double')) == 'double'
-    assert overloaded4(np.array([1], dtype='longlong')) == 'long long'
+    assert m.overloaded4(np.array([1], dtype='double')) == 'double'
+    assert m.overloaded4(np.array([1], dtype='longlong')) == 'long long'
     # Non-exact matches requiring conversion.  Since float to integer isn't a
     # save conversion, it should go to the double overload, but short can go to
     # either (and so should end up on the first-registered, the long long).
-    assert overloaded4(np.array([1], dtype='float32')) == 'double'
-    assert overloaded4(np.array([1], dtype='short')) == 'long long'
+    assert m.overloaded4(np.array([1], dtype='float32')) == 'double'
+    assert m.overloaded4(np.array([1], dtype='short')) == 'long long'
 
-    assert overloaded5(np.array([1], dtype='double')) == 'double'
-    assert overloaded5(np.array([1], dtype='uintc')) == 'unsigned int'
-    assert overloaded5(np.array([1], dtype='float32')) == 'unsigned int'
+    assert m.overloaded5(np.array([1], dtype='double')) == 'double'
+    assert m.overloaded5(np.array([1], dtype='uintc')) == 'unsigned int'
+    assert m.overloaded5(np.array([1], dtype='float32')) == 'unsigned int'
 
 
-def test_greedy_string_overload():  # issue 685
-    from pybind11_tests.array import issue685
+def test_greedy_string_overload():
+    """Tests fix for #685 - ndarray shouldn't go to std::string overload"""
 
-    assert issue685("abc") == "string"
-    assert issue685(np.array([97, 98, 99], dtype='b')) == "array"
-    assert issue685(123) == "other"
+    assert m.issue685("abc") == "string"
+    assert m.issue685(np.array([97, 98, 99], dtype='b')) == "array"
+    assert m.issue685(123) == "other"
 
 
 def test_array_unchecked_fixed_dims(msg):
-    from pybind11_tests.array import (proxy_add2, proxy_init3F, proxy_init3, proxy_squared_L2_norm,
-                                      proxy_auxiliaries2, array_auxiliaries2)
-
     z1 = np.array([[1, 2], [3, 4]], dtype='float64')
-    proxy_add2(z1, 10)
+    m.proxy_add2(z1, 10)
     assert np.all(z1 == [[11, 12], [13, 14]])
 
     with pytest.raises(ValueError) as excinfo:
-        proxy_add2(np.array([1., 2, 3]), 5.0)
+        m.proxy_add2(np.array([1., 2, 3]), 5.0)
     assert msg(excinfo.value) == "array has incorrect number of dimensions: 1; expected 2"
 
     expect_c = np.ndarray(shape=(3, 3, 3), buffer=np.array(range(3, 30)), dtype='int')
-    assert np.all(proxy_init3(3.0) == expect_c)
+    assert np.all(m.proxy_init3(3.0) == expect_c)
     expect_f = np.transpose(expect_c)
-    assert np.all(proxy_init3F(3.0) == expect_f)
+    assert np.all(m.proxy_init3F(3.0) == expect_f)
 
-    assert proxy_squared_L2_norm(np.array(range(6))) == 55
-    assert proxy_squared_L2_norm(np.array(range(6), dtype="float64")) == 55
+    assert m.proxy_squared_L2_norm(np.array(range(6))) == 55
+    assert m.proxy_squared_L2_norm(np.array(range(6), dtype="float64")) == 55
 
-    assert proxy_auxiliaries2(z1) == [11, 11, True, 2, 8, 2, 2, 4, 32]
-    assert proxy_auxiliaries2(z1) == array_auxiliaries2(z1)
+    assert m.proxy_auxiliaries2(z1) == [11, 11, True, 2, 8, 2, 2, 4, 32]
+    assert m.proxy_auxiliaries2(z1) == m.array_auxiliaries2(z1)
 
 
 def test_array_unchecked_dyn_dims(msg):
-    from pybind11_tests.array import (proxy_add2_dyn, proxy_init3_dyn, proxy_auxiliaries2_dyn,
-                                      array_auxiliaries2)
     z1 = np.array([[1, 2], [3, 4]], dtype='float64')
-    proxy_add2_dyn(z1, 10)
+    m.proxy_add2_dyn(z1, 10)
     assert np.all(z1 == [[11, 12], [13, 14]])
 
     expect_c = np.ndarray(shape=(3, 3, 3), buffer=np.array(range(3, 30)), dtype='int')
-    assert np.all(proxy_init3_dyn(3.0) == expect_c)
+    assert np.all(m.proxy_init3_dyn(3.0) == expect_c)
 
-    assert proxy_auxiliaries2_dyn(z1) == [11, 11, True, 2, 8, 2, 2, 4, 32]
-    assert proxy_auxiliaries2_dyn(z1) == array_auxiliaries2(z1)
+    assert m.proxy_auxiliaries2_dyn(z1) == [11, 11, True, 2, 8, 2, 2, 4, 32]
+    assert m.proxy_auxiliaries2_dyn(z1) == m.array_auxiliaries2(z1)
 
 
 def test_array_failure():
-    from pybind11_tests.array import (array_fail_test, array_t_fail_test,
-                                      array_fail_test_negative_size)
-
     with pytest.raises(ValueError) as excinfo:
-        array_fail_test()
+        m.array_fail_test()
     assert str(excinfo.value) == 'cannot create a pybind11::array from a nullptr'
 
     with pytest.raises(ValueError) as excinfo:
-        array_t_fail_test()
+        m.array_t_fail_test()
     assert str(excinfo.value) == 'cannot create a pybind11::array_t from a nullptr'
 
     with pytest.raises(ValueError) as excinfo:
-        array_fail_test_negative_size()
+        m.array_fail_test_negative_size()
     assert str(excinfo.value) == 'negative dimensions are not allowed'
 
 
-def test_array_resize(msg):
-    from pybind11_tests.array import (array_reshape2, array_resize3)
+def test_initializer_list():
+    assert m.array_initializer_list1().shape == (1,)
+    assert m.array_initializer_list2().shape == (1, 2)
+    assert m.array_initializer_list3().shape == (1, 2, 3)
+    assert m.array_initializer_list4().shape == (1, 2, 3, 4)
 
+
+def test_array_resize(msg):
     a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='float64')
-    array_reshape2(a)
+    m.array_reshape2(a)
     assert(a.size == 9)
     assert(np.all(a == [[1, 2, 3], [4, 5, 6], [7, 8, 9]]))
 
     # total size change should succced with refcheck off
-    array_resize3(a, 4, False)
+    m.array_resize3(a, 4, False)
     assert(a.size == 64)
     # ... and fail with refcheck on
     try:
-        array_resize3(a, 3, True)
+        m.array_resize3(a, 3, True)
     except ValueError as e:
         assert(str(e).startswith("cannot resize an array"))
     # transposed array doesn't own data
     b = a.transpose()
     try:
-        array_resize3(b, 3, False)
+        m.array_resize3(b, 3, False)
     except ValueError as e:
         assert(str(e).startswith("cannot resize this array: it does not own its data"))
     # ... but reshape should be fine
-    array_reshape2(b)
+    m.array_reshape2(b)
     assert(b.shape == (8, 8))
 
 
 @pytest.unsupported_on_pypy
 def test_array_create_and_resize(msg):
-    from pybind11_tests.array import create_and_resize
-    a = create_and_resize(2)
+    a = m.create_and_resize(2)
     assert(a.size == 4)
     assert(np.all(a == 42.))
diff --git a/tests/test_numpy_dtypes.cpp b/tests/test_numpy_dtypes.cpp
index 5f987a8..ddec851 100644
--- a/tests/test_numpy_dtypes.cpp
+++ b/tests/test_numpy_dtypes.cpp
@@ -156,90 +156,6 @@
     return arr;
 }
 
-std::string get_format_unbound() {
-    return py::format_descriptor<UnboundStruct>::format();
-}
-
-py::array_t<NestedStruct, 0> create_nested(size_t n) {
-    auto arr = mkarray_via_buffer<NestedStruct>(n);
-    auto req = arr.request();
-    auto ptr = static_cast<NestedStruct*>(req.ptr);
-    for (size_t i = 0; i < n; i++) {
-        SET_TEST_VALS(ptr[i].a, i);
-        SET_TEST_VALS(ptr[i].b, i + 1);
-    }
-    return arr;
-}
-
-py::array_t<PartialNestedStruct, 0> create_partial_nested(size_t n) {
-    auto arr = mkarray_via_buffer<PartialNestedStruct>(n);
-    auto req = arr.request();
-    auto ptr = static_cast<PartialNestedStruct*>(req.ptr);
-    for (size_t i = 0; i < n; i++) {
-        SET_TEST_VALS(ptr[i].a, i);
-    }
-    return arr;
-}
-
-py::array_t<StringStruct, 0> create_string_array(bool non_empty) {
-    auto arr = mkarray_via_buffer<StringStruct>(non_empty ? 4 : 0);
-    if (non_empty) {
-        auto req = arr.request();
-        auto ptr = static_cast<StringStruct*>(req.ptr);
-        for (ssize_t i = 0; i < req.size * req.itemsize; i++)
-            static_cast<char*>(req.ptr)[i] = 0;
-        ptr[1].a[0] = 'a'; ptr[1].b[0] = 'a';
-        ptr[2].a[0] = 'a'; ptr[2].b[0] = 'a';
-        ptr[3].a[0] = 'a'; ptr[3].b[0] = 'a';
-
-        ptr[2].a[1] = 'b'; ptr[2].b[1] = 'b';
-        ptr[3].a[1] = 'b'; ptr[3].b[1] = 'b';
-
-        ptr[3].a[2] = 'c'; ptr[3].b[2] = 'c';
-    }
-    return arr;
-}
-
-py::array_t<ArrayStruct, 0> create_array_array(size_t n) {
-    auto arr = mkarray_via_buffer<ArrayStruct>(n);
-    auto ptr = (ArrayStruct *) arr.mutable_data();
-    for (size_t i = 0; i < n; i++) {
-        for (size_t j = 0; j < 3; j++)
-            for (size_t k = 0; k < 4; k++)
-                ptr[i].a[j][k] = char('A' + (i * 100 + j * 10 + k) % 26);
-        for (size_t j = 0; j < 2; j++)
-            ptr[i].b[j] = int32_t(i * 1000 + j);
-        for (size_t j = 0; j < 3; j++)
-            ptr[i].c[j] = uint8_t(i * 10 + j);
-        for (size_t j = 0; j < 4; j++)
-            for (size_t k = 0; k < 2; k++)
-                ptr[i].d[j][k] = float(i) * 100.0f + float(j) * 10.0f + float(k);
-    }
-    return arr;
-}
-
-py::array_t<EnumStruct, 0> create_enum_array(size_t n) {
-    auto arr = mkarray_via_buffer<EnumStruct>(n);
-    auto ptr = (EnumStruct *) arr.mutable_data();
-    for (size_t i = 0; i < n; i++) {
-        ptr[i].e1 = static_cast<E1>(-1 + ((int) i % 2) * 2);
-        ptr[i].e2 = static_cast<E2>(1 + (i % 2));
-    }
-    return arr;
-}
-
-py::array_t<ComplexStruct, 0> create_complex_array(size_t n) {
-    auto arr = mkarray_via_buffer<ComplexStruct>(n);
-    auto ptr = (ComplexStruct *) arr.mutable_data();
-    for (size_t i = 0; i < n; i++) {
-        ptr[i].cflt.real(float(i));
-        ptr[i].cflt.imag(float(i) + 0.25f);
-        ptr[i].cdbl.real(double(i) + 0.5);
-        ptr[i].cdbl.imag(double(i) + 0.75);
-    }
-    return arr;
-}
-
 template <typename S>
 py::list print_recarray(py::array_t<S, 0> arr) {
     const auto req = arr.request();
@@ -253,45 +169,6 @@
     return l;
 }
 
-py::list print_format_descriptors() {
-    const auto fmts = {
-        py::format_descriptor<SimpleStruct>::format(),
-        py::format_descriptor<PackedStruct>::format(),
-        py::format_descriptor<NestedStruct>::format(),
-        py::format_descriptor<PartialStruct>::format(),
-        py::format_descriptor<PartialNestedStruct>::format(),
-        py::format_descriptor<StringStruct>::format(),
-        py::format_descriptor<ArrayStruct>::format(),
-        py::format_descriptor<EnumStruct>::format(),
-        py::format_descriptor<ComplexStruct>::format()
-    };
-    auto l = py::list();
-    for (const auto &fmt : fmts) {
-        l.append(py::cast(fmt));
-    }
-    return l;
-}
-
-py::list print_dtypes() {
-    const auto dtypes = {
-        py::str(py::dtype::of<SimpleStruct>()),
-        py::str(py::dtype::of<PackedStruct>()),
-        py::str(py::dtype::of<NestedStruct>()),
-        py::str(py::dtype::of<PartialStruct>()),
-        py::str(py::dtype::of<PartialNestedStruct>()),
-        py::str(py::dtype::of<StringStruct>()),
-        py::str(py::dtype::of<ArrayStruct>()),
-        py::str(py::dtype::of<EnumStruct>()),
-        py::str(py::dtype::of<StructWithUglyNames>()),
-        py::str(py::dtype::of<ComplexStruct>())
-    };
-    auto l = py::list();
-    for (const auto &s : dtypes) {
-        l.append(s);
-    }
-    return l;
-}
-
 py::array_t<int32_t, 0> test_array_ctors(int i) {
     using arr_t = py::array_t<int32_t, 0>;
 
@@ -367,51 +244,9 @@
     return list;
 }
 
-struct TrailingPaddingStruct {
-    int32_t a;
-    char b;
-};
-
-py::dtype trailing_padding_dtype() {
-    return py::dtype::of<TrailingPaddingStruct>();
-}
-
-py::dtype buffer_to_dtype(py::buffer& buf) {
-    return py::dtype(buf.request());
-}
-
-py::list test_dtype_methods() {
-    py::list list;
-    auto dt1 = py::dtype::of<int32_t>();
-    auto dt2 = py::dtype::of<SimpleStruct>();
-    list.append(dt1); list.append(dt2);
-    list.append(py::bool_(dt1.has_fields())); list.append(py::bool_(dt2.has_fields()));
-    list.append(py::int_(dt1.itemsize())); list.append(py::int_(dt2.itemsize()));
-    return list;
-}
-
-struct CompareStruct {
-    bool x;
-    uint32_t y;
-    float z;
-};
-
-py::list test_compare_buffer_info() {
-    py::list list;
-    list.append(py::bool_(py::detail::compare_buffer_info<float>::compare(py::buffer_info(nullptr, sizeof(float), "f", 1))));
-    list.append(py::bool_(py::detail::compare_buffer_info<unsigned>::compare(py::buffer_info(nullptr, sizeof(int), "I", 1))));
-    list.append(py::bool_(py::detail::compare_buffer_info<long>::compare(py::buffer_info(nullptr, sizeof(long), "l", 1))));
-    list.append(py::bool_(py::detail::compare_buffer_info<long>::compare(py::buffer_info(nullptr, sizeof(long), sizeof(long) == sizeof(int) ? "i" : "q", 1))));
-    list.append(py::bool_(py::detail::compare_buffer_info<CompareStruct>::compare(py::buffer_info(nullptr, sizeof(CompareStruct), "T{?:x:3xI:y:f:z:}", 1))));
-    return list;
-}
-
-test_initializer numpy_dtypes([](py::module &m) {
-    try {
-        py::module::import("numpy");
-    } catch (...) {
-        return;
-    }
+TEST_SUBMODULE(numpy_dtypes, m) {
+    try { py::module::import("numpy"); }
+    catch (...) { return; }
 
     // typeinfo may be registered before the dtype descriptor for scalar casts to work...
     py::class_<SimpleStruct>(m, "SimpleStruct");
@@ -425,8 +260,6 @@
     PYBIND11_NUMPY_DTYPE(ArrayStruct, a, b, c, d);
     PYBIND11_NUMPY_DTYPE(EnumStruct, e1, e2);
     PYBIND11_NUMPY_DTYPE(ComplexStruct, cflt, cdbl);
-    PYBIND11_NUMPY_DTYPE(TrailingPaddingStruct, a, b);
-    PYBIND11_NUMPY_DTYPE(CompareStruct, x, y, z);
 
     // ... or after
     py::class_<PackedStruct>(m, "PackedStruct");
@@ -438,35 +271,181 @@
 //    struct NotPOD { std::string v; NotPOD() : v("hi") {}; };
 //    PYBIND11_NUMPY_DTYPE(NotPOD, v);
 
+    // test_recarray, test_scalar_conversion
     m.def("create_rec_simple", &create_recarray<SimpleStruct>);
     m.def("create_rec_packed", &create_recarray<PackedStruct>);
-    m.def("create_rec_nested", &create_nested);
+    m.def("create_rec_nested", [](size_t n) { // test_signature
+        py::array_t<NestedStruct, 0> arr = mkarray_via_buffer<NestedStruct>(n);
+        auto req = arr.request();
+        auto ptr = static_cast<NestedStruct*>(req.ptr);
+        for (size_t i = 0; i < n; i++) {
+            SET_TEST_VALS(ptr[i].a, i);
+            SET_TEST_VALS(ptr[i].b, i + 1);
+        }
+        return arr;
+    });
     m.def("create_rec_partial", &create_recarray<PartialStruct>);
-    m.def("create_rec_partial_nested", &create_partial_nested);
-    m.def("print_format_descriptors", &print_format_descriptors);
+    m.def("create_rec_partial_nested", [](size_t n) {
+        py::array_t<PartialNestedStruct, 0> arr = mkarray_via_buffer<PartialNestedStruct>(n);
+        auto req = arr.request();
+        auto ptr = static_cast<PartialNestedStruct*>(req.ptr);
+        for (size_t i = 0; i < n; i++) {
+            SET_TEST_VALS(ptr[i].a, i);
+        }
+        return arr;
+    });
     m.def("print_rec_simple", &print_recarray<SimpleStruct>);
     m.def("print_rec_packed", &print_recarray<PackedStruct>);
     m.def("print_rec_nested", &print_recarray<NestedStruct>);
-    m.def("print_dtypes", &print_dtypes);
-    m.def("get_format_unbound", &get_format_unbound);
-    m.def("create_string_array", &create_string_array);
-    m.def("print_string_array", &print_recarray<StringStruct>);
-    m.def("create_array_array", &create_array_array);
-    m.def("print_array_array", &print_recarray<ArrayStruct>);
-    m.def("create_enum_array", &create_enum_array);
-    m.def("print_enum_array", &print_recarray<EnumStruct>);
-    m.def("create_complex_array", &create_complex_array);
-    m.def("print_complex_array", &print_recarray<ComplexStruct>);
-    m.def("test_array_ctors", &test_array_ctors);
+
+    // test_format_descriptors
+    m.def("get_format_unbound", []() { return py::format_descriptor<UnboundStruct>::format(); });
+    m.def("print_format_descriptors", []() {
+        py::list l;
+        for (const auto &fmt : {
+            py::format_descriptor<SimpleStruct>::format(),
+            py::format_descriptor<PackedStruct>::format(),
+            py::format_descriptor<NestedStruct>::format(),
+            py::format_descriptor<PartialStruct>::format(),
+            py::format_descriptor<PartialNestedStruct>::format(),
+            py::format_descriptor<StringStruct>::format(),
+            py::format_descriptor<ArrayStruct>::format(),
+            py::format_descriptor<EnumStruct>::format(),
+            py::format_descriptor<ComplexStruct>::format()
+        }) {
+            l.append(py::cast(fmt));
+        }
+        return l;
+    });
+
+    // test_dtype
+    m.def("print_dtypes", []() {
+        py::list l;
+        for (const py::handle &d : {
+            py::dtype::of<SimpleStruct>(),
+            py::dtype::of<PackedStruct>(),
+            py::dtype::of<NestedStruct>(),
+            py::dtype::of<PartialStruct>(),
+            py::dtype::of<PartialNestedStruct>(),
+            py::dtype::of<StringStruct>(),
+            py::dtype::of<ArrayStruct>(),
+            py::dtype::of<EnumStruct>(),
+            py::dtype::of<StructWithUglyNames>(),
+            py::dtype::of<ComplexStruct>()
+        })
+            l.append(py::str(d));
+        return l;
+    });
     m.def("test_dtype_ctors", &test_dtype_ctors);
-    m.def("test_dtype_methods", &test_dtype_methods);
-    m.def("compare_buffer_info", &test_compare_buffer_info);
-    m.def("trailing_padding_dtype", &trailing_padding_dtype);
-    m.def("buffer_to_dtype", &buffer_to_dtype);
+    m.def("test_dtype_methods", []() {
+        py::list list;
+        auto dt1 = py::dtype::of<int32_t>();
+        auto dt2 = py::dtype::of<SimpleStruct>();
+        list.append(dt1); list.append(dt2);
+        list.append(py::bool_(dt1.has_fields())); list.append(py::bool_(dt2.has_fields()));
+        list.append(py::int_(dt1.itemsize())); list.append(py::int_(dt2.itemsize()));
+        return list;
+    });
+    struct TrailingPaddingStruct {
+        int32_t a;
+        char b;
+    };
+    PYBIND11_NUMPY_DTYPE(TrailingPaddingStruct, a, b);
+    m.def("trailing_padding_dtype", []() { return py::dtype::of<TrailingPaddingStruct>(); });
+
+    // test_string_array
+    m.def("create_string_array", [](bool non_empty) {
+        py::array_t<StringStruct, 0> arr = mkarray_via_buffer<StringStruct>(non_empty ? 4 : 0);
+        if (non_empty) {
+            auto req = arr.request();
+            auto ptr = static_cast<StringStruct*>(req.ptr);
+            for (ssize_t i = 0; i < req.size * req.itemsize; i++)
+                static_cast<char*>(req.ptr)[i] = 0;
+            ptr[1].a[0] = 'a'; ptr[1].b[0] = 'a';
+            ptr[2].a[0] = 'a'; ptr[2].b[0] = 'a';
+            ptr[3].a[0] = 'a'; ptr[3].b[0] = 'a';
+
+            ptr[2].a[1] = 'b'; ptr[2].b[1] = 'b';
+            ptr[3].a[1] = 'b'; ptr[3].b[1] = 'b';
+
+            ptr[3].a[2] = 'c'; ptr[3].b[2] = 'c';
+        }
+        return arr;
+    });
+    m.def("print_string_array", &print_recarray<StringStruct>);
+
+    // test_array_array
+    m.def("create_array_array", [](size_t n) {
+        py::array_t<ArrayStruct, 0> arr = mkarray_via_buffer<ArrayStruct>(n);
+        auto ptr = (ArrayStruct *) arr.mutable_data();
+        for (size_t i = 0; i < n; i++) {
+            for (size_t j = 0; j < 3; j++)
+                for (size_t k = 0; k < 4; k++)
+                    ptr[i].a[j][k] = char('A' + (i * 100 + j * 10 + k) % 26);
+            for (size_t j = 0; j < 2; j++)
+                ptr[i].b[j] = int32_t(i * 1000 + j);
+            for (size_t j = 0; j < 3; j++)
+                ptr[i].c[j] = uint8_t(i * 10 + j);
+            for (size_t j = 0; j < 4; j++)
+                for (size_t k = 0; k < 2; k++)
+                    ptr[i].d[j][k] = float(i) * 100.0f + float(j) * 10.0f + float(k);
+        }
+        return arr;
+    });
+    m.def("print_array_array", &print_recarray<ArrayStruct>);
+
+    // test_enum_array
+    m.def("create_enum_array", [](size_t n) {
+        py::array_t<EnumStruct, 0> arr = mkarray_via_buffer<EnumStruct>(n);
+        auto ptr = (EnumStruct *) arr.mutable_data();
+        for (size_t i = 0; i < n; i++) {
+            ptr[i].e1 = static_cast<E1>(-1 + ((int) i % 2) * 2);
+            ptr[i].e2 = static_cast<E2>(1 + (i % 2));
+        }
+        return arr;
+    });
+    m.def("print_enum_array", &print_recarray<EnumStruct>);
+
+    // test_complex_array
+    m.def("create_complex_array", [](size_t n) {
+        py::array_t<ComplexStruct, 0> arr = mkarray_via_buffer<ComplexStruct>(n);
+        auto ptr = (ComplexStruct *) arr.mutable_data();
+        for (size_t i = 0; i < n; i++) {
+            ptr[i].cflt.real(float(i));
+            ptr[i].cflt.imag(float(i) + 0.25f);
+            ptr[i].cdbl.real(double(i) + 0.5);
+            ptr[i].cdbl.imag(double(i) + 0.75);
+        }
+        return arr;
+    });
+    m.def("print_complex_array", &print_recarray<ComplexStruct>);
+
+    // test_array_constructors
+    m.def("test_array_ctors", &test_array_ctors);
+
+    // test_compare_buffer_info
+    struct CompareStruct {
+        bool x;
+        uint32_t y;
+        float z;
+    };
+    PYBIND11_NUMPY_DTYPE(CompareStruct, x, y, z);
+    m.def("compare_buffer_info", []() {
+        py::list list;
+        list.append(py::bool_(py::detail::compare_buffer_info<float>::compare(py::buffer_info(nullptr, sizeof(float), "f", 1))));
+        list.append(py::bool_(py::detail::compare_buffer_info<unsigned>::compare(py::buffer_info(nullptr, sizeof(int), "I", 1))));
+        list.append(py::bool_(py::detail::compare_buffer_info<long>::compare(py::buffer_info(nullptr, sizeof(long), "l", 1))));
+        list.append(py::bool_(py::detail::compare_buffer_info<long>::compare(py::buffer_info(nullptr, sizeof(long), sizeof(long) == sizeof(int) ? "i" : "q", 1))));
+        list.append(py::bool_(py::detail::compare_buffer_info<CompareStruct>::compare(py::buffer_info(nullptr, sizeof(CompareStruct), "T{?:x:3xI:y:f:z:}", 1))));
+        return list;
+    });
+    m.def("buffer_to_dtype", [](py::buffer& buf) { return py::dtype(buf.request()); });
+
+    // test_scalar_conversion
     m.def("f_simple", [](SimpleStruct s) { return s.uint_ * 10; });
     m.def("f_packed", [](PackedStruct s) { return s.uint_ * 10; });
     m.def("f_nested", [](NestedStruct s) { return s.a.uint_ * 10; });
-    m.def("register_dtype", []() { PYBIND11_NUMPY_DTYPE(SimpleStruct, bool_, uint_, float_, ldbl_); });
-});
 
-#undef PYBIND11_PACKED
+    // test_register_dtype
+    m.def("register_dtype", []() { PYBIND11_NUMPY_DTYPE(SimpleStruct, bool_, uint_, float_, ldbl_); });
+}
diff --git a/tests/test_numpy_dtypes.py b/tests/test_numpy_dtypes.py
index 24803a9..5f9a954 100644
--- a/tests/test_numpy_dtypes.py
+++ b/tests/test_numpy_dtypes.py
@@ -1,5 +1,6 @@
 import re
 import pytest
+from pybind11_tests import numpy_dtypes as m
 
 pytestmark = pytest.requires_numpy
 
@@ -65,10 +66,8 @@
 
 
 def test_format_descriptors():
-    from pybind11_tests import get_format_unbound, print_format_descriptors
-
     with pytest.raises(RuntimeError) as excinfo:
-        get_format_unbound()
+        m.get_format_unbound()
     assert re.match('^NumPy type info missing for .*UnboundStruct.*$', str(excinfo.value))
 
     ld = np.dtype('longdouble')
@@ -79,7 +78,7 @@
                    str(4 * (dbl.alignment > 4) + dbl.itemsize + 8 * (ld.alignment > 8)) +
                    "xg:ldbl_:}")
     nested_extra = str(max(8, ld.alignment))
-    assert print_format_descriptors() == [
+    assert m.print_format_descriptors() == [
         ss_fmt,
         "^T{?:bool_:I:uint_:f:float_:g:ldbl_:}",
         "^T{" + ss_fmt + ":a:^T{?:bool_:I:uint_:f:float_:g:ldbl_:}:b:}",
@@ -93,12 +92,10 @@
 
 
 def test_dtype(simple_dtype):
-    from pybind11_tests import (print_dtypes, test_dtype_ctors, test_dtype_methods,
-                                trailing_padding_dtype, buffer_to_dtype)
     from sys import byteorder
     e = '<' if byteorder == 'little' else '>'
 
-    assert print_dtypes() == [
+    assert m.print_dtypes() == [
         simple_dtype_fmt(),
         packed_dtype_fmt(),
         "[('a', {}), ('b', {})]".format(simple_dtype_fmt(), packed_dtype_fmt()),
@@ -116,23 +113,19 @@
     d1 = np.dtype({'names': ['a', 'b'], 'formats': ['int32', 'float64'],
                    'offsets': [1, 10], 'itemsize': 20})
     d2 = np.dtype([('a', 'i4'), ('b', 'f4')])
-    assert test_dtype_ctors() == [np.dtype('int32'), np.dtype('float64'),
-                                  np.dtype('bool'), d1, d1, np.dtype('uint32'), d2]
+    assert m.test_dtype_ctors() == [np.dtype('int32'), np.dtype('float64'),
+                                    np.dtype('bool'), d1, d1, np.dtype('uint32'), d2]
 
-    assert test_dtype_methods() == [np.dtype('int32'), simple_dtype, False, True,
-                                    np.dtype('int32').itemsize, simple_dtype.itemsize]
+    assert m.test_dtype_methods() == [np.dtype('int32'), simple_dtype, False, True,
+                                      np.dtype('int32').itemsize, simple_dtype.itemsize]
 
-    assert trailing_padding_dtype() == buffer_to_dtype(np.zeros(1, trailing_padding_dtype()))
+    assert m.trailing_padding_dtype() == m.buffer_to_dtype(np.zeros(1, m.trailing_padding_dtype()))
 
 
 def test_recarray(simple_dtype, packed_dtype):
-    from pybind11_tests import (create_rec_simple, create_rec_packed, create_rec_nested,
-                                print_rec_simple, print_rec_packed, print_rec_nested,
-                                create_rec_partial, create_rec_partial_nested)
-
     elements = [(False, 0, 0.0, -0.0), (True, 1, 1.5, -2.5), (False, 2, 3.0, -5.0)]
 
-    for func, dtype in [(create_rec_simple, simple_dtype), (create_rec_packed, packed_dtype)]:
+    for func, dtype in [(m.create_rec_simple, simple_dtype), (m.create_rec_packed, packed_dtype)]:
         arr = func(0)
         assert arr.dtype == dtype
         assert_equal(arr, [], simple_dtype)
@@ -144,13 +137,13 @@
         assert_equal(arr, elements, packed_dtype)
 
         if dtype == simple_dtype:
-            assert print_rec_simple(arr) == [
+            assert m.print_rec_simple(arr) == [
                 "s:0,0,0,-0",
                 "s:1,1,1.5,-2.5",
                 "s:0,2,3,-5"
             ]
         else:
-            assert print_rec_packed(arr) == [
+            assert m.print_rec_packed(arr) == [
                 "p:0,0,0,-0",
                 "p:1,1,1.5,-2.5",
                 "p:0,2,3,-5"
@@ -158,22 +151,22 @@
 
     nested_dtype = np.dtype([('a', simple_dtype), ('b', packed_dtype)])
 
-    arr = create_rec_nested(0)
+    arr = m.create_rec_nested(0)
     assert arr.dtype == nested_dtype
     assert_equal(arr, [], nested_dtype)
 
-    arr = create_rec_nested(3)
+    arr = m.create_rec_nested(3)
     assert arr.dtype == nested_dtype
     assert_equal(arr, [((False, 0, 0.0, -0.0), (True, 1, 1.5, -2.5)),
                        ((True, 1, 1.5, -2.5), (False, 2, 3.0, -5.0)),
                        ((False, 2, 3.0, -5.0), (True, 3, 4.5, -7.5))], nested_dtype)
-    assert print_rec_nested(arr) == [
+    assert m.print_rec_nested(arr) == [
         "n:a=s:0,0,0,-0;b=p:1,1,1.5,-2.5",
         "n:a=s:1,1,1.5,-2.5;b=p:0,2,3,-5",
         "n:a=s:0,2,3,-5;b=p:1,3,4.5,-7.5"
     ]
 
-    arr = create_rec_partial(3)
+    arr = m.create_rec_partial(3)
     assert str(arr.dtype) == partial_dtype_fmt()
     partial_dtype = arr.dtype
     assert '' not in arr.dtype.fields
@@ -181,32 +174,28 @@
     assert_equal(arr, elements, simple_dtype)
     assert_equal(arr, elements, packed_dtype)
 
-    arr = create_rec_partial_nested(3)
+    arr = m.create_rec_partial_nested(3)
     assert str(arr.dtype) == partial_nested_fmt()
     assert '' not in arr.dtype.fields
     assert '' not in arr.dtype.fields['a'][0].fields
     assert arr.dtype.itemsize > partial_dtype.itemsize
-    np.testing.assert_equal(arr['a'], create_rec_partial(3))
+    np.testing.assert_equal(arr['a'], m.create_rec_partial(3))
 
 
 def test_array_constructors():
-    from pybind11_tests import test_array_ctors
-
     data = np.arange(1, 7, dtype='int32')
     for i in range(8):
-        np.testing.assert_array_equal(test_array_ctors(10 + i), data.reshape((3, 2)))
-        np.testing.assert_array_equal(test_array_ctors(20 + i), data.reshape((3, 2)))
+        np.testing.assert_array_equal(m.test_array_ctors(10 + i), data.reshape((3, 2)))
+        np.testing.assert_array_equal(m.test_array_ctors(20 + i), data.reshape((3, 2)))
     for i in range(5):
-        np.testing.assert_array_equal(test_array_ctors(30 + i), data)
-        np.testing.assert_array_equal(test_array_ctors(40 + i), data)
+        np.testing.assert_array_equal(m.test_array_ctors(30 + i), data)
+        np.testing.assert_array_equal(m.test_array_ctors(40 + i), data)
 
 
 def test_string_array():
-    from pybind11_tests import create_string_array, print_string_array
-
-    arr = create_string_array(True)
+    arr = m.create_string_array(True)
     assert str(arr.dtype) == "[('a', 'S3'), ('b', 'S3')]"
-    assert print_string_array(arr) == [
+    assert m.print_string_array(arr) == [
         "a='',b=''",
         "a='a',b='a'",
         "a='ab',b='ab'",
@@ -215,21 +204,20 @@
     dtype = arr.dtype
     assert arr['a'].tolist() == [b'', b'a', b'ab', b'abc']
     assert arr['b'].tolist() == [b'', b'a', b'ab', b'abc']
-    arr = create_string_array(False)
+    arr = m.create_string_array(False)
     assert dtype == arr.dtype
 
 
 def test_array_array():
-    from pybind11_tests import create_array_array, print_array_array
     from sys import byteorder
     e = '<' if byteorder == 'little' else '>'
 
-    arr = create_array_array(3)
+    arr = m.create_array_array(3)
     assert str(arr.dtype) == (
         "{{'names':['a','b','c','d'], " +
         "'formats':[('S4', (3,)),('<i4', (2,)),('u1', (3,)),('{e}f4', (4, 2))], " +
         "'offsets':[0,12,20,24], 'itemsize':56}}").format(e=e)
-    assert print_array_array(arr) == [
+    assert m.print_array_array(arr) == [
         "a={{A,B,C,D},{K,L,M,N},{U,V,W,X}},b={0,1}," +
         "c={0,1,2},d={{0,1},{10,11},{20,21},{30,31}}",
         "a={{W,X,Y,Z},{G,H,I,J},{Q,R,S,T}},b={1000,1001}," +
@@ -241,61 +229,53 @@
                                  [b'WXYZ', b'GHIJ', b'QRST'],
                                  [b'STUV', b'CDEF', b'MNOP']]
     assert arr['b'].tolist() == [[0, 1], [1000, 1001], [2000, 2001]]
-    assert create_array_array(0).dtype == arr.dtype
+    assert m.create_array_array(0).dtype == arr.dtype
 
 
 def test_enum_array():
-    from pybind11_tests import create_enum_array, print_enum_array
     from sys import byteorder
     e = '<' if byteorder == 'little' else '>'
 
-    arr = create_enum_array(3)
+    arr = m.create_enum_array(3)
     dtype = arr.dtype
     assert dtype == np.dtype([('e1', e + 'i8'), ('e2', 'u1')])
-    assert print_enum_array(arr) == [
+    assert m.print_enum_array(arr) == [
         "e1=A,e2=X",
         "e1=B,e2=Y",
         "e1=A,e2=X"
     ]
     assert arr['e1'].tolist() == [-1, 1, -1]
     assert arr['e2'].tolist() == [1, 2, 1]
-    assert create_enum_array(0).dtype == dtype
+    assert m.create_enum_array(0).dtype == dtype
 
 
 def test_complex_array():
-    from pybind11_tests import create_complex_array, print_complex_array
     from sys import byteorder
     e = '<' if byteorder == 'little' else '>'
 
-    arr = create_complex_array(3)
+    arr = m.create_complex_array(3)
     dtype = arr.dtype
     assert dtype == np.dtype([('cflt', e + 'c8'), ('cdbl', e + 'c16')])
-    assert print_complex_array(arr) == [
+    assert m.print_complex_array(arr) == [
         "c:(0,0.25),(0.5,0.75)",
         "c:(1,1.25),(1.5,1.75)",
         "c:(2,2.25),(2.5,2.75)"
     ]
     assert arr['cflt'].tolist() == [0.0 + 0.25j, 1.0 + 1.25j, 2.0 + 2.25j]
     assert arr['cdbl'].tolist() == [0.5 + 0.75j, 1.5 + 1.75j, 2.5 + 2.75j]
-    assert create_complex_array(0).dtype == dtype
+    assert m.create_complex_array(0).dtype == dtype
 
 
 def test_signature(doc):
-    from pybind11_tests import create_rec_nested
-
-    assert doc(create_rec_nested) == "create_rec_nested(arg0: int) -> numpy.ndarray[NestedStruct]"
+    assert doc(m.create_rec_nested) == \
+        "create_rec_nested(arg0: int) -> numpy.ndarray[NestedStruct]"
 
 
 def test_scalar_conversion():
-    from pybind11_tests import (create_rec_simple, f_simple,
-                                create_rec_packed, f_packed,
-                                create_rec_nested, f_nested,
-                                create_enum_array)
-
     n = 3
-    arrays = [create_rec_simple(n), create_rec_packed(n),
-              create_rec_nested(n), create_enum_array(n)]
-    funcs = [f_simple, f_packed, f_nested]
+    arrays = [m.create_rec_simple(n), m.create_rec_packed(n),
+              m.create_rec_nested(n), m.create_enum_array(n)]
+    funcs = [m.f_simple, m.f_packed, m.f_nested]
 
     for i, func in enumerate(funcs):
         for j, arr in enumerate(arrays):
@@ -308,14 +288,11 @@
 
 
 def test_register_dtype():
-    from pybind11_tests import register_dtype
-
     with pytest.raises(RuntimeError) as excinfo:
-        register_dtype()
+        m.register_dtype()
     assert 'dtype is already registered' in str(excinfo.value)
 
 
 @pytest.requires_numpy
 def test_compare_buffer_info():
-    from pybind11_tests import compare_buffer_info
-    assert all(compare_buffer_info())
+    assert all(m.compare_buffer_info())
diff --git a/tests/test_numpy_vectorize.cpp b/tests/test_numpy_vectorize.cpp
index b1f8208..a875a74 100644
--- a/tests/test_numpy_vectorize.cpp
+++ b/tests/test_numpy_vectorize.cpp
@@ -16,22 +16,11 @@
     return (float) x*y*z;
 }
 
-std::complex<double> my_func3(std::complex<double> c) {
-    return c * std::complex<double>(2.f);
-}
+TEST_SUBMODULE(numpy_vectorize, m) {
+    try { py::module::import("numpy"); }
+    catch (...) { return; }
 
-struct VectorizeTestClass {
-    VectorizeTestClass(int v) : value{v} {};
-    float method(int x, float y) { return y + (float) (x + value); }
-    int value = 0;
-};
-
-struct NonPODClass {
-    NonPODClass(int v) : value{v} {}
-    int value;
-};
-
-test_initializer numpy_vectorize([](py::module &m) {
+    // test_vectorize, test_docs, test_array_collapse
     // Vectorize all arguments of a function (though non-vector arguments are also allowed)
     m.def("vectorized_func", py::vectorize(my_func));
 
@@ -43,16 +32,24 @@
     );
 
     // Vectorize a complex-valued function
-    m.def("vectorized_func3", py::vectorize(my_func3));
+    m.def("vectorized_func3", py::vectorize(
+        [](std::complex<double> c) { return c * std::complex<double>(2.f); }
+    ));
 
-    /// Numpy function which only accepts specific data types
+    // test_type_selection
+    // Numpy function which only accepts specific data types
     m.def("selective_func", [](py::array_t<int, py::array::c_style>) { return "Int branch taken."; });
     m.def("selective_func", [](py::array_t<float, py::array::c_style>) { return "Float branch taken."; });
     m.def("selective_func", [](py::array_t<std::complex<float>, py::array::c_style>) { return "Complex float branch taken."; });
 
 
+    // test_passthrough_arguments
     // Passthrough test: references and non-pod types should be automatically passed through (in the
     // function definition below, only `b`, `d`, and `g` are vectorized):
+    struct NonPODClass {
+        NonPODClass(int v) : value{v} {}
+        int value;
+    };
     py::class_<NonPODClass>(m, "NonPODClass").def(py::init<int>());
     m.def("vec_passthrough", py::vectorize(
         [](double *a, double b, py::array_t<double> c, const int &d, int &e, NonPODClass f, const double g) {
@@ -60,6 +57,12 @@
         }
     ));
 
+    // test_method_vectorization
+    struct VectorizeTestClass {
+        VectorizeTestClass(int v) : value{v} {};
+        float method(int x, float y) { return y + (float) (x + value); }
+        int value = 0;
+    };
     py::class_<VectorizeTestClass> vtc(m, "VectorizeTestClass");
     vtc .def(py::init<int>())
         .def_readwrite("value", &VectorizeTestClass::value);
@@ -67,6 +70,7 @@
     // Automatic vectorizing of methods
     vtc.def("method", py::vectorize(&VectorizeTestClass::method));
 
+    // test_trivial_broadcasting
     // Internal optimization test for whether the input is trivially broadcastable:
     py::enum_<py::detail::broadcast_trivial>(m, "trivial")
         .value("f_trivial", py::detail::broadcast_trivial::f_trivial)
@@ -82,4 +86,4 @@
         std::array<py::buffer_info, 3> buffers {{ arg1.request(), arg2.request(), arg3.request() }};
         return py::detail::broadcast(buffers, ndim, shape);
     });
-});
+}
diff --git a/tests/test_numpy_vectorize.py b/tests/test_numpy_vectorize.py
index 362b036..0e9c883 100644
--- a/tests/test_numpy_vectorize.py
+++ b/tests/test_numpy_vectorize.py
@@ -1,4 +1,5 @@
 import pytest
+from pybind11_tests import numpy_vectorize as m
 
 pytestmark = pytest.requires_numpy
 
@@ -7,11 +8,9 @@
 
 
 def test_vectorize(capture):
-    from pybind11_tests import vectorized_func, vectorized_func2, vectorized_func3
+    assert np.isclose(m.vectorized_func3(np.array(3 + 7j)), [6 + 14j])
 
-    assert np.isclose(vectorized_func3(np.array(3 + 7j)), [6 + 14j])
-
-    for f in [vectorized_func, vectorized_func2]:
+    for f in [m.vectorized_func, m.vectorized_func2]:
         with capture:
             assert np.isclose(f(1, 2, 3), 6)
         assert capture == "my_func(x:int=1, y:float=2, z:float=3)"
@@ -103,23 +102,19 @@
 
 
 def test_type_selection():
-    from pybind11_tests import selective_func
-
-    assert selective_func(np.array([1], dtype=np.int32)) == "Int branch taken."
-    assert selective_func(np.array([1.0], dtype=np.float32)) == "Float branch taken."
-    assert selective_func(np.array([1.0j], dtype=np.complex64)) == "Complex float branch taken."
+    assert m.selective_func(np.array([1], dtype=np.int32)) == "Int branch taken."
+    assert m.selective_func(np.array([1.0], dtype=np.float32)) == "Float branch taken."
+    assert m.selective_func(np.array([1.0j], dtype=np.complex64)) == "Complex float branch taken."
 
 
 def test_docs(doc):
-    from pybind11_tests import vectorized_func
-
-    assert doc(vectorized_func) == """
+    assert doc(m.vectorized_func) == """
         vectorized_func(arg0: numpy.ndarray[int32], arg1: numpy.ndarray[float32], arg2: numpy.ndarray[float64]) -> object
     """  # noqa: E501 line too long
 
 
 def test_trivial_broadcasting():
-    from pybind11_tests import vectorized_is_trivial, trivial, vectorized_func
+    trivial, vectorized_is_trivial = m.trivial, m.vectorized_is_trivial
 
     assert vectorized_is_trivial(1, 2, 3) == trivial.c_trivial
     assert vectorized_is_trivial(np.array(1), np.array(2), 3) == trivial.c_trivial
@@ -153,51 +148,49 @@
     assert vectorized_is_trivial(z1[1::4, 1::4], y2, 1) == trivial.f_trivial
     assert vectorized_is_trivial(y1[1::4, 1::4], z2, 1) == trivial.c_trivial
 
-    assert vectorized_func(z1, z2, z3).flags.c_contiguous
-    assert vectorized_func(y1, y2, y3).flags.f_contiguous
-    assert vectorized_func(z1, 1, 1).flags.c_contiguous
-    assert vectorized_func(1, y2, 1).flags.f_contiguous
-    assert vectorized_func(z1[1::4, 1::4], y2, 1).flags.f_contiguous
-    assert vectorized_func(y1[1::4, 1::4], z2, 1).flags.c_contiguous
+    assert m.vectorized_func(z1, z2, z3).flags.c_contiguous
+    assert m.vectorized_func(y1, y2, y3).flags.f_contiguous
+    assert m.vectorized_func(z1, 1, 1).flags.c_contiguous
+    assert m.vectorized_func(1, y2, 1).flags.f_contiguous
+    assert m.vectorized_func(z1[1::4, 1::4], y2, 1).flags.f_contiguous
+    assert m.vectorized_func(y1[1::4, 1::4], z2, 1).flags.c_contiguous
 
 
 def test_passthrough_arguments(doc):
-    from pybind11_tests import vec_passthrough, NonPODClass
-
-    assert doc(vec_passthrough) == (
-        "vec_passthrough("
-        "arg0: float, arg1: numpy.ndarray[float64], arg2: numpy.ndarray[float64], "
-        "arg3: numpy.ndarray[int32], arg4: int, arg5: m.NonPODClass, arg6: numpy.ndarray[float64]"
-        ") -> object")
+    assert doc(m.vec_passthrough) == (
+        "vec_passthrough(" + ", ".join([
+            "arg0: float",
+            "arg1: numpy.ndarray[float64]",
+            "arg2: numpy.ndarray[float64]",
+            "arg3: numpy.ndarray[int32]",
+            "arg4: int",
+            "arg5: m.numpy_vectorize.NonPODClass",
+            "arg6: numpy.ndarray[float64]"]) + ") -> object")
 
     b = np.array([[10, 20, 30]], dtype='float64')
     c = np.array([100, 200])  # NOT a vectorized argument
     d = np.array([[1000], [2000], [3000]], dtype='int')
     g = np.array([[1000000, 2000000, 3000000]], dtype='int')  # requires casting
     assert np.all(
-        vec_passthrough(1, b, c, d, 10000, NonPODClass(100000), g) ==
+        m.vec_passthrough(1, b, c, d, 10000, m.NonPODClass(100000), g) ==
         np.array([[1111111, 2111121, 3111131],
                   [1112111, 2112121, 3112131],
                   [1113111, 2113121, 3113131]]))
 
 
 def test_method_vectorization():
-    from pybind11_tests import VectorizeTestClass
-
-    o = VectorizeTestClass(3)
+    o = m.VectorizeTestClass(3)
     x = np.array([1, 2], dtype='int')
     y = np.array([[10], [20]], dtype='float32')
     assert np.all(o.method(x, y) == [[14, 15], [24, 25]])
 
 
 def test_array_collapse():
-    from pybind11_tests import vectorized_func
-
-    assert not isinstance(vectorized_func(1, 2, 3), np.ndarray)
-    assert not isinstance(vectorized_func(np.array(1), 2, 3), np.ndarray)
-    z = vectorized_func([1], 2, 3)
+    assert not isinstance(m.vectorized_func(1, 2, 3), np.ndarray)
+    assert not isinstance(m.vectorized_func(np.array(1), 2, 3), np.ndarray)
+    z = m.vectorized_func([1], 2, 3)
     assert isinstance(z, np.ndarray)
     assert z.shape == (1, )
-    z = vectorized_func(1, [[[2]]], 3)
+    z = m.vectorized_func(1, [[[2]]], 3)
     assert isinstance(z, np.ndarray)
     assert z.shape == (1, 1, 1)
diff --git a/tests/test_opaque_types.cpp b/tests/test_opaque_types.cpp
index 54f4dc7..5e83df0 100644
--- a/tests/test_opaque_types.cpp
+++ b/tests/test_opaque_types.cpp
@@ -11,17 +11,13 @@
 #include <pybind11/stl.h>
 #include <vector>
 
-typedef std::vector<std::string> StringList;
-
-class ClassWithSTLVecProperty {
-public:
-    StringList stringList;
-};
+using StringList = std::vector<std::string>;
 
 /* IMPORTANT: Disable internal pybind11 translation mechanisms for STL data structures */
 PYBIND11_MAKE_OPAQUE(StringList);
 
-test_initializer opaque_types([](py::module &m) {
+TEST_SUBMODULE(opaque_types, m) {
+    // test_string_list
     py::class_<StringList>(m, "StringList")
         .def(py::init<>())
         .def("pop_back", &StringList::pop_back)
@@ -33,6 +29,10 @@
            return py::make_iterator(v.begin(), v.end());
         }, py::keep_alive<0, 1>());
 
+    class ClassWithSTLVecProperty {
+    public:
+        StringList stringList;
+    };
     py::class_<ClassWithSTLVecProperty>(m, "ClassWithSTLVecProperty")
         .def(py::init<>())
         .def_readwrite("stringList", &ClassWithSTLVecProperty::stringList);
@@ -49,6 +49,7 @@
         return ret + "]";
     });
 
+    // test_pointers
     m.def("return_void_ptr", []() { return (void *) 0x1234; });
     m.def("get_void_ptr_value", [](void *ptr) { return reinterpret_cast<std::intptr_t>(ptr); });
     m.def("return_null_str", []() { return (char *) nullptr; });
@@ -59,4 +60,4 @@
         result->push_back("some value");
         return std::unique_ptr<StringList>(result);
     });
-});
+}
diff --git a/tests/test_opaque_types.py b/tests/test_opaque_types.py
index 1cd4102..2d3aef5 100644
--- a/tests/test_opaque_types.py
+++ b/tests/test_opaque_types.py
@@ -1,40 +1,36 @@
 import pytest
+from pybind11_tests import opaque_types as m
+from pybind11_tests import ConstructorStats, UserType
 
 
 def test_string_list():
-    from pybind11_tests import StringList, ClassWithSTLVecProperty, print_opaque_list
-
-    l = StringList()
+    l = m.StringList()
     l.push_back("Element 1")
     l.push_back("Element 2")
-    assert print_opaque_list(l) == "Opaque list: [Element 1, Element 2]"
+    assert m.print_opaque_list(l) == "Opaque list: [Element 1, Element 2]"
     assert l.back() == "Element 2"
 
     for i, k in enumerate(l, start=1):
         assert k == "Element {}".format(i)
     l.pop_back()
-    assert print_opaque_list(l) == "Opaque list: [Element 1]"
+    assert m.print_opaque_list(l) == "Opaque list: [Element 1]"
 
-    cvp = ClassWithSTLVecProperty()
-    assert print_opaque_list(cvp.stringList) == "Opaque list: []"
+    cvp = m.ClassWithSTLVecProperty()
+    assert m.print_opaque_list(cvp.stringList) == "Opaque list: []"
 
     cvp.stringList = l
     cvp.stringList.push_back("Element 3")
-    assert print_opaque_list(cvp.stringList) == "Opaque list: [Element 1, Element 3]"
+    assert m.print_opaque_list(cvp.stringList) == "Opaque list: [Element 1, Element 3]"
 
 
 def test_pointers(msg):
-    from pybind11_tests import (return_void_ptr, get_void_ptr_value, ExampleMandA,
-                                print_opaque_list, return_null_str, get_null_str_value,
-                                return_unique_ptr, ConstructorStats)
-
-    living_before = ConstructorStats.get(ExampleMandA).alive()
-    assert get_void_ptr_value(return_void_ptr()) == 0x1234
-    assert get_void_ptr_value(ExampleMandA())  # Should also work for other C++ types
-    assert ConstructorStats.get(ExampleMandA).alive() == living_before
+    living_before = ConstructorStats.get(UserType).alive()
+    assert m.get_void_ptr_value(m.return_void_ptr()) == 0x1234
+    assert m.get_void_ptr_value(UserType())  # Should also work for other C++ types
+    assert ConstructorStats.get(UserType).alive() == living_before
 
     with pytest.raises(TypeError) as excinfo:
-        get_void_ptr_value([1, 2, 3])  # This should not work
+        m.get_void_ptr_value([1, 2, 3])  # This should not work
     assert msg(excinfo.value) == """
         get_void_ptr_value(): incompatible function arguments. The following argument types are supported:
             1. (arg0: capsule) -> int
@@ -42,9 +38,9 @@
         Invoked with: [1, 2, 3]
     """  # noqa: E501 line too long
 
-    assert return_null_str() is None
-    assert get_null_str_value(return_null_str()) is not None
+    assert m.return_null_str() is None
+    assert m.get_null_str_value(m.return_null_str()) is not None
 
-    ptr = return_unique_ptr()
+    ptr = m.return_unique_ptr()
     assert "StringList" in repr(ptr)
-    assert print_opaque_list(ptr) == "Opaque list: [some value]"
+    assert m.print_opaque_list(ptr) == "Opaque list: [some value]"
diff --git a/tests/test_operator_overloading.cpp b/tests/test_operator_overloading.cpp
index cb22b12..d4d35f0 100644
--- a/tests/test_operator_overloading.cpp
+++ b/tests/test_operator_overloading.cpp
@@ -16,22 +16,11 @@
     Vector2(float x, float y) : x(x), y(y) { print_created(this, toString()); }
     Vector2(const Vector2 &v) : x(v.x), y(v.y) { print_copy_created(this); }
     Vector2(Vector2 &&v) : x(v.x), y(v.y) { print_move_created(this); v.x = v.y = 0; }
+    Vector2 &operator=(const Vector2 &v) { x = v.x; y = v.y; print_copy_assigned(this); return *this; }
+    Vector2 &operator=(Vector2 &&v) { x = v.x; y = v.y; v.x = v.y = 0; print_move_assigned(this); return *this; }
     ~Vector2() { print_destroyed(this); }
 
-    std::string toString() const {
-        return "[" + std::to_string(x) + ", " + std::to_string(y) + "]";
-    }
-
-    void operator=(const Vector2 &v) {
-        print_copy_assigned(this);
-        x = v.x;
-        y = v.y;
-    }
-
-    void operator=(Vector2 &&v) {
-        print_move_assigned(this);
-        x = v.x; y = v.y; v.x = v.y = 0;
-    }
+    std::string toString() const { return "[" + std::to_string(x) + ", " + std::to_string(y) + "]"; }
 
     Vector2 operator+(const Vector2 &v) const { return Vector2(x + v.x, y + v.y); }
     Vector2 operator-(const Vector2 &v) const { return Vector2(x - v.x, y - v.y); }
@@ -64,30 +53,9 @@
 int operator+(const C2 &, const C1 &) { return 21; }
 int operator+(const C1 &, const C2 &) { return 12; }
 
-struct NestABase {
-    int value = -2;
-};
+TEST_SUBMODULE(operators, m) {
 
-struct NestA : NestABase {
-    int value = 3;
-    NestA& operator+=(int i) { value += i; return *this; }
-};
-
-struct NestB {
-    NestA a;
-    int value = 4;
-    NestB& operator-=(int i) { value -= i; return *this; }
-};
-
-struct NestC {
-    NestB b;
-    int value = 5;
-    NestC& operator*=(int i) { value *= i; return *this; }
-};
-
-test_initializer operator_overloading([](py::module &pm) {
-    auto m = pm.def_submodule("operators");
-
+    // test_operator_overloading
     py::class_<Vector2>(m, "Vector2")
         .def(py::init<float, float>())
         .def(py::self + py::self)
@@ -113,6 +81,7 @@
 
     m.attr("Vector") = m.attr("Vector2");
 
+    // test_operators_notimplemented
     // #393: need to return NotSupported to ensure correct arithmetic operator behavior
     py::class_<C1>(m, "C1")
         .def(py::init<>())
@@ -124,29 +93,44 @@
         .def("__add__", [](const C2& c2, const C1& c1) { return c2 + c1; })
         .def("__radd__", [](const C2& c2, const C1& c1) { return c1 + c2; });
 
+    // test_nested
     // #328: first member in a class can't be used in operators
+    struct NestABase { int value = -2; };
     py::class_<NestABase>(m, "NestABase")
         .def(py::init<>())
         .def_readwrite("value", &NestABase::value);
 
+    struct NestA : NestABase {
+        int value = 3;
+        NestA& operator+=(int i) { value += i; return *this; }
+    };
     py::class_<NestA>(m, "NestA")
         .def(py::init<>())
         .def(py::self += int())
         .def("as_base", [](NestA &a) -> NestABase& {
             return (NestABase&) a;
         }, py::return_value_policy::reference_internal);
+    m.def("get_NestA", [](const NestA &a) { return a.value; });
 
+    struct NestB {
+        NestA a;
+        int value = 4;
+        NestB& operator-=(int i) { value -= i; return *this; }
+    };
     py::class_<NestB>(m, "NestB")
         .def(py::init<>())
         .def(py::self -= int())
         .def_readwrite("a", &NestB::a);
+    m.def("get_NestB", [](const NestB &b) { return b.value; });
 
+    struct NestC {
+        NestB b;
+        int value = 5;
+        NestC& operator*=(int i) { value *= i; return *this; }
+    };
     py::class_<NestC>(m, "NestC")
         .def(py::init<>())
         .def(py::self *= int())
         .def_readwrite("b", &NestC::b);
-
-    m.def("get_NestA", [](const NestA &a) { return a.value; });
-    m.def("get_NestB", [](const NestB &b) { return b.value; });
     m.def("get_NestC", [](const NestC &c) { return c.value; });
-});
+}
diff --git a/tests/test_operator_overloading.py b/tests/test_operator_overloading.py
index 63dd546..845cedd 100644
--- a/tests/test_operator_overloading.py
+++ b/tests/test_operator_overloading.py
@@ -1,12 +1,11 @@
 import pytest
+from pybind11_tests import operators as m
 from pybind11_tests import ConstructorStats
 
 
 def test_operator_overloading():
-    from pybind11_tests.operators import Vector2, Vector
-
-    v1 = Vector2(1, 2)
-    v2 = Vector(3, -1)
+    v1 = m.Vector2(1, 2)
+    v2 = m.Vector(3, -1)
     assert str(v1) == "[1.000000, 2.000000]"
     assert str(v2) == "[3.000000, -1.000000]"
 
@@ -36,7 +35,7 @@
     v2 /= v1
     assert str(v2) == "[2.000000, 8.000000]"
 
-    cstats = ConstructorStats.get(Vector2)
+    cstats = ConstructorStats.get(m.Vector2)
     assert cstats.alive() == 2
     del v1
     assert cstats.alive() == 1
@@ -59,9 +58,8 @@
 
 def test_operators_notimplemented():
     """#393: need to return NotSupported to ensure correct arithmetic operator behavior"""
-    from pybind11_tests.operators import C1, C2
 
-    c1, c2 = C1(), C2()
+    c1, c2 = m.C1(), m.C2()
     assert c1 + c1 == 11
     assert c2 + c2 == 22
     assert c2 + c1 == 21
@@ -70,24 +68,23 @@
 
 def test_nested():
     """#328: first member in a class can't be used in operators"""
-    from pybind11_tests.operators import NestA, NestB, NestC, get_NestA, get_NestB, get_NestC
 
-    a = NestA()
-    b = NestB()
-    c = NestC()
+    a = m.NestA()
+    b = m.NestB()
+    c = m.NestC()
 
     a += 10
-    assert get_NestA(a) == 13
+    assert m.get_NestA(a) == 13
     b.a += 100
-    assert get_NestA(b.a) == 103
+    assert m.get_NestA(b.a) == 103
     c.b.a += 1000
-    assert get_NestA(c.b.a) == 1003
+    assert m.get_NestA(c.b.a) == 1003
     b -= 1
-    assert get_NestB(b) == 3
+    assert m.get_NestB(b) == 3
     c.b -= 3
-    assert get_NestB(c.b) == 1
+    assert m.get_NestB(c.b) == 1
     c *= 7
-    assert get_NestC(c) == 35
+    assert m.get_NestC(c) == 35
 
     abase = a.as_base()
     assert abase.value == -2
diff --git a/tests/test_pickling.cpp b/tests/test_pickling.cpp
index 52b1dbc..1e5f4ce 100644
--- a/tests/test_pickling.cpp
+++ b/tests/test_pickling.cpp
@@ -9,30 +9,22 @@
 
 #include "pybind11_tests.h"
 
-class Pickleable {
-public:
-    Pickleable(const std::string &value) : m_value(value) { }
-    const std::string &value() const { return m_value; }
+TEST_SUBMODULE(pickling, m) {
+    // test_roundtrip
+    class Pickleable {
+    public:
+        Pickleable(const std::string &value) : m_value(value) { }
+        const std::string &value() const { return m_value; }
 
-    void setExtra1(int extra1) { m_extra1 = extra1; }
-    void setExtra2(int extra2) { m_extra2 = extra2; }
-    int extra1() const { return m_extra1; }
-    int extra2() const { return m_extra2; }
-private:
-    std::string m_value;
-    int m_extra1 = 0;
-    int m_extra2 = 0;
-};
-
-class PickleableWithDict {
-public:
-    PickleableWithDict(const std::string &value) : value(value) { }
-
-    std::string value;
-    int extra;
-};
-
-test_initializer pickling([](py::module &m) {
+        void setExtra1(int extra1) { m_extra1 = extra1; }
+        void setExtra2(int extra2) { m_extra2 = extra2; }
+        int extra1() const { return m_extra1; }
+        int extra2() const { return m_extra2; }
+    private:
+        std::string m_value;
+        int m_extra1 = 0;
+        int m_extra2 = 0;
+    };
     py::class_<Pickleable>(m, "Pickleable")
         .def(py::init<std::string>())
         .def("value", &Pickleable::value)
@@ -58,6 +50,14 @@
         });
 
 #if !defined(PYPY_VERSION)
+    // test_roundtrip_with_dict
+    class PickleableWithDict {
+    public:
+        PickleableWithDict(const std::string &value) : value(value) { }
+
+        std::string value;
+        int extra;
+    };
     py::class_<PickleableWithDict>(m, "PickleableWithDict", py::dynamic_attr())
         .def(py::init<std::string>())
         .def_readwrite("value", &PickleableWithDict::value)
@@ -80,4 +80,4 @@
             self.attr("__dict__") = t[2];
         });
 #endif
-});
+}
diff --git a/tests/test_pickling.py b/tests/test_pickling.py
index 548c618..6cbcdf5 100644
--- a/tests/test_pickling.py
+++ b/tests/test_pickling.py
@@ -1,4 +1,5 @@
 import pytest
+from pybind11_tests import pickling as m
 
 try:
     import cPickle as pickle  # Use cPickle on Python 2.7
@@ -7,9 +8,7 @@
 
 
 def test_roundtrip():
-    from pybind11_tests import Pickleable
-
-    p = Pickleable("test_value")
+    p = m.Pickleable("test_value")
     p.setExtra1(15)
     p.setExtra2(48)
 
@@ -22,9 +21,7 @@
 
 @pytest.unsupported_on_pypy
 def test_roundtrip_with_dict():
-    from pybind11_tests import PickleableWithDict
-
-    p = PickleableWithDict("test_value")
+    p = m.PickleableWithDict("test_value")
     p.extra = 15
     p.dynamic = "Attribute"
 
diff --git a/tests/test_sequences_and_iterators.cpp b/tests/test_sequences_and_iterators.cpp
index 89fde8f..a455212 100644
--- a/tests/test_sequences_and_iterators.cpp
+++ b/tests/test_sequences_and_iterators.cpp
@@ -13,146 +13,6 @@
 #include <pybind11/operators.h>
 #include <pybind11/stl.h>
 
-class Sequence {
-public:
-    Sequence(size_t size) : m_size(size) {
-        print_created(this, "of size", m_size);
-        m_data = new float[size];
-        memset(m_data, 0, sizeof(float) * size);
-    }
-
-    Sequence(const std::vector<float> &value) : m_size(value.size()) {
-        print_created(this, "of size", m_size, "from std::vector");
-        m_data = new float[m_size];
-        memcpy(m_data, &value[0], sizeof(float) * m_size);
-    }
-
-    Sequence(const Sequence &s) : m_size(s.m_size) {
-        print_copy_created(this);
-        m_data = new float[m_size];
-        memcpy(m_data, s.m_data, sizeof(float)*m_size);
-    }
-
-    Sequence(Sequence &&s) : m_size(s.m_size), m_data(s.m_data) {
-        print_move_created(this);
-        s.m_size = 0;
-        s.m_data = nullptr;
-    }
-
-    ~Sequence() {
-        print_destroyed(this);
-        delete[] m_data;
-    }
-
-    Sequence &operator=(const Sequence &s) {
-        if (&s != this) {
-            delete[] m_data;
-            m_size = s.m_size;
-            m_data = new float[m_size];
-            memcpy(m_data, s.m_data, sizeof(float)*m_size);
-        }
-
-        print_copy_assigned(this);
-
-        return *this;
-    }
-
-    Sequence &operator=(Sequence &&s) {
-        if (&s != this) {
-            delete[] m_data;
-            m_size = s.m_size;
-            m_data = s.m_data;
-            s.m_size = 0;
-            s.m_data = nullptr;
-        }
-
-        print_move_assigned(this);
-
-        return *this;
-    }
-
-    bool operator==(const Sequence &s) const {
-        if (m_size != s.size())
-            return false;
-        for (size_t i=0; i<m_size; ++i)
-            if (m_data[i] != s[i])
-                return false;
-        return true;
-    }
-
-    bool operator!=(const Sequence &s) const {
-        return !operator==(s);
-    }
-
-    float operator[](size_t index) const {
-        return m_data[index];
-    }
-
-    float &operator[](size_t index) {
-        return m_data[index];
-    }
-
-    bool contains(float v) const {
-        for (size_t i=0; i<m_size; ++i)
-            if (v == m_data[i])
-                return true;
-        return false;
-    }
-
-    Sequence reversed() const {
-        Sequence result(m_size);
-        for (size_t i=0; i<m_size; ++i)
-            result[m_size-i-1] = m_data[i];
-        return result;
-    }
-
-    size_t size() const { return m_size; }
-
-    const float *begin() const { return m_data; }
-    const float *end() const { return m_data+m_size; }
-
-private:
-    size_t m_size;
-    float *m_data;
-};
-
-class IntPairs {
-public:
-    IntPairs(std::vector<std::pair<int, int>> data) : data_(std::move(data)) {}
-    const std::pair<int, int>* begin() const { return data_.data(); }
-
-private:
-    std::vector<std::pair<int, int>> data_;
-};
-
-// Interface of a map-like object that isn't (directly) an unordered_map, but provides some basic
-// map-like functionality.
-class StringMap {
-public:
-    StringMap() = default;
-    StringMap(std::unordered_map<std::string, std::string> init)
-        : map(std::move(init)) {}
-
-    void set(std::string key, std::string val) {
-        map[key] = val;
-    }
-
-    std::string get(std::string key) const {
-        return map.at(key);
-    }
-
-    size_t size() const {
-        return map.size();
-    }
-
-private:
-    std::unordered_map<std::string, std::string> map;
-
-public:
-    decltype(map.cbegin()) begin() const { return map.cbegin(); }
-    decltype(map.cend()) end() const { return map.cend(); }
-};
-
 template<typename T>
 class NonZeroIterator {
     const T* ptr_;
@@ -210,66 +70,164 @@
     return checks;
 }
 
-test_initializer sequences_and_iterators([](py::module &pm) {
-    auto m = pm.def_submodule("sequences_and_iterators");
+TEST_SUBMODULE(sequences_and_iterators, m) {
 
-    py::class_<Sequence> seq(m, "Sequence");
+    // test_sequence
+    class Sequence {
+    public:
+        Sequence(size_t size) : m_size(size) {
+            print_created(this, "of size", m_size);
+            m_data = new float[size];
+            memset(m_data, 0, sizeof(float) * size);
+        }
+        Sequence(const std::vector<float> &value) : m_size(value.size()) {
+            print_created(this, "of size", m_size, "from std::vector");
+            m_data = new float[m_size];
+            memcpy(m_data, &value[0], sizeof(float) * m_size);
+        }
+        Sequence(const Sequence &s) : m_size(s.m_size) {
+            print_copy_created(this);
+            m_data = new float[m_size];
+            memcpy(m_data, s.m_data, sizeof(float)*m_size);
+        }
+        Sequence(Sequence &&s) : m_size(s.m_size), m_data(s.m_data) {
+            print_move_created(this);
+            s.m_size = 0;
+            s.m_data = nullptr;
+        }
 
-    seq.def(py::init<size_t>())
-       .def(py::init<const std::vector<float>&>())
-       /// Bare bones interface
-       .def("__getitem__", [](const Sequence &s, size_t i) {
-            if (i >= s.size())
-                throw py::index_error();
+        ~Sequence() { print_destroyed(this); delete[] m_data; }
+
+        Sequence &operator=(const Sequence &s) {
+            if (&s != this) {
+                delete[] m_data;
+                m_size = s.m_size;
+                m_data = new float[m_size];
+                memcpy(m_data, s.m_data, sizeof(float)*m_size);
+            }
+            print_copy_assigned(this);
+            return *this;
+        }
+
+        Sequence &operator=(Sequence &&s) {
+            if (&s != this) {
+                delete[] m_data;
+                m_size = s.m_size;
+                m_data = s.m_data;
+                s.m_size = 0;
+                s.m_data = nullptr;
+            }
+            print_move_assigned(this);
+            return *this;
+        }
+
+        bool operator==(const Sequence &s) const {
+            if (m_size != s.size()) return false;
+            for (size_t i = 0; i < m_size; ++i)
+                if (m_data[i] != s[i])
+                    return false;
+            return true;
+        }
+        bool operator!=(const Sequence &s) const { return !operator==(s); }
+
+        float operator[](size_t index) const { return m_data[index]; }
+        float &operator[](size_t index) { return m_data[index]; }
+
+        bool contains(float v) const {
+            for (size_t i = 0; i < m_size; ++i)
+                if (v == m_data[i])
+                    return true;
+            return false;
+        }
+
+        Sequence reversed() const {
+            Sequence result(m_size);
+            for (size_t i = 0; i < m_size; ++i)
+                result[m_size - i - 1] = m_data[i];
+            return result;
+        }
+
+        size_t size() const { return m_size; }
+
+        const float *begin() const { return m_data; }
+        const float *end() const { return m_data+m_size; }
+
+    private:
+        size_t m_size;
+        float *m_data;
+    };
+    py::class_<Sequence>(m, "Sequence")
+        .def(py::init<size_t>())
+        .def(py::init<const std::vector<float>&>())
+        /// Bare bones interface
+        .def("__getitem__", [](const Sequence &s, size_t i) {
+            if (i >= s.size()) throw py::index_error();
             return s[i];
         })
-       .def("__setitem__", [](Sequence &s, size_t i, float v) {
-            if (i >= s.size())
-                throw py::index_error();
+        .def("__setitem__", [](Sequence &s, size_t i, float v) {
+            if (i >= s.size()) throw py::index_error();
             s[i] = v;
         })
-       .def("__len__", &Sequence::size)
-       /// Optional sequence protocol operations
-       .def("__iter__", [](const Sequence &s) { return py::make_iterator(s.begin(), s.end()); },
-                        py::keep_alive<0, 1>() /* Essential: keep object alive while iterator exists */)
-       .def("__contains__", [](const Sequence &s, float v) { return s.contains(v); })
-       .def("__reversed__", [](const Sequence &s) -> Sequence { return s.reversed(); })
-       /// Slicing protocol (optional)
-       .def("__getitem__", [](const Sequence &s, py::slice slice) -> Sequence* {
+        .def("__len__", &Sequence::size)
+        /// Optional sequence protocol operations
+        .def("__iter__", [](const Sequence &s) { return py::make_iterator(s.begin(), s.end()); },
+                         py::keep_alive<0, 1>() /* Essential: keep object alive while iterator exists */)
+        .def("__contains__", [](const Sequence &s, float v) { return s.contains(v); })
+        .def("__reversed__", [](const Sequence &s) -> Sequence { return s.reversed(); })
+        /// Slicing protocol (optional)
+        .def("__getitem__", [](const Sequence &s, py::slice slice) -> Sequence* {
             size_t start, stop, step, slicelength;
             if (!slice.compute(s.size(), &start, &stop, &step, &slicelength))
                 throw py::error_already_set();
             Sequence *seq = new Sequence(slicelength);
-            for (size_t i=0; i<slicelength; ++i) {
+            for (size_t i = 0; i < slicelength; ++i) {
                 (*seq)[i] = s[start]; start += step;
             }
             return seq;
         })
-       .def("__setitem__", [](Sequence &s, py::slice slice, const Sequence &value) {
+        .def("__setitem__", [](Sequence &s, py::slice slice, const Sequence &value) {
             size_t start, stop, step, slicelength;
             if (!slice.compute(s.size(), &start, &stop, &step, &slicelength))
                 throw py::error_already_set();
             if (slicelength != value.size())
                 throw std::runtime_error("Left and right hand size of slice assignment have different sizes!");
-            for (size_t i=0; i<slicelength; ++i) {
+            for (size_t i = 0; i < slicelength; ++i) {
                 s[start] = value[i]; start += step;
             }
         })
-       /// Comparisons
-       .def(py::self == py::self)
-       .def(py::self != py::self);
-       // Could also define py::self + py::self for concatenation, etc.
+        /// Comparisons
+        .def(py::self == py::self)
+        .def(py::self != py::self)
+        // Could also define py::self + py::self for concatenation, etc.
+        ;
 
-    py::class_<StringMap> map(m, "StringMap");
+    // test_map_iterator
+    // Interface of a map-like object that isn't (directly) an unordered_map, but provides some basic
+    // map-like functionality.
+    class StringMap {
+    public:
+        StringMap() = default;
+        StringMap(std::unordered_map<std::string, std::string> init)
+            : map(std::move(init)) {}
 
-    map .def(py::init<>())
+        void set(std::string key, std::string val) { map[key] = val; }
+        std::string get(std::string key) const { return map.at(key); }
+        size_t size() const { return map.size(); }
+    private:
+        std::unordered_map<std::string, std::string> map;
+    public:
+        decltype(map.cbegin()) begin() const { return map.cbegin(); }
+        decltype(map.cend()) end() const { return map.cend(); }
+    };
+    py::class_<StringMap>(m, "StringMap")
+        .def(py::init<>())
         .def(py::init<std::unordered_map<std::string, std::string>>())
         .def("__getitem__", [](const StringMap &map, std::string key) {
                 try { return map.get(key); }
                 catch (const std::out_of_range&) {
                     throw py::key_error("key '" + key + "' does not exist");
                 }
-                })
+        })
         .def("__setitem__", &StringMap::set)
         .def("__len__", &StringMap::size)
         .def("__iter__", [](const StringMap &map) { return py::make_key_iterator(map.begin(), map.end()); },
@@ -278,14 +236,23 @@
                 py::keep_alive<0, 1>())
         ;
 
+    // test_generalized_iterators
+    class IntPairs {
+    public:
+        IntPairs(std::vector<std::pair<int, int>> data) : data_(std::move(data)) {}
+        const std::pair<int, int>* begin() const { return data_.data(); }
+    private:
+        std::vector<std::pair<int, int>> data_;
+    };
     py::class_<IntPairs>(m, "IntPairs")
         .def(py::init<std::vector<std::pair<int, int>>>())
         .def("nonzero", [](const IntPairs& s) {
                 return py::make_iterator(NonZeroIterator<std::pair<int, int>>(s.begin()), NonZeroSentinel());
-            }, py::keep_alive<0, 1>())
+        }, py::keep_alive<0, 1>())
         .def("nonzero_keys", [](const IntPairs& s) {
             return py::make_key_iterator(NonZeroIterator<std::pair<int, int>>(s.begin()), NonZeroSentinel());
-        }, py::keep_alive<0, 1>());
+        }, py::keep_alive<0, 1>())
+        ;
 
 
 #if 0
@@ -315,6 +282,7 @@
     .def("__iter__", [](py::object s) { return PySequenceIterator(s.cast<const Sequence &>(), s); })
 #endif
 
+    // test_python_iterator_in_cpp
     m.def("object_to_list", [](py::object o) {
         auto l = py::list();
         for (auto item : o) {
@@ -348,17 +316,19 @@
        });
     });
 
-    m.def("tuple_iterator", [](py::tuple x) { return test_random_access_iterator(x); });
-    m.def("list_iterator", [](py::list x) { return test_random_access_iterator(x); });
-    m.def("sequence_iterator", [](py::sequence x) { return test_random_access_iterator(x); });
+    m.def("tuple_iterator", &test_random_access_iterator<py::tuple>);
+    m.def("list_iterator", &test_random_access_iterator<py::list>);
+    m.def("sequence_iterator", &test_random_access_iterator<py::sequence>);
 
+    // test_iterator_passthrough
     // #181: iterator passthrough did not compile
     m.def("iterator_passthrough", [](py::iterator s) -> py::iterator {
         return py::make_iterator(std::begin(s), std::end(s));
     });
 
+    // test_iterator_rvp
     // #388: Can't make iterators via make_iterator() with different r/v policies
     static std::vector<int> list = { 1, 2, 3 };
     m.def("make_iterator_1", []() { return py::make_iterator<py::return_value_policy::copy>(list); });
     m.def("make_iterator_2", []() { return py::make_iterator<py::return_value_policy::automatic>(list); });
-});
+}
diff --git a/tests/test_sequences_and_iterators.py b/tests/test_sequences_and_iterators.py
index 2ce2e60..640ca07 100644
--- a/tests/test_sequences_and_iterators.py
+++ b/tests/test_sequences_and_iterators.py
@@ -1,4 +1,6 @@
 import pytest
+from pybind11_tests import sequences_and_iterators as m
+from pybind11_tests import ConstructorStats
 
 
 def isclose(a, b, rel_tol=1e-05, abs_tol=0.0):
@@ -11,35 +13,30 @@
 
 
 def test_generalized_iterators():
-    from pybind11_tests.sequences_and_iterators import IntPairs
+    assert list(m.IntPairs([(1, 2), (3, 4), (0, 5)]).nonzero()) == [(1, 2), (3, 4)]
+    assert list(m.IntPairs([(1, 2), (2, 0), (0, 3), (4, 5)]).nonzero()) == [(1, 2)]
+    assert list(m.IntPairs([(0, 3), (1, 2), (3, 4)]).nonzero()) == []
 
-    assert list(IntPairs([(1, 2), (3, 4), (0, 5)]).nonzero()) == [(1, 2), (3, 4)]
-    assert list(IntPairs([(1, 2), (2, 0), (0, 3), (4, 5)]).nonzero()) == [(1, 2)]
-    assert list(IntPairs([(0, 3), (1, 2), (3, 4)]).nonzero()) == []
-
-    assert list(IntPairs([(1, 2), (3, 4), (0, 5)]).nonzero_keys()) == [1, 3]
-    assert list(IntPairs([(1, 2), (2, 0), (0, 3), (4, 5)]).nonzero_keys()) == [1]
-    assert list(IntPairs([(0, 3), (1, 2), (3, 4)]).nonzero_keys()) == []
+    assert list(m.IntPairs([(1, 2), (3, 4), (0, 5)]).nonzero_keys()) == [1, 3]
+    assert list(m.IntPairs([(1, 2), (2, 0), (0, 3), (4, 5)]).nonzero_keys()) == [1]
+    assert list(m.IntPairs([(0, 3), (1, 2), (3, 4)]).nonzero_keys()) == []
 
     # __next__ must continue to raise StopIteration
-    it = IntPairs([(0, 0)]).nonzero()
+    it = m.IntPairs([(0, 0)]).nonzero()
     for _ in range(3):
         with pytest.raises(StopIteration):
             next(it)
 
-    it = IntPairs([(0, 0)]).nonzero_keys()
+    it = m.IntPairs([(0, 0)]).nonzero_keys()
     for _ in range(3):
         with pytest.raises(StopIteration):
             next(it)
 
 
 def test_sequence():
-    from pybind11_tests import ConstructorStats
-    from pybind11_tests.sequences_and_iterators import Sequence
+    cstats = ConstructorStats.get(m.Sequence)
 
-    cstats = ConstructorStats.get(Sequence)
-
-    s = Sequence(5)
+    s = m.Sequence(5)
     assert cstats.values() == ['of size', '5']
 
     assert "Sequence" in repr(s)
@@ -56,7 +53,7 @@
     rev2 = s[::-1]
     assert cstats.values() == ['of size', '5']
 
-    it = iter(Sequence(0))
+    it = iter(m.Sequence(0))
     for _ in range(3):  # __next__ must continue to raise StopIteration
         with pytest.raises(StopIteration):
             next(it)
@@ -67,7 +64,7 @@
     assert allclose(rev2, expected)
     assert rev == rev2
 
-    rev[0::2] = Sequence([2.0, 2.0, 2.0])
+    rev[0::2] = m.Sequence([2.0, 2.0, 2.0])
     assert cstats.values() == ['of size', '3', 'from std::vector']
 
     assert allclose(rev, [2, 56.78, 2, 0, 2])
@@ -91,33 +88,29 @@
 
 
 def test_map_iterator():
-    from pybind11_tests.sequences_and_iterators import StringMap
-
-    m = StringMap({'hi': 'bye', 'black': 'white'})
-    assert m['hi'] == 'bye'
-    assert len(m) == 2
-    assert m['black'] == 'white'
+    sm = m.StringMap({'hi': 'bye', 'black': 'white'})
+    assert sm['hi'] == 'bye'
+    assert len(sm) == 2
+    assert sm['black'] == 'white'
 
     with pytest.raises(KeyError):
-        assert m['orange']
-    m['orange'] = 'banana'
-    assert m['orange'] == 'banana'
+        assert sm['orange']
+    sm['orange'] = 'banana'
+    assert sm['orange'] == 'banana'
 
     expected = {'hi': 'bye', 'black': 'white', 'orange': 'banana'}
-    for k in m:
-        assert m[k] == expected[k]
-    for k, v in m.items():
+    for k in sm:
+        assert sm[k] == expected[k]
+    for k, v in sm.items():
         assert v == expected[k]
 
-    it = iter(StringMap({}))
+    it = iter(m.StringMap({}))
     for _ in range(3):  # __next__ must continue to raise StopIteration
         with pytest.raises(StopIteration):
             next(it)
 
 
 def test_python_iterator_in_cpp():
-    import pybind11_tests.sequences_and_iterators as m
-
     t = (1, 2, 3)
     assert m.object_to_list(t) == [1, 2, 3]
     assert m.object_to_list(iter(t)) == [1, 2, 3]
diff --git a/tests/test_smart_ptr.cpp b/tests/test_smart_ptr.cpp
index 91239e8..9051b5c 100644
--- a/tests/test_smart_ptr.cpp
+++ b/tests/test_smart_ptr.cpp
@@ -11,108 +11,12 @@
 #include "pybind11_tests.h"
 #include "object.h"
 
-/// Custom object with builtin reference counting (see 'object.h' for the implementation)
-class MyObject1 : public Object {
-public:
-    MyObject1(int value) : value(value) {
-        print_created(this, toString());
-    }
-
-    std::string toString() const {
-        return "MyObject1[" + std::to_string(value) + "]";
-    }
-
-protected:
-    virtual ~MyObject1() {
-        print_destroyed(this);
-    }
-
-private:
-    int value;
-};
-
-/// Object managed by a std::shared_ptr<>
-class MyObject2 {
-public:
-    MyObject2(int value) : value(value) {
-        print_created(this, toString());
-    }
-
-    std::string toString() const {
-        return "MyObject2[" + std::to_string(value) + "]";
-    }
-
-    virtual ~MyObject2() {
-        print_destroyed(this);
-    }
-
-private:
-    int value;
-};
-
-/// Object managed by a std::shared_ptr<>, additionally derives from std::enable_shared_from_this<>
-class MyObject3 : public std::enable_shared_from_this<MyObject3> {
-public:
-    MyObject3(int value) : value(value) {
-        print_created(this, toString());
-    }
-
-    std::string toString() const {
-        return "MyObject3[" + std::to_string(value) + "]";
-    }
-
-    virtual ~MyObject3() {
-        print_destroyed(this);
-    }
-
-private:
-    int value;
-};
-
-class MyObject4 {
-public:
-    MyObject4(int value) : value{value} {
-        print_created(this);
-    }
-    int value;
-private:
-    ~MyObject4() {
-        print_destroyed(this);
-    }
-};
-
-/// This is just a wrapper around unique_ptr, but with extra fields to deliberately bloat up the
-/// holder size to trigger the non-simple-layout internal instance layout for single inheritance with
-/// large holder type.
-template <typename T> class huge_unique_ptr {
-    std::unique_ptr<T> ptr;
-    uint64_t padding[10];
-public:
-    huge_unique_ptr(T *p) : ptr(p) {};
-    T *get() { return ptr.get(); }
-};
-
-class MyObject5 { // managed by huge_unique_ptr
-public:
-    MyObject5(int value) : value{value} {
-        print_created(this);
-    }
-    int value;
-    ~MyObject5() {
-        print_destroyed(this);
-    }
-};
-
-/// Make pybind aware of the ref-counted wrapper type (s)
+// Make pybind aware of the ref-counted wrapper type (s):
 
 // ref<T> is a wrapper for 'Object' which uses intrusive reference counting
 // It is always possible to construct a ref<T> from an Object* pointer without
 // possible incosistencies, hence the 'true' argument at the end.
 PYBIND11_DECLARE_HOLDER_TYPE(T, ref<T>, true);
-PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr<T>); // Not required any more for std::shared_ptr,
-                                                     // but it should compile without error
-PYBIND11_DECLARE_HOLDER_TYPE(T, huge_unique_ptr<T>);
-
 // Make pybind11 aware of the non-standard getter member function
 namespace pybind11 { namespace detail {
     template <typename T>
@@ -121,162 +25,157 @@
     };
 }}
 
-Object *make_object_1() { return new MyObject1(1); }
-ref<Object> make_object_2() { return new MyObject1(2); }
+// The following is not required anymore for std::shared_ptr, but it should compile without error:
+PYBIND11_DECLARE_HOLDER_TYPE(T, std::shared_ptr<T>);
 
-MyObject1 *make_myobject1_1() { return new MyObject1(4); }
-ref<MyObject1> make_myobject1_2() { return new MyObject1(5); }
+// This is just a wrapper around unique_ptr, but with extra fields to deliberately bloat up the
+// holder size to trigger the non-simple-layout internal instance layout for single inheritance with
+// large holder type:
+template <typename T> class huge_unique_ptr {
+    std::unique_ptr<T> ptr;
+    uint64_t padding[10];
+public:
+    huge_unique_ptr(T *p) : ptr(p) {};
+    T *get() { return ptr.get(); }
+};
+PYBIND11_DECLARE_HOLDER_TYPE(T, huge_unique_ptr<T>);
 
-MyObject2 *make_myobject2_1() { return new MyObject2(6); }
-std::shared_ptr<MyObject2> make_myobject2_2() { return std::make_shared<MyObject2>(7); }
+// Simple custom holder that works like unique_ptr
+template <typename T>
+class custom_unique_ptr {
+    std::unique_ptr<T> impl;
+public:
+    custom_unique_ptr(T* p) : impl(p) { }
+    T* get() const { return impl.get(); }
+    T* release_ptr() { return impl.release(); }
+};
+PYBIND11_DECLARE_HOLDER_TYPE(T, custom_unique_ptr<T>);
 
-MyObject3 *make_myobject3_1() { return new MyObject3(8); }
-std::shared_ptr<MyObject3> make_myobject3_2() { return std::make_shared<MyObject3>(9); }
 
-void print_object_1(const Object *obj) { py::print(obj->toString()); }
-void print_object_2(ref<Object> obj) { py::print(obj->toString()); }
-void print_object_3(const ref<Object> &obj) { py::print(obj->toString()); }
-void print_object_4(const ref<Object> *obj) { py::print((*obj)->toString()); }
+TEST_SUBMODULE(smart_ptr, m) {
 
-void print_myobject1_1(const MyObject1 *obj) { py::print(obj->toString()); }
-void print_myobject1_2(ref<MyObject1> obj) { py::print(obj->toString()); }
-void print_myobject1_3(const ref<MyObject1> &obj) { py::print(obj->toString()); }
-void print_myobject1_4(const ref<MyObject1> *obj) { py::print((*obj)->toString()); }
+    // test_smart_ptr
 
-void print_myobject2_1(const MyObject2 *obj) { py::print(obj->toString()); }
-void print_myobject2_2(std::shared_ptr<MyObject2> obj) { py::print(obj->toString()); }
-void print_myobject2_3(const std::shared_ptr<MyObject2> &obj) { py::print(obj->toString()); }
-void print_myobject2_4(const std::shared_ptr<MyObject2> *obj) { py::print((*obj)->toString()); }
-
-void print_myobject3_1(const MyObject3 *obj) { py::print(obj->toString()); }
-void print_myobject3_2(std::shared_ptr<MyObject3> obj) { py::print(obj->toString()); }
-void print_myobject3_3(const std::shared_ptr<MyObject3> &obj) { py::print(obj->toString()); }
-void print_myobject3_4(const std::shared_ptr<MyObject3> *obj) { py::print((*obj)->toString()); }
-
-test_initializer smart_ptr([](py::module &m) {
+    // Object implementation in `object.h`
     py::class_<Object, ref<Object>> obj(m, "Object");
     obj.def("getRefCount", &Object::getRefCount);
 
+    // Custom object with builtin reference counting (see 'object.h' for the implementation)
+    class MyObject1 : public Object {
+    public:
+        MyObject1(int value) : value(value) { print_created(this, toString()); }
+        std::string toString() const { return "MyObject1[" + std::to_string(value) + "]"; }
+    protected:
+        virtual ~MyObject1() { print_destroyed(this); }
+    private:
+        int value;
+    };
     py::class_<MyObject1, ref<MyObject1>>(m, "MyObject1", obj)
         .def(py::init<int>());
+    py::implicitly_convertible<py::int_, MyObject1>();
 
-    m.def("test_object1_refcounting",
-        []() -> bool {
-            ref<MyObject1> o = new MyObject1(0);
-            bool good = o->getRefCount() == 1;
-            py::object o2 = py::cast(o, py::return_value_policy::reference);
-            // always request (partial) ownership for objects with intrusive
-            // reference counting even when using the 'reference' RVP
-            good &= o->getRefCount() == 2;
-            return good;
-        }
-    );
+    m.def("make_object_1", []() -> Object * { return new MyObject1(1); });
+    m.def("make_object_2", []() -> ref<Object> { return new MyObject1(2); });
+    m.def("make_myobject1_1", []() -> MyObject1 * { return new MyObject1(4); });
+    m.def("make_myobject1_2", []() -> ref<MyObject1> { return new MyObject1(5); });
+    m.def("print_object_1", [](const Object *obj) { py::print(obj->toString()); });
+    m.def("print_object_2", [](ref<Object> obj) { py::print(obj->toString()); });
+    m.def("print_object_3", [](const ref<Object> &obj) { py::print(obj->toString()); });
+    m.def("print_object_4", [](const ref<Object> *obj) { py::print((*obj)->toString()); });
+    m.def("print_myobject1_1", [](const MyObject1 *obj) { py::print(obj->toString()); });
+    m.def("print_myobject1_2", [](ref<MyObject1> obj) { py::print(obj->toString()); });
+    m.def("print_myobject1_3", [](const ref<MyObject1> &obj) { py::print(obj->toString()); });
+    m.def("print_myobject1_4", [](const ref<MyObject1> *obj) { py::print((*obj)->toString()); });
 
-    m.def("make_object_1", &make_object_1);
-    m.def("make_object_2", &make_object_2);
-    m.def("make_myobject1_1", &make_myobject1_1);
-    m.def("make_myobject1_2", &make_myobject1_2);
-    m.def("print_object_1", &print_object_1);
-    m.def("print_object_2", &print_object_2);
-    m.def("print_object_3", &print_object_3);
-    m.def("print_object_4", &print_object_4);
-    m.def("print_myobject1_1", &print_myobject1_1);
-    m.def("print_myobject1_2", &print_myobject1_2);
-    m.def("print_myobject1_3", &print_myobject1_3);
-    m.def("print_myobject1_4", &print_myobject1_4);
+    // Expose constructor stats for the ref type
+    m.def("cstats_ref", &ConstructorStats::get<ref_tag>);
 
+
+    // Object managed by a std::shared_ptr<>
+    class MyObject2 {
+    public:
+        MyObject2(int value) : value(value) { print_created(this, toString()); }
+        std::string toString() const { return "MyObject2[" + std::to_string(value) + "]"; }
+        virtual ~MyObject2() { print_destroyed(this); }
+    private:
+        int value;
+    };
     py::class_<MyObject2, std::shared_ptr<MyObject2>>(m, "MyObject2")
         .def(py::init<int>());
-    m.def("make_myobject2_1", &make_myobject2_1);
-    m.def("make_myobject2_2", &make_myobject2_2);
-    m.def("print_myobject2_1", &print_myobject2_1);
-    m.def("print_myobject2_2", &print_myobject2_2);
-    m.def("print_myobject2_3", &print_myobject2_3);
-    m.def("print_myobject2_4", &print_myobject2_4);
+    m.def("make_myobject2_1", []() { return new MyObject2(6); });
+    m.def("make_myobject2_2", []() { return std::make_shared<MyObject2>(7); });
+    m.def("print_myobject2_1", [](const MyObject2 *obj) { py::print(obj->toString()); });
+    m.def("print_myobject2_2", [](std::shared_ptr<MyObject2> obj) { py::print(obj->toString()); });
+    m.def("print_myobject2_3", [](const std::shared_ptr<MyObject2> &obj) { py::print(obj->toString()); });
+    m.def("print_myobject2_4", [](const std::shared_ptr<MyObject2> *obj) { py::print((*obj)->toString()); });
 
+    // Object managed by a std::shared_ptr<>, additionally derives from std::enable_shared_from_this<>
+    class MyObject3 : public std::enable_shared_from_this<MyObject3> {
+    public:
+        MyObject3(int value) : value(value) { print_created(this, toString()); }
+        std::string toString() const { return "MyObject3[" + std::to_string(value) + "]"; }
+        virtual ~MyObject3() { print_destroyed(this); }
+    private:
+        int value;
+    };
     py::class_<MyObject3, std::shared_ptr<MyObject3>>(m, "MyObject3")
         .def(py::init<int>());
-    m.def("make_myobject3_1", &make_myobject3_1);
-    m.def("make_myobject3_2", &make_myobject3_2);
-    m.def("print_myobject3_1", &print_myobject3_1);
-    m.def("print_myobject3_2", &print_myobject3_2);
-    m.def("print_myobject3_3", &print_myobject3_3);
-    m.def("print_myobject3_4", &print_myobject3_4);
+    m.def("make_myobject3_1", []() { return new MyObject3(8); });
+    m.def("make_myobject3_2", []() { return std::make_shared<MyObject3>(9); });
+    m.def("print_myobject3_1", [](const MyObject3 *obj) { py::print(obj->toString()); });
+    m.def("print_myobject3_2", [](std::shared_ptr<MyObject3> obj) { py::print(obj->toString()); });
+    m.def("print_myobject3_3", [](const std::shared_ptr<MyObject3> &obj) { py::print(obj->toString()); });
+    m.def("print_myobject3_4", [](const std::shared_ptr<MyObject3> *obj) { py::print((*obj)->toString()); });
 
+    // test_smart_ptr_refcounting
+    m.def("test_object1_refcounting", []() {
+        ref<MyObject1> o = new MyObject1(0);
+        bool good = o->getRefCount() == 1;
+        py::object o2 = py::cast(o, py::return_value_policy::reference);
+        // always request (partial) ownership for objects with intrusive
+        // reference counting even when using the 'reference' RVP
+        good &= o->getRefCount() == 2;
+        return good;
+    });
+
+    // test_unique_nodelete
+    // Object with a private destructor
+    class MyObject4 {
+    public:
+        MyObject4(int value) : value{value} { print_created(this); }
+        int value;
+    private:
+        ~MyObject4() { print_destroyed(this); }
+    };
     py::class_<MyObject4, std::unique_ptr<MyObject4, py::nodelete>>(m, "MyObject4")
         .def(py::init<int>())
         .def_readwrite("value", &MyObject4::value);
 
+    // test_large_holder
+    class MyObject5 { // managed by huge_unique_ptr
+    public:
+        MyObject5(int value) : value{value} { print_created(this); }
+        ~MyObject5() { print_destroyed(this); }
+        int value;
+    };
     py::class_<MyObject5, huge_unique_ptr<MyObject5>>(m, "MyObject5")
         .def(py::init<int>())
         .def_readwrite("value", &MyObject5::value);
 
-    py::implicitly_convertible<py::int_, MyObject1>();
+    // test_shared_ptr_and_references
+    struct SharedPtrRef {
+        struct A {
+            A() { print_created(this); }
+            A(const A &) { print_copy_created(this); }
+            A(A &&) { print_move_created(this); }
+            ~A() { print_destroyed(this); }
+        };
 
-    // Expose constructor stats for the ref type
-    m.def("cstats_ref", &ConstructorStats::get<ref_tag>);
-});
-
-struct SharedPtrRef {
-    struct A {
-        A() { print_created(this); }
-        A(const A &) { print_copy_created(this); }
-        A(A &&) { print_move_created(this); }
-        ~A() { print_destroyed(this); }
+        A value = {};
+        std::shared_ptr<A> shared = std::make_shared<A>();
     };
-
-    A value = {};
-    std::shared_ptr<A> shared = std::make_shared<A>();
-};
-
-struct SharedFromThisRef {
-    struct B : std::enable_shared_from_this<B> {
-        B() { print_created(this); }
-        B(const B &) : std::enable_shared_from_this<B>() { print_copy_created(this); }
-        B(B &&) : std::enable_shared_from_this<B>() { print_move_created(this); }
-        ~B() { print_destroyed(this); }
-    };
-
-    B value = {};
-    std::shared_ptr<B> shared = std::make_shared<B>();
-};
-
-// Issue #865: shared_from_this doesn't work with virtual inheritance
-struct SharedFromThisVBase : std::enable_shared_from_this<SharedFromThisVBase> {
-    virtual ~SharedFromThisVBase() = default;
-};
-struct SharedFromThisVirt : virtual SharedFromThisVBase {};
-
-template <typename T>
-class CustomUniquePtr {
-    std::unique_ptr<T> impl;
-
-public:
-    CustomUniquePtr(T* p) : impl(p) { }
-    T* get() const { return impl.get(); }
-    T* release_ptr() { return impl.release(); }
-};
-
-PYBIND11_DECLARE_HOLDER_TYPE(T, CustomUniquePtr<T>);
-
-struct ElementBase { virtual void foo() { } /* Force creation of virtual table */ };
-struct ElementA : ElementBase {
-    ElementA(int v) : v(v) { }
-    int value() { return v; }
-    int v;
-};
-
-struct ElementList {
-    void add(std::shared_ptr<ElementBase> e) { l.push_back(e); }
-    std::vector<std::shared_ptr<ElementBase>> l;
-};
-
-test_initializer smart_ptr_and_references([](py::module &pm) {
-    auto m = pm.def_submodule("smart_ptr");
-
     using A = SharedPtrRef::A;
     py::class_<A, std::shared_ptr<A>>(m, "A");
-
     py::class_<SharedPtrRef>(m, "SharedPtrRef")
         .def(py::init<>())
         .def_readonly("ref", &SharedPtrRef::value)
@@ -288,9 +187,20 @@
         .def("set_ref", [](SharedPtrRef &, const A &) { return true; })
         .def("set_holder", [](SharedPtrRef &, std::shared_ptr<A>) { return true; });
 
+    // test_shared_ptr_from_this_and_references
+    struct SharedFromThisRef {
+        struct B : std::enable_shared_from_this<B> {
+            B() { print_created(this); }
+            B(const B &) : std::enable_shared_from_this<B>() { print_copy_created(this); }
+            B(B &&) : std::enable_shared_from_this<B>() { print_move_created(this); }
+            ~B() { print_destroyed(this); }
+        };
+
+        B value = {};
+        std::shared_ptr<B> shared = std::make_shared<B>();
+    };
     using B = SharedFromThisRef::B;
     py::class_<B, std::shared_ptr<B>>(m, "B");
-
     py::class_<SharedFromThisRef>(m, "SharedFromThisRef")
         .def(py::init<>())
         .def_readonly("bad_wp", &SharedFromThisRef::value)
@@ -304,31 +214,46 @@
         .def("set_holder", [](SharedFromThisRef &, std::shared_ptr<B>) { return true; });
 
     // Issue #865: shared_from_this doesn't work with virtual inheritance
+    struct SharedFromThisVBase : std::enable_shared_from_this<SharedFromThisVBase> {
+        virtual ~SharedFromThisVBase() = default;
+    };
+    struct SharedFromThisVirt : virtual SharedFromThisVBase {};
     static std::shared_ptr<SharedFromThisVirt> sft(new SharedFromThisVirt());
     py::class_<SharedFromThisVirt, std::shared_ptr<SharedFromThisVirt>>(m, "SharedFromThisVirt")
         .def_static("get", []() { return sft.get(); });
 
+    // test_move_only_holder
     struct C {
         C() { print_created(this); }
         ~C() { print_destroyed(this); }
     };
+    py::class_<C, custom_unique_ptr<C>>(m, "TypeWithMoveOnlyHolder")
+        .def_static("make", []() { return custom_unique_ptr<C>(new C); });
 
-    py::class_<C, CustomUniquePtr<C>>(m, "TypeWithMoveOnlyHolder")
-        .def_static("make", []() { return CustomUniquePtr<C>(new C); });
-
+    // test_smart_ptr_from_default
     struct HeldByDefaultHolder { };
-
     py::class_<HeldByDefaultHolder>(m, "HeldByDefaultHolder")
         .def(py::init<>())
         .def_static("load_shared_ptr", [](std::shared_ptr<HeldByDefaultHolder>) {});
 
+    // test_shared_ptr_gc
     // #187: issue involving std::shared_ptr<> return value policy & garbage collection
+    struct ElementBase { virtual void foo() { } /* Force creation of virtual table */ };
     py::class_<ElementBase, std::shared_ptr<ElementBase>>(m, "ElementBase");
 
+    struct ElementA : ElementBase {
+        ElementA(int v) : v(v) { }
+        int value() { return v; }
+        int v;
+    };
     py::class_<ElementA, ElementBase, std::shared_ptr<ElementA>>(m, "ElementA")
         .def(py::init<int>())
         .def("value", &ElementA::value);
 
+    struct ElementList {
+        void add(std::shared_ptr<ElementBase> e) { l.push_back(e); }
+        std::vector<std::shared_ptr<ElementBase>> l;
+    };
     py::class_<ElementList, std::shared_ptr<ElementList>>(m, "ElementList")
         .def(py::init<>())
         .def("add", &ElementList::add)
@@ -338,4 +263,4 @@
                 list.append(py::cast(e));
             return list;
         });
-});
+}
diff --git a/tests/test_smart_ptr.py b/tests/test_smart_ptr.py
index 144180d..4dfe003 100644
--- a/tests/test_smart_ptr.py
+++ b/tests/test_smart_ptr.py
@@ -1,40 +1,35 @@
 import pytest
+from pybind11_tests import smart_ptr as m
 from pybind11_tests import ConstructorStats
 
 
 def test_smart_ptr(capture):
     # Object1
-    from pybind11_tests import (MyObject1, make_object_1, make_object_2,
-                                print_object_1, print_object_2, print_object_3, print_object_4)
-
-    for i, o in enumerate([make_object_1(), make_object_2(), MyObject1(3)], start=1):
+    for i, o in enumerate([m.make_object_1(), m.make_object_2(), m.MyObject1(3)], start=1):
         assert o.getRefCount() == 1
         with capture:
-            print_object_1(o)
-            print_object_2(o)
-            print_object_3(o)
-            print_object_4(o)
+            m.print_object_1(o)
+            m.print_object_2(o)
+            m.print_object_3(o)
+            m.print_object_4(o)
         assert capture == "MyObject1[{i}]\n".format(i=i) * 4
 
-    from pybind11_tests import (make_myobject1_1, make_myobject1_2,
-                                print_myobject1_1, print_myobject1_2,
-                                print_myobject1_3, print_myobject1_4)
-
-    for i, o in enumerate([make_myobject1_1(), make_myobject1_2(), MyObject1(6), 7], start=4):
+    for i, o in enumerate([m.make_myobject1_1(), m.make_myobject1_2(), m.MyObject1(6), 7],
+                          start=4):
         print(o)
         with capture:
             if not isinstance(o, int):
-                print_object_1(o)
-                print_object_2(o)
-                print_object_3(o)
-                print_object_4(o)
-            print_myobject1_1(o)
-            print_myobject1_2(o)
-            print_myobject1_3(o)
-            print_myobject1_4(o)
+                m.print_object_1(o)
+                m.print_object_2(o)
+                m.print_object_3(o)
+                m.print_object_4(o)
+            m.print_myobject1_1(o)
+            m.print_myobject1_2(o)
+            m.print_myobject1_3(o)
+            m.print_myobject1_4(o)
         assert capture == "MyObject1[{i}]\n".format(i=i) * (4 if isinstance(o, int) else 8)
 
-    cstats = ConstructorStats.get(MyObject1)
+    cstats = ConstructorStats.get(m.MyObject1)
     assert cstats.alive() == 0
     expected_values = ['MyObject1[{}]'.format(i) for i in range(1, 7)] + ['MyObject1[7]'] * 4
     assert cstats.values() == expected_values
@@ -45,21 +40,16 @@
     assert cstats.move_assignments == 0
 
     # Object2
-    from pybind11_tests import (MyObject2, make_myobject2_1, make_myobject2_2,
-                                make_myobject3_1, make_myobject3_2,
-                                print_myobject2_1, print_myobject2_2,
-                                print_myobject2_3, print_myobject2_4)
-
-    for i, o in zip([8, 6, 7], [MyObject2(8), make_myobject2_1(), make_myobject2_2()]):
+    for i, o in zip([8, 6, 7], [m.MyObject2(8), m.make_myobject2_1(), m.make_myobject2_2()]):
         print(o)
         with capture:
-            print_myobject2_1(o)
-            print_myobject2_2(o)
-            print_myobject2_3(o)
-            print_myobject2_4(o)
+            m.print_myobject2_1(o)
+            m.print_myobject2_2(o)
+            m.print_myobject2_3(o)
+            m.print_myobject2_4(o)
         assert capture == "MyObject2[{i}]\n".format(i=i) * 4
 
-    cstats = ConstructorStats.get(MyObject2)
+    cstats = ConstructorStats.get(m.MyObject2)
     assert cstats.alive() == 1
     o = None
     assert cstats.alive() == 0
@@ -71,19 +61,16 @@
     assert cstats.move_assignments == 0
 
     # Object3
-    from pybind11_tests import (MyObject3, print_myobject3_1, print_myobject3_2,
-                                print_myobject3_3, print_myobject3_4)
-
-    for i, o in zip([9, 8, 9], [MyObject3(9), make_myobject3_1(), make_myobject3_2()]):
+    for i, o in zip([9, 8, 9], [m.MyObject3(9), m.make_myobject3_1(), m.make_myobject3_2()]):
         print(o)
         with capture:
-            print_myobject3_1(o)
-            print_myobject3_2(o)
-            print_myobject3_3(o)
-            print_myobject3_4(o)
+            m.print_myobject3_1(o)
+            m.print_myobject3_2(o)
+            m.print_myobject3_3(o)
+            m.print_myobject3_4(o)
         assert capture == "MyObject3[{i}]\n".format(i=i) * 4
 
-    cstats = ConstructorStats.get(MyObject3)
+    cstats = ConstructorStats.get(m.MyObject3)
     assert cstats.alive() == 1
     o = None
     assert cstats.alive() == 0
@@ -94,10 +81,8 @@
     assert cstats.copy_assignments == 0
     assert cstats.move_assignments == 0
 
-    # Object and ref
-    from pybind11_tests import Object, cstats_ref
-
-    cstats = ConstructorStats.get(Object)
+    # Object
+    cstats = ConstructorStats.get(m.Object)
     assert cstats.alive() == 0
     assert cstats.values() == []
     assert cstats.default_constructions == 10
@@ -106,7 +91,8 @@
     assert cstats.copy_assignments == 0
     assert cstats.move_assignments == 0
 
-    cstats = cstats_ref()
+    # ref<>
+    cstats = m.cstats_ref()
     assert cstats.alive() == 0
     assert cstats.values() == ['from pointer'] * 10
     assert cstats.default_constructions == 30
@@ -117,36 +103,30 @@
 
 
 def test_smart_ptr_refcounting():
-    from pybind11_tests import test_object1_refcounting
-    assert test_object1_refcounting()
+    assert m.test_object1_refcounting()
 
 
 def test_unique_nodelete():
-    from pybind11_tests import MyObject4
-    o = MyObject4(23)
+    o = m.MyObject4(23)
     assert o.value == 23
-    cstats = ConstructorStats.get(MyObject4)
+    cstats = ConstructorStats.get(m.MyObject4)
     assert cstats.alive() == 1
     del o
-    cstats = ConstructorStats.get(MyObject4)
     assert cstats.alive() == 1  # Leak, but that's intentional
 
 
 def test_large_holder():
-    from pybind11_tests import MyObject5
-    o = MyObject5(5)
+    o = m.MyObject5(5)
     assert o.value == 5
-    cstats = ConstructorStats.get(MyObject5)
+    cstats = ConstructorStats.get(m.MyObject5)
     assert cstats.alive() == 1
     del o
     assert cstats.alive() == 0
 
 
 def test_shared_ptr_and_references():
-    from pybind11_tests.smart_ptr import SharedPtrRef, A
-
-    s = SharedPtrRef()
-    stats = ConstructorStats.get(A)
+    s = m.SharedPtrRef()
+    stats = ConstructorStats.get(m.A)
     assert stats.alive() == 2
 
     ref = s.ref  # init_holder_helper(holder_ptr=false, owned=false)
@@ -176,10 +156,8 @@
 
 
 def test_shared_ptr_from_this_and_references():
-    from pybind11_tests.smart_ptr import SharedFromThisRef, B, SharedFromThisVirt
-
-    s = SharedFromThisRef()
-    stats = ConstructorStats.get(B)
+    s = m.SharedFromThisRef()
+    stats = ConstructorStats.get(m.B)
     assert stats.alive() == 2
 
     ref = s.ref  # init_holder_helper(holder_ptr=false, owned=false, bad_wp=false)
@@ -212,37 +190,31 @@
     del ref, bad_wp, copy, holder_ref, holder_copy, s
     assert stats.alive() == 0
 
-    z = SharedFromThisVirt.get()
-    y = SharedFromThisVirt.get()
+    z = m.SharedFromThisVirt.get()
+    y = m.SharedFromThisVirt.get()
     assert y is z
 
 
 def test_move_only_holder():
-    from pybind11_tests.smart_ptr import TypeWithMoveOnlyHolder
-
-    a = TypeWithMoveOnlyHolder.make()
-    stats = ConstructorStats.get(TypeWithMoveOnlyHolder)
+    a = m.TypeWithMoveOnlyHolder.make()
+    stats = ConstructorStats.get(m.TypeWithMoveOnlyHolder)
     assert stats.alive() == 1
     del a
     assert stats.alive() == 0
 
 
 def test_smart_ptr_from_default():
-    from pybind11_tests.smart_ptr import HeldByDefaultHolder
-
-    instance = HeldByDefaultHolder()
+    instance = m.HeldByDefaultHolder()
     with pytest.raises(RuntimeError) as excinfo:
-        HeldByDefaultHolder.load_shared_ptr(instance)
+        m.HeldByDefaultHolder.load_shared_ptr(instance)
     assert "Unable to load a custom holder type from a default-holder instance" in str(excinfo)
 
 
 def test_shared_ptr_gc():
     """#187: issue involving std::shared_ptr<> return value policy & garbage collection"""
-    from pybind11_tests.smart_ptr import ElementList, ElementA
-
-    el = ElementList()
+    el = m.ElementList()
     for i in range(10):
-        el.add(ElementA(i))
+        el.add(m.ElementA(i))
     pytest.gc_collect()
     for i, v in enumerate(el.get()):
         assert i == v.value()
diff --git a/tests/test_stl.cpp b/tests/test_stl.cpp
index 6ba9cb9..93e8c66 100644
--- a/tests/test_stl.cpp
+++ b/tests/test_stl.cpp
@@ -10,17 +10,6 @@
 #include "pybind11_tests.h"
 #include <pybind11/stl.h>
 
-// Class that can be move- and copy-constructed, but not assigned
-struct NoAssign {
-    int value;
-
-    explicit NoAssign(int value = 0) : value(value) { }
-    NoAssign(const NoAssign &) = default;
-    NoAssign(NoAssign &&) = default;
-
-    NoAssign &operator=(const NoAssign &) = delete;
-    NoAssign &operator=(NoAssign &&) = delete;
-};
 
 /// Issue #528: templated constructor
 struct TplCtorClass {
@@ -103,24 +92,34 @@
         return v;
     });
 
+    // test_move_out_container
     struct MoveOutContainer {
         struct Value { int value; };
-
         std::list<Value> move_list() const { return {{0}, {1}, {2}}; }
     };
-
     py::class_<MoveOutContainer::Value>(m, "MoveOutContainerValue")
         .def_readonly("value", &MoveOutContainer::Value::value);
-
     py::class_<MoveOutContainer>(m, "MoveOutContainer")
         .def(py::init<>())
         .def_property_readonly("move_list", &MoveOutContainer::move_list);
 
+    // Class that can be move- and copy-constructed, but not assigned
+    struct NoAssign {
+        int value;
+
+        explicit NoAssign(int value = 0) : value(value) { }
+        NoAssign(const NoAssign &) = default;
+        NoAssign(NoAssign &&) = default;
+
+        NoAssign &operator=(const NoAssign &) = delete;
+        NoAssign &operator=(NoAssign &&) = delete;
+    };
     py::class_<NoAssign>(m, "NoAssign", "Class with no C++ assignment operators")
         .def(py::init<>())
         .def(py::init<int>());
 
 #ifdef PYBIND11_HAS_OPTIONAL
+    // test_optional
     m.attr("has_optional") = true;
 
     using opt_int = std::optional<int>;
@@ -143,6 +142,7 @@
 #endif
 
 #ifdef PYBIND11_HAS_EXP_OPTIONAL
+    // test_exp_optional
     m.attr("has_exp_optional") = true;
 
     using exp_opt_int = std::experimental::optional<int>;
@@ -169,21 +169,21 @@
         const char *operator()(std::nullptr_t) { return "std::nullptr_t"; }
     };
 
+    // test_variant
     m.def("load_variant", [](std::variant<int, std::string, double, std::nullptr_t> v) {
         return std::visit(visitor(), v);
     });
-
     m.def("load_variant_2pass", [](std::variant<double, int> v) {
         return std::visit(visitor(), v);
     });
-
     m.def("cast_variant", []() {
         using V = std::variant<int, std::string>;
         return py::make_tuple(V(5), V("Hello"));
     });
 #endif
 
-    /// #528: templated constructor
+    // #528: templated constructor
+    // (no python tests: the test here is that this compiles)
     m.def("tpl_ctor_vector", [](std::vector<TplCtorClass> &) {});
     m.def("tpl_ctor_map", [](std::unordered_map<TplCtorClass, TplCtorClass> &) {});
     m.def("tpl_ctor_set", [](std::unordered_set<TplCtorClass> &) {});
diff --git a/tests/test_stl_binders.cpp b/tests/test_stl_binders.cpp
index 22ba16e..2df6ca0 100644
--- a/tests/test_stl_binders.cpp
+++ b/tests/test_stl_binders.cpp
@@ -54,70 +54,58 @@
     return m;
 }
 
-struct VStruct {
-    bool w;
-    uint32_t x;
-    double y;
-    bool z;
-};
+TEST_SUBMODULE(stl_binders, m) {
 
-struct VUndeclStruct { //dtype not declared for this version
-    bool w;
-    uint32_t x;
-    double y;
-    bool z;
-};
-
-test_initializer stl_binder_vector([](py::module &m) {
-    py::class_<El>(m, "El")
-        .def(py::init<int>());
-
-    py::bind_vector<std::vector<unsigned char>>(m, "VectorUChar", py::buffer_protocol());
+    // test_vector_int
     py::bind_vector<std::vector<unsigned int>>(m, "VectorInt", py::buffer_protocol());
+
+    // test_vector_bool
     py::bind_vector<std::vector<bool>>(m, "VectorBool");
 
+    // test_vector_custom
+    py::class_<El>(m, "El")
+        .def(py::init<int>());
     py::bind_vector<std::vector<El>>(m, "VectorEl");
-
     py::bind_vector<std::vector<std::vector<El>>>(m, "VectorVectorEl");
 
-    m.def("create_undeclstruct", [m] () mutable {
-        py::bind_vector<std::vector<VUndeclStruct>>(m, "VectorUndeclStruct", py::buffer_protocol());
-    });
-
-    try {
-        py::module::import("numpy");
-    } catch (...) {
-        return;
-    }
-    PYBIND11_NUMPY_DTYPE(VStruct, w, x, y, z);
-    py::class_<VStruct>(m, "VStruct").def_readwrite("x", &VStruct::x);
-    py::bind_vector<std::vector<VStruct>>(m, "VectorStruct", py::buffer_protocol());
-    m.def("get_vectorstruct", [] {return std::vector<VStruct> {{0, 5, 3.0, 1}, {1, 30, -1e4, 0}};});
-});
-
-test_initializer stl_binder_map([](py::module &m) {
+    // test_map_string_double
     py::bind_map<std::map<std::string, double>>(m, "MapStringDouble");
     py::bind_map<std::unordered_map<std::string, double>>(m, "UnorderedMapStringDouble");
 
+    // test_map_string_double_const
     py::bind_map<std::map<std::string, double const>>(m, "MapStringDoubleConst");
     py::bind_map<std::unordered_map<std::string, double const>>(m, "UnorderedMapStringDoubleConst");
 
-});
-
-test_initializer stl_binder_noncopyable([](py::module &m) {
     py::class_<E_nc>(m, "ENC")
         .def(py::init<int>())
         .def_readwrite("value", &E_nc::value);
 
+    // test_noncopyable_containers
     py::bind_vector<std::vector<E_nc>>(m, "VectorENC");
     m.def("get_vnc", &one_to_n<std::vector<E_nc>>, py::return_value_policy::reference);
-
     py::bind_vector<std::deque<E_nc>>(m, "DequeENC");
     m.def("get_dnc", &one_to_n<std::deque<E_nc>>, py::return_value_policy::reference);
-
     py::bind_map<std::map<int, E_nc>>(m, "MapENC");
     m.def("get_mnc", &times_ten<std::map<int, E_nc>>, py::return_value_policy::reference);
-
     py::bind_map<std::unordered_map<int, E_nc>>(m, "UmapENC");
     m.def("get_umnc", &times_ten<std::unordered_map<int, E_nc>>, py::return_value_policy::reference);
-});
+
+    // test_vector_buffer
+    py::bind_vector<std::vector<unsigned char>>(m, "VectorUChar", py::buffer_protocol());
+    // no dtype declared for this version:
+    struct VUndeclStruct { bool w; uint32_t x; double y; bool z; };
+    m.def("create_undeclstruct", [m] () mutable {
+        py::bind_vector<std::vector<VUndeclStruct>>(m, "VectorUndeclStruct", py::buffer_protocol());
+    });
+
+    // The rest depends on numpy:
+    try { py::module::import("numpy"); }
+    catch (...) { return; }
+
+    // test_vector_buffer_numpy
+    struct VStruct { bool w; uint32_t x; double y; bool z; };
+    PYBIND11_NUMPY_DTYPE(VStruct, w, x, y, z);
+    py::class_<VStruct>(m, "VStruct").def_readwrite("x", &VStruct::x);
+    py::bind_vector<std::vector<VStruct>>(m, "VectorStruct", py::buffer_protocol());
+    m.def("get_vectorstruct", [] {return std::vector<VStruct> {{0, 5, 3.0, 1}, {1, 30, -1e4, 0}};});
+}
diff --git a/tests/test_stl_binders.py b/tests/test_stl_binders.py
index 1d52b8b..7496d05 100644
--- a/tests/test_stl_binders.py
+++ b/tests/test_stl_binders.py
@@ -1,18 +1,17 @@
 import pytest
 import sys
+from pybind11_tests import stl_binders as m
 
 with pytest.suppress(ImportError):
     import numpy as np
 
 
 def test_vector_int():
-    from pybind11_tests import VectorInt
-
-    v_int = VectorInt([0, 0])
+    v_int = m.VectorInt([0, 0])
     assert len(v_int) == 2
     assert bool(v_int) is True
 
-    v_int2 = VectorInt([0, 0])
+    v_int2 = m.VectorInt([0, 0])
     assert v_int == v_int2
     v_int2[1] = 1
     assert v_int != v_int2
@@ -28,85 +27,66 @@
 
     v_int.append(99)
     v_int2[2:-2] = v_int
-    assert v_int2 == VectorInt([3, 2, 0, 0, 99, 2, 3])
+    assert v_int2 == m.VectorInt([3, 2, 0, 0, 99, 2, 3])
     del v_int2[1:3]
-    assert v_int2 == VectorInt([3, 0, 99, 2, 3])
+    assert v_int2 == m.VectorInt([3, 0, 99, 2, 3])
     del v_int2[0]
-    assert v_int2 == VectorInt([0, 99, 2, 3])
+    assert v_int2 == m.VectorInt([0, 99, 2, 3])
 
 
-# As of pypy 5.7.1, running this and the next test seems to trigger a segfault
 # related to the PyPy's buffer protocol.
 @pytest.unsupported_on_pypy
 def test_vector_buffer():
-    from pybind11_tests import VectorUChar, create_undeclstruct
     b = bytearray([1, 2, 3, 4])
-    v = VectorUChar(b)
+    v = m.VectorUChar(b)
     assert v[1] == 2
     v[2] = 5
-    m = memoryview(v)  # We expose the buffer interface
+    mv = memoryview(v)  # We expose the buffer interface
     if sys.version_info.major > 2:
-        assert m[2] == 5
-        m[2] = 6
+        assert mv[2] == 5
+        mv[2] = 6
     else:
-        assert m[2] == '\x05'
-        m[2] = '\x06'
+        assert mv[2] == '\x05'
+        mv[2] = '\x06'
     assert v[2] == 6
 
-    with pytest.raises(RuntimeError):
-        create_undeclstruct()  # Undeclared struct contents, no buffer interface
+    with pytest.raises(RuntimeError) as excinfo:
+        m.create_undeclstruct()  # Undeclared struct contents, no buffer interface
+    assert "NumPy type info missing for " in str(excinfo.value)
 
 
 @pytest.unsupported_on_pypy
 @pytest.requires_numpy
 def test_vector_buffer_numpy():
-    from pybind11_tests import VectorInt, VectorStruct, get_vectorstruct
-
     a = np.array([1, 2, 3, 4], dtype=np.int32)
     with pytest.raises(TypeError):
-        VectorInt(a)
+        m.VectorInt(a)
 
     a = np.array([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], dtype=np.uintc)
-    v = VectorInt(a[0, :])
+    v = m.VectorInt(a[0, :])
     assert len(v) == 4
     assert v[2] == 3
-    m = np.asarray(v)
-    m[2] = 5
+    ma = np.asarray(v)
+    ma[2] = 5
     assert v[2] == 5
 
-    v = VectorInt(a[:, 1])
+    v = m.VectorInt(a[:, 1])
     assert len(v) == 3
     assert v[2] == 10
 
-    v = get_vectorstruct()
+    v = m.get_vectorstruct()
     assert v[0].x == 5
-    m = np.asarray(v)
-    m[1]['x'] = 99
+    ma = np.asarray(v)
+    ma[1]['x'] = 99
     assert v[1].x == 99
 
-    v = VectorStruct(np.zeros(3, dtype=np.dtype([('w', 'bool'), ('x', 'I'),
-                                                 ('y', 'float64'), ('z', 'bool')], align=True)))
+    v = m.VectorStruct(np.zeros(3, dtype=np.dtype([('w', 'bool'), ('x', 'I'),
+                                                   ('y', 'float64'), ('z', 'bool')], align=True)))
     assert len(v) == 3
 
 
-def test_vector_custom():
-    from pybind11_tests import El, VectorEl, VectorVectorEl
-
-    v_a = VectorEl()
-    v_a.append(El(1))
-    v_a.append(El(2))
-    assert str(v_a) == "VectorEl[El{1}, El{2}]"
-
-    vv_a = VectorVectorEl()
-    vv_a.append(v_a)
-    vv_b = vv_a[0]
-    assert str(vv_b) == "VectorEl[El{1}, El{2}]"
-
-
 def test_vector_bool():
-    from pybind11_tests import VectorBool
-
-    vv_c = VectorBool()
+    vv_c = m.VectorBool()
     for i in range(10):
         vv_c.append(i % 2 == 0)
     for i in range(10):
@@ -114,18 +94,28 @@
     assert str(vv_c) == "VectorBool[1, 0, 1, 0, 1, 0, 1, 0, 1, 0]"
 
 
+def test_vector_custom():
+    v_a = m.VectorEl()
+    v_a.append(m.El(1))
+    v_a.append(m.El(2))
+    assert str(v_a) == "VectorEl[El{1}, El{2}]"
+
+    vv_a = m.VectorVectorEl()
+    vv_a.append(v_a)
+    vv_b = vv_a[0]
+    assert str(vv_b) == "VectorEl[El{1}, El{2}]"
+
+
 def test_map_string_double():
-    from pybind11_tests import MapStringDouble, UnorderedMapStringDouble
+    mm = m.MapStringDouble()
+    mm['a'] = 1
+    mm['b'] = 2.5
 
-    m = MapStringDouble()
-    m['a'] = 1
-    m['b'] = 2.5
+    assert list(mm) == ['a', 'b']
+    assert list(mm.items()) == [('a', 1), ('b', 2.5)]
+    assert str(mm) == "MapStringDouble{a: 1, b: 2.5}"
 
-    assert list(m) == ['a', 'b']
-    assert list(m.items()) == [('a', 1), ('b', 2.5)]
-    assert str(m) == "MapStringDouble{a: 1, b: 2.5}"
-
-    um = UnorderedMapStringDouble()
+    um = m.UnorderedMapStringDouble()
     um['ua'] = 1.1
     um['ub'] = 2.6
 
@@ -135,35 +125,29 @@
 
 
 def test_map_string_double_const():
-    from pybind11_tests import MapStringDoubleConst, UnorderedMapStringDoubleConst
-
-    mc = MapStringDoubleConst()
+    mc = m.MapStringDoubleConst()
     mc['a'] = 10
     mc['b'] = 20.5
     assert str(mc) == "MapStringDoubleConst{a: 10, b: 20.5}"
 
-    umc = UnorderedMapStringDoubleConst()
+    umc = m.UnorderedMapStringDoubleConst()
     umc['a'] = 11
     umc['b'] = 21.5
 
     str(umc)
 
 
-def test_noncopyable_vector():
-    from pybind11_tests import get_vnc
-
-    vnc = get_vnc(5)
+def test_noncopyable_containers():
+    # std::vector
+    vnc = m.get_vnc(5)
     for i in range(0, 5):
         assert vnc[i].value == i + 1
 
     for i, j in enumerate(vnc, start=1):
         assert j.value == i
 
-
-def test_noncopyable_deque():
-    from pybind11_tests import get_dnc
-
-    dnc = get_dnc(5)
+    # std::deque
+    dnc = m.get_dnc(5)
     for i in range(0, 5):
         assert dnc[i].value == i + 1
 
@@ -172,11 +156,8 @@
         assert(j.value == i)
         i += 1
 
-
-def test_noncopyable_map():
-    from pybind11_tests import get_mnc
-
-    mnc = get_mnc(5)
+    # std::map
+    mnc = m.get_mnc(5)
     for i in range(1, 6):
         assert mnc[i].value == 10 * i
 
@@ -187,11 +168,8 @@
 
     assert vsum == 150
 
-
-def test_noncopyable_unordered_map():
-    from pybind11_tests import get_umnc
-
-    mnc = get_umnc(5)
+    # std::unordered_map
+    mnc = m.get_umnc(5)
     for i in range(1, 6):
         assert mnc[i].value == 10 * i
 
diff --git a/tests/test_virtual_functions.cpp b/tests/test_virtual_functions.cpp
index 899bba6..4127d68 100644
--- a/tests/test_virtual_functions.cpp
+++ b/tests/test_virtual_functions.cpp
@@ -145,16 +145,147 @@
     }
 };
 
-int runExampleVirt(ExampleVirt *ex, int value) {
-    return ex->run(value);
-}
+struct Base {
+    /* for some reason MSVC2015 can't compile this if the function is pure virtual */
+    virtual std::string dispatch() const { return {}; };
+};
 
-bool runExampleVirtBool(ExampleVirt* ex) {
-    return ex->run_bool();
-}
+struct DispatchIssue : Base {
+    virtual std::string dispatch() const {
+        PYBIND11_OVERLOAD_PURE(std::string, Base, dispatch, /* no arguments */);
+    }
+};
 
-void runExampleVirtVirtual(ExampleVirt *ex) {
-    ex->pure_virtual();
+// Forward declaration (so that we can put the main tests here; the inherited virtual approaches are
+// rather long).
+void initialize_inherited_virtuals(py::module &m);
+
+TEST_SUBMODULE(virtual_functions, m) {
+    // test_override
+    py::class_<ExampleVirt, PyExampleVirt>(m, "ExampleVirt")
+        .def(py::init<int>())
+        /* Reference original class in function definitions */
+        .def("run", &ExampleVirt::run)
+        .def("run_bool", &ExampleVirt::run_bool)
+        .def("pure_virtual", &ExampleVirt::pure_virtual);
+
+    py::class_<NonCopyable>(m, "NonCopyable")
+        .def(py::init<int, int>());
+
+    py::class_<Movable>(m, "Movable")
+        .def(py::init<int, int>());
+
+    // test_move_support
+#if !defined(__INTEL_COMPILER)
+    py::class_<NCVirt, NCVirtTrampoline>(m, "NCVirt")
+        .def(py::init<>())
+        .def("get_noncopyable", &NCVirt::get_noncopyable)
+        .def("get_movable", &NCVirt::get_movable)
+        .def("print_nc", &NCVirt::print_nc)
+        .def("print_movable", &NCVirt::print_movable);
+#endif
+
+    m.def("runExampleVirt", [](ExampleVirt *ex, int value) { return ex->run(value); });
+    m.def("runExampleVirtBool", [](ExampleVirt* ex) { return ex->run_bool(); });
+    m.def("runExampleVirtVirtual", [](ExampleVirt *ex) { ex->pure_virtual(); });
+
+    m.def("cstats_debug", &ConstructorStats::get<ExampleVirt>);
+    initialize_inherited_virtuals(m);
+
+    // test_alias_delay_initialization1
+    // don't invoke Python dispatch classes by default when instantiating C++ classes
+    // that were not extended on the Python side
+    struct A {
+        virtual ~A() {}
+        virtual void f() { py::print("A.f()"); }
+    };
+
+    struct PyA : A {
+        PyA() { py::print("PyA.PyA()"); }
+        ~PyA() { py::print("PyA.~PyA()"); }
+
+        void f() override {
+            py::print("PyA.f()");
+            PYBIND11_OVERLOAD(void, A, f);
+        }
+    };
+
+    py::class_<A, PyA>(m, "A")
+        .def(py::init<>())
+        .def("f", &A::f);
+
+    m.def("call_f", [](A *a) { a->f(); });
+
+    // test_alias_delay_initialization2
+    // ... unless we explicitly request it, as in this example:
+    struct A2 {
+        virtual ~A2() {}
+        virtual void f() { py::print("A2.f()"); }
+    };
+
+    struct PyA2 : A2 {
+        PyA2() { py::print("PyA2.PyA2()"); }
+        ~PyA2() { py::print("PyA2.~PyA2()"); }
+        void f() override {
+            py::print("PyA2.f()");
+            PYBIND11_OVERLOAD(void, A2, f);
+        }
+    };
+
+    py::class_<A2, PyA2>(m, "A2")
+        .def(py::init_alias<>())
+        .def("f", &A2::f);
+
+    m.def("call_f", [](A2 *a2) { a2->f(); });
+
+    // test_dispatch_issue
+    // #159: virtual function dispatch has problems with similar-named functions
+    py::class_<Base, DispatchIssue>(m, "DispatchIssue")
+        .def(py::init<>())
+        .def("dispatch", &Base::dispatch);
+
+    m.def("dispatch_issue_go", [](const Base * b) { return b->dispatch(); });
+
+    // test_override_ref
+    // #392/397: overridding reference-returning functions
+    class OverrideTest {
+    public:
+        struct A { std::string value = "hi"; };
+        std::string v;
+        A a;
+        explicit OverrideTest(const std::string &v) : v{v} {}
+        virtual std::string str_value() { return v; }
+        virtual std::string &str_ref() { return v; }
+        virtual A A_value() { return a; }
+        virtual A &A_ref() { return a; }
+    };
+
+    class PyOverrideTest : public OverrideTest {
+    public:
+        using OverrideTest::OverrideTest;
+        std::string str_value() override { PYBIND11_OVERLOAD(std::string, OverrideTest, str_value); }
+        // Not allowed (uncommenting should hit a static_assert failure): we can't get a reference
+        // to a python numeric value, since we only copy values in the numeric type caster:
+//      std::string &str_ref() override { PYBIND11_OVERLOAD(std::string &, OverrideTest, str_ref); }
+        // But we can work around it like this:
+    private:
+        std::string _tmp;
+        std::string str_ref_helper() { PYBIND11_OVERLOAD(std::string, OverrideTest, str_ref); }
+    public:
+        std::string &str_ref() override { return _tmp = str_ref_helper(); }
+
+        A A_value() override { PYBIND11_OVERLOAD(A, OverrideTest, A_value); }
+        A &A_ref() override { PYBIND11_OVERLOAD(A &, OverrideTest, A_ref); }
+    };
+
+    py::class_<OverrideTest::A>(m, "OverrideTest_A")
+        .def_readwrite("value", &OverrideTest::A::value);
+    py::class_<OverrideTest, PyOverrideTest>(m, "OverrideTest")
+        .def(py::init<const std::string &>())
+        .def("str_value", &OverrideTest::str_value)
+//      .def("str_ref", &OverrideTest::str_ref)
+        .def("A_value", &OverrideTest::A_value)
+        .def("A_ref", &OverrideTest::A_ref);
 }
 
 
@@ -281,6 +412,8 @@
 
 
 void initialize_inherited_virtuals(py::module &m) {
+    // test_inherited_virtuals
+
     // Method 1: repeat
     py::class_<A_Repeat, PyA_Repeat>(m, "A_Repeat")
         .def(py::init<>())
@@ -295,6 +428,7 @@
     py::class_<D_Repeat, C_Repeat, PyD_Repeat>(m, "D_Repeat")
         .def(py::init<>());
 
+    // test_
     // Method 2: Templated trampolines
     py::class_<A_Tpl, PyA_Tpl<>>(m, "A_Tpl")
         .def(py::init<>())
@@ -311,137 +445,3 @@
 
 };
 
-struct Base {
-    /* for some reason MSVC2015 can't compile this if the function is pure virtual */
-    virtual std::string dispatch() const { return {}; };
-};
-
-struct DispatchIssue : Base {
-    virtual std::string dispatch() const {
-        PYBIND11_OVERLOAD_PURE(std::string, Base, dispatch, /* no arguments */);
-    }
-};
-
-TEST_SUBMODULE(virtual_functions, m) {
-    py::class_<ExampleVirt, PyExampleVirt>(m, "ExampleVirt")
-        .def(py::init<int>())
-        /* Reference original class in function definitions */
-        .def("run", &ExampleVirt::run)
-        .def("run_bool", &ExampleVirt::run_bool)
-        .def("pure_virtual", &ExampleVirt::pure_virtual);
-
-    py::class_<NonCopyable>(m, "NonCopyable")
-        .def(py::init<int, int>());
-
-    py::class_<Movable>(m, "Movable")
-        .def(py::init<int, int>());
-
-#if !defined(__INTEL_COMPILER)
-    py::class_<NCVirt, NCVirtTrampoline>(m, "NCVirt")
-        .def(py::init<>())
-        .def("get_noncopyable", &NCVirt::get_noncopyable)
-        .def("get_movable", &NCVirt::get_movable)
-        .def("print_nc", &NCVirt::print_nc)
-        .def("print_movable", &NCVirt::print_movable);
-#endif
-
-    m.def("runExampleVirt", &runExampleVirt);
-    m.def("runExampleVirtBool", &runExampleVirtBool);
-    m.def("runExampleVirtVirtual", &runExampleVirtVirtual);
-
-    m.def("cstats_debug", &ConstructorStats::get<ExampleVirt>);
-    initialize_inherited_virtuals(m);
-
-    // test_alias_delay_initialization1
-    // don't invoke Python dispatch classes by default when instantiating C++ classes
-    // that were not extended on the Python side
-    struct A {
-        virtual ~A() {}
-        virtual void f() { py::print("A.f()"); }
-    };
-
-    struct PyA : A {
-        PyA() { py::print("PyA.PyA()"); }
-        ~PyA() { py::print("PyA.~PyA()"); }
-
-        void f() override {
-            py::print("PyA.f()");
-            PYBIND11_OVERLOAD(void, A, f);
-        }
-    };
-
-    py::class_<A, PyA>(m, "A")
-        .def(py::init<>())
-        .def("f", &A::f);
-
-    m.def("call_f", [](A *a) { a->f(); });
-
-    // test_alias_delay_initialization2
-    // ... unless we explicitly request it, as in this example:
-    struct A2 {
-        virtual ~A2() {}
-        virtual void f() { py::print("A2.f()"); }
-    };
-
-    struct PyA2 : A2 {
-        PyA2() { py::print("PyA2.PyA2()"); }
-        ~PyA2() { py::print("PyA2.~PyA2()"); }
-        void f() override {
-            py::print("PyA2.f()");
-            PYBIND11_OVERLOAD(void, A2, f);
-        }
-    };
-
-    py::class_<A2, PyA2>(m, "A2")
-        .def(py::init_alias<>())
-        .def("f", &A2::f);
-
-    m.def("call_f", [](A2 *a2) { a2->f(); });
-
-    // #159: virtual function dispatch has problems with similar-named functions
-    py::class_<Base, DispatchIssue>(m, "DispatchIssue")
-        .def(py::init<>())
-        .def("dispatch", &Base::dispatch);
-
-    m.def("dispatch_issue_go", [](const Base * b) { return b->dispatch(); });
-
-    // #392/397: overridding reference-returning functions
-    class OverrideTest {
-    public:
-        struct A { std::string value = "hi"; };
-        std::string v;
-        A a;
-        explicit OverrideTest(const std::string &v) : v{v} {}
-        virtual std::string str_value() { return v; }
-        virtual std::string &str_ref() { return v; }
-        virtual A A_value() { return a; }
-        virtual A &A_ref() { return a; }
-    };
-
-    class PyOverrideTest : public OverrideTest {
-    public:
-        using OverrideTest::OverrideTest;
-        std::string str_value() override { PYBIND11_OVERLOAD(std::string, OverrideTest, str_value); }
-        // Not allowed (uncommenting should hit a static_assert failure): we can't get a reference
-        // to a python numeric value, since we only copy values in the numeric type caster:
-//      std::string &str_ref() override { PYBIND11_OVERLOAD(std::string &, OverrideTest, str_ref); }
-        // But we can work around it like this:
-    private:
-        std::string _tmp;
-        std::string str_ref_helper() { PYBIND11_OVERLOAD(std::string, OverrideTest, str_ref); }
-    public:
-        std::string &str_ref() override { return _tmp = str_ref_helper(); }
-
-        A A_value() override { PYBIND11_OVERLOAD(A, OverrideTest, A_value); }
-        A &A_ref() override { PYBIND11_OVERLOAD(A &, OverrideTest, A_ref); }
-    };
-
-    py::class_<OverrideTest::A>(m, "OverrideTest_A")
-        .def_readwrite("value", &OverrideTest::A::value);
-    py::class_<OverrideTest, PyOverrideTest>(m, "OverrideTest")
-        .def(py::init<const std::string &>())
-        .def("str_value", &OverrideTest::str_value)
-//      .def("str_ref", &OverrideTest::str_ref)
-        .def("A_value", &OverrideTest::A_value)
-        .def("A_ref", &OverrideTest::A_ref);
-}
diff --git a/tests/test_virtual_functions.py b/tests/test_virtual_functions.py
index 7d1698d..138be63 100644
--- a/tests/test_virtual_functions.py
+++ b/tests/test_virtual_functions.py
@@ -149,7 +149,92 @@
     """
 
 
-def test_inheriting_repeat():
+# PyPy: Reference count > 1 causes call with noncopyable instance
+# to fail in ncv1.print_nc()
+@pytest.unsupported_on_pypy
+@pytest.mark.skipif(not hasattr(m, "NCVirt"), reason="NCVirt test broken on ICPC")
+def test_move_support():
+    class NCVirtExt(m.NCVirt):
+        def get_noncopyable(self, a, b):
+            # Constructs and returns a new instance:
+            nc = m.NonCopyable(a * a, b * b)
+            return nc
+
+        def get_movable(self, a, b):
+            # Return a referenced copy
+            self.movable = m.Movable(a, b)
+            return self.movable
+
+    class NCVirtExt2(m.NCVirt):
+        def get_noncopyable(self, a, b):
+            # Keep a reference: this is going to throw an exception
+            self.nc = m.NonCopyable(a, b)
+            return self.nc
+
+        def get_movable(self, a, b):
+            # Return a new instance without storing it
+            return m.Movable(a, b)
+
+    ncv1 = NCVirtExt()
+    assert ncv1.print_nc(2, 3) == "36"
+    assert ncv1.print_movable(4, 5) == "9"
+    ncv2 = NCVirtExt2()
+    assert ncv2.print_movable(7, 7) == "14"
+    # Don't check the exception message here because it differs under debug/non-debug mode
+    with pytest.raises(RuntimeError):
+        ncv2.print_nc(9, 9)
+
+    nc_stats = ConstructorStats.get(m.NonCopyable)
+    mv_stats = ConstructorStats.get(m.Movable)
+    assert nc_stats.alive() == 1
+    assert mv_stats.alive() == 1
+    del ncv1, ncv2
+    assert nc_stats.alive() == 0
+    assert mv_stats.alive() == 0
+    assert nc_stats.values() == ['4', '9', '9', '9']
+    assert mv_stats.values() == ['4', '5', '7', '7']
+    assert nc_stats.copy_constructions == 0
+    assert mv_stats.copy_constructions == 1
+    assert nc_stats.move_constructions >= 0
+    assert mv_stats.move_constructions >= 0
+
+
+def test_dispatch_issue(msg):
+    """#159: virtual function dispatch has problems with similar-named functions"""
+    class PyClass1(m.DispatchIssue):
+        def dispatch(self):
+            return "Yay.."
+
+    class PyClass2(m.DispatchIssue):
+        def dispatch(self):
+            with pytest.raises(RuntimeError) as excinfo:
+                super(PyClass2, self).dispatch()
+            assert msg(excinfo.value) == 'Tried to call pure virtual function "Base::dispatch"'
+
+            p = PyClass1()
+            return m.dispatch_issue_go(p)
+
+    b = PyClass2()
+    assert m.dispatch_issue_go(b) == "Yay.."
+
+
+def test_override_ref():
+    """#392/397: overridding reference-returning functions"""
+    o = m.OverrideTest("asdf")
+
+    # Not allowed (see associated .cpp comment)
+    # i = o.str_ref()
+    # assert o.str_ref() == "asdf"
+    assert o.str_value() == "asdf"
+
+    assert o.A_value().value == "hi"
+    a = o.A_ref()
+    assert a.value == "hi"
+    a.value = "bye"
+    assert a.value == "bye"
+
+
+def test_inherited_virtuals():
     class AR(m.A_Repeat):
         def unlucky_number(self):
             return 99
@@ -276,88 +361,3 @@
     assert obj.unlucky_number() == -7
     assert obj.lucky_number() == -1.375
     assert obj.say_everything() == "BT -7"
-
-
-# PyPy: Reference count > 1 causes call with noncopyable instance
-# to fail in ncv1.print_nc()
-@pytest.unsupported_on_pypy
-@pytest.mark.skipif(not hasattr(m, "NCVirt"), reason="NCVirt test broken on ICPC")
-def test_move_support():
-    class NCVirtExt(m.NCVirt):
-        def get_noncopyable(self, a, b):
-            # Constructs and returns a new instance:
-            nc = m.NonCopyable(a * a, b * b)
-            return nc
-
-        def get_movable(self, a, b):
-            # Return a referenced copy
-            self.movable = m.Movable(a, b)
-            return self.movable
-
-    class NCVirtExt2(m.NCVirt):
-        def get_noncopyable(self, a, b):
-            # Keep a reference: this is going to throw an exception
-            self.nc = m.NonCopyable(a, b)
-            return self.nc
-
-        def get_movable(self, a, b):
-            # Return a new instance without storing it
-            return m.Movable(a, b)
-
-    ncv1 = NCVirtExt()
-    assert ncv1.print_nc(2, 3) == "36"
-    assert ncv1.print_movable(4, 5) == "9"
-    ncv2 = NCVirtExt2()
-    assert ncv2.print_movable(7, 7) == "14"
-    # Don't check the exception message here because it differs under debug/non-debug mode
-    with pytest.raises(RuntimeError):
-        ncv2.print_nc(9, 9)
-
-    nc_stats = ConstructorStats.get(m.NonCopyable)
-    mv_stats = ConstructorStats.get(m.Movable)
-    assert nc_stats.alive() == 1
-    assert mv_stats.alive() == 1
-    del ncv1, ncv2
-    assert nc_stats.alive() == 0
-    assert mv_stats.alive() == 0
-    assert nc_stats.values() == ['4', '9', '9', '9']
-    assert mv_stats.values() == ['4', '5', '7', '7']
-    assert nc_stats.copy_constructions == 0
-    assert mv_stats.copy_constructions == 1
-    assert nc_stats.move_constructions >= 0
-    assert mv_stats.move_constructions >= 0
-
-
-def test_dispatch_issue(msg):
-    """#159: virtual function dispatch has problems with similar-named functions"""
-    class PyClass1(m.DispatchIssue):
-        def dispatch(self):
-            return "Yay.."
-
-    class PyClass2(m.DispatchIssue):
-        def dispatch(self):
-            with pytest.raises(RuntimeError) as excinfo:
-                super(PyClass2, self).dispatch()
-            assert msg(excinfo.value) == 'Tried to call pure virtual function "Base::dispatch"'
-
-            p = PyClass1()
-            return m.dispatch_issue_go(p)
-
-    b = PyClass2()
-    assert m.dispatch_issue_go(b) == "Yay.."
-
-
-def test_override_ref():
-    """#392/397: overridding reference-returning functions"""
-    o = m.OverrideTest("asdf")
-
-    # Not allowed (see associated .cpp comment)
-    # i = o.str_ref()
-    # assert o.str_ref() == "asdf"
-    assert o.str_value() == "asdf"
-
-    assert o.A_value().value == "hi"
-    a = o.A_ref()
-    assert a.value == "hi"
-    a.value = "bye"
-    assert a.value == "bye"
