Allow module-local classes to be loaded externally

The main point of `py::module_local` is to make the C++ -> Python cast
unique so that returning/casting a C++ instance is well-defined.
Unfortunately it also makes loading unique, but this isn't particularly
desirable: when an instance contains `Type` instance there's no reason
it shouldn't be possible to pass that instance to a bound function
taking a `Type` parameter, even if that function is in another module.

This commit solves the issue by allowing foreign module (and global)
type loaders have a chance to load the value if the local module loader
fails.  The implementation here does this by storing a module-local
loading function in a capsule in the python type, which we can then call
if the local (and possibly global, if the local type is masking a global
type) version doesn't work.
diff --git a/tests/test_local_bindings.py b/tests/test_local_bindings.py
index 3a6ad8b..e9a63ca 100644
--- a/tests/test_local_bindings.py
+++ b/tests/test_local_bindings.py
@@ -20,16 +20,14 @@
     assert not hasattr(i1, 'get2')
     assert not hasattr(i2, 'get3')
 
+    # Loading within the local module
     assert m.local_value(i1) == 5
     assert cm.local_value(i2) == 10
 
-    with pytest.raises(TypeError) as excinfo:
-        m.local_value(i2)
-    assert "incompatible function arguments" in str(excinfo.value)
-
-    with pytest.raises(TypeError) as excinfo:
-        cm.local_value(i1)
-    assert "incompatible function arguments" in str(excinfo.value)
+    # Cross-module loading works as well (on failure, the type loader looks for
+    # external module-local converters):
+    assert m.local_value(i2) == 10
+    assert cm.local_value(i1) == 5
 
 
 def test_nonlocal_failure():
@@ -60,13 +58,12 @@
     v2.append(cm.LocalType(1))
     v2.append(cm.LocalType(2))
 
-    with pytest.raises(TypeError):
-        v1.append(cm.LocalType(3))
-    with pytest.raises(TypeError):
-        v2.append(m.LocalType(3))
+    # Cross module value loading:
+    v1.append(cm.LocalType(3))
+    v2.append(m.LocalType(3))
 
-    assert [i.get() for i in v1] == [0, 1]
-    assert [i.get() for i in v2] == [2, 3]
+    assert [i.get() for i in v1] == [0, 1, 2]
+    assert [i.get() for i in v2] == [2, 3, 4]
 
     v3, v4 = m.NonLocalVec(), cm.NonLocalVec2()
     v3.append(m.NonLocalType(1))
@@ -158,3 +155,56 @@
 
     Invoked with: [1, 2, 3]
     """  # noqa: E501 line too long
+
+
+def test_cross_module_calls():
+    import pybind11_cross_module_tests as cm
+
+    v1 = m.LocalVec()
+    v1.append(m.LocalType(1))
+    v2 = cm.LocalVec()
+    v2.append(cm.LocalType(2))
+
+    # Returning the self pointer should get picked up as returning an existing
+    # instance (even when that instance is of a foreign, non-local type).
+    assert m.return_self(v1) is v1
+    assert cm.return_self(v2) is v2
+    assert m.return_self(v2) is v2
+    assert cm.return_self(v1) is v1
+
+    assert m.LocalVec is not cm.LocalVec
+    # Returning a copy, on the other hand, always goes to the local type,
+    # regardless of where the source type came from.
+    assert type(m.return_copy(v1)) is m.LocalVec
+    assert type(m.return_copy(v2)) is m.LocalVec
+    assert type(cm.return_copy(v1)) is cm.LocalVec
+    assert type(cm.return_copy(v2)) is cm.LocalVec
+
+    # Test the example given in the documentation (which also tests inheritance casting):
+    mycat = m.Cat("Fluffy")
+    mydog = cm.Dog("Rover")
+    assert mycat.get_name() == "Fluffy"
+    assert mydog.name() == "Rover"
+    assert m.Cat.__base__.__name__ == "Pet"
+    assert cm.Dog.__base__.__name__ == "Pet"
+    assert m.Cat.__base__ is not cm.Dog.__base__
+    assert m.pet_name(mycat) == "Fluffy"
+    assert m.pet_name(mydog) == "Rover"
+    assert cm.pet_name(mycat) == "Fluffy"
+    assert cm.pet_name(mydog) == "Rover"
+
+    assert m.MixGL is not cm.MixGL
+    a = m.MixGL(1)
+    b = cm.MixGL(2)
+    assert m.get_gl_value(a) == 11
+    assert m.get_gl_value(b) == 12
+    assert cm.get_gl_value(a) == 101
+    assert cm.get_gl_value(b) == 102
+
+    c, d = m.MixGL2(3), cm.MixGL2(4)
+    with pytest.raises(TypeError) as excinfo:
+        m.get_gl_value(c)
+    assert "incompatible function arguments" in str(excinfo)
+    with pytest.raises(TypeError) as excinfo:
+        m.get_gl_value(d)
+    assert "incompatible function arguments" in str(excinfo)