Expose more instance management functions

This breaks up the instance management functions in class_support.h a
little bit so that other pybind11 code can use it.  In particular:

- added make_new_instance() which does what pybind11_object_new does,
  but also allows instance allocation without `value` allocation.  This
  lets `cast.h` use the same instance allocation rather than having its
  own separate implementation.
- instance registration is now moved to a
  `register_instance()`/deregister_instance()` pair (rather than having
  individual code add or remove things from `registered_instances`
  directory).
- clear_instance() does everything `pybind11_object_dealloc()` needs
  except for the deallocation; this is helpful for factory construction
  which needs to be able to replace the internals of an instance without
  deallocating it.
- clear_instance() now also calls `dealloc` when `holder_constructed`
  is true, even if `value` is false.  This can happen in factory
  construction when the pointer is moved from one instance to another,
  but the holder itself is only copied (i.e. for a shared_ptr holder).
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index b996203..66ca4d4 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -195,8 +195,10 @@
 #endif
 }
 
-// Forward declaration
+// Forward declarations
 inline void keep_alive_impl(handle nurse, handle patient);
+inline void register_instance(void *self);
+inline PyObject *make_new_instance(PyTypeObject *type, bool allocate_value = true);
 
 class type_caster_generic {
 public:
@@ -302,7 +304,7 @@
                 return handle((PyObject *) it_i->second).inc_ref();
         }
 
-        auto inst = reinterpret_steal<object>(PyType_GenericAlloc(tinfo->type, 0));
+        auto inst = reinterpret_steal<object>(make_new_instance(tinfo->type, false /* don't allocate value */));
 
         auto wrapper = (instance<void> *) inst.ptr();
 
@@ -352,10 +354,9 @@
                 throw cast_error("unhandled return_value_policy: should not happen!");
         }
 
+        register_instance(wrapper);
         tinfo->init_holder(inst.ptr(), existing_holder);
 
-        internals.registered_instances.emplace(wrapper->value, inst.ptr());
-
         return inst.release();
     }
 
diff --git a/include/pybind11/class_support.h b/include/pybind11/class_support.h
index 235df45..370a67c 100644
--- a/include/pybind11/class_support.h
+++ b/include/pybind11/class_support.h
@@ -185,19 +185,49 @@
     return type;
 }
 
-/// Instance creation function for all pybind11 types. It only allocates space for the
-/// C++ object, but doesn't call the constructor -- an `__init__` function must do that.
-extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *, PyObject *) {
+inline void register_instance(void *self) {
+    auto *inst = (instance_essentials<void> *) self;
+    get_internals().registered_instances.emplace(inst->value, self);
+}
+
+inline bool deregister_instance(void *self) {
+    auto *inst = (instance_essentials<void> *) self;
+    auto type = Py_TYPE(inst);
+    auto &registered_instances = get_internals().registered_instances;
+    auto range = registered_instances.equal_range(inst->value);
+    for (auto it = range.first; it != range.second; ++it) {
+        if (type == Py_TYPE(it->second)) {
+            registered_instances.erase(it);
+            return true;
+        }
+    }
+    return false;
+}
+
+/// Creates a new instance which, by default, includes allocation (but not construction of) the
+/// wrapped C++ instance.  If allocating value, the instance is registered; otherwise
+/// register_instance will need to be called once the value has been assigned.
+inline PyObject *make_new_instance(PyTypeObject *type, bool allocate_value /*= true (in cast.h)*/) {
     PyObject *self = type->tp_alloc(type, 0);
     auto instance = (instance_essentials<void> *) self;
     auto tinfo = get_type_info(type);
-    instance->value = tinfo->operator_new(tinfo->type_size);
     instance->owned = true;
     instance->holder_constructed = false;
-    get_internals().registered_instances.emplace(instance->value, self);
+    if (allocate_value) {
+        instance->value = tinfo->operator_new(tinfo->type_size);
+        register_instance(self);
+    } else {
+        instance->value = nullptr;
+    }
     return self;
 }
 
+/// Instance creation function for all pybind11 types. It only allocates space for the
+/// C++ object, but doesn't call the constructor -- an `__init__` function must do that.
+extern "C" inline PyObject *pybind11_object_new(PyTypeObject *type, PyObject *, PyObject *) {
+    return make_new_instance(type);
+}
+
 /// An `__init__` function constructs the C++ object. Users should provide at least one
 /// of these using `py::init` or directly with `.def(__init__, ...)`. Otherwise, the
 /// following default function will be used which simply throws an exception.
@@ -213,25 +243,17 @@
     return -1;
 }
 
-/// Instance destructor function for all pybind11 types. It calls `type_info.dealloc`
-/// to destroy the C++ object itself, while the rest is Python bookkeeping.
-extern "C" inline void pybind11_object_dealloc(PyObject *self) {
+/// Clears all internal data from the instance and removes it from registered instances in
+/// preparation for deallocation.
+inline void clear_instance(PyObject *self) {
     auto instance = (instance_essentials<void> *) self;
-    if (instance->value) {
+    bool has_value = instance->value;
+    if (has_value || instance->holder_constructed) {
         auto type = Py_TYPE(self);
         get_type_info(type)->dealloc(self);
-
-        auto &registered_instances = get_internals().registered_instances;
-        auto range = registered_instances.equal_range(instance->value);
-        bool found = false;
-        for (auto it = range.first; it != range.second; ++it) {
-            if (type == Py_TYPE(it->second)) {
-                registered_instances.erase(it);
-                found = true;
-                break;
-            }
-        }
-        if (!found)
+    }
+    if (has_value) {
+        if (!deregister_instance(self))
             pybind11_fail("pybind11_object_dealloc(): Tried to deallocate unregistered instance!");
 
         if (instance->weakrefs)
@@ -241,6 +263,12 @@
         if (dict_ptr)
             Py_CLEAR(*dict_ptr);
     }
+}
+
+/// Instance destructor function for all pybind11 types. It calls `type_info.dealloc`
+/// to destroy the C++ object itself, while the rest is Python bookkeeping.
+extern "C" inline void pybind11_object_dealloc(PyObject *self) {
+    clear_instance(self);
     Py_TYPE(self)->tp_free(self);
 }