do more work with classes from pytypes.h (especially for STL container casting)
diff --git a/README.md b/README.md
index 6167fe0..b739475 100644
--- a/README.md
+++ b/README.md
@@ -22,13 +22,13 @@
 become an excessively large and unnecessary dependency.
 
 Think of this library as a tiny self-contained version of Boost.Python with
-everything stripped away that isn't relevant for binding generation. The core
-header files only require ~2.5K lines of code and depend on Python (2.7 or 3.x)
-and the C++ standard library. This compact implementation was possible thanks
-to some of the new C++11 language features (specifically: tuples, lambda
-functions and variadic templates). Since its creation, this library has grown
-beyond Boost.Python in many ways, leading to dramatically simpler binding code
-in many common situations.
+everything stripped away that isn't relevant for binding generation. Without
+comments, the core header files only require ~2.5K lines of code and depend on
+Python (2.7 or 3.x) and the C++ standard library. This compact implementation
+was possible thanks to some of the new C++11 language features (specifically:
+tuples, lambda functions and variadic templates). Since its creation, this
+library has grown beyond Boost.Python in many ways, leading to dramatically
+simpler binding code in many common situations.
 
 Tutorial and reference documentation is provided at
 [http://pybind11.readthedocs.org/en/latest](http://pybind11.readthedocs.org/en/latest).
diff --git a/docs/intro.rst b/docs/intro.rst
index cb9dc9c..6eb5038 100644
--- a/docs/intro.rst
+++ b/docs/intro.rst
@@ -19,13 +19,13 @@
 become an excessively large and unnecessary dependency.
 
 Think of this library as a tiny self-contained version of Boost.Python with
-everything stripped away that isn't relevant for binding generation. The core
-header files only require ~2.5K lines of code and depend on Python (2.7 or 3.x)
-and the C++ standard library. This compact implementation was possible thanks
-to some of the new C++11 language features (specifically: tuples, lambda
-functions and variadic templates). Since its creation, this library has grown
-beyond Boost.Python in many ways, leading to dramatically simpler binding code
-in many common situations.
+everything stripped away that isn't relevant for binding generation. Without
+comments, the core header files only require ~2.5K lines of code and depend on
+Python (2.7 or 3.x) and the C++ standard library. This compact implementation
+was possible thanks to some of the new C++11 language features (specifically:
+tuples, lambda functions and variadic templates). Since its creation, this
+library has grown beyond Boost.Python in many ways, leading to dramatically
+simpler binding code in many common situations.
 
 Core features
 *************
diff --git a/example/example4.ref b/example/example4.ref
index 9be2bdc..040d55b 100644
--- a/example/example4.ref
+++ b/example/example4.ref
@@ -10,7 +10,7 @@
 None
 test_function(enum=2)
 None
-<class 'Example4.EMode'>
+<class 'example.EMode'>
 EMode.EFirstMode
 EMode.EFirstMode
 Example4::test_function(enum=1)
diff --git a/example/run_test.py b/example/run_test.py
index 8977e96..802b851 100755
--- a/example/run_test.py
+++ b/example/run_test.py
@@ -22,6 +22,8 @@
         line = line.replace('__builtin__', 'builtins')
         line = line.replace('example.', '')
         line = line.replace('unicode', 'str')
+        line = line.replace('Example4.EMode', 'EMode')
+        line = line.replace('example.EMode', 'EMode')
         line = line.replace('method of builtins.PyCapsule instance', '')
         line = line.strip()
         if sys.platform == 'win32':
diff --git a/include/pybind11/common.h b/include/pybind11/common.h
index e294cfa..334ec13 100644
--- a/include/pybind11/common.h
+++ b/include/pybind11/common.h
@@ -225,8 +225,8 @@
 /// Internal data struture used to track registered instances and types
 struct internals {
     std::unordered_map<const void *, void*> registered_types_cpp; // std::type_info* -> type_info
-    std::unordered_map<const void *, void*> registered_types_py;  // PyTypeObject* -> type_info 
-    std::unordered_map<const void *, void*> registered_instances; // void * -> PyObject* 
+    std::unordered_map<const void *, void*> registered_types_py;  // PyTypeObject* -> type_info
+    std::unordered_map<const void *, void*> registered_instances; // void * -> PyObject*
     std::unordered_set<std::pair<const PyObject *, const char *>, overload_hash> inactive_overload_cache;
 };
 
@@ -271,4 +271,7 @@
 /// Thrown when pybind11::cast or handle::call fail due to a type casting error
 struct cast_error        : public std::runtime_error { public: cast_error(const std::string &w = "") : std::runtime_error(w)     {} };
 
+PYBIND11_NOINLINE inline void pybind11_fail(const char *reason) { throw std::runtime_error(reason); }
+PYBIND11_NOINLINE inline void pybind11_fail(const std::string &reason) { throw std::runtime_error(reason); }
+
 NAMESPACE_END(pybind11)
diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h
index c35f837..9749147 100644
--- a/include/pybind11/pybind11.h
+++ b/include/pybind11/pybind11.h
@@ -24,7 +24,6 @@
 #endif
 
 #include "cast.h"
-#include <iostream>
 
 NAMESPACE_BEGIN(pybind11)
 
@@ -196,9 +195,9 @@
                 a.value, return_value_policy::automatic, nullptr);
 
         if (obj == nullptr)
-            throw std::runtime_error("arg(): could not convert default keyword "
-                                     "argument into a Python object (type not "
-                                     "registered yet?)");
+            pybind11_fail("arg(): could not convert default keyword "
+                          "argument into a Python object (type not "
+                          "registered yet?)");
 
         entry->args.emplace_back(a.name, a.descr, obj);
     }
@@ -490,7 +489,7 @@
             } else if (c == '%') {
                 const std::type_info *t = types[type_index++];
                 if (!t)
-                    throw std::runtime_error("Internal error while parsing type signature (1)");
+                    pybind11_fail("Internal error while parsing type signature (1)");
                 auto it = registered_types.find(t);
                 if (it != registered_types.end()) {
                     signature += ((const detail::type_info *) it->second)->type->tp_name;
@@ -504,7 +503,7 @@
             }
         }
         if (type_depth != 0 || types[type_index] != nullptr)
-            throw std::runtime_error("Internal error while parsing type signature (2)");
+            pybind11_fail("Internal error while parsing type signature (2)");
 
         #if !defined(PYBIND11_CPP14)
             delete[] types;
@@ -519,7 +518,7 @@
 #endif
 
         if (!m_entry->args.empty() && (int) m_entry->args.size() != args)
-            throw std::runtime_error(
+            pybind11_fail(
                 "cpp_function(): function \"" + std::string(m_entry->name) + "\" takes " +
                 std::to_string(args) + " arguments, but " + std::to_string(m_entry->args.size()) +
                 " pybind11::arg entries were specified!");
@@ -555,7 +554,7 @@
             });
             m_ptr = PyCFunction_New(m_entry->def, entry_capsule.ptr());
             if (!m_ptr)
-                throw std::runtime_error("cpp_function::cpp_function(): Could not allocate function object");
+                pybind11_fail("cpp_function::cpp_function(): Could not allocate function object");
         } else {
             /* Append at the end of the overload chain */
             m_ptr = m_entry->sibling;
@@ -597,7 +596,7 @@
             m_ptr = PyMethod_New(m_ptr, nullptr, entry->class_);
 #endif
             if (!m_ptr)
-                throw std::runtime_error("cpp_function::cpp_function(): Could not allocate instance method object");
+                pybind11_fail("cpp_function::cpp_function(): Could not allocate instance method object");
             Py_DECREF(func);
         }
     }
@@ -621,7 +620,7 @@
         m_ptr = Py_InitModule3(name, nullptr, doc);
 #endif
         if (m_ptr == nullptr)
-            throw std::runtime_error("Internal error in module::module()");
+            pybind11_fail("Internal error in module::module()");
         inc_ref();
     }
 
@@ -647,7 +646,7 @@
     static module import(const char *name) {
         PyObject *obj = PyImport_ImportModule(name);
         if (!obj)
-            throw std::runtime_error("Module \"" + std::string(name) + "\" not found!");
+            pybind11_fail("Module \"" + std::string(name) + "\" not found!");
         return module(obj, false);
     }
 };
@@ -668,7 +667,7 @@
         auto type = (PyHeapTypeObject*) type_holder.ptr();
 
         if (!type_holder || !name)
-            throw std::runtime_error("generic_type: unable to create type object!");
+            pybind11_fail("generic_type: unable to create type object!");
 
         /* Register supplemental type information in C++ dict */
         auto &internals = get_internals();
@@ -732,7 +731,7 @@
         }
 
         if (PyType_Ready(&type->ht_type) < 0)
-            throw std::runtime_error("generic_type: PyType_Ready failed!");
+            pybind11_fail("generic_type: PyType_Ready failed!");
 
         m_ptr = type_holder.ptr();
 
@@ -756,7 +755,7 @@
             object type_holder(PyType_Type.tp_alloc(&PyType_Type, 0), false);
             object name(PYBIND11_FROM_STRING(name_.c_str()), false);
             if (!type_holder || !name)
-                throw std::runtime_error("generic_type::metaclass(): unable to create type object!");
+                pybind11_fail("generic_type::metaclass(): unable to create type object!");
 
             auto type = (PyHeapTypeObject*) type_holder.ptr();
             type->ht_name = name.release();
@@ -767,7 +766,7 @@
                                       ~Py_TPFLAGS_HAVE_GC;
 
             if (PyType_Ready(&type->ht_type) < 0)
-                throw std::runtime_error("generic_type::metaclass(): PyType_Ready failed!");
+                pybind11_fail("generic_type::metaclass(): PyType_Ready failed!");
 
             ob_type = (PyTypeObject *) type_holder.release();
         }
@@ -798,7 +797,7 @@
                 auto &registered_instances = detail::get_internals().registered_instances;
                 auto it = registered_instances.find(self->value);
                 if (it == registered_instances.end())
-                    throw std::runtime_error("generic_type::dealloc(): Tried to deallocate unregistered instance!");
+                    pybind11_fail("generic_type::dealloc(): Tried to deallocate unregistered instance!");
                 registered_instances.erase(it);
             }
             Py_XDECREF(self->parent);
@@ -1096,14 +1095,12 @@
     handle patient(Patient > 0 ? PyTuple_GetItem(arg, Patient - 1) : ret);
 
     if (!nurse || !patient)
-        throw std::runtime_error("Could not activate keep_alive!");
+        pybind11_fail("Could not activate keep_alive!");
 
     cpp_function disable_lifesupport(
         [patient](handle weakref) { patient.dec_ref(); weakref.dec_ref(); });
 
     weakref wr(nurse, disable_lifesupport);
-    if (!wr)
-        throw std::runtime_error("Could not allocate weak reference!");
 
     patient.inc_ref(); /* reference patient and leak the weak reference */
     (void) wr.release();
@@ -1138,7 +1135,7 @@
     auto & registered_types = detail::get_internals().registered_types_cpp;
     auto it = registered_types.find(&typeid(OutputType));
     if (it == registered_types.end())
-        throw std::runtime_error("implicitly_convertible: Unable to find type " + type_id<OutputType>());
+        pybind11_fail("implicitly_convertible: Unable to find type " + type_id<OutputType>());
     ((detail::type_info *) it->second)->implicit_conversions.push_back(implicit_caster);
 }
 
@@ -1196,7 +1193,7 @@
 
 #define PYBIND11_OVERLOAD_PURE(ret_type, class_name, name, ...) \
     PYBIND11_OVERLOAD_INT(ret_type, class_name, name, __VA_ARGS__) \
-    throw std::runtime_error("Tried to call pure virtual function \"" #name "\"");
+    pybind11::pybind11_fail("Tried to call pure virtual function \"" #name "\"");
 
 NAMESPACE_END(pybind11)
 
diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h
index 1b2c440..8f52471 100644
--- a/include/pybind11/pytypes.h
+++ b/include/pybind11/pytypes.h
@@ -88,12 +88,12 @@
     iterator(PyObject *obj, bool borrowed = false) : object(obj, borrowed) { ++*this; }
     iterator& operator++() {
         if (ptr())
-            value = object(PyIter_Next(ptr()), false);
+            value = object(PyIter_Next(m_ptr), false);
         return *this;
     }
     bool operator==(const iterator &it) const { return *it == **this; }
     bool operator!=(const iterator &it) const { return *it != **this; }
-    const object &operator*() const { return value; }
+    const handle &operator*() const { return value; }
     bool check() const { return PyIter_Check(ptr()); }
 private:
     object value;
@@ -127,10 +127,10 @@
     void operator=(const handle &h) {
         if (attr) {
             if (PyObject_SetAttr(obj, key, (PyObject *) h.ptr()) < 0)
-                throw std::runtime_error("Unable to set object attribute");
+                pybind11_fail("Unable to set object attribute");
         } else {
             if (PyObject_SetItem(obj, key, (PyObject *) h.ptr()) < 0)
-                throw std::runtime_error("Unable to set object item");
+                pybind11_fail("Unable to set object item");
         }
     }
 
@@ -164,12 +164,12 @@
     void operator=(const handle &o) {
         o.inc_ref(); // PyList_SetItem steals a reference
         if (PyList_SetItem(list, (ssize_t) index, (PyObject *) o.ptr()) < 0)
-            throw std::runtime_error("Unable to assign value in Python list!");
+            pybind11_fail("Unable to assign value in Python list!");
     }
     operator object() const {
         PyObject *result = PyList_GetItem(list, (ssize_t) index);
         if (!result)
-            throw std::runtime_error("Unable to retrieve value from Python list!");
+            pybind11_fail("Unable to retrieve value from Python list!");
         return object(result, true);
     }
 private:
@@ -184,12 +184,12 @@
     void operator=(const handle &o) {
         o.inc_ref(); // PyTuple_SetItem steals a reference
         if (PyTuple_SetItem(tuple, (ssize_t) index, (PyObject *) o.ptr()) < 0)
-            throw std::runtime_error("Unable to assign value in Python tuple!");
+            pybind11_fail("Unable to assign value in Python tuple!");
     }
     operator object() const {
         PyObject *result = PyTuple_GetItem(tuple, (ssize_t) index);
         if (!result)
-            throw std::runtime_error("Unable to retrieve value from Python tuple!");
+            pybind11_fail("Unable to retrieve value from Python tuple!");
         return object(result, true);
     }
 private:
@@ -205,8 +205,8 @@
             pos = -1;
         return *this;
     }
-    std::pair<object, object> operator*() const {
-        return std::make_pair(object(key, true), object(value, true));
+    std::pair<handle, handle> operator*() const {
+        return std::make_pair(key, value);
     }
     bool operator==(const dict_iterator &it) const { return it.pos == pos; }
     bool operator!=(const dict_iterator &it) const { return it.pos != pos; }
@@ -242,7 +242,10 @@
 class str : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(str, object, PyUnicode_Check)
-    str(const std::string &s) : object(PyUnicode_FromStringAndSize(s.c_str(), s.length()), false) { }
+    str(const std::string &s)
+        : object(PyUnicode_FromStringAndSize(s.c_str(), s.length()), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate string object!");
+    }
 
     operator std::string() const {
 #if PY_MAJOR_VERSION >= 3
@@ -250,7 +253,7 @@
 #else
         object temp(PyUnicode_AsUTF8String(m_ptr), false);
         if (temp.ptr() == nullptr)
-            throw std::runtime_error("Unable to extract string contents!");
+            pybind11_fail("Unable to extract string contents!");
         return PyString_AsString(temp.ptr());
 #endif
     }
@@ -270,14 +273,16 @@
     PYBIND11_OBJECT_DEFAULT(bytes, object, PYBIND11_BYTES_CHECK)
 
     bytes(const std::string &s)
-        : object(PYBIND11_BYTES_FROM_STRING_AND_SIZE(s.data(), s.size()), false) { }
+        : object(PYBIND11_BYTES_FROM_STRING_AND_SIZE(s.data(), s.size()), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate bytes object!");
+    }
 
     operator std::string() const {
         char *buffer;
         ssize_t length;
         int err = PYBIND11_BYTES_AS_STRING_AND_SIZE(m_ptr, &buffer, &length);
         if (err == -1)
-            throw std::runtime_error("Unable to extract bytes contents!");
+            pybind11_fail("Unable to extract bytes contents!");
         return std::string(buffer, length);
     }
 };
@@ -306,6 +311,7 @@
             else
                 m_ptr = PyLong_FromUnsignedLongLong((unsigned long long) value);
         }
+        if (!m_ptr) pybind11_fail("Could not allocate int object!");
     }
 
     template <typename T,
@@ -328,8 +334,12 @@
 class float_ : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(float_, object, PyFloat_Check)
-    float_(float value) : object(PyFloat_FromDouble((double) value), false) { }
-    float_(double value) : object(PyFloat_FromDouble((double) value), false) { }
+    float_(float value) : object(PyFloat_FromDouble((double) value), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate float object!");
+    }
+    float_(double value) : object(PyFloat_FromDouble((double) value), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate float object!");
+    }
     operator float() const { return (float) PyFloat_AsDouble(m_ptr); }
     operator double() const { return (double) PyFloat_AsDouble(m_ptr); }
 };
@@ -337,7 +347,9 @@
 class weakref : public object {
 public:
     PYBIND11_OBJECT_DEFAULT(weakref, object, PyWeakref_Check)
-    weakref(handle obj, handle callback = handle()) : object(PyWeakref_NewRef(obj.ptr(), callback.ptr()), false) { }
+    weakref(handle obj, handle callback = handle()) : object(PyWeakref_NewRef(obj.ptr(), callback.ptr()), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate weak reference!");
+    }
 };
 
 class slice : public object {
@@ -346,6 +358,7 @@
     slice(ssize_t start_, ssize_t stop_, ssize_t step_) {
         int_ start(start_), stop(stop_), step(step_);
         m_ptr = PySlice_New(start.ptr(), stop.ptr(), step.ptr());
+        if (!m_ptr) pybind11_fail("Could not allocate slice object!");
     }
     bool compute(ssize_t length, ssize_t *start, ssize_t *stop, ssize_t *step, ssize_t *slicelength) const {
         return PySlice_GetIndicesEx((PYBIND11_SLICE_OBJECT *) m_ptr, length,
@@ -357,10 +370,13 @@
 public:
     PYBIND11_OBJECT_DEFAULT(capsule, object, PyCapsule_CheckExact)
     capsule(PyObject *obj, bool borrowed) : object(obj, borrowed) { }
-    capsule(void *value, void (*destruct)(PyObject *) = nullptr) : object(PyCapsule_New(value, nullptr, destruct), false) { }
+    capsule(void *value, void (*destruct)(PyObject *) = nullptr)
+        : object(PyCapsule_New(value, nullptr, destruct), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate capsule object!");
+    }
     template <typename T> operator T *() const {
         T * result = static_cast<T *>(PyCapsule_GetPointer(m_ptr, nullptr));
-        if (!result) throw std::runtime_error("Unable to extract capsule contents!");
+        if (!result) pybind11_fail("Unable to extract capsule contents!");
         return result;
     }
 };
@@ -368,7 +384,9 @@
 class tuple : public object {
 public:
     PYBIND11_OBJECT(tuple, object, PyTuple_Check)
-    tuple(size_t size = 0) : object(PyTuple_New((ssize_t) size), false) { }
+    tuple(size_t size = 0) : object(PyTuple_New((ssize_t) size), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate tuple object!");
+    }
     size_t size() const { return (size_t) PyTuple_Size(m_ptr); }
     detail::tuple_accessor operator[](size_t index) const { return detail::tuple_accessor(ptr(), index); }
 };
@@ -376,7 +394,9 @@
 class dict : public object {
 public:
     PYBIND11_OBJECT(dict, object, PyDict_Check)
-    dict() : object(PyDict_New(), false) { }
+    dict() : object(PyDict_New(), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate dict object!");
+    }
     size_t size() const { return (size_t) PyDict_Size(m_ptr); }
     detail::dict_iterator begin() const { return (++detail::dict_iterator(ptr(), 0)); }
     detail::dict_iterator end() const { return detail::dict_iterator(); }
@@ -386,7 +406,9 @@
 class list : public object {
 public:
     PYBIND11_OBJECT(list, object, PyList_Check)
-    list(size_t size = 0) : object(PyList_New((ssize_t) size), false) { }
+    list(size_t size = 0) : object(PyList_New((ssize_t) size), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate list object!");
+    }
     size_t size() const { return (size_t) PyList_Size(m_ptr); }
     detail::list_accessor operator[](size_t index) const { return detail::list_accessor(ptr(), index); }
     void append(const object &object) const { PyList_Append(m_ptr, (PyObject *) object.ptr()); }
@@ -395,10 +417,12 @@
 class set : public object {
 public:
     PYBIND11_OBJECT(set, object, PySet_Check)
-    set() : object(PySet_New(nullptr), false) { }
+    set() : object(PySet_New(nullptr), false) {
+        if (!m_ptr) pybind11_fail("Could not allocate set object!");
+    }
     size_t size() const { return (size_t) PySet_Size(m_ptr); }
-    void add(const object &object) const { PySet_Add(m_ptr, (PyObject *) object.ptr()); }
-    void clear() const { PySet_Clear(ptr()); }
+    bool add(const object &object) const { return PySet_Add(m_ptr, (PyObject *) object.ptr()) == 0; }
+    void clear() const { PySet_Clear(m_ptr); }
 };
 
 class function : public object {
@@ -448,7 +472,7 @@
             return (detail::type_info *) it->second;
         type = type->tp_base;
         if (type == nullptr)
-            throw std::runtime_error("pybind11::detail::get_type_info: unable to find type object!");
+            pybind11_fail("pybind11::detail::get_type_info: unable to find type object!");
     } while (true);
 }
 
diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h
index 3f3da7f..c57befa 100644
--- a/include/pybind11/stl.h
+++ b/include/pybind11/stl.h
@@ -14,7 +14,6 @@
 #include <set>
 #include <iostream>
 
-
 #if defined(_MSC_VER)
 #pragma warning(push)
 #pragma warning(disable: 4127) // warning C4127: Conditional expression is constant
@@ -28,14 +27,14 @@
     typedef type_caster<Value> value_conv;
 public:
     bool load(PyObject *src, bool convert) {
-        if (!PyList_Check(src))
+        list l(src, true);
+        if (!l.check())
             return false;
-        size_t size = (size_t) PyList_GET_SIZE(src);
-        value.reserve(size);
+        value.reserve(l.size());
         value.clear();
         value_conv conv;
-        for (size_t i=0; i<size; ++i) {
-            if (!conv.load(PyList_GetItem(src, (ssize_t) i), convert))
+        for (auto it : l) {
+            if (!conv.load(it.ptr(), convert))
                 return false;
             value.push_back((Value) conv);
         }
@@ -43,17 +42,15 @@
     }
 
     static PyObject *cast(const type &src, return_value_policy policy, PyObject *parent) {
-        object list(PyList_New(src.size()), false);
-        if (!list)
-            return nullptr;
+        list l(src.size());
         size_t index = 0;
         for (auto const &value: src) {
-            object value_ (value_conv::cast(value, policy, parent), false);
+            object value_(value_conv::cast(value, policy, parent), false);
             if (!value_)
                 return nullptr;
-            PyList_SET_ITEM(list.ptr(), index++, value_.release()); // steals a reference
+            PyList_SET_ITEM(l.ptr(), index++, value_.release()); // steals a reference
         }
-        return list.release();
+        return l.release();
     }
     PYBIND11_TYPE_CASTER(type, _("list<") + value_conv::name() + _(">"));
 };
@@ -68,8 +65,8 @@
             return false;
         value.clear();
         key_conv conv;
-        for (const object &o: s) {
-            if (!conv.load((PyObject *) o.ptr(), convert))
+        for (auto entry : s) {
+            if (!conv.load(entry.ptr(), convert))
                 return false;
             value.insert((Key) conv);
         }
@@ -77,15 +74,13 @@
     }
 
     static PyObject *cast(const type &src, return_value_policy policy, PyObject *parent) {
-        object set(PySet_New(nullptr), false);
-        if (!set)
-            return nullptr;
+        pybind11::set s;
         for (auto const &value: src) {
             object value_(key_conv::cast(value, policy, parent), false);
-            if (!value_ || PySet_Add(set.ptr(), value_.ptr()) != 0)
+            if (!value_ || !s.add(value))
                 return nullptr;
         }
-        return set.release();
+        return s.release();
     }
     PYBIND11_TYPE_CASTER(type, _("set<") + key_conv::name() + _(">"));
 };
@@ -97,16 +92,15 @@
     typedef type_caster<Value> value_conv;
 
     bool load(PyObject *src, bool convert) {
-        if (!PyDict_Check(src))
+        dict d(src, true);
+        if (!d.check())
             return false;
-
-        value.clear();
-        PyObject *key_, *value_;
-        ssize_t pos = 0;
         key_conv kconv;
         value_conv vconv;
-        while (PyDict_Next(src, &pos, &key_, &value_)) {
-            if (!kconv.load(key_, convert) || !vconv.load(value_, convert))
+        value.clear();
+        for (auto it : d) {
+            if (!kconv.load(it.first.ptr(), convert) ||
+                !vconv.load(it.second.ptr(), convert))
                 return false;
             value[(Key) kconv] = (Value) vconv;
         }
@@ -114,16 +108,15 @@
     }
 
     static PyObject *cast(const type &src, return_value_policy policy, PyObject *parent) {
-        object dict(PyDict_New(), false);
-        if (!dict)
-            return nullptr;
+        dict d;
         for (auto const &kv: src) {
             object key(key_conv::cast(kv.first, policy, parent), false);
             object value(value_conv::cast(kv.second, policy, parent), false);
-            if (!key || !value || PyDict_SetItem(dict.ptr(), key.ptr(), value.ptr()) != 0)
+            if (!key || !value)
                 return nullptr;
+            d[key] = value;
         }
-        return dict.release();
+        return d.release();
     }
 
     PYBIND11_TYPE_CASTER(type, _("dict<") + key_conv::name() + _(", ") + value_conv::name() + _(">"));
@@ -131,7 +124,10 @@
 
 NAMESPACE_END(detail)
 
-inline std::ostream &operator<<(std::ostream &os, const object &obj) { os << (std::string) obj.str(); return os; }
+inline std::ostream &operator<<(std::ostream &os, const handle &obj) {
+    os << (std::string) obj.str();
+    return os;
+}
 
 NAMESPACE_END(pybind11)
 
diff --git a/include/pybind11/typeid.h b/include/pybind11/typeid.h
index 2d7c39b..b395110 100644
--- a/include/pybind11/typeid.h
+++ b/include/pybind11/typeid.h
@@ -11,6 +11,7 @@
 
 #include <cstdio>
 #include <cstdlib>
+
 #if defined(__GNUG__)
 #include <cxxabi.h>
 #endif
@@ -26,7 +27,7 @@
     }
 }
 
-inline void clean_type_id(std::string &name) {
+PYBIND11_NOINLINE inline void clean_type_id(std::string &name) {
 #if defined(__GNUG__)
     int status = 0;
     std::unique_ptr<char, void (*)(void *)> res {
diff --git a/setup.py b/setup.py
index 17ef509..691a51a 100644
--- a/setup.py
+++ b/setup.py
@@ -17,6 +17,7 @@
     headers=[
         'include/pybind11/cast.h',
         'include/pybind11/complex.h',
+        'include/pybind11/descr.h',
         'include/pybind11/numpy.h',
         'include/pybind11/pybind11.h',
         'include/pybind11/stl.h',
@@ -57,11 +58,10 @@
 become an excessively large and unnecessary dependency.
 
 Think of this library as a tiny self-contained version of Boost.Python with
-everything stripped away that isn't relevant for binding generation. The whole
-codebase requires less than 3000 lines of code and only depends on Python (2.7
-or 3.x) and the C++ standard library. This compact implementation was
-possible thanks to some of the new C++11 language features (tuples, lambda
-functions and variadic templates). Since its creation, this library has
-grown beyond Boost.Python in many ways, leading to dramatically simpler binding
-code in many common situations.""",
-)
+everything stripped away that isn't relevant for binding generation. The core
+header files only require ~2.5K lines of code and depend on Python (2.7 or 3.x)
+and the C++ standard library. This compact implementation was possible thanks
+to some of the new C++11 language features (specifically: tuples, lambda
+functions and variadic templates). Since its creation, this library has grown
+beyond Boost.Python in many ways, leading to dramatically simpler binding code
+in many common situations.""")