generate more compact binaries
diff --git a/include/pybind/cast.h b/include/pybind/cast.h
index a3143d2..6c26f41 100644
--- a/include/pybind/cast.h
+++ b/include/pybind/cast.h
@@ -11,7 +11,6 @@
 #pragma once
 
 #include <pybind/pytypes.h>
-#include <pybind/mpl.h>
 #include <pybind/typeid.h>
 #include <map>
 #include <array>
@@ -63,7 +62,8 @@
             return Py_None;
         }
         // avoid an issue with internal references matching their parent's address
-        bool dont_cache = parent && ((instance<void> *) parent)->value == (void *) src;
+        bool dont_cache = policy == return_value_policy::reference_internal &&
+                          parent && ((instance<void> *) parent)->value == (void *) src;
         auto& internals = get_internals();
         auto it_instance = internals.registered_instances.find(src);
         if (it_instance != internals.registered_instances.end() && !dont_cache) {
@@ -126,7 +126,7 @@
     object temp;
 };
 
-#define TYPE_CASTER(type, py_name) \
+#define PYBIND_TYPE_CASTER(type, py_name) \
     protected: \
         type value; \
     public: \
@@ -137,7 +137,7 @@
         operator type*() { return &value; } \
         operator type&() { return value; } \
 
-#define TYPE_CASTER_NUMBER(type, py_type, from_type, to_pytype) \
+#define PYBIND_TYPE_CASTER_NUMBER(type, py_type, from_type, to_pytype) \
     template <> class type_caster<type> { \
     public: \
         bool load(PyObject *src, bool) { \
@@ -151,30 +151,30 @@
         static PyObject *cast(type src, return_value_policy /* policy */, PyObject * /* parent */) { \
             return to_pytype((py_type) src); \
         } \
-        TYPE_CASTER(type, #type); \
+        PYBIND_TYPE_CASTER(type, #type); \
     };
 
-TYPE_CASTER_NUMBER(int32_t, long, PyLong_AsLong, PyLong_FromLong)
-TYPE_CASTER_NUMBER(uint32_t, unsigned long, PyLong_AsUnsignedLong, PyLong_FromUnsignedLong)
-TYPE_CASTER_NUMBER(int64_t, PY_LONG_LONG, PyLong_AsLongLong, PyLong_FromLongLong)
-TYPE_CASTER_NUMBER(uint64_t, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong, PyLong_FromUnsignedLongLong)
+PYBIND_TYPE_CASTER_NUMBER(int32_t, long, PyLong_AsLong, PyLong_FromLong)
+PYBIND_TYPE_CASTER_NUMBER(uint32_t, unsigned long, PyLong_AsUnsignedLong, PyLong_FromUnsignedLong)
+PYBIND_TYPE_CASTER_NUMBER(int64_t, PY_LONG_LONG, PyLong_AsLongLong, PyLong_FromLongLong)
+PYBIND_TYPE_CASTER_NUMBER(uint64_t, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong, PyLong_FromUnsignedLongLong)
 
 #if defined(__APPLE__) // size_t/ssize_t are separate types on Mac OS X
-TYPE_CASTER_NUMBER(ssize_t, Py_ssize_t, PyLong_AsSsize_t, PyLong_FromSsize_t)
-TYPE_CASTER_NUMBER(size_t, size_t, PyLong_AsSize_t, PyLong_FromSize_t)
+PYBIND_TYPE_CASTER_NUMBER(ssize_t, Py_ssize_t, PyLong_AsSsize_t, PyLong_FromSsize_t)
+PYBIND_TYPE_CASTER_NUMBER(size_t, size_t, PyLong_AsSize_t, PyLong_FromSize_t)
 #endif
 
-TYPE_CASTER_NUMBER(float, float, PyFloat_AsDouble, PyFloat_FromDouble)
-TYPE_CASTER_NUMBER(double, double, PyFloat_AsDouble, PyFloat_FromDouble)
+PYBIND_TYPE_CASTER_NUMBER(float, float, PyFloat_AsDouble, PyFloat_FromDouble)
+PYBIND_TYPE_CASTER_NUMBER(double, double, PyFloat_AsDouble, PyFloat_FromDouble)
 
-template <> class type_caster<mpl::detail::void_type> {
+template <> class type_caster<detail::void_type> {
 public:
     bool load(PyObject *, bool) { return true; }
-    static PyObject *cast(mpl::detail::void_type, return_value_policy /* policy */, PyObject * /* parent */) {
+    static PyObject *cast(detail::void_type, return_value_policy /* policy */, PyObject * /* parent */) {
         Py_INCREF(Py_None);
         return Py_None;
     }
-    TYPE_CASTER(mpl::detail::void_type, "None");
+    PYBIND_TYPE_CASTER(detail::void_type, "None");
 };
 
 template <> class type_caster<bool> {
@@ -189,7 +189,7 @@
         Py_INCREF(result);
         return result;
     }
-    TYPE_CASTER(bool, "bool");
+    PYBIND_TYPE_CASTER(bool, "bool");
 };
 
 template <> class type_caster<std::string> {
@@ -203,7 +203,7 @@
     static PyObject *cast(const std::string &src, return_value_policy /* policy */, PyObject * /* parent */) {
         return PyUnicode_FromString(src.c_str());
     }
-    TYPE_CASTER(std::string, "str");
+    PYBIND_TYPE_CASTER(std::string, "str");
 };
 
 #ifdef HAVE_WCHAR_H
@@ -218,7 +218,7 @@
     static PyObject *cast(const std::wstring &src, return_value_policy /* policy */, PyObject * /* parent */) {
         return PyUnicode_FromWideChar(src.c_str(), src.length());
     }
-    TYPE_CASTER(std::wstring, "wstr");
+    PYBIND_TYPE_CASTER(std::wstring, "wstr");
 };
 #endif
 
@@ -280,7 +280,7 @@
         }
         return list;
     }
-    TYPE_CASTER(type, "list<" + value_conv::name() + ">");
+    PYBIND_TYPE_CASTER(type, "list<" + value_conv::name() + ">");
 };
 
 template <typename Key, typename Value> struct type_caster<std::map<Key, Value>> {
@@ -322,7 +322,7 @@
         }
         return dict;
     }
-    TYPE_CASTER(type, "dict<" + key_conv::name() + ", " + value_conv::name() + ">");
+    PYBIND_TYPE_CASTER(type, "dict<" + key_conv::name() + ", " + value_conv::name() + ">");
 };
 
 template <typename T1, typename T2> class type_caster<std::pair<T1, T2>> {
@@ -337,8 +337,8 @@
     }
 
     static PyObject *cast(const type &src, return_value_policy policy, PyObject *parent) {
-        PyObject *o1 = type_caster<typename mpl::normalize_type<T1>::type>::cast(src.first, policy, parent);
-        PyObject *o2 = type_caster<typename mpl::normalize_type<T2>::type>::cast(src.second, policy, parent);
+        PyObject *o1 = type_caster<typename detail::decay<T1>::type>::cast(src.first, policy, parent);
+        PyObject *o2 = type_caster<typename detail::decay<T2>::type>::cast(src.second, policy, parent);
         if (!o1 || !o2) {
             Py_XDECREF(o1);
             Py_XDECREF(o2);
@@ -358,8 +358,8 @@
         return type(first, second);
     }
 protected:
-    type_caster<typename mpl::normalize_type<T1>::type> first;
-    type_caster<typename mpl::normalize_type<T2>::type> second;
+    type_caster<typename detail::decay<T1>::type> first;
+    type_caster<typename detail::decay<T2>::type> second;
 };
 
 template <typename ... Tuple> class type_caster<std::tuple<Tuple...>> {
@@ -368,16 +368,16 @@
     enum { size = sizeof...(Tuple) };
 
     bool load(PyObject *src, bool convert) {
-        return load(src, convert, typename mpl::make_index_sequence<sizeof...(Tuple)>::type());
+        return load(src, convert, typename make_index_sequence<sizeof...(Tuple)>::type());
     }
 
     static PyObject *cast(const type &src, return_value_policy policy, PyObject *parent) {
-        return cast(src, policy, parent, typename mpl::make_index_sequence<size>::type());
+        return cast(src, policy, parent, typename make_index_sequence<size>::type());
     }
 
     static std::string name() {
         std::array<std::string, size> names {{
-            type_caster<typename mpl::normalize_type<Tuple>::type>::name()...
+            type_caster<typename detail::decay<Tuple>::type>::name()...
         }};
         std::string result("(");
         int counter = 0;
@@ -390,15 +390,29 @@
         return result;
     }
 
-    operator type() {
-        return cast(typename mpl::make_index_sequence<sizeof...(Tuple)>::type());
+    template <typename ReturnValue, typename Func> typename std::enable_if<!std::is_void<ReturnValue>::value, ReturnValue>::type call(Func &f) {
+        return call<ReturnValue, Func>(f, typename make_index_sequence<sizeof...(Tuple)>::type());
     }
+
+    template <typename ReturnValue, typename Func> typename std::enable_if<std::is_void<ReturnValue>::value, detail::void_type>::type call(Func &f) {
+        call<ReturnValue, Func>(f, typename make_index_sequence<sizeof...(Tuple)>::type());
+        return detail::void_type();
+    }
+
+    operator type() {
+        return cast(typename make_index_sequence<sizeof...(Tuple)>::type());
+    }
+
 protected:
-    template <size_t ... Index> type cast(mpl::index_sequence<Index...>) {
+    template <typename ReturnValue, typename Func, size_t ... Index> ReturnValue call(Func &f, index_sequence<Index...>) {
+        return f((Tuple) std::get<Index>(value)...);
+    }
+
+    template <size_t ... Index> type cast(index_sequence<Index...>) {
         return type((Tuple) std::get<Index>(value)...);
     }
 
-    template <size_t ... Indices> bool load(PyObject *src, bool convert, mpl::index_sequence<Indices...>) {
+    template <size_t ... Indices> bool load(PyObject *src, bool convert, index_sequence<Indices...>) {
         if (!PyTuple_Check(src))
             return false;
         if (PyTuple_Size(src) != size)
@@ -413,9 +427,9 @@
     }
 
     /* Implementation: Convert a C++ tuple into a Python tuple */
-    template <size_t ... Indices> static PyObject *cast(const type &src, return_value_policy policy, PyObject *parent, mpl::index_sequence<Indices...>) {
+    template <size_t ... Indices> static PyObject *cast(const type &src, return_value_policy policy, PyObject *parent, index_sequence<Indices...>) {
         std::array<PyObject *, size> results {{
-            type_caster<typename mpl::normalize_type<Tuple>::type>::cast(std::get<Indices>(src), policy, parent)...
+            type_caster<typename detail::decay<Tuple>::type>::cast(std::get<Indices>(src), policy, parent)...
         }};
         bool success = true;
         for (auto result : results)
@@ -436,7 +450,7 @@
     }
 
 protected:
-    std::tuple<type_caster<typename mpl::normalize_type<Tuple>::type>...> value;
+    std::tuple<type_caster<typename detail::decay<Tuple>::type>...> value;
 };
 
 /// Type caster for holder types like std::shared_ptr, etc.
@@ -467,39 +481,29 @@
         src.inc_ref();
         return (PyObject *) src.ptr();
     }
-    TYPE_CASTER(handle, "handle");
+    PYBIND_TYPE_CASTER(handle, "handle");
 };
 
-#define TYPE_CASTER_PYTYPE(name) \
+#define PYBIND_TYPE_CASTER_PYTYPE(name) \
     template <> class type_caster<name> { \
     public: \
         bool load(PyObject *src, bool) { value = name(src, true); return true; } \
         static PyObject *cast(const name &src, return_value_policy /* policy */, PyObject * /* parent */) { \
             src.inc_ref(); return (PyObject *) src.ptr(); \
         } \
-        TYPE_CASTER(name, #name); \
+        PYBIND_TYPE_CASTER(name, #name); \
     };
 
-TYPE_CASTER_PYTYPE(object)
-TYPE_CASTER_PYTYPE(buffer)
-TYPE_CASTER_PYTYPE(capsule)
-TYPE_CASTER_PYTYPE(dict)
-TYPE_CASTER_PYTYPE(float_)
-TYPE_CASTER_PYTYPE(int_)
-TYPE_CASTER_PYTYPE(list)
-TYPE_CASTER_PYTYPE(slice)
-TYPE_CASTER_PYTYPE(tuple)
-TYPE_CASTER_PYTYPE(function)
-TYPE_CASTER_PYTYPE(array)
-
-#undef TYPE_CASTER
-#undef TYPE_CASTER_PYTYPE
-#undef TYPE_CASTER_NUMBER
+PYBIND_TYPE_CASTER_PYTYPE(object)  PYBIND_TYPE_CASTER_PYTYPE(buffer)
+PYBIND_TYPE_CASTER_PYTYPE(capsule) PYBIND_TYPE_CASTER_PYTYPE(dict)
+PYBIND_TYPE_CASTER_PYTYPE(float_)  PYBIND_TYPE_CASTER_PYTYPE(int_)
+PYBIND_TYPE_CASTER_PYTYPE(list)    PYBIND_TYPE_CASTER_PYTYPE(slice)
+PYBIND_TYPE_CASTER_PYTYPE(tuple)   PYBIND_TYPE_CASTER_PYTYPE(function)
 
 NAMESPACE_END(detail)
 
 template <typename T> inline T cast(PyObject *object) {
-    detail::type_caster<typename mpl::normalize_type<T>::type> conv;
+    detail::type_caster<typename detail::decay<T>::type> conv;
     if (!conv.load(object, true))
         throw cast_error("Unable to cast Python object to C++ type");
     return conv;
@@ -508,7 +512,7 @@
 template <typename T> inline object cast(const T &value, return_value_policy policy = return_value_policy::automatic, PyObject *parent = nullptr) {
     if (policy == return_value_policy::automatic)
         policy = std::is_pointer<T>::value ? return_value_policy::take_ownership : return_value_policy::copy;
-    return object(detail::type_caster<typename mpl::normalize_type<T>::type>::cast(value, policy, parent), false);
+    return object(detail::type_caster<typename detail::decay<T>::type>::cast(value, policy, parent), false);
 }
 
 template <typename T> inline T handle::cast() { return pybind::cast<T>(m_ptr); }
@@ -516,7 +520,7 @@
 template <typename ... Args> inline object handle::call(Args&&... args_) {
     const size_t size = sizeof...(Args);
     std::array<PyObject *, size> args{
-        { detail::type_caster<typename mpl::normalize_type<Args>::type>::cast(
+        { detail::type_caster<typename detail::decay<Args>::type>::cast(
             std::forward<Args>(args_), return_value_policy::automatic, nullptr)... }
     };
     bool fail = false;
diff --git a/include/pybind/common.h b/include/pybind/common.h
index 0bbaa41..d1a6b47 100644
--- a/include/pybind/common.h
+++ b/include/pybind/common.h
@@ -30,10 +30,10 @@
 #include <vector>
 #include <string>
 #include <stdexcept>
-#include <functional>
 #include <unordered_map>
 #include <iostream>
 #include <memory>
+#include <functional>
 
 /// Include Python header, disable linking to pythonX_d.lib on Windows in debug mode
 #if defined(_MSC_VER)
@@ -79,31 +79,27 @@
 
 /// Format strings for basic number types
 template <typename type> struct format_descriptor { };
-template<> struct format_descriptor<int8_t>   { static std::string value() { return "b"; }; };
-template<> struct format_descriptor<uint8_t>  { static std::string value() { return "B"; }; };
-template<> struct format_descriptor<int16_t>  { static std::string value() { return "h"; }; };
-template<> struct format_descriptor<uint16_t> { static std::string value() { return "H"; }; };
-template<> struct format_descriptor<int32_t>  { static std::string value() { return "i"; }; };
-template<> struct format_descriptor<uint32_t> { static std::string value() { return "I"; }; };
-template<> struct format_descriptor<int64_t>  { static std::string value() { return "q"; }; };
-template<> struct format_descriptor<uint64_t> { static std::string value() { return "Q"; }; };
-template<> struct format_descriptor<float>    { static std::string value() { return "f"; }; };
-template<> struct format_descriptor<double>   { static std::string value() { return "d"; }; };
+#define DECL_FMT(t, n) template<> struct format_descriptor<t> { static std::string value() { return n; }; };
+DECL_FMT(int8_t,  "b"); DECL_FMT(uint8_t,  "B"); DECL_FMT(int16_t, "h"); DECL_FMT(uint16_t, "H");
+DECL_FMT(int32_t, "i"); DECL_FMT(uint32_t, "I"); DECL_FMT(int64_t, "q"); DECL_FMT(uint64_t, "Q");
+DECL_FMT(float ,  "f"); DECL_FMT(double,   "d");
+#undef DECL_FMT
 
 /// Information record describing a Python buffer object
 struct buffer_info {
     void *ptr;
-    size_t itemsize;
+    size_t itemsize, count;
     std::string format; // for dense contents, this should be set to format_descriptor<T>::value
     int ndim;
     std::vector<size_t> shape;
     std::vector<size_t> strides;
 
-    buffer_info(void *ptr, size_t itemsize, const std::string &format,
-                int ndim, const std::vector<size_t> &shape,
-                const std::vector<size_t> &strides)
+    buffer_info(void *ptr, size_t itemsize, const std::string &format, int ndim,
+                const std::vector<size_t> &shape, const std::vector<size_t> &strides)
         : ptr(ptr), itemsize(itemsize), format(format), ndim(ndim),
-          shape(shape), strides(strides) {}
+          shape(shape), strides(strides) {
+        count = 1; for (int i=0; i<ndim; ++i) count *= shape[i];
+    }
 };
 
 // C++ bindings of core Python exceptions
@@ -140,7 +136,29 @@
     std::unordered_map<void *, PyObject *> registered_instances;
 };
 
+/// Return a reference to the current 'internals' information
 inline internals &get_internals();
 
+/// Index sequence for convenient template metaprogramming involving tuples
+template<size_t ...> struct index_sequence  { };
+template<size_t N, size_t ...S> struct make_index_sequence : make_index_sequence <N - 1, N - 1, S...> { };
+template<size_t ...S> struct make_index_sequence <0, S...> { typedef index_sequence<S...> type; };
+
+/// Strip the class from a method type
+template <typename T> struct remove_class {};
+template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...)> { typedef R type(A...); };
+template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...) const> { typedef R type(A...); };
+
+/// Helper template to strip away type modifiers
+template <typename T> struct decay                       { typedef T type; };
+template <typename T> struct decay<const T>              { typedef typename decay<T>::type type; };
+template <typename T> struct decay<T*>                   { typedef typename decay<T>::type type; };
+template <typename T> struct decay<T&>                   { typedef typename decay<T>::type type; };
+template <typename T> struct decay<T&&>                  { typedef typename decay<T>::type type; };
+template <typename T, size_t N> struct decay<const T[N]> { typedef typename decay<T>::type type; };
+template <typename T, size_t N> struct decay<T[N]>       { typedef typename decay<T>::type type; };
+
+/// Helper type to replace 'void' in some expressions
+struct void_type { };
 NAMESPACE_END(detail)
 NAMESPACE_END(pybind)
diff --git a/include/pybind/mpl.h b/include/pybind/mpl.h
deleted file mode 100644
index 27d18b4..0000000
--- a/include/pybind/mpl.h
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
-    pybind/mpl.h: Simple library for type manipulation and template metaprogramming
-
-    Copyright (c) 2015 Wenzel Jakob <wenzel@inf.ethz.ch>
-
-    All rights reserved. Use of this source code is governed by a
-    BSD-style license that can be found in the LICENSE file.
-*/
-
-#pragma once
-
-#include <pybind/common.h>
-#include <tuple>
-
-NAMESPACE_BEGIN(pybind)
-NAMESPACE_BEGIN(mpl)
-
-/// Index sequence for convenient template metaprogramming involving tuples
-template<size_t ...> struct index_sequence  { };
-template<size_t N, size_t ...S> struct make_index_sequence : make_index_sequence <N - 1, N - 1, S...> { };
-template<size_t ...S> struct make_index_sequence <0, S...> { typedef index_sequence<S...> type; };
-
-/// Helper template to strip away type modifiers
-template <typename T> struct normalize_type                       { typedef T type; };
-template <typename T> struct normalize_type<const T>              { typedef typename normalize_type<T>::type type; };
-template <typename T> struct normalize_type<T*>                   { typedef typename normalize_type<T>::type type; };
-template <typename T> struct normalize_type<T&>                   { typedef typename normalize_type<T>::type type; };
-template <typename T> struct normalize_type<T&&>                  { typedef typename normalize_type<T>::type type; };
-template <typename T, size_t N> struct normalize_type<const T[N]> { typedef typename normalize_type<T>::type type; };
-template <typename T, size_t N> struct normalize_type<T[N]>       { typedef typename normalize_type<T>::type type; };
-
-NAMESPACE_BEGIN(detail)
-
-/// Strip the class from a method type
-template <typename T> struct remove_class {};
-template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...)> { typedef R type(A...); };
-template <typename C, typename R, typename... A> struct remove_class<R (C::*)(A...) const> { typedef R type(A...); };
-
-/**
- * \brief Convert a lambda function to a std::function
- * From http://stackoverflow.com/questions/11893141/inferring-the-call-signature-of-a-lambda-or-arbitrary-callable-for-make-functio
- */
-template <typename T> struct lambda_signature_impl {
-    using type = typename remove_class<
-        decltype(&std::remove_reference<T>::type::operator())>::type;
-};
-template <typename R, typename... A> struct lambda_signature_impl<R    (A...)> { typedef R type(A...); };
-template <typename R, typename... A> struct lambda_signature_impl<R (&)(A...)> { typedef R type(A...); };
-template <typename R, typename... A> struct lambda_signature_impl<R (*)(A...)> { typedef R type(A...); };
-template <typename T> using lambda_signature = typename lambda_signature_impl<T>::type;
-template <typename F> using make_function_type = std::function<lambda_signature<F>>;
-
-NAMESPACE_END(detail)
-
-template<typename F> detail::make_function_type<F> make_function(F &&f) {
-    return detail::make_function_type<F>(std::forward<F>(f)); }
-
-NAMESPACE_BEGIN(detail)
-
-struct void_type { };
-
-/// Helper functions for calling a function using a tuple argument while dealing with void/non-void return values
-template <typename RetType> struct tuple_dispatch {
-    typedef RetType return_type;
-    template<typename Func, typename Arg, size_t ... S> return_type operator()(const Func &f, Arg && args, index_sequence<S...>) {
-        return f(std::get<S>(std::forward<Arg>(args))...);
-    }
-};
-
-/// Helper functions for calling a function using a tuple argument (special case for void return values)
-template <> struct tuple_dispatch<void> {
-    typedef void_type return_type;
-    template<typename Func, typename Arg, size_t ... S> return_type operator()(const Func &f, Arg &&args, index_sequence<S...>) {
-        f(std::get<S>(std::forward<Arg>(args))...);
-        return return_type();
-    }
-};
-
-NAMESPACE_END(detail)
-
-/// For lambda functions delegate to their 'operator()'
-template <typename T> struct function_traits : function_traits<typename detail::make_function_type<T>> { };
-
-/* Deal with reference arguments */
-template <typename ReturnType, typename... Args>
-    struct function_traits<ReturnType(*&)(Args...)> : function_traits<ReturnType(*)(Args...)> {};
-template <typename ClassType, typename ReturnType, typename... Args>
-    struct function_traits<ReturnType(ClassType::*&)(Args...)> : function_traits<ReturnType(ClassType::*)(Args...)> {};
-template <typename ClassType, typename ReturnType, typename... Args>
-    struct function_traits<ReturnType(ClassType::*&)(Args...) const> : function_traits<ReturnType(ClassType::*)(Args...) const> {};
-
-/// Type traits for function pointers
-template <typename ReturnType, typename... Args>
-struct function_traits<ReturnType(*)(Args...)> {
-    enum {
-        nargs = sizeof...(Args),
-        is_method = 0,
-        is_const = 0
-    };
-    typedef std::function<ReturnType (Args...)>    f_type;
-    typedef detail::tuple_dispatch<ReturnType>     dispatch_type;
-    typedef typename dispatch_type::return_type    return_type;
-    typedef std::tuple<Args...>                    args_type;
-
-    template <size_t i> struct arg {
-        typedef typename std::tuple_element<i, args_type>::type type;
-    };
-
-    static f_type cast(ReturnType (*func)(Args ...)) { return func; }
-
-    static return_type dispatch(const f_type &f, args_type &&args) {
-        return dispatch_type()(f, std::move(args),
-            typename make_index_sequence<nargs>::type());
-    }
-};
-
-/// Type traits for ordinary methods
-template <typename ClassType, typename ReturnType, typename... Args>
-struct function_traits<ReturnType(ClassType::*)(Args...)> {
-    enum {
-        nargs = sizeof...(Args),
-        is_method = 1,
-        is_const = 0
-    };
-    typedef std::function<ReturnType(ClassType &, Args...)>  f_type;
-    typedef detail::tuple_dispatch<ReturnType>               dispatch_type;
-    typedef typename dispatch_type::return_type              return_type;
-    typedef std::tuple<ClassType&, Args...>                  args_type;
-
-    template <size_t i> struct arg {
-        typedef typename std::tuple_element<i, args_type>::type type;
-    };
-
-    static f_type cast(ReturnType (ClassType::*func)(Args ...)) { return std::mem_fn(func); }
-
-    static return_type dispatch(const f_type &f, args_type &&args) {
-        return dispatch_type()(f, std::move(args),
-            typename make_index_sequence<nargs+1>::type());
-    }
-};
-
-/// Type traits for const methods
-template <typename ClassType, typename ReturnType, typename... Args>
-struct function_traits<ReturnType(ClassType::*)(Args...) const> {
-    enum {
-        nargs = sizeof...(Args),
-        is_method = 1,
-        is_const = 1
-    };
-    typedef std::function<ReturnType (const ClassType &, Args...)>  f_type;
-    typedef detail::tuple_dispatch<ReturnType>                      dispatch_type;
-    typedef typename dispatch_type::return_type                     return_type;
-    typedef std::tuple<const ClassType&, Args...>                   args_type;
-
-    template <size_t i> struct arg {
-        typedef typename std::tuple_element<i, args_type>::type type;
-    };
-
-    static f_type cast(ReturnType (ClassType::*func)(Args ...) const) {
-        return std::mem_fn(func);
-    }
-
-    static return_type dispatch(const f_type &f, args_type &&args) {
-        return dispatch_type()(f, std::move(args),
-            typename make_index_sequence<nargs+1>::type());
-    }
-};
-
-/// Type traits for std::functions
-template <typename ReturnType, typename... Args>
-struct function_traits<std::function<ReturnType(Args...)>> {
-    enum {
-        nargs = sizeof...(Args),
-        is_method = 0,
-        is_const = 0
-    };
-    typedef std::function<ReturnType (Args...)>  f_type;
-    typedef detail::tuple_dispatch<ReturnType>   dispatch_type;
-    typedef typename dispatch_type::return_type  return_type;
-    typedef std::tuple<Args...>                  args_type;
-
-    template <size_t i> struct arg {
-        typedef typename std::tuple_element<i, args_type>::type type;
-    };
-
-    static f_type cast(const f_type &func) { return func; }
-
-    static return_type dispatch(const f_type &f, args_type &&args) {
-        return dispatch_type()(f, std::move(args),
-            typename make_index_sequence<nargs>::type());
-    }
-};
-
-NAMESPACE_END(mpl)
-NAMESPACE_END(pybind)
diff --git a/include/pybind/numpy.h b/include/pybind/numpy.h
new file mode 100644
index 0000000..f4a4a74
--- /dev/null
+++ b/include/pybind/numpy.h
@@ -0,0 +1,201 @@
+/*
+    pybind/numpy.h: Basic NumPy support, auto-vectorization support
+
+    Copyright (c) 2015 Wenzel Jakob <wenzel@inf.ethz.ch>
+
+    All rights reserved. Use of this source code is governed by a
+    BSD-style license that can be found in the LICENSE file.
+*/
+
+#pragma once
+
+#include <pybind/pybind.h>
+#if defined(_MSC_VER)
+#pragma warning(push)
+#pragma warning(disable: 4127) // warning C4127: Conditional expression is constant
+#endif
+
+NAMESPACE_BEGIN(pybind)
+
+class array : public buffer {
+protected:
+    struct API {
+        enum Entries {
+            API_PyArray_Type = 2,
+            API_PyArray_DescrFromType = 45,
+            API_PyArray_FromAny = 69,
+            API_PyArray_NewCopy = 85,
+            API_PyArray_NewFromDescr = 94,
+            API_NPY_C_CONTIGUOUS = 0x0001,
+            API_NPY_F_CONTIGUOUS = 0x0002,
+            API_NPY_NPY_ARRAY_FORCECAST = 0x0010,
+            API_NPY_ENSURE_ARRAY = 0x0040
+        };
+
+        static API lookup() {
+            PyObject *numpy = PyImport_ImportModule("numpy.core.multiarray");
+            PyObject *capsule = numpy ? PyObject_GetAttrString(numpy, "_ARRAY_API") : nullptr;
+            void **api_ptr = (void **) (capsule ? PyCapsule_GetPointer(capsule, NULL) : nullptr);
+            Py_XDECREF(capsule);
+            Py_XDECREF(numpy);
+            if (api_ptr == nullptr)
+                throw std::runtime_error("Could not acquire pointer to NumPy API!");
+            API api;
+            api.PyArray_Type          = (decltype(api.PyArray_Type))          api_ptr[API_PyArray_Type];
+            api.PyArray_DescrFromType = (decltype(api.PyArray_DescrFromType)) api_ptr[API_PyArray_DescrFromType];
+            api.PyArray_FromAny       = (decltype(api.PyArray_FromAny))       api_ptr[API_PyArray_FromAny];
+            api.PyArray_NewCopy       = (decltype(api.PyArray_NewCopy))       api_ptr[API_PyArray_NewCopy];
+            api.PyArray_NewFromDescr  = (decltype(api.PyArray_NewFromDescr))  api_ptr[API_PyArray_NewFromDescr];
+            return api;
+        }
+
+        bool PyArray_Check(PyObject *obj) const { return (bool) PyObject_TypeCheck(obj, PyArray_Type); }
+
+        PyObject *(*PyArray_DescrFromType)(int);
+        PyObject *(*PyArray_NewFromDescr)
+            (PyTypeObject *, PyObject *, int, Py_intptr_t *,
+             Py_intptr_t *, void *, int, PyObject *);
+        PyObject *(*PyArray_NewCopy)(PyObject *, int);
+        PyTypeObject *PyArray_Type;
+        PyObject *(*PyArray_FromAny) (PyObject *, PyObject *, int, int, int, PyObject *);
+    };
+public:
+    PYBIND_OBJECT_DEFAULT(array, buffer, lookup_api().PyArray_Check)
+
+    template <typename Type> array(size_t size, const Type *ptr) {
+        API& api = lookup_api();
+        PyObject *descr = api.PyArray_DescrFromType(
+            (int) format_descriptor<Type>::value()[0]);
+        if (descr == nullptr)
+            throw std::runtime_error("NumPy: unsupported buffer format!");
+        Py_intptr_t shape = (Py_intptr_t) size;
+        PyObject *tmp = api.PyArray_NewFromDescr(
+            api.PyArray_Type, descr, 1, &shape, nullptr, (void *) ptr, 0, nullptr);
+        if (tmp == nullptr)
+            throw std::runtime_error("NumPy: unable to create array!");
+        m_ptr = api.PyArray_NewCopy(tmp, -1 /* any order */);
+        Py_DECREF(tmp);
+        if (m_ptr == nullptr)
+            throw std::runtime_error("NumPy: unable to copy array!");
+    }
+
+    array(const buffer_info &info) {
+        API& api = lookup_api();
+        if (info.format.size() != 1)
+            throw std::runtime_error("Unsupported buffer format!");
+        PyObject *descr = api.PyArray_DescrFromType(info.format[0]);
+        if (descr == nullptr)
+            throw std::runtime_error("NumPy: unsupported buffer format '" + info.format + "'!");
+        PyObject *tmp = api.PyArray_NewFromDescr(
+            api.PyArray_Type, descr, info.ndim, (Py_intptr_t *) &info.shape[0],
+            (Py_intptr_t *) &info.strides[0], info.ptr, 0, nullptr);
+        if (tmp == nullptr)
+            throw std::runtime_error("NumPy: unable to create array!");
+        m_ptr = api.PyArray_NewCopy(tmp, -1 /* any order */);
+        Py_DECREF(tmp);
+        if (m_ptr == nullptr)
+            throw std::runtime_error("NumPy: unable to copy array!");
+    }
+
+protected:
+    static API &lookup_api() {
+        static API api = API::lookup();
+        return api;
+    }
+};
+
+template <typename T> class array_dtype : public array {
+public:
+    PYBIND_OBJECT_CVT(array_dtype, array, is_non_null, m_ptr = ensure(m_ptr));
+    array_dtype() : array() { }
+    static bool is_non_null(PyObject *ptr) { return ptr != nullptr; }
+    static PyObject *ensure(PyObject *ptr) {
+        API &api = lookup_api();
+        PyObject *descr = api.PyArray_DescrFromType(format_descriptor<T>::value()[0]);
+        return api.PyArray_FromAny(ptr, descr, 0, 0,
+                                   API::API_NPY_C_CONTIGUOUS | API::API_NPY_ENSURE_ARRAY |
+                                   API::API_NPY_NPY_ARRAY_FORCECAST, nullptr);
+    }
+};
+
+NAMESPACE_BEGIN(detail)
+PYBIND_TYPE_CASTER_PYTYPE(array)
+PYBIND_TYPE_CASTER_PYTYPE(array_dtype<int8_t>)  PYBIND_TYPE_CASTER_PYTYPE(array_dtype<uint8_t>)
+PYBIND_TYPE_CASTER_PYTYPE(array_dtype<int16_t>) PYBIND_TYPE_CASTER_PYTYPE(array_dtype<uint16_t>)
+PYBIND_TYPE_CASTER_PYTYPE(array_dtype<int32_t>) PYBIND_TYPE_CASTER_PYTYPE(array_dtype<uint32_t>)
+PYBIND_TYPE_CASTER_PYTYPE(array_dtype<int64_t>) PYBIND_TYPE_CASTER_PYTYPE(array_dtype<uint64_t>)
+PYBIND_TYPE_CASTER_PYTYPE(array_dtype<float>)   PYBIND_TYPE_CASTER_PYTYPE(array_dtype<double>)
+NAMESPACE_END(detail)
+
+template <typename func_type, typename return_type, typename... args_type, size_t... Index>
+    std::function<object(array_dtype<args_type>...)>
+        vectorize(func_type &&f, return_type (*) (args_type ...),
+                  detail::index_sequence<Index...>) {
+
+    return [f](array_dtype<args_type>... args) -> array {
+        /* Request buffers from all parameters */
+        const size_t N = sizeof...(args_type);
+        std::array<buffer_info, N> buffers {{ args.request()... }};
+
+        /* Determine dimensions parameters of output array */
+        int ndim = 0; size_t count = 0;
+        std::vector<size_t> shape;
+        for (size_t i=0; i<N; ++i) {
+            if (buffers[i].count > count) {
+                ndim = buffers[i].ndim;
+                shape = buffers[i].shape;
+                count = buffers[i].count;
+            }
+        }
+        std::vector<size_t> strides(ndim);
+        if (ndim > 0) {
+            strides[ndim-1] = sizeof(return_type);
+            for (int i=ndim-1; i>0; --i)
+                strides[i-1] = strides[i] * shape[i];
+        }
+
+        /* Check if the parameters are actually compatible */
+        for (size_t i=0; i<N; ++i) {
+            if (buffers[i].count != 1 && (buffers[i].ndim != ndim || buffers[i].shape != shape))
+                throw std::runtime_error("pybind::vectorize: incompatible size/dimension of inputs!");
+        }
+
+        /* Call the function */
+        std::vector<return_type> result(count);
+        for (size_t i=0; i<count; ++i)
+            result[i] = f((buffers[Index].count == 1
+                               ? *((args_type *) buffers[Index].ptr)
+                               :  ((args_type *) buffers[Index].ptr)[i])...);
+
+        if (count == 1)
+            return cast(result[0]);
+
+        /* Return the result */
+        return array(buffer_info(result.data(), sizeof(return_type), 
+            format_descriptor<return_type>::value(),
+            ndim, shape, strides));
+    };
+}
+
+template <typename func_type, typename return_type, typename... args_type>
+    std::function<object(array_dtype<args_type>...)>
+        vectorize(func_type &&f, return_type (*f_) (args_type ...) = nullptr) {
+    return vectorize(f, f_, typename detail::make_index_sequence<sizeof...(args_type)>::type());
+}
+
+template <typename return_type, typename... args_type>
+std::function<object(array_dtype<args_type>...)> vectorize(return_type (*f) (args_type ...)) {
+    return vectorize(f, f);
+}
+
+template <typename func> auto vectorize(func &&f) -> decltype(
+        vectorize(std::forward<func>(f), (typename detail::remove_class<decltype(&std::remove_reference<func>::type::operator())>::type *) nullptr)) {
+    return vectorize(std::forward<func>(f), (typename detail::remove_class<decltype(
+                   &std::remove_reference<func>::type::operator())>::type *) nullptr);
+}
+
+NAMESPACE_END(pybind)
+
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
diff --git a/include/pybind/operators.h b/include/pybind/operators.h
index 9e3011a..c2c1338 100644
--- a/include/pybind/operators.h
+++ b/include/pybind/operators.h
@@ -17,13 +17,12 @@
 
 /// Enumeration with all supported operator types
 enum op_id : int {
-    op_add, op_sub, op_mul, op_div, op_mod, op_divmod, op_pow,
-    op_lshift, op_rshift, op_and, op_xor, op_or, op_neg,
-    op_pos, op_abs, op_invert, op_int, op_long, op_float,
-    op_str, op_cmp, op_gt, op_ge, op_lt, op_le, op_eq, op_ne,
-    op_iadd, op_isub, op_imul, op_idiv, op_imod, op_ilshift,
-    op_irshift, op_iand, op_ixor, op_ior, op_complex, op_bool,
-    op_nonzero, op_repr, op_truediv
+    op_add, op_sub, op_mul, op_div, op_mod, op_divmod, op_pow, op_lshift,
+    op_rshift, op_and, op_xor, op_or, op_neg, op_pos, op_abs, op_invert,
+    op_int, op_long, op_float, op_str, op_cmp, op_gt, op_ge, op_lt, op_le,
+    op_eq, op_ne, op_iadd, op_isub, op_imul, op_idiv, op_imod, op_ilshift,
+    op_irshift, op_iand, op_ixor, op_ior, op_complex, op_bool, op_nonzero,
+    op_repr, op_truediv
 };
 
 enum op_type : int {
@@ -33,12 +32,11 @@
 };
 
 struct self_t { };
+static const self_t self = self_t();
 
 /// Type for an unused type slot
 struct undefined_t { };
 
-static const self_t self = self_t();
-
 /// Don't warn about an unused variable
 inline self_t __self() { return self; }
 
@@ -140,7 +138,6 @@
 #undef PYBIND_BINARY_OPERATOR
 #undef PYBIND_INPLACE_OPERATOR
 #undef PYBIND_UNARY_OPERATOR
-
 NAMESPACE_END(detail)
 
 using detail::self;
diff --git a/include/pybind/pybind.h b/include/pybind/pybind.h
index d4ebdc8..e8c4ff5 100644
--- a/include/pybind/pybind.h
+++ b/include/pybind/pybind.h
@@ -22,49 +22,112 @@
 
 NAMESPACE_BEGIN(pybind)
 
+/// Wraps an arbitrary C++ function/method/lambda function/.. into a callable Python object
 class cpp_function : public function {
-public:
+private:
+    /// Chained list of function entries for overloading
     struct function_entry {
-        std::function<PyObject* (PyObject *)> impl;
+        PyObject * (*impl) (function_entry *, PyObject *, PyObject *);
+        void *data;
         std::string signature, doc;
         bool is_constructor;
+        return_value_policy policy;
         function_entry *next = nullptr;
     };
 
+    /// Picks a suitable return value converter from cast.h
+    template <typename T> using return_value_caster =
+        detail::type_caster<typename std::conditional<
+            std::is_void<T>::value, detail::void_type, typename detail::decay<T>::type>::type>;
+
+    /// Picks a suitable argument value converter from cast.h
+    template <typename ... T> using arg_value_caster =
+        detail::type_caster<typename std::tuple<T...>>;
+public:
     cpp_function() { }
-    template <typename Func> cpp_function(
-        Func &&_func, const char *name = nullptr, const char *doc = nullptr,
-        return_value_policy policy = return_value_policy::automatic,
-        function sibling = function(), bool is_method = false) {
-        /* Function traits extracted from the template type 'Func' */
-        typedef mpl::function_traits<Func> f_traits;
 
-        /* Suitable input and output casters */
-        typedef typename detail::type_caster<typename f_traits::args_type> cast_in;
-        typedef typename detail::type_caster<typename mpl::normalize_type<typename f_traits::return_type>::type> cast_out;
-        typename f_traits::f_type func = f_traits::cast(std::forward<Func>(_func));
+    /// Vanilla function pointers
+    template <typename return_type, typename... arg_type>
+    cpp_function(return_type (*f)(arg_type...), const char *name = nullptr,
+                 const char *doc = nullptr, return_value_policy policy = return_value_policy::automatic,
+                 const function &sibling = function(), bool is_method = false) {
 
-        auto impl = [func, policy](PyObject *pyArgs) -> PyObject *{
+        typedef arg_value_caster<arg_type...> cast_in;
+        typedef return_value_caster<return_type> cast_out;
+
+        auto impl = [](function_entry *entry, PyObject *pyArgs, PyObject *parent) -> PyObject * {
             cast_in args;
-            if (!args.load(pyArgs, true))
-                return nullptr;
-            PyObject *parent = policy != return_value_policy::reference_internal
-                ? nullptr : PyTuple_GetItem(pyArgs, 0);
-            return cast_out::cast(
-                f_traits::dispatch(func, args.operator typename f_traits::args_type()),
-                policy, parent);
+            if (!args.load(pyArgs, true)) return nullptr;
+            auto f = (return_type (*) (arg_type...)) entry->data;
+            return cast_out::cast(args.template call<return_type>(f),
+                                  entry->policy, parent);
         };
 
         initialize(name, doc, cast_in::name() + std::string(" -> ") + cast_out::name(),
-                    sibling, is_method, std::move(impl));
+                    sibling, is_method, policy, impl, (void *) f);
     }
+
+    /// Delegating helper constructor to deal with lambda functions
+    template <typename func>
+    cpp_function(func &&f, const char *name = nullptr,
+                 const char *doc = nullptr,
+                 return_value_policy policy = return_value_policy::automatic,
+                 const function &sibling = function(), bool is_method = false) {
+        initialize(std::forward<func>(f), name, doc, policy, sibling, is_method,
+                   (typename detail::remove_class<decltype(
+                       &std::remove_reference<func>::type::operator())>::type *) nullptr);
+    }
+
+
+    /// Class methods (non-const)
+    template <typename return_type, typename class_type, typename ... arg_type> cpp_function(
+            return_type (class_type::*f)(arg_type...), const char *name = nullptr,
+            const char *doc = nullptr, return_value_policy policy = return_value_policy::automatic,
+            const function &sibling = function(), bool is_method = false) {
+        initialize([f](class_type *c, arg_type... args) -> return_type { return (c->*f)(args...); },
+            name, doc, policy, sibling, is_method, (return_type (*)(class_type *, arg_type ...)) nullptr);
+    }
+
+    /// Class methods (const)
+    template <typename return_type, typename class_type, typename ... arg_type> cpp_function(
+            return_type (class_type::*f)(arg_type...) const, const char *name = nullptr,
+            const char *doc = nullptr, return_value_policy policy = return_value_policy::automatic,
+            const function &sibling = function(), bool is_method = false) {
+        initialize([f](const class_type *c, arg_type... args) -> return_type { return (c->*f)(args...); },
+            name, doc, policy, sibling, is_method, (return_type (*)(const class_type *, arg_type ...)) nullptr);
+    }
+
 private:
+    /// Functors, lambda functions, etc.
+    template <typename func, typename return_type, typename... arg_type>
+    void initialize(func &&f, const char *name, const char *doc,
+                 return_value_policy policy, const function &sibling,
+                 bool is_method, return_type (*)(arg_type...)) {
+
+        typedef arg_value_caster<arg_type...> cast_in;
+        typedef return_value_caster<return_type> cast_out;
+        struct capture { typename std::remove_reference<func>::type f; };
+        void *ptr = new capture { std::forward<func>(f) };
+
+        auto impl = [](function_entry *entry, PyObject *pyArgs, PyObject *parent) -> PyObject *{
+            cast_in args;
+            if (!args.load(pyArgs, true)) return nullptr;
+            func &f = ((capture *) entry->data)->f;
+            return cast_out::cast(args.template call<return_type>(f),
+                                  entry->policy, parent);
+        };
+
+        initialize(name, doc, cast_in::name() + std::string(" -> ") + cast_out::name(),
+                    sibling, is_method, policy, impl, ptr);
+    }
+
     static PyObject *dispatcher(PyObject *self, PyObject *args, PyObject * /* kwargs */) {
         function_entry *overloads = (function_entry *) PyCapsule_GetPointer(self, nullptr);
         PyObject *result = nullptr;
+        PyObject *parent = PyTuple_Size(args) > 0 ? PyTuple_GetItem(args, 0) : nullptr;
         try {
             for (function_entry *it = overloads; it != nullptr; it = it->next) {
-                if ((result = it->impl(args)) != nullptr)
+                if ((result = it->impl(it, args, parent)) != nullptr)
                     break;
             }
         } catch (const error_already_set &) {                                               return nullptr;
@@ -100,15 +163,19 @@
 
     void initialize(const char *name, const char *doc,
                     const std::string &signature, function sibling,
-                    bool is_method, std::function<PyObject *(PyObject *)> &&impl) {
+                    bool is_method, return_value_policy policy,
+                    PyObject *(*impl) (function_entry *, PyObject *, PyObject *),
+                    void *data) {
         if (name == nullptr)
             name = "";
 
         /* Linked list of function call handlers (for overloading) */
         function_entry *entry = new function_entry();
-        entry->impl = std::move(impl);
+        entry->impl = impl;
         entry->is_constructor = !strcmp(name, "__init__");
+        entry->policy = policy;
         entry->signature = signature;
+        entry->data = data;
         if (doc) entry->doc = doc;
 
         if (!sibling.ptr() || !PyCFunction_Check(sibling.ptr())) {
@@ -159,16 +226,15 @@
 class cpp_method : public cpp_function {
 public:
     cpp_method () { }
-    template <typename Func>
-    cpp_method(Func &&_func, const char *name = nullptr, const char *doc = nullptr,
-               return_value_policy policy = return_value_policy::automatic,
-               function sibling = function())
-        : cpp_function(std::forward<Func>(_func), name, doc, policy, sibling, true) { }
+    template <typename func> cpp_method(func &&f, const char *name = nullptr,
+               const char *doc = nullptr, return_value_policy
+               policy = return_value_policy::automatic, function sibling = function())
+        : cpp_function(std::forward<func>(f), name, doc, policy, sibling, true) {}
 };
 
 class module : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(module, object, PyModule_Check)
+    PYBIND_OBJECT_DEFAULT(module, object, PyModule_Check)
 
     module(const char *name, const char *doc = nullptr) {
         PyModuleDef *def = new PyModuleDef();
@@ -214,7 +280,7 @@
 /// Basic support for creating new Python heap types
 class custom_type : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(custom_type, object, PyType_Check)
+    PYBIND_OBJECT_DEFAULT(custom_type, object, PyType_Check)
 
     custom_type(object &scope, const char *name_, const std::string &type_name,
                 size_t type_size, size_t instance_size,
@@ -364,14 +430,13 @@
 
     static void releasebuffer(PyObject *, Py_buffer *view) { delete (buffer_info *) view->internal; }
 };
-
 NAMESPACE_END(detail)
 
 template <typename type, typename holder_type = std::unique_ptr<type>> class class_ : public detail::custom_type {
 public:
     typedef detail::instance<type, holder_type> instance_type;
 
-    PYTHON_OBJECT(class_, detail::custom_type, PyType_Check)
+    PYBIND_OBJECT(class_, detail::custom_type, PyType_Check)
 
     class_(object &scope, const char *name, const char *doc = nullptr)
         : detail::custom_type(scope, name, type_id<type>(), sizeof(type),
@@ -603,6 +668,3 @@
 #if defined(_MSC_VER)
 #pragma warning(pop)
 #endif
-
-#undef PYTHON_OBJECT
-#undef PYTHON_OBJECT_DEFAULT
diff --git a/include/pybind/pytypes.h b/include/pybind/pytypes.h
index 33aa69f..2586a8b 100644
--- a/include/pybind/pytypes.h
+++ b/include/pybind/pytypes.h
@@ -19,9 +19,7 @@
 class str;
 class object;
 class dict;
-NAMESPACE_BEGIN(detail)
-class accessor;
-NAMESPACE_END(detail)
+namespace detail { class accessor; }
 
 /// Holds a reference to a Python object (no reference counting)
 class handle {
@@ -189,7 +187,6 @@
     PyObject *dict, *key, *value;
     ssize_t pos = 0;
 };
-
 NAMESPACE_END(detail)
 
 inline detail::accessor handle::operator[](handle key) { return detail::accessor(ptr(), key.ptr(), false); }
@@ -197,21 +194,24 @@
 inline detail::accessor handle::attr(handle key) { return detail::accessor(ptr(), key.ptr(), true); }
 inline detail::accessor handle::attr(const char *key) { return detail::accessor(ptr(), key, true); }
 
-#define PYTHON_OBJECT(Name, Parent, CheckFun) \
-    Name(const handle &h, bool borrowed) : Parent(h, borrowed) { } \
-    Name(const object& o): Parent(o) { } \
-    Name(object&& o): Parent(std::move(o)) { } \
-    Name& operator=(object&& o) { return static_cast<Name&>(object::operator=(std::move(o))); } \
-    Name& operator=(object& o) { return static_cast<Name&>(object::operator=(o)); } \
+#define PYBIND_OBJECT_CVT(Name, Parent, CheckFun, CvtStmt) \
+    Name(const handle &h, bool borrowed) : Parent(h, borrowed) { CvtStmt; } \
+    Name(const object& o): Parent(o) { CvtStmt; } \
+    Name(object&& o): Parent(std::move(o)) { CvtStmt; } \
+    Name& operator=(object&& o) { return static_cast<Name&>(object::operator=(std::move(o))); CvtStmt; } \
+    Name& operator=(object& o) { return static_cast<Name&>(object::operator=(o)); CvtStmt; } \
     bool check() const { return m_ptr != nullptr && (bool) CheckFun(m_ptr); }
 
-#define PYTHON_OBJECT_DEFAULT(Name, Parent, CheckFun) \
-    PYTHON_OBJECT(Name, Parent, CheckFun) \
+#define PYBIND_OBJECT(Name, Parent, CheckFun) \
+    PYBIND_OBJECT_CVT(Name, Parent, CheckFun, )
+
+#define PYBIND_OBJECT_DEFAULT(Name, Parent, CheckFun) \
+    PYBIND_OBJECT(Name, Parent, CheckFun) \
     Name() : Parent() { }
 
 class str : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(str, object, PyUnicode_Check)
+    PYBIND_OBJECT_DEFAULT(str, object, PyUnicode_Check)
     str(const char *s) : object(PyUnicode_FromString(s), false) { }
     operator const char *() const { return PyUnicode_AsUTF8(m_ptr); }
 };
@@ -221,13 +221,13 @@
 
 class bool_ : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(bool_, object, PyBool_Check)
+    PYBIND_OBJECT_DEFAULT(bool_, object, PyBool_Check)
     operator bool() const { return m_ptr && PyLong_AsLong(m_ptr) != 0; }
 };
 
 class int_ : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(int_, object, PyLong_Check)
+    PYBIND_OBJECT_DEFAULT(int_, object, PyLong_Check)
     int_(int value) : object(PyLong_FromLong((long) value), false) { }
     int_(size_t value) : object(PyLong_FromSize_t(value), false) { }
     int_(ssize_t value) : object(PyLong_FromSsize_t(value), false) { }
@@ -236,7 +236,7 @@
 
 class float_ : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(float_, object, PyFloat_Check)
+    PYBIND_OBJECT_DEFAULT(float_, object, PyFloat_Check)
     float_(float value) : object(PyFloat_FromDouble((double) value), false) { }
     float_(double value) : object(PyFloat_FromDouble((double) value), false) { }
     operator float() const { return (float) PyFloat_AsDouble(m_ptr); }
@@ -245,7 +245,7 @@
 
 class slice : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(slice, object, PySlice_Check)
+    PYBIND_OBJECT_DEFAULT(slice, object, PySlice_Check)
     slice(ssize_t start_, ssize_t stop_, ssize_t step_) {
         int_ start(start_), stop(stop_), step(step_);
         m_ptr = PySlice_New(start.ptr(), stop.ptr(), step.ptr());
@@ -257,7 +257,7 @@
 
 class capsule : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(capsule, object, PyCapsule_CheckExact)
+    PYBIND_OBJECT_DEFAULT(capsule, object, PyCapsule_CheckExact)
     capsule(void *value) : object(PyCapsule_New(value, nullptr, nullptr), false) { }
     template <typename T> operator T *() const {
         T * result = static_cast<T *>(PyCapsule_GetPointer(m_ptr, nullptr));
@@ -268,7 +268,7 @@
 
 class tuple : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(tuple, object, PyTuple_Check)
+    PYBIND_OBJECT_DEFAULT(tuple, object, PyTuple_Check)
     tuple(size_t size) : object(PyTuple_New((Py_ssize_t) size), false) { }
     size_t size() const { return (size_t) PyTuple_Size(m_ptr); }
     detail::tuple_accessor operator[](size_t index) { return detail::tuple_accessor(ptr(), index); }
@@ -276,7 +276,7 @@
 
 class dict : public object {
 public:
-    PYTHON_OBJECT(dict, object, PyDict_Check)
+    PYBIND_OBJECT(dict, object, PyDict_Check)
     dict() : object(PyDict_New(), false) { }
     size_t size() const { return (size_t) PyDict_Size(m_ptr); }
     detail::dict_iterator begin() { return (++detail::dict_iterator(ptr(), 0)); }
@@ -285,7 +285,7 @@
 
 class list : public object {
 public:
-    PYTHON_OBJECT(list, object, PyList_Check)
+    PYBIND_OBJECT(list, object, PyList_Check)
     list(size_t size = 0) : object(PyList_New((ssize_t) size), false) { }
     size_t size() const { return (size_t) PyList_Size(m_ptr); }
     detail::list_iterator begin() { return detail::list_iterator(ptr(), 0); }
@@ -296,12 +296,12 @@
 
 class function : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(function, object, PyFunction_Check)
+    PYBIND_OBJECT_DEFAULT(function, object, PyFunction_Check)
 };
 
 class buffer : public object {
 public:
-    PYTHON_OBJECT_DEFAULT(buffer, object, PyObject_CheckBuffer)
+    PYBIND_OBJECT_DEFAULT(buffer, object, PyObject_CheckBuffer)
 
     buffer_info request(bool writable = false) {
         int flags = PyBUF_STRIDES | PyBUF_FORMAT;
@@ -322,88 +322,6 @@
     Py_buffer *view = nullptr;
 };
 
-class array : public buffer {
-protected:
-    struct API {
-        enum Entries {
-            API_PyArray_Type = 2,
-            API_PyArray_DescrFromType = 45,
-            API_PyArray_NewCopy = 85,
-            API_PyArray_NewFromDescr = 94
-        };
-
-        static API lookup() {
-            PyObject *numpy = PyImport_ImportModule("numpy.core.multiarray");
-            PyObject *capsule = numpy ? PyObject_GetAttrString(numpy, "_ARRAY_API") : nullptr;
-            void **api_ptr = (void **) (capsule ? PyCapsule_GetPointer(capsule, NULL) : nullptr);
-            Py_XDECREF(capsule);
-            Py_XDECREF(numpy);
-            if (api_ptr == nullptr)
-                throw std::runtime_error("Could not acquire pointer to NumPy API!");
-            API api;
-            api.PyArray_DescrFromType = (decltype(api.PyArray_DescrFromType)) api_ptr[API_PyArray_DescrFromType];
-            api.PyArray_NewFromDescr  = (decltype(api.PyArray_NewFromDescr))  api_ptr[API_PyArray_NewFromDescr];
-            api.PyArray_NewCopy       = (decltype(api.PyArray_NewCopy))       api_ptr[API_PyArray_NewCopy];
-            api.PyArray_Type          = (decltype(api.PyArray_Type))          api_ptr[API_PyArray_Type];
-            return api;
-        }
-
-        bool PyArray_Check(PyObject *obj) const {
-            return (bool) PyObject_TypeCheck(obj, PyArray_Type);
-        }
-
-        PyObject *(*PyArray_DescrFromType)(int);
-        PyObject *(*PyArray_NewFromDescr)
-            (PyTypeObject *, PyObject *, int, Py_intptr_t *,
-             Py_intptr_t *, void *, int, PyObject *);
-        PyObject *(*PyArray_NewCopy)(PyObject *, int);
-        PyTypeObject *PyArray_Type;
-    };
-public:
-    PYTHON_OBJECT_DEFAULT(array, buffer, lookup_api().PyArray_Check)
-    
-    template <typename Type> array(size_t size, const Type *ptr) {
-        API& api = lookup_api();
-        PyObject *descr = api.PyArray_DescrFromType(
-            (int) format_descriptor<Type>::value()[0]);
-        if (descr == nullptr)
-            throw std::runtime_error("NumPy: unsupported buffer format!");
-        Py_intptr_t shape = (Py_intptr_t) size;
-        PyObject *tmp = api.PyArray_NewFromDescr(
-            api.PyArray_Type, descr, 1, &shape, nullptr, (void *) ptr, 0, nullptr);
-        if (tmp == nullptr)
-            throw std::runtime_error("NumPy: unable to create array!");
-        m_ptr = api.PyArray_NewCopy(tmp, -1 /* any order */);
-        Py_DECREF(tmp);
-        if (m_ptr == nullptr)
-            throw std::runtime_error("NumPy: unable to copy array!");
-    }
-
-    array(const buffer_info &info) {
-        API& api = lookup_api();
-        if (info.format.size() != 1)
-            throw std::runtime_error("Unsupported buffer format!");
-        PyObject *descr = api.PyArray_DescrFromType(info.format[0]);
-        if (descr == nullptr)
-            throw std::runtime_error("NumPy: unsupported buffer format '" + info.format + "'!");
-        PyObject *tmp = api.PyArray_NewFromDescr(
-            api.PyArray_Type, descr, info.ndim, (Py_intptr_t *) &info.shape[0],
-            (Py_intptr_t *) &info.strides[0], info.ptr, 0, nullptr);
-        if (tmp == nullptr)
-            throw std::runtime_error("NumPy: unable to create array!");
-        m_ptr = api.PyArray_NewCopy(tmp, -1 /* any order */);
-        Py_DECREF(tmp);
-        if (m_ptr == nullptr)
-            throw std::runtime_error("NumPy: unable to copy array!");
-    }
-protected:
-    static API &lookup_api() {
-        static API api = API::lookup();
-        return api;
-    }
-};
-
-
 NAMESPACE_BEGIN(detail)
 inline internals &get_internals() {
     static internals *internals_ptr = nullptr;