Merge pull request #372 from dean0x7d/keywords

Keyword arguments and generalized unpacking for C++ API
diff --git a/docs/advanced.rst b/docs/advanced.rst
index fdbf6cd..cf588af 100644
--- a/docs/advanced.rst
+++ b/docs/advanced.rst
@@ -1622,24 +1622,76 @@
     py::object result_py = f(1234, "hello", some_instance);
     MyClass &result = result_py.cast<MyClass>();
 
-The special ``f(*args)`` and ``f(*args, **kwargs)`` syntax is also supported to
-supply arbitrary argument and keyword lists, although these cannot be mixed
-with other parameters.
+Keyword arguments are also supported. In Python, there is the usual call syntax:
+
+.. code-block:: python
+
+    def f(number, say, to):
+        ...  # function code
+
+    f(1234, say="hello", to=some_instance)  # keyword call in Python
+
+In C++, the same call can be made using:
 
 .. code-block:: cpp
 
-    py::function f = <...>;
+    using pybind11::literals; // to bring in the `_a` literal
+    f(1234, "say"_a="hello", "to"_a=some_instance); // keyword call in C++
+
+Unpacking of ``*args`` and ``**kwargs`` is also possible and can be mixed with
+other arguments:
+
+.. code-block:: cpp
+
+    // * unpacking
+    py::tuple args = py::make_tuple(1234, "hello", some_instance);
+    f(*args);
+
+    // ** unpacking
+    py::dict kwargs = py::dict("number"_a=1234, "say"_a="hello", "to"_a=some_instance);
+    f(**kwargs);
+
+    // mixed keywords, * and ** unpacking
     py::tuple args = py::make_tuple(1234);
-    py::dict kwargs;
-    kwargs["y"] = py::cast(5678);
-    py::object result = f(*args, **kwargs);
+    py::dict kwargs = py::dict("to"_a=some_instance);
+    f(*args, "say"_a="hello", **kwargs);
+
+Generalized unpacking according to PEP448_ is also supported:
+
+.. code-block:: cpp
+
+    py::dict kwargs1 = py::dict("number"_a=1234);
+    py::dict kwargs2 = py::dict("to"_a=some_instance);
+    f(**kwargs1, "say"_a="hello", **kwargs2);
 
 .. seealso::
 
     The file :file:`tests/test_python_types.cpp` contains a complete
     example that demonstrates passing native Python types in more detail. The
-    file :file:`tests/test_kwargs_and_defaults.cpp` discusses usage
-    of ``args`` and ``kwargs``.
+    file :file:`tests/test_callbacks.cpp` presents a few examples of calling
+    Python functions from C++, including keywords arguments and unpacking.
+
+.. _PEP448: https://www.python.org/dev/peps/pep-0448/
+
+Using Python's print function in C++
+====================================
+
+The usual way to write output in C++ is using ``std::cout`` while in Python one
+would use ``print``. Since these methods use different buffers, mixing them can
+lead to output order issues. To resolve this, pybind11 modules can use the
+:func:`py::print` function which writes to Python's ``sys.stdout`` for consistency.
+
+Python's ``print`` function is replicated in the C++ API including optional
+keyword arguments ``sep``, ``end``, ``file``, ``flush``. Everything works as
+expected in Python:
+
+.. code-block:: cpp
+
+    py::print(1, 2.0, "three"); // 1 2.0 three
+    py::print(1, 2.0, "three", "sep"_a="-"); // 1-2.0-three
+
+    auto args = py::make_tuple("unpacked", true);
+    py::print("->", *args, "end"_a="<-"); // -> unpacked True <-
 
 Default arguments revisited
 ===========================
diff --git a/docs/changelog.rst b/docs/changelog.rst
index 0485e92..a9886e0 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -46,6 +46,13 @@
 * Added constructors for ``str`` and ``bytes`` from zero-terminated char pointers,
   and from char pointers and length.
 * Added ``memoryview`` wrapper type which is constructible from ``buffer_info``.
+* New syntax to call a Python function from C++ using keyword arguments and unpacking,
+  e.g. ``foo(1, 2, "z"_a=3)`` or ``bar(1, *args, "z"_a=3, **kwargs)``.
+* Added ``py::print()`` function which replicates Python's API and writes to Python's
+  ``sys.stdout`` by default (as opposed to C's ``stdout`` like ``std::cout``).
+* Added ``py::dict`` keyword constructor:``auto d = dict("number"_a=42, "name"_a="World");``
+* Added ``py::str::format()`` method and ``_s`` literal:
+  ``py::str s = "1 + 2 = {}"_s.format(3);``
 * Various minor improvements of library internals (no user-visible changes)
 
 1.8.1 (July 12, 2016)
diff --git a/include/pybind11/attr.h b/include/pybind11/attr.h
index 7925b8e..9acb3e3 100644
--- a/include/pybind11/attr.h
+++ b/include/pybind11/attr.h
@@ -14,35 +14,6 @@
 
 NAMESPACE_BEGIN(pybind11)
 
-template <typename T> struct arg_t;
-
-/// Annotation for keyword arguments
-struct arg {
-    constexpr explicit arg(const char *name) : name(name) { }
-
-    template <typename T>
-    constexpr arg_t<T> operator=(const T &value) const { return {name, value}; }
-    template <typename T, size_t N>
-    constexpr arg_t<const T *> operator=(T const (&value)[N]) const {
-        return operator=((const T *) value);
-    }
-
-    const char *name;
-};
-
-/// Annotation for keyword arguments with default values
-template <typename T> struct arg_t : public arg {
-    constexpr arg_t(const char *name, const T &value, const char *descr = nullptr)
-        : arg(name), value(value), descr(descr) { }
-    T value;
-    const char *descr;
-};
-
-inline namespace literals {
-/// String literal version of arg
-constexpr arg operator"" _a(const char *name, size_t) { return arg(name); }
-}
-
 /// Annotation for methods
 struct is_method { handle class_; is_method(const handle &c) : class_(c) { } };
 
@@ -238,21 +209,14 @@
 };
 
 /// Process a keyword argument attribute (*with* a default value)
-template <typename T>
-struct process_attribute<arg_t<T>> : process_attribute_default<arg_t<T>> {
-    static void init(const arg_t<T> &a, function_record *r) {
+template <> struct process_attribute<arg_v> : process_attribute_default<arg_v> {
+    static void init(const arg_v &a, function_record *r) {
         if (r->class_ && r->args.empty())
             r->args.emplace_back("self", nullptr, handle());
 
-        /* Convert keyword value into a Python object */
-        object o = object(detail::type_caster<typename detail::intrinsic_type<T>::type>::cast(
-                a.value, return_value_policy::automatic, handle()), false);
-
-        if (!o) {
+        if (!a.value) {
 #if !defined(NDEBUG)
-            std::string descr(typeid(T).name());
-            detail::clean_type_id(descr);
-            descr = "'" + std::string(a.name) + ": " + descr + "'";
+            auto descr = "'" + std::string(a.name) + ": " + a.type + "'";
             if (r->class_) {
                 if (r->name)
                     descr += " in method '" + (std::string) r->class_.str() + "." + (std::string) r->name + "'";
@@ -269,7 +233,7 @@
                           "Compile in debug mode for more information.");
 #endif
         }
-        r->args.emplace_back(a.name, a.descr, o.release());
+        r->args.emplace_back(a.name, a.descr, a.value.inc_ref());
     }
 };
 
@@ -301,9 +265,6 @@
     static void postcall(handle args, handle ret) { keep_alive_impl(Nurse, Patient, args, ret); }
 };
 
-/// Ignore that a variable is unused in compiler warnings
-inline void ignore_unused(const int *) { }
-
 /// Recursively iterate over variadic template arguments
 template <typename... Args> struct process_attributes {
     static void init(const Args&... args, function_record *r) {
@@ -324,11 +285,6 @@
     }
 };
 
-/// Compile-time integer sum
-constexpr size_t constexpr_sum() { return 0; }
-template <typename T, typename... Ts>
-constexpr size_t constexpr_sum(T n, Ts... ns) { return n + constexpr_sum(ns...); }
-
 /// Check the number of named arguments at compile time
 template <typename... Extra,
           size_t named = constexpr_sum(std::is_base_of<arg, Extra>::value...),
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index cbb0ae5..c8c8f77 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -57,6 +57,7 @@
                 } catch (const index_error &e)           { PyErr_SetString(PyExc_IndexError,    e.what()); return;
                 } catch (const key_error &e)             { PyErr_SetString(PyExc_KeyError,      e.what()); return;
                 } catch (const value_error &e)           { PyErr_SetString(PyExc_ValueError,    e.what()); return;
+                } catch (const type_error &e)            { PyErr_SetString(PyExc_TypeError,     e.what()); return;
                 } catch (const stop_iteration &e)        { PyErr_SetString(PyExc_StopIteration, e.what()); return;
                 } catch (const std::bad_alloc &e)        { PyErr_SetString(PyExc_MemoryError,   e.what()); return;
                 } catch (const std::domain_error &e)     { PyErr_SetString(PyExc_ValueError,    e.what()); return;
@@ -251,8 +252,8 @@
 /* Determine suitable casting operator */
 template <typename T>
 using cast_op_type = typename std::conditional<std::is_pointer<typename std::remove_reference<T>::type>::value,
-    typename std::add_pointer<typename intrinsic_type<T>::type>::type,
-    typename std::add_lvalue_reference<typename intrinsic_type<T>::type>::type>::type;
+    typename std::add_pointer<intrinsic_t<T>>::type,
+    typename std::add_lvalue_reference<intrinsic_t<T>>::type>::type;
 
 /// Generic type caster for objects stored on the heap
 template <typename type> class type_caster_base : public type_caster_generic {
@@ -308,6 +309,7 @@
 };
 
 template <typename type, typename SFINAE = void> class type_caster : public type_caster_base<type> { };
+template <typename type> using make_caster = type_caster<intrinsic_t<type>>;
 
 template <typename type> class type_caster<std::reference_wrapper<type>> : public type_caster_base<type> {
 public:
@@ -610,8 +612,8 @@
     }
 
     static handle cast(const type &src, return_value_policy policy, handle parent) {
-        object o1 = object(type_caster<typename intrinsic_type<T1>::type>::cast(src.first, policy, parent), false);
-        object o2 = object(type_caster<typename intrinsic_type<T2>::type>::cast(src.second, policy, parent), false);
+        object o1 = object(make_caster<T1>::cast(src.first, policy, parent), false);
+        object o2 = object(make_caster<T2>::cast(src.second, policy, parent), false);
         if (!o1 || !o2)
             return handle();
         tuple result(2);
@@ -622,24 +624,24 @@
 
     static PYBIND11_DESCR name() {
         return type_descr(
-            _("Tuple[") + type_caster<typename intrinsic_type<T1>::type>::name() +
-            _(", ") + type_caster<typename intrinsic_type<T2>::type>::name() + _("]"));
+            _("Tuple[") + make_caster<T1>::name() + _(", ") + make_caster<T2>::name() + _("]")
+        );
     }
 
     template <typename T> using cast_op_type = type;
 
     operator type() {
-        return type(first .operator typename type_caster<typename intrinsic_type<T1>::type>::template cast_op_type<T1>(),
-                    second.operator typename type_caster<typename intrinsic_type<T2>::type>::template cast_op_type<T2>());
+        return type(first.operator typename make_caster<T1>::template cast_op_type<T1>(),
+                    second.operator typename make_caster<T2>::template cast_op_type<T2>());
     }
 protected:
-    type_caster<typename intrinsic_type<T1>::type> first;
-    type_caster<typename intrinsic_type<T2>::type> second;
+    make_caster<T1> first;
+    make_caster<T2> second;
 };
 
 template <typename... Tuple> class type_caster<std::tuple<Tuple...>> {
     typedef std::tuple<Tuple...> type;
-    typedef std::tuple<typename intrinsic_type<Tuple>::type...> itype;
+    typedef std::tuple<intrinsic_t<Tuple>...> itype;
     typedef std::tuple<args> args_type;
     typedef std::tuple<args, kwargs> args_kwargs_type;
 public:
@@ -679,7 +681,7 @@
     }
 
     static PYBIND11_DESCR element_names() {
-        return detail::concat(type_caster<typename intrinsic_type<Tuple>::type>::name()...);
+        return detail::concat(make_caster<Tuple>::name()...);
     }
 
     static PYBIND11_DESCR name() {
@@ -704,12 +706,12 @@
 protected:
     template <typename ReturnValue, typename Func, size_t ... Index> ReturnValue call(Func &&f, index_sequence<Index...>) {
         return f(std::get<Index>(value)
-            .operator typename type_caster<typename intrinsic_type<Tuple>::type>::template cast_op_type<Tuple>()...);
+            .operator typename make_caster<Tuple>::template cast_op_type<Tuple>()...);
     }
 
     template <size_t ... Index> type cast(index_sequence<Index...>) {
         return type(std::get<Index>(value)
-            .operator typename type_caster<typename intrinsic_type<Tuple>::type>::template cast_op_type<Tuple>()...);
+            .operator typename make_caster<Tuple>::template cast_op_type<Tuple>()...);
     }
 
     template <size_t ... Indices> bool load(handle src, bool convert, index_sequence<Indices...>) {
@@ -726,7 +728,7 @@
     /* Implementation: Convert a C++ tuple into a Python tuple */
     template <size_t ... Indices> static handle cast(const type &src, return_value_policy policy, handle parent, index_sequence<Indices...>) {
         std::array<object, size> entries {{
-            object(type_caster<typename intrinsic_type<Tuple>::type>::cast(std::get<Indices>(src), policy, parent), false)...
+            object(make_caster<Tuple>::cast(std::get<Indices>(src), policy, parent), false)...
         }};
         for (const auto &entry: entries)
             if (!entry)
@@ -739,7 +741,7 @@
     }
 
 protected:
-    std::tuple<type_caster<typename intrinsic_type<Tuple>::type>...> value;
+    std::tuple<make_caster<Tuple>...> value;
 };
 
 /// Type caster for holder types like std::shared_ptr, etc.
@@ -846,7 +848,7 @@
 NAMESPACE_END(detail)
 
 template <typename T> T cast(const handle &handle) {
-    typedef detail::type_caster<typename detail::intrinsic_type<T>::type> type_caster;
+    using type_caster = detail::make_caster<T>;
     type_caster conv;
     if (!conv.load(handle, true)) {
 #if defined(NDEBUG)
@@ -866,7 +868,7 @@
         policy = std::is_pointer<T>::value ? return_value_policy::take_ownership : return_value_policy::copy;
     else if (policy == return_value_policy::automatic_reference)
         policy = std::is_pointer<T>::value ? return_value_policy::reference : return_value_policy::copy;
-    return object(detail::type_caster<typename detail::intrinsic_type<T>::type>::cast(value, policy, parent), false);
+    return object(detail::make_caster<T>::cast(value, policy, parent), false);
 }
 
 template <typename T> T handle::cast() const { return pybind11::cast<T>(*this); }
@@ -927,7 +929,7 @@
           typename... Args> tuple make_tuple(Args&&... args_) {
     const size_t size = sizeof...(Args);
     std::array<object, size> args {
-        { object(detail::type_caster<typename detail::intrinsic_type<Args>::type>::cast(
+        { object(detail::make_caster<Args>::cast(
             std::forward<Args>(args_), policy, nullptr), false)... }
     };
     for (auto &arg_value : args) {
@@ -947,13 +949,220 @@
     return result;
 }
 
-template <return_value_policy policy,
-          typename... Args> object handle::operator()(Args&&... args) const {
-    tuple args_tuple = pybind11::make_tuple<policy>(std::forward<Args>(args)...);
-    object result(PyObject_CallObject(m_ptr, args_tuple.ptr()), false);
-    if (!result)
-        throw error_already_set();
-    return result;
+/// Annotation for keyword arguments
+struct arg {
+    constexpr explicit arg(const char *name) : name(name) { }
+    template <typename T> arg_v operator=(T &&value) const;
+
+    const char *name;
+};
+
+/// Annotation for keyword arguments with values
+struct arg_v : arg {
+    template <typename T>
+    arg_v(const char *name, T &&x, const char *descr = nullptr)
+        : arg(name),
+          value(detail::make_caster<T>::cast(x, return_value_policy::automatic, handle()), false),
+          descr(descr)
+#if !defined(NDEBUG)
+        , type(type_id<T>())
+#endif
+    { }
+
+    object value;
+    const char *descr;
+#if !defined(NDEBUG)
+    std::string type;
+#endif
+};
+
+template <typename T>
+arg_v arg::operator=(T &&value) const { return {name, std::forward<T>(value)}; }
+
+/// Alias for backward compatibility -- to be remove in version 2.0
+template <typename /*unused*/> using arg_t = arg_v;
+
+inline namespace literals {
+/// String literal version of arg
+constexpr arg operator"" _a(const char *name, size_t) { return arg(name); }
+}
+
+NAMESPACE_BEGIN(detail)
+NAMESPACE_BEGIN(constexpr_impl)
+/// Implementation details for constexpr functions
+constexpr int first(int i) { return i; }
+template <typename T, typename... Ts>
+constexpr int first(int i, T v, Ts... vs) { return v ? i : first(i + 1, vs...); }
+
+constexpr int last(int /*i*/, int result) { return result; }
+template <typename T, typename... Ts>
+constexpr int last(int i, int result, T v, Ts... vs) { return last(i + 1, v ? i : result, vs...); }
+NAMESPACE_END(constexpr_impl)
+
+/// Return the index of the first type in Ts which satisfies Predicate<T>
+template <template<typename> class Predicate, typename... Ts>
+constexpr int constexpr_first() { return constexpr_impl::first(0, Predicate<Ts>::value...); }
+
+/// Return the index of the last type in Ts which satisfies Predicate<T>
+template <template<typename> class Predicate, typename... Ts>
+constexpr int constexpr_last() { return constexpr_impl::last(0, -1, Predicate<Ts>::value...); }
+
+/// Helper class which collects only positional arguments for a Python function call.
+/// A fancier version below can collect any argument, but this one is optimal for simple calls.
+template <return_value_policy policy>
+class simple_collector {
+public:
+    template <typename... Ts>
+    simple_collector(Ts &&...values)
+        : m_args(pybind11::make_tuple<policy>(std::forward<Ts>(values)...)) { }
+
+    const tuple &args() const & { return m_args; }
+    dict kwargs() const { return {}; }
+
+    tuple args() && { return std::move(m_args); }
+
+    /// Call a Python function and pass the collected arguments
+    object call(PyObject *ptr) const {
+        auto result = object(PyObject_CallObject(ptr, m_args.ptr()), false);
+        if (!result)
+            throw error_already_set();
+        return result;
+    }
+
+private:
+    tuple m_args;
+};
+
+/// Helper class which collects positional, keyword, * and ** arguments for a Python function call
+template <return_value_policy policy>
+class unpacking_collector {
+public:
+    template <typename... Ts>
+    unpacking_collector(Ts &&...values) {
+        // Tuples aren't (easily) resizable so a list is needed for collection,
+        // but the actual function call strictly requires a tuple.
+        auto args_list = list();
+        int _[] = { 0, (process(args_list, std::forward<Ts>(values)), 0)... };
+        ignore_unused(_);
+
+        m_args = object(PyList_AsTuple(args_list.ptr()), false);
+    }
+
+    const tuple &args() const & { return m_args; }
+    const dict &kwargs() const & { return m_kwargs; }
+
+    tuple args() && { return std::move(m_args); }
+    dict kwargs() && { return std::move(m_kwargs); }
+
+    /// Call a Python function and pass the collected arguments
+    object call(PyObject *ptr) const {
+        auto result = object(PyObject_Call(ptr, m_args.ptr(), m_kwargs.ptr()), false);
+        if (!result)
+            throw error_already_set();
+        return result;
+    }
+
+private:
+    template <typename T>
+    void process(list &args_list, T &&x) {
+        auto o = object(detail::make_caster<T>::cast(std::forward<T>(x), policy, nullptr), false);
+        if (!o) {
+#if defined(NDEBUG)
+            argument_cast_error();
+#else
+            argument_cast_error(std::to_string(args_list.size()), type_id<T>());
+#endif
+        }
+        args_list.append(o);
+    }
+
+    void process(list &args_list, detail::args_proxy ap) {
+        for (const auto &a : ap) {
+            args_list.append(a.cast<object>());
+        }
+    }
+
+    void process(list &/*args_list*/, arg_v a) {
+        if (m_kwargs[a.name]) {
+#if defined(NDEBUG)
+            multiple_values_error();
+#else
+            multiple_values_error(a.name);
+#endif
+        }
+        if (!a.value) {
+#if defined(NDEBUG)
+            argument_cast_error();
+#else
+            argument_cast_error(a.name, a.type);
+#endif
+        }
+        m_kwargs[a.name] = a.value;
+    }
+
+    void process(list &/*args_list*/, detail::kwargs_proxy kp) {
+        for (const auto &k : dict(kp, true)) {
+            if (m_kwargs[k.first]) {
+#if defined(NDEBUG)
+                multiple_values_error();
+#else
+                multiple_values_error(k.first.str());
+#endif
+            }
+            m_kwargs[k.first] = k.second;
+        }
+    }
+
+    [[noreturn]] static void multiple_values_error() {
+        throw type_error("Got multiple values for keyword argument "
+                         "(compile in debug mode for details)");
+    }
+
+    [[noreturn]] static void multiple_values_error(std::string name) {
+        throw type_error("Got multiple values for keyword argument '" + name + "'");
+    }
+
+    [[noreturn]] static void argument_cast_error() {
+        throw cast_error("Unable to convert call argument to Python object "
+                         "(compile in debug mode for details)");
+    }
+
+    [[noreturn]] static void argument_cast_error(std::string name, std::string type) {
+        throw cast_error("Unable to convert call argument '" + name
+                         + "' of type '" + type + "' to Python object");
+    }
+
+private:
+    tuple m_args;
+    dict m_kwargs;
+};
+
+/// Collect only positional arguments for a Python function call
+template <return_value_policy policy, typename... Args,
+          typename = enable_if_t<all_of_t<is_positional, Args...>::value>>
+simple_collector<policy> collect_arguments(Args &&...args) {
+    return {std::forward<Args>(args)...};
+}
+
+/// Collect all arguments, including keywords and unpacking (only instantiated when needed)
+template <return_value_policy policy, typename... Args,
+          typename = enable_if_t<!all_of_t<is_positional, Args...>::value>>
+unpacking_collector<policy> collect_arguments(Args &&...args) {
+    // Following argument order rules for generalized unpacking according to PEP 448
+    static_assert(
+        constexpr_last<is_positional, Args...>() < constexpr_first<is_keyword_or_ds, Args...>()
+        && constexpr_last<is_s_unpacking, Args...>() < constexpr_first<is_ds_unpacking, Args...>(),
+        "Invalid function call: positional args must precede keywords and ** unpacking; "
+        "* unpacking must precede ** unpacking"
+    );
+    return {std::forward<Args>(args)...};
+}
+
+NAMESPACE_END(detail)
+
+template <return_value_policy policy, typename... Args>
+object handle::operator()(Args &&...args) const {
+    return detail::collect_arguments<policy>(std::forward<Args>(args)...).call(m_ptr);
 }
 
 template <return_value_policy policy,
@@ -961,20 +1170,6 @@
     return operator()<policy>(std::forward<Args>(args)...);
 }
 
-inline object handle::operator()(detail::args_proxy args) const {
-    object result(PyObject_CallObject(m_ptr, args.ptr()), false);
-    if (!result)
-        throw error_already_set();
-    return result;
-}
-
-inline object handle::operator()(detail::args_proxy args, detail::kwargs_proxy kwargs) const {
-    object result(PyObject_Call(m_ptr, args.ptr(), kwargs.ptr()), false);
-    if (!result)
-        throw error_already_set();
-    return result;
-}
-
 #define PYBIND11_MAKE_OPAQUE(Type) \
     namespace pybind11 { namespace detail { \
         template<> class type_caster<Type> : public type_caster_base<Type> { }; \
diff --git a/include/pybind11/common.h b/include/pybind11/common.h
index 762a1e0..3d5aeb2 100644
--- a/include/pybind11/common.h
+++ b/include/pybind11/common.h
@@ -326,10 +326,45 @@
 template <typename T> struct intrinsic_type<T&&>                  { typedef typename intrinsic_type<T>::type type; };
 template <typename T, size_t N> struct intrinsic_type<const T[N]> { typedef typename intrinsic_type<T>::type type; };
 template <typename T, size_t N> struct intrinsic_type<T[N]>       { typedef typename intrinsic_type<T>::type type; };
+template <typename T> using intrinsic_t = typename intrinsic_type<T>::type;
 
 /// Helper type to replace 'void' in some expressions
 struct void_type { };
 
+/// from __cpp_future__ import (convenient aliases from C++14/17)
+template <bool B> using bool_constant = std::integral_constant<bool, B>;
+template <class T> using negation = bool_constant<!T::value>;
+template <bool B, typename T = void> using enable_if_t = typename std::enable_if<B, T>::type;
+template <bool B, typename T, typename F> using conditional_t = typename std::conditional<B, T, F>::type;
+
+/// Compile-time integer sum
+constexpr size_t constexpr_sum() { return 0; }
+template <typename T, typename... Ts>
+constexpr size_t constexpr_sum(T n, Ts... ns) { return size_t{n} + constexpr_sum(ns...); }
+
+/// Return true if all/any Ts satify Predicate<T>
+#if !defined(_MSC_VER)
+template <template<typename> class Predicate, typename... Ts>
+using all_of_t = bool_constant<(constexpr_sum(Predicate<Ts>::value...) == sizeof...(Ts))>;
+template <template<typename> class Predicate, typename... Ts>
+using any_of_t = bool_constant<(constexpr_sum(Predicate<Ts>::value...) > 0)>;
+#else
+// MSVC workaround (2015 Update 3 has issues with some member type aliases and constexpr)
+template <template<typename> class P, typename...> struct all_of_t : std::true_type { };
+template <template<typename> class P, typename T, typename... Ts>
+struct all_of_t<P, T, Ts...> : conditional_t<P<T>::value, all_of_t<P, Ts...>, std::false_type> { };
+template <template<typename> class P, typename...> struct any_of_t : std::false_type { };
+template <template<typename> class P, typename T, typename... Ts>
+struct any_of_t<P, T, Ts...> : conditional_t<P<T>::value, std::true_type, any_of_t<P, Ts...>> { };
+#endif
+
+/// Defer the evaluation of type T until types Us are instantiated
+template <typename T, typename... /*Us*/> struct deferred_type { using type = T; };
+template <typename T, typename... Us> using deferred_t = typename deferred_type<T, Us...>::type;
+
+/// Ignore that a variable is unused in compiler warnings
+inline void ignore_unused(const int *) { }
+
 NAMESPACE_END(detail)
 
 #define PYBIND11_RUNTIME_EXCEPTION(name) \
@@ -345,6 +380,7 @@
 PYBIND11_RUNTIME_EXCEPTION(index_error)
 PYBIND11_RUNTIME_EXCEPTION(key_error)
 PYBIND11_RUNTIME_EXCEPTION(value_error)
+PYBIND11_RUNTIME_EXCEPTION(type_error)
 PYBIND11_RUNTIME_EXCEPTION(cast_error) /// Thrown when pybind11::cast or handle::call fail due to a type casting error
 PYBIND11_RUNTIME_EXCEPTION(reference_cast_error) /// Used internally
 
diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h
index 7bdf913..2104671 100644
--- a/include/pybind11/pybind11.h
+++ b/include/pybind11/pybind11.h
@@ -1233,6 +1233,33 @@
     }
 };
 
+NAMESPACE_BEGIN(detail)
+PYBIND11_NOINLINE inline void print(tuple args, dict kwargs) {
+    auto strings = tuple(args.size());
+    for (size_t i = 0; i < args.size(); ++i) {
+        strings[i] = args[i].cast<object>().str();
+    }
+    auto sep = kwargs["sep"] ? kwargs["sep"] : cast(" ");
+    auto line = sep.attr("join").cast<object>()(strings);
+
+    auto file = kwargs["file"] ? kwargs["file"].cast<object>()
+                               : module::import("sys").attr("stdout");
+    auto write = file.attr("write").cast<object>();
+    write(line);
+    write(kwargs["end"] ? kwargs["end"] : cast("\n"));
+
+    if (kwargs["flush"] && kwargs["flush"].cast<bool>()) {
+        file.attr("flush").cast<object>()();
+    }
+}
+NAMESPACE_END(detail)
+
+template <return_value_policy policy = return_value_policy::automatic_reference, typename... Args>
+void print(Args &&...args) {
+    auto c = detail::collect_arguments<policy>(std::forward<Args>(args)...);
+    detail::print(c.args(), c.kwargs());
+}
+
 #if defined(WITH_THREAD)
 
 /* The functions below essentially reproduce the PyGILState_* API using a RAII
diff --git a/include/pybind11/pytypes.h b/include/pybind11/pytypes.h
index d25bc38..51f1629 100644
--- a/include/pybind11/pytypes.h
+++ b/include/pybind11/pytypes.h
@@ -16,7 +16,8 @@
 NAMESPACE_BEGIN(pybind11)
 
 /* A few forward declarations */
-class object; class str; class object; class dict; class iterator;
+class object; class str; class iterator;
+struct arg; struct arg_v;
 namespace detail { class accessor; class args_proxy; class kwargs_proxy; }
 
 /// Holds a reference to a Python object (no reference counting)
@@ -47,8 +48,6 @@
     object call(Args&&... args) const;
     template <return_value_policy policy = return_value_policy::automatic_reference, typename ... Args>
     object operator()(Args&&... args) const;
-    inline object operator()(detail::args_proxy args) const;
-    inline object operator()(detail::args_proxy f_args, detail::kwargs_proxy kwargs) const;
     operator bool() const { return m_ptr != nullptr; }
     bool operator==(const handle &h) const { return m_ptr == h.m_ptr; }
     bool operator!=(const handle &h) const { return m_ptr != h.m_ptr; }
@@ -249,6 +248,23 @@
     kwargs_proxy operator*() const { return kwargs_proxy(*this); }
 };
 
+/// Python argument categories (using PEP 448 terms)
+template <typename T> using is_keyword = std::is_base_of<arg, T>;
+template <typename T> using is_s_unpacking = std::is_same<args_proxy, T>; // * unpacking
+template <typename T> using is_ds_unpacking = std::is_same<kwargs_proxy, T>; // ** unpacking
+template <typename T> using is_positional = bool_constant<
+    !is_keyword<T>::value && !is_s_unpacking<T>::value && !is_ds_unpacking<T>::value
+>;
+template <typename T> using is_keyword_or_ds = bool_constant<
+    is_keyword<T>::value || is_ds_unpacking<T>::value
+>;
+
+// Call argument collector forward declarations
+template <return_value_policy policy = return_value_policy::automatic_reference>
+class simple_collector;
+template <return_value_policy policy = return_value_policy::automatic_reference>
+class unpacking_collector;
+
 NAMESPACE_END(detail)
 
 #define PYBIND11_OBJECT_CVT(Name, Parent, CheckFun, CvtStmt) \
@@ -373,8 +389,18 @@
             pybind11_fail("Unable to extract string contents! (invalid type)");
         return std::string(buffer, (size_t) length);
     }
+
+    template <typename... Args>
+    str format(Args &&...args) const {
+        return attr("format").cast<object>()(std::forward<Args>(args)...);
+    }
 };
 
+inline namespace literals {
+/// String literal version of str
+inline str operator"" _s(const char *s, size_t size) { return {s, size}; }
+}
+
 inline pybind11::str handle::str() const {
     PyObject *strValue = PyObject_Str(m_ptr);
 #if PY_MAJOR_VERSION < 3
@@ -567,6 +593,12 @@
     dict() : object(PyDict_New(), false) {
         if (!m_ptr) pybind11_fail("Could not allocate dict object!");
     }
+    template <typename... Args,
+              typename = detail::enable_if_t<detail::all_of_t<detail::is_keyword_or_ds, Args...>::value>,
+              // MSVC workaround: it can't compile an out-of-line definition, so defer the collector
+              typename collector = detail::deferred_t<detail::unpacking_collector<>, Args...>>
+    dict(Args &&...args) : dict(collector(std::forward<Args>(args)...).kwargs()) { }
+
     size_t size() const { return (size_t) PyDict_Size(m_ptr); }
     detail::dict_iterator begin() const { return (++detail::dict_iterator(*this, 0)); }
     detail::dict_iterator end() const { return detail::dict_iterator(); }
diff --git a/include/pybind11/stl.h b/include/pybind11/stl.h
index 2c47841..4390efa 100644
--- a/include/pybind11/stl.h
+++ b/include/pybind11/stl.h
@@ -26,8 +26,8 @@
 NAMESPACE_BEGIN(detail)
 
 template <typename Type, typename Key> struct set_caster {
-    typedef Type type;
-    typedef type_caster<typename intrinsic_type<Key>::type> key_conv;
+    using type = Type;
+    using key_conv = make_caster<Key>;
 
     bool load(handle src, bool convert) {
         pybind11::set s(src, true);
@@ -57,9 +57,9 @@
 };
 
 template <typename Type, typename Key, typename Value> struct map_caster {
-    typedef Type type;
-    typedef type_caster<typename intrinsic_type<Key>::type>   key_conv;
-    typedef type_caster<typename intrinsic_type<Value>::type> value_conv;
+    using type = Type;
+    using key_conv   = make_caster<Key>;
+    using value_conv = make_caster<Value>;
 
     bool load(handle src, bool convert) {
         dict d(src, true);
@@ -93,8 +93,8 @@
 };
 
 template <typename Type, typename Value> struct list_caster {
-    typedef Type type;
-    typedef type_caster<typename intrinsic_type<Value>::type> value_conv;
+    using type = Type;
+    using value_conv = make_caster<Value>;
 
     bool load(handle src, bool convert) {
         list l(src, true);
@@ -138,8 +138,8 @@
  : list_caster<std::list<Type, Alloc>, Type> { };
 
 template <typename Type, size_t Size> struct type_caster<std::array<Type, Size>> {
-    typedef std::array<Type, Size> array_type;
-    typedef type_caster<typename intrinsic_type<Type>::type> value_conv;
+    using array_type = std::array<Type, Size>;
+    using value_conv = make_caster<Type>;
 
     bool load(handle src, bool convert) {
         list l(src, true);
diff --git a/tests/conftest.py b/tests/conftest.py
index 8ba0f48..eb6fd02 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -68,18 +68,22 @@
     def __init__(self, capfd):
         self.capfd = capfd
         self.out = ""
+        self.err = ""
 
-    def _flush_stdout(self):
+    def _flush(self):
+        """Workaround for issues on Windows: to be removed after tests get py::print"""
         sys.stdout.flush()
-        os.fsync(sys.stdout.fileno())  # make sure C++ output is also read
-        return self.capfd.readouterr()[0]
+        os.fsync(sys.stdout.fileno())
+        sys.stderr.flush()
+        os.fsync(sys.stderr.fileno())
+        return self.capfd.readouterr()
 
     def __enter__(self):
-        self._flush_stdout()
+        self._flush()
         return self
 
     def __exit__(self, *_):
-        self.out = self._flush_stdout()
+        self.out, self.err = self._flush()
 
     def __eq__(self, other):
         a = Output(self.out)
@@ -100,6 +104,10 @@
     def unordered(self):
         return Unordered(self.out)
 
+    @property
+    def stderr(self):
+        return Output(self.err)
+
 
 @pytest.fixture
 def capture(capfd):
diff --git a/tests/pybind11_tests.h b/tests/pybind11_tests.h
index 8af3154..cf3cb36 100644
--- a/tests/pybind11_tests.h
+++ b/tests/pybind11_tests.h
@@ -8,6 +8,7 @@
 using std::endl;
 
 namespace py = pybind11;
+using namespace pybind11::literals;
 
 class test_initializer {
 public:
diff --git a/tests/test_callbacks.cpp b/tests/test_callbacks.cpp
index 8e0a6cc..31d4e39 100644
--- a/tests/test_callbacks.cpp
+++ b/tests/test_callbacks.cpp
@@ -71,6 +71,9 @@
     }
 };
 
+/// Something to trigger a conversion error
+struct Unregistered {};
+
 test_initializer callbacks([](py::module &m) {
     m.def("test_callback1", &test_callback1);
     m.def("test_callback2", &test_callback2);
@@ -78,8 +81,56 @@
     m.def("test_callback4", &test_callback4);
     m.def("test_callback5", &test_callback5);
 
-    /* Test cleanup of lambda closure */
+    // Test keyword args and generalized unpacking
+    m.def("test_tuple_unpacking", [](py::function f) {
+        auto t1 = py::make_tuple(2, 3);
+        auto t2 = py::make_tuple(5, 6);
+        return f("positional", 1, *t1, 4, *t2);
+    });
 
+    m.def("test_dict_unpacking", [](py::function f) {
+        auto d1 = py::dict("key"_a="value", "a"_a=1);
+        auto d2 = py::dict();
+        auto d3 = py::dict("b"_a=2);
+        return f("positional", 1, **d1, **d2, **d3);
+    });
+
+    m.def("test_keyword_args", [](py::function f) {
+        return f("x"_a=10, "y"_a=20);
+    });
+
+    m.def("test_unpacking_and_keywords1", [](py::function f) {
+        auto args = py::make_tuple(2);
+        auto kwargs = py::dict("d"_a=4);
+        return f(1, *args, "c"_a=3, **kwargs);
+    });
+
+    m.def("test_unpacking_and_keywords2", [](py::function f) {
+        auto kwargs1 = py::dict("a"_a=1);
+        auto kwargs2 = py::dict("c"_a=3, "d"_a=4);
+        return f("positional", *py::make_tuple(1), 2, *py::make_tuple(3, 4), 5,
+                 "key"_a="value", **kwargs1, "b"_a=2, **kwargs2, "e"_a=5);
+    });
+
+    m.def("test_unpacking_error1", [](py::function f) {
+        auto kwargs = py::dict("x"_a=3);
+        return f("x"_a=1, "y"_a=2, **kwargs); // duplicate ** after keyword
+    });
+
+    m.def("test_unpacking_error2", [](py::function f) {
+        auto kwargs = py::dict("x"_a=3);
+        return f(**kwargs, "x"_a=1); // duplicate keyword after **
+    });
+
+    m.def("test_arg_conversion_error1", [](py::function f) {
+        f(234, Unregistered(), "kw"_a=567);
+    });
+
+    m.def("test_arg_conversion_error2", [](py::function f) {
+        f(234, "expected_name"_a=Unregistered(), "kw"_a=567);
+    });
+
+    /* Test cleanup of lambda closure */
     m.def("test_cleanup", []() -> std::function<void(void)> {
         Payload p;
 
diff --git a/tests/test_callbacks.py b/tests/test_callbacks.py
index d6e72f3..8f867d4 100644
--- a/tests/test_callbacks.py
+++ b/tests/test_callbacks.py
@@ -27,6 +27,41 @@
     assert f(number=43) == 44
 
 
+def test_keyword_args_and_generalized_unpacking():
+    from pybind11_tests import (test_tuple_unpacking, test_dict_unpacking, test_keyword_args,
+                                test_unpacking_and_keywords1, test_unpacking_and_keywords2,
+                                test_unpacking_error1, test_unpacking_error2,
+                                test_arg_conversion_error1, test_arg_conversion_error2)
+
+    def f(*args, **kwargs):
+        return args, kwargs
+
+    assert test_tuple_unpacking(f) == (("positional", 1, 2, 3, 4, 5, 6), {})
+    assert test_dict_unpacking(f) == (("positional", 1), {"key": "value", "a": 1, "b": 2})
+    assert test_keyword_args(f) == ((), {"x": 10, "y": 20})
+    assert test_unpacking_and_keywords1(f) == ((1, 2), {"c": 3, "d": 4})
+    assert test_unpacking_and_keywords2(f) == (
+        ("positional", 1, 2, 3, 4, 5),
+        {"key": "value", "a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
+    )
+
+    with pytest.raises(TypeError) as excinfo:
+        test_unpacking_error1(f)
+    assert "Got multiple values for keyword argument" in str(excinfo.value)
+
+    with pytest.raises(TypeError) as excinfo:
+        test_unpacking_error2(f)
+    assert "Got multiple values for keyword argument" in str(excinfo.value)
+
+    with pytest.raises(RuntimeError) as excinfo:
+        test_arg_conversion_error1(f)
+    assert "Unable to convert call argument" in str(excinfo.value)
+
+    with pytest.raises(RuntimeError) as excinfo:
+        test_arg_conversion_error2(f)
+    assert "Unable to convert call argument" in str(excinfo.value)
+
+
 def test_lambda_closure_cleanup():
     from pybind11_tests import test_cleanup, payload_cstats
 
diff --git a/tests/test_kwargs_and_defaults.cpp b/tests/test_kwargs_and_defaults.cpp
index bd24498..24fc0cd 100644
--- a/tests/test_kwargs_and_defaults.cpp
+++ b/tests/test_kwargs_and_defaults.cpp
@@ -20,13 +20,6 @@
     return ret;
 }
 
-py::object call_kw_func(py::function f) {
-    py::tuple args = py::make_tuple(1234);
-    py::dict kwargs;
-    kwargs["y"] = py::cast(5678);
-    return f(*args, **kwargs);
-}
-
 py::tuple args_function(py::args args) {
     return args;
 }
@@ -49,14 +42,11 @@
     std::vector<int> list;
     list.push_back(13);
     list.push_back(17);
-
     m.def("kw_func4", &kw_func4, py::arg("myList") = list);
-    m.def("call_kw_func", &call_kw_func);
 
     m.def("args_function", &args_function);
     m.def("args_kwargs_function", &args_kwargs_function);
 
-    using namespace py::literals;
     m.def("kw_func_udl", &kw_func, "x"_a, "y"_a=300);
     m.def("kw_func_udl_z", &kw_func, "x"_a, "y"_a=0);
 
diff --git a/tests/test_kwargs_and_defaults.py b/tests/test_kwargs_and_defaults.py
index 14d9c5a..0e1ea80 100644
--- a/tests/test_kwargs_and_defaults.py
+++ b/tests/test_kwargs_and_defaults.py
@@ -1,7 +1,6 @@
 import pytest
-from pybind11_tests import (kw_func0, kw_func1, kw_func2, kw_func3, kw_func4, call_kw_func,
-                            args_function, args_kwargs_function, kw_func_udl, kw_func_udl_z,
-                            KWClass)
+from pybind11_tests import (kw_func0, kw_func1, kw_func2, kw_func3, kw_func4, args_function,
+                            args_kwargs_function, kw_func_udl, kw_func_udl_z, KWClass)
 
 
 def test_function_signatures(doc):
@@ -49,8 +48,6 @@
 
 
 def test_arg_and_kwargs():
-    assert call_kw_func(kw_func2) == "x=1234, y=5678"
-
     args = 'arg1_value', 'arg2_value', 3
     assert args_function(*args) == args
 
diff --git a/tests/test_python_types.cpp b/tests/test_python_types.cpp
index 8ec7e26..e527c0a 100644
--- a/tests/test_python_types.cpp
+++ b/tests/test_python_types.cpp
@@ -197,4 +197,32 @@
         .def_readwrite_static("value", &ExamplePythonTypes::value, "Static value member")
         .def_readonly_static("value2", &ExamplePythonTypes::value2, "Static value member (readonly)")
         ;
+
+    m.def("test_print_function", []() {
+        py::print("Hello, World!");
+        py::print(1, 2.0, "three", true, std::string("-- multiple args"));
+        auto args = py::make_tuple("and", "a", "custom", "separator");
+        py::print("*args", *args, "sep"_a="-");
+        py::print("no new line here", "end"_a=" -- ");
+        py::print("next print");
+
+        auto py_stderr = py::module::import("sys").attr("stderr").cast<py::object>();
+        py::print("this goes to stderr", "file"_a=py_stderr);
+
+        py::print("flush", "flush"_a=true);
+
+        py::print("{a} + {b} = {c}"_s.format("a"_a="py::print", "b"_a="str.format", "c"_a="this"));
+    });
+
+    m.def("test_str_format", []() {
+        auto s1 = "{} + {} = {}"_s.format(1, 2, 3);
+        auto s2 = "{a} + {b} = {c}"_s.format("a"_a=1, "b"_a=2, "c"_a=3);
+        return py::make_tuple(s1, s2);
+    });
+
+    m.def("test_dict_keyword_constructor", []() {
+        auto d1 = py::dict("x"_a=1, "y"_a=2);
+        auto d2 = py::dict("z"_a=3, **d1);
+        return d2;
+    });
 });
diff --git a/tests/test_python_types.py b/tests/test_python_types.py
index 3738d41..087a9a2 100644
--- a/tests/test_python_types.py
+++ b/tests/test_python_types.py
@@ -218,3 +218,33 @@
     assert ExamplePythonTypes.__module__ == "pybind11_tests"
     assert ExamplePythonTypes.get_set.__name__ == "get_set"
     assert ExamplePythonTypes.get_set.__module__ == "pybind11_tests"
+
+
+def test_print(capture):
+    from pybind11_tests import test_print_function
+
+    with capture:
+        test_print_function()
+    assert capture == """
+        Hello, World!
+        1 2.0 three True -- multiple args
+        *args-and-a-custom-separator
+        no new line here -- next print
+        flush
+        py::print + str.format = this
+    """
+    assert capture.stderr == "this goes to stderr"
+
+
+def test_str_api():
+    from pybind11_tests import test_str_format
+
+    s1, s2 = test_str_format()
+    assert s1 == "1 + 2 = 3"
+    assert s1 == s2
+
+
+def test_dict_api():
+    from pybind11_tests import test_dict_keyword_constructor
+
+    assert test_dict_keyword_constructor() == {"x": 1, "y": 2, "z": 3}