Fix Python 3 `bytes` conversion to std::string/char*

The Unicode support added in 2.1 (PR #624) inadvertently broke accepting
`bytes` as std::string/char* arguments.  This restores it with a
separate path that does a plain conversion (i.e. completely bypassing
all the encoding/decoding code), but only for single-byte string types.
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index 5bc4605..fc2612b 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -693,9 +693,9 @@
             return false;
         } else if (!PyUnicode_Check(load_src.ptr())) {
 #if PY_MAJOR_VERSION >= 3
-            return false;
-            // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
+            return load_bytes(load_src);
 #else
+            // The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
             if (!PYBIND11_BYTES_CHECK(load_src.ptr()))
                 return false;
             temp = reinterpret_steal<object>(PyUnicode_FromObject(load_src.ptr()));
@@ -740,6 +740,28 @@
         return PyUnicode_Decode(buffer, nbytes, UTF_N == 8 ? "utf-8" : UTF_N == 16 ? "utf-16" : "utf-32", nullptr);
 #endif
     }
+
+#if PY_MAJOR_VERSION >= 3
+    // In Python 3, when loading into a std::string or char*, accept a bytes object as-is (i.e.
+    // without any encoding/decoding attempt).  For other C++ char sizes this is a no-op.  Python 2,
+    // which supports loading a unicode from a str, doesn't take this path.
+    template <typename C = CharT>
+    bool load_bytes(enable_if_t<sizeof(C) == 1, handle> src) {
+        if (PYBIND11_BYTES_CHECK(src.ptr())) {
+            // We were passed a Python 3 raw bytes; accept it into a std::string or char*
+            // without any encoding attempt.
+            const char *bytes = PYBIND11_BYTES_AS_STRING(src.ptr());
+            if (bytes) {
+                value = StringType(bytes, (size_t) PYBIND11_BYTES_SIZE(src.ptr()));
+                return true;
+            }
+        }
+
+        return false;
+    }
+    template <typename C = CharT>
+    bool load_bytes(enable_if_t<sizeof(C) != 1, handle>) { return false; }
+#endif
 };
 
 // Type caster for C-style strings.  We basically use a std::string type caster, but also add the
diff --git a/tests/test_python_types.cpp b/tests/test_python_types.cpp
index 6f20809..18aa87a 100644
--- a/tests/test_python_types.cpp
+++ b/tests/test_python_types.cpp
@@ -473,6 +473,9 @@
     m.def("ord_char32", [](char32_t c) -> uint32_t { return c; });
     m.def("ord_wchar", [](wchar_t c) -> int { return c; });
 
+    m.def("strlen", [](char *s) { return strlen(s); });
+    m.def("string_length", [](std::string s) { return s.length(); });
+
     m.def("return_none_string", []() -> std::string * { return nullptr; });
     m.def("return_none_char",   []() -> const char *  { return nullptr; });
     m.def("return_none_bool",   []() -> bool *        { return nullptr; });
diff --git a/tests/test_python_types.py b/tests/test_python_types.py
index cf8c147..9849bc8 100644
--- a/tests/test_python_types.py
+++ b/tests/test_python_types.py
@@ -511,6 +511,20 @@
     assert str(excinfo.value) == toolong_message
 
 
+def test_bytes_to_string():
+    """Tests the ability to pass bytes to C++ string-accepting functions.  Note that this is
+    one-way: the only way to return bytes to Python is via the pybind11::bytes class."""
+    # Issue #816
+    from pybind11_tests import strlen, string_length
+    import sys
+    byte = bytes if sys.version_info[0] < 3 else str
+
+    assert strlen(byte("hi")) == 2
+    assert string_length(byte("world")) == 5
+    assert string_length(byte("a\x00b")) == 3
+    assert strlen(byte("a\x00b")) == 1  # C-string limitation
+
+
 def test_builtins_cast_return_none():
     """Casters produced with PYBIND11_TYPE_CASTER() should convert nullptr to None"""
     import pybind11_tests as m