Fix passing in utf8 encoded strings with python 2
Passing utf8 encoded strings from python to a C++ function taking a
std::string was broken. The previous version was trying to call
'PyUnicode_FromObject' on this data, which failed to convert the string
to unicode with the default ascii codec. Also this incurs an unnecessary
conversion to unicode for data this is immediately converted back to
utf8.
Fix by treating python 2 strings the same python 3 bytes objects, and just
copying over the data if possible.
diff --git a/include/pybind11/cast.h b/include/pybind11/cast.h
index 3934971..9e7b4dd 100644
--- a/include/pybind11/cast.h
+++ b/include/pybind11/cast.h
@@ -734,9 +734,14 @@
#if PY_MAJOR_VERSION >= 3
return load_bytes(load_src);
#else
+ if (sizeof(CharT) == 1) {
+ return load_bytes(load_src);
+ }
+
// The below is a guaranteed failure in Python 3 when PyUnicode_Check returns false
if (!PYBIND11_BYTES_CHECK(load_src.ptr()))
return false;
+
temp = reinterpret_steal<object>(PyUnicode_FromObject(load_src.ptr()));
if (!temp) { PyErr_Clear(); return false; }
load_src = temp;
@@ -780,9 +785,8 @@
#endif
}
-#if PY_MAJOR_VERSION >= 3
- // In Python 3, when loading into a std::string or char*, accept a bytes object as-is (i.e.
- // without any encoding/decoding attempt). For other C++ char sizes this is a no-op. Python 2,
+ // When loading into a std::string or char*, accept a bytes object as-is (i.e.
+ // without any encoding/decoding attempt). For other C++ char sizes this is a no-op.
// which supports loading a unicode from a str, doesn't take this path.
template <typename C = CharT>
bool load_bytes(enable_if_t<sizeof(C) == 1, handle> src) {
@@ -798,9 +802,9 @@
return false;
}
+
template <typename C = CharT>
bool load_bytes(enable_if_t<sizeof(C) != 1, handle>) { return false; }
-#endif
};
// Type caster for C-style strings. We basically use a std::string type caster, but also add the
diff --git a/tests/test_python_types.py b/tests/test_python_types.py
index 5e2761c..08bb3ab 100644
--- a/tests/test_python_types.py
+++ b/tests/test_python_types.py
@@ -554,6 +554,9 @@
assert string_length(byte("a\x00b")) == 3
assert strlen(byte("a\x00b")) == 1 # C-string limitation
+ # passing in a utf8 encoded string should work
+ assert string_length(u'💩'.encode("utf8")) == 4
+
def test_builtins_cast_return_none():
"""Casters produced with PYBIND11_TYPE_CASTER() should convert nullptr to None"""