blob: d3d128cf890211ef342bb4e6d309ebdfb4291ecd [file] [log] [blame]
David Tolnay7db73692019-10-20 14:51:12 -04001use std::borrow::Cow;
2use std::fmt::{self, Debug, Display};
3use std::slice;
4use std::str::{self, Utf8Error};
5
6extern "C" {
David Tolnay8c730492020-03-13 01:29:06 -07007 #[link_name = "cxxbridge02$cxx_string$data"]
David Tolnay7db73692019-10-20 14:51:12 -04008 fn string_data(_: &CxxString) -> *const u8;
David Tolnay8c730492020-03-13 01:29:06 -07009 #[link_name = "cxxbridge02$cxx_string$length"]
David Tolnay7db73692019-10-20 14:51:12 -040010 fn string_length(_: &CxxString) -> usize;
11}
12
13/// Binding to C++ `std::string`.
14///
15/// # Invariants
16///
17/// As an invariant of this API and the static analysis of the cxx::bridge
18/// macro, in Rust code we can never obtain a `CxxString` by value. C++'s string
19/// requires a move constructor and may hold internal pointers, which is not
20/// compatible with Rust's move behavior. Instead in Rust code we will only ever
21/// look at a CxxString through a reference or smart pointer, as in `&CxxString`
22/// or `UniquePtr<CxxString>`.
23#[repr(C)]
24pub struct CxxString {
25 _private: [u8; 0],
26}
27
28impl CxxString {
29 /// Returns the length of the string in bytes.
30 ///
31 /// Matches the behavior of C++ [std::string::size][size].
32 ///
33 /// [size]: https://en.cppreference.com/w/cpp/string/basic_string/size
34 pub fn len(&self) -> usize {
35 unsafe { string_length(self) }
36 }
37
38 /// Returns true if `self` has a length of zero bytes.
39 pub fn is_empty(&self) -> bool {
40 self.len() == 0
41 }
42
43 /// Returns a byte slice of this string's contents.
44 pub fn as_bytes(&self) -> &[u8] {
45 let data = self.as_ptr();
46 let len = self.len();
47 unsafe { slice::from_raw_parts(data, len) }
48 }
49
50 /// Produces a pointer to the first character of the string.
51 ///
52 /// Matches the behavior of C++ [std::string::data][data].
53 ///
54 /// Note that the return type may look like `const char *` but is not a
55 /// `const char *` in the typical C sense, as C++ strings may contain
56 /// internal null bytes. As such, the returned pointer only makes sense as a
57 /// string in combination with the length returned by [`len()`](#len).
58 ///
59 /// [data]: https://en.cppreference.com/w/cpp/string/basic_string/data
60 pub fn as_ptr(&self) -> *const u8 {
61 unsafe { string_data(self) }
62 }
63
64 /// Validates that the C++ string contains UTF-8 data and produces a view of
65 /// it as a Rust &amp;str, otherwise an error.
66 pub fn to_str(&self) -> Result<&str, Utf8Error> {
67 str::from_utf8(self.as_bytes())
68 }
69
70 /// If the contents of the C++ string are valid UTF-8, this function returns
71 /// a view as a Cow::Borrowed &amp;str. Otherwise replaces any invalid UTF-8
72 /// sequences with the U+FFFD [replacement character] and returns a
73 /// Cow::Owned String.
74 ///
75 /// [replacement character]: https://doc.rust-lang.org/std/char/constant.REPLACEMENT_CHARACTER.html
76 pub fn to_string_lossy(&self) -> Cow<str> {
77 String::from_utf8_lossy(self.as_bytes())
78 }
79}
80
81impl Display for CxxString {
82 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayd930a792020-03-25 12:24:40 -070083 Display::fmt(self.to_string_lossy().as_ref(), f)
David Tolnay7db73692019-10-20 14:51:12 -040084 }
85}
86
87impl Debug for CxxString {
88 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
David Tolnayd930a792020-03-25 12:24:40 -070089 Debug::fmt(self.to_string_lossy().as_ref(), f)
David Tolnay7db73692019-10-20 14:51:12 -040090 }
91}
David Tolnay42ebfa22020-03-25 12:26:22 -070092
93impl PartialEq for CxxString {
94 fn eq(&self, other: &CxxString) -> bool {
95 self.as_bytes() == other.as_bytes()
96 }
97}
98
99impl PartialEq<CxxString> for str {
100 fn eq(&self, other: &CxxString) -> bool {
101 self.as_bytes() == other.as_bytes()
102 }
103}
104
105impl PartialEq<str> for CxxString {
106 fn eq(&self, other: &str) -> bool {
107 self.as_bytes() == other.as_bytes()
108 }
109}