| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 1 | use std::borrow::Cow; |
| 2 | use std::fmt::{self, Debug, Display}; |
| 3 | use std::slice; |
| 4 | use std::str::{self, Utf8Error}; |
| 5 | |
| 6 | extern "C" { |
| David Tolnay | 8c73049 | 2020-03-13 01:29:06 -0700 | [diff] [blame] | 7 | #[link_name = "cxxbridge02$cxx_string$data"] |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 8 | fn string_data(_: &CxxString) -> *const u8; |
| David Tolnay | 8c73049 | 2020-03-13 01:29:06 -0700 | [diff] [blame] | 9 | #[link_name = "cxxbridge02$cxx_string$length"] |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 10 | fn string_length(_: &CxxString) -> usize; |
| 11 | } |
| 12 | |
| 13 | /// Binding to C++ `std::string`. |
| 14 | /// |
| 15 | /// # Invariants |
| 16 | /// |
| 17 | /// As an invariant of this API and the static analysis of the cxx::bridge |
| 18 | /// macro, in Rust code we can never obtain a `CxxString` by value. C++'s string |
| 19 | /// requires a move constructor and may hold internal pointers, which is not |
| 20 | /// compatible with Rust's move behavior. Instead in Rust code we will only ever |
| 21 | /// look at a CxxString through a reference or smart pointer, as in `&CxxString` |
| 22 | /// or `UniquePtr<CxxString>`. |
| 23 | #[repr(C)] |
| 24 | pub struct CxxString { |
| 25 | _private: [u8; 0], |
| 26 | } |
| 27 | |
| 28 | impl CxxString { |
| 29 | /// Returns the length of the string in bytes. |
| 30 | /// |
| 31 | /// Matches the behavior of C++ [std::string::size][size]. |
| 32 | /// |
| 33 | /// [size]: https://en.cppreference.com/w/cpp/string/basic_string/size |
| 34 | pub fn len(&self) -> usize { |
| 35 | unsafe { string_length(self) } |
| 36 | } |
| 37 | |
| 38 | /// Returns true if `self` has a length of zero bytes. |
| David Tolnay | d7b8a6e | 2020-04-24 16:22:55 -0700 | [diff] [blame] | 39 | /// |
| 40 | /// Matches the behavior of C++ [std::string::empty][empty]. |
| 41 | /// |
| 42 | /// [empty]: https://en.cppreference.com/w/cpp/string/basic_string/empty |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 43 | pub fn is_empty(&self) -> bool { |
| 44 | self.len() == 0 |
| 45 | } |
| 46 | |
| 47 | /// Returns a byte slice of this string's contents. |
| 48 | pub fn as_bytes(&self) -> &[u8] { |
| 49 | let data = self.as_ptr(); |
| 50 | let len = self.len(); |
| 51 | unsafe { slice::from_raw_parts(data, len) } |
| 52 | } |
| 53 | |
| 54 | /// Produces a pointer to the first character of the string. |
| 55 | /// |
| 56 | /// Matches the behavior of C++ [std::string::data][data]. |
| 57 | /// |
| 58 | /// Note that the return type may look like `const char *` but is not a |
| 59 | /// `const char *` in the typical C sense, as C++ strings may contain |
| 60 | /// internal null bytes. As such, the returned pointer only makes sense as a |
| David Tolnay | 3cd990f | 2020-04-24 16:24:26 -0700 | [diff] [blame^] | 61 | /// string in combination with the length returned by [`len()`][len]. |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 62 | /// |
| 63 | /// [data]: https://en.cppreference.com/w/cpp/string/basic_string/data |
| David Tolnay | 3cd990f | 2020-04-24 16:24:26 -0700 | [diff] [blame^] | 64 | /// [len]: #method.len |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 65 | pub fn as_ptr(&self) -> *const u8 { |
| 66 | unsafe { string_data(self) } |
| 67 | } |
| 68 | |
| 69 | /// Validates that the C++ string contains UTF-8 data and produces a view of |
| 70 | /// it as a Rust &str, otherwise an error. |
| 71 | pub fn to_str(&self) -> Result<&str, Utf8Error> { |
| 72 | str::from_utf8(self.as_bytes()) |
| 73 | } |
| 74 | |
| 75 | /// If the contents of the C++ string are valid UTF-8, this function returns |
| 76 | /// a view as a Cow::Borrowed &str. Otherwise replaces any invalid UTF-8 |
| 77 | /// sequences with the U+FFFD [replacement character] and returns a |
| 78 | /// Cow::Owned String. |
| 79 | /// |
| 80 | /// [replacement character]: https://doc.rust-lang.org/std/char/constant.REPLACEMENT_CHARACTER.html |
| 81 | pub fn to_string_lossy(&self) -> Cow<str> { |
| 82 | String::from_utf8_lossy(self.as_bytes()) |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | impl Display for CxxString { |
| 87 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| David Tolnay | d930a79 | 2020-03-25 12:24:40 -0700 | [diff] [blame] | 88 | Display::fmt(self.to_string_lossy().as_ref(), f) |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 89 | } |
| 90 | } |
| 91 | |
| 92 | impl Debug for CxxString { |
| 93 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| David Tolnay | d930a79 | 2020-03-25 12:24:40 -0700 | [diff] [blame] | 94 | Debug::fmt(self.to_string_lossy().as_ref(), f) |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 95 | } |
| 96 | } |
| David Tolnay | 42ebfa2 | 2020-03-25 12:26:22 -0700 | [diff] [blame] | 97 | |
| 98 | impl PartialEq for CxxString { |
| 99 | fn eq(&self, other: &CxxString) -> bool { |
| 100 | self.as_bytes() == other.as_bytes() |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | impl PartialEq<CxxString> for str { |
| 105 | fn eq(&self, other: &CxxString) -> bool { |
| 106 | self.as_bytes() == other.as_bytes() |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | impl PartialEq<str> for CxxString { |
| 111 | fn eq(&self, other: &str) -> bool { |
| 112 | self.as_bytes() == other.as_bytes() |
| 113 | } |
| 114 | } |