| David Tolnay | 3384c14 | 2020-09-14 00:26:47 -0400 | [diff] [blame] | 1 | use alloc::borrow::Cow; |
| David Tolnay | c5a52f9 | 2020-09-14 00:43:29 -0400 | [diff] [blame] | 2 | use alloc::string::String; |
| David Tolnay | 3384c14 | 2020-09-14 00:26:47 -0400 | [diff] [blame] | 3 | use core::fmt::{self, Debug, Display}; |
| 4 | use core::slice; |
| 5 | use core::str::{self, Utf8Error}; |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 6 | |
| 7 | extern "C" { |
| David Tolnay | 8f16ae7 | 2020-10-08 18:21:13 -0700 | [diff] [blame] | 8 | #[link_name = "cxxbridge05$cxx_string$data"] |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 9 | fn string_data(_: &CxxString) -> *const u8; |
| David Tolnay | 8f16ae7 | 2020-10-08 18:21:13 -0700 | [diff] [blame] | 10 | #[link_name = "cxxbridge05$cxx_string$length"] |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 11 | fn string_length(_: &CxxString) -> usize; |
| 12 | } |
| 13 | |
| 14 | /// Binding to C++ `std::string`. |
| 15 | /// |
| 16 | /// # Invariants |
| 17 | /// |
| 18 | /// As an invariant of this API and the static analysis of the cxx::bridge |
| 19 | /// macro, in Rust code we can never obtain a `CxxString` by value. C++'s string |
| 20 | /// requires a move constructor and may hold internal pointers, which is not |
| 21 | /// compatible with Rust's move behavior. Instead in Rust code we will only ever |
| 22 | /// look at a CxxString through a reference or smart pointer, as in `&CxxString` |
| 23 | /// or `UniquePtr<CxxString>`. |
| 24 | #[repr(C)] |
| 25 | pub struct CxxString { |
| 26 | _private: [u8; 0], |
| 27 | } |
| 28 | |
| 29 | impl CxxString { |
| 30 | /// Returns the length of the string in bytes. |
| 31 | /// |
| 32 | /// Matches the behavior of C++ [std::string::size][size]. |
| 33 | /// |
| 34 | /// [size]: https://en.cppreference.com/w/cpp/string/basic_string/size |
| 35 | pub fn len(&self) -> usize { |
| 36 | unsafe { string_length(self) } |
| 37 | } |
| 38 | |
| 39 | /// Returns true if `self` has a length of zero bytes. |
| David Tolnay | d7b8a6e | 2020-04-24 16:22:55 -0700 | [diff] [blame] | 40 | /// |
| 41 | /// Matches the behavior of C++ [std::string::empty][empty]. |
| 42 | /// |
| 43 | /// [empty]: https://en.cppreference.com/w/cpp/string/basic_string/empty |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 44 | pub fn is_empty(&self) -> bool { |
| 45 | self.len() == 0 |
| 46 | } |
| 47 | |
| 48 | /// Returns a byte slice of this string's contents. |
| 49 | pub fn as_bytes(&self) -> &[u8] { |
| 50 | let data = self.as_ptr(); |
| 51 | let len = self.len(); |
| 52 | unsafe { slice::from_raw_parts(data, len) } |
| 53 | } |
| 54 | |
| 55 | /// Produces a pointer to the first character of the string. |
| 56 | /// |
| 57 | /// Matches the behavior of C++ [std::string::data][data]. |
| 58 | /// |
| 59 | /// Note that the return type may look like `const char *` but is not a |
| 60 | /// `const char *` in the typical C sense, as C++ strings may contain |
| 61 | /// internal null bytes. As such, the returned pointer only makes sense as a |
| David Tolnay | 3cd990f | 2020-04-24 16:24:26 -0700 | [diff] [blame] | 62 | /// string in combination with the length returned by [`len()`][len]. |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 63 | /// |
| 64 | /// [data]: https://en.cppreference.com/w/cpp/string/basic_string/data |
| David Tolnay | 3cd990f | 2020-04-24 16:24:26 -0700 | [diff] [blame] | 65 | /// [len]: #method.len |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 66 | pub fn as_ptr(&self) -> *const u8 { |
| 67 | unsafe { string_data(self) } |
| 68 | } |
| 69 | |
| 70 | /// Validates that the C++ string contains UTF-8 data and produces a view of |
| 71 | /// it as a Rust &str, otherwise an error. |
| 72 | pub fn to_str(&self) -> Result<&str, Utf8Error> { |
| 73 | str::from_utf8(self.as_bytes()) |
| 74 | } |
| 75 | |
| 76 | /// If the contents of the C++ string are valid UTF-8, this function returns |
| 77 | /// a view as a Cow::Borrowed &str. Otherwise replaces any invalid UTF-8 |
| 78 | /// sequences with the U+FFFD [replacement character] and returns a |
| 79 | /// Cow::Owned String. |
| 80 | /// |
| 81 | /// [replacement character]: https://doc.rust-lang.org/std/char/constant.REPLACEMENT_CHARACTER.html |
| 82 | pub fn to_string_lossy(&self) -> Cow<str> { |
| 83 | String::from_utf8_lossy(self.as_bytes()) |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | impl Display for CxxString { |
| 88 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| David Tolnay | d930a79 | 2020-03-25 12:24:40 -0700 | [diff] [blame] | 89 | Display::fmt(self.to_string_lossy().as_ref(), f) |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 90 | } |
| 91 | } |
| 92 | |
| 93 | impl Debug for CxxString { |
| 94 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| David Tolnay | d930a79 | 2020-03-25 12:24:40 -0700 | [diff] [blame] | 95 | Debug::fmt(self.to_string_lossy().as_ref(), f) |
| David Tolnay | 7db7369 | 2019-10-20 14:51:12 -0400 | [diff] [blame] | 96 | } |
| 97 | } |
| David Tolnay | 42ebfa2 | 2020-03-25 12:26:22 -0700 | [diff] [blame] | 98 | |
| 99 | impl PartialEq for CxxString { |
| 100 | fn eq(&self, other: &CxxString) -> bool { |
| 101 | self.as_bytes() == other.as_bytes() |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | impl PartialEq<CxxString> for str { |
| 106 | fn eq(&self, other: &CxxString) -> bool { |
| 107 | self.as_bytes() == other.as_bytes() |
| 108 | } |
| 109 | } |
| 110 | |
| 111 | impl PartialEq<str> for CxxString { |
| 112 | fn eq(&self, other: &str) -> bool { |
| 113 | self.as_bytes() == other.as_bytes() |
| 114 | } |
| 115 | } |