More UTF string conversion wrappers
Added new string conversion wrappers that convert between `std::string` (of UTF-8 bytes) and `std::wstring`, which is particularly useful for Win32 interop. Also fixed a missing string conversion for `getenv` on Win32, using these new wrappers.
The motivation behind this is to provide the support functions required for LLDB to work properly on Windows with non-ASCII data; however, the functions are not LLDB specific.
Patch by cameron314
Differential Revision: http://reviews.llvm.org/D17549
llvm-svn: 263247
diff --git a/llvm/unittests/Support/ConvertUTFTest.cpp b/llvm/unittests/Support/ConvertUTFTest.cpp
index d436fc0..61ed252 100644
--- a/llvm/unittests/Support/ConvertUTFTest.cpp
+++ b/llvm/unittests/Support/ConvertUTFTest.cpp
@@ -59,7 +59,7 @@
TEST(ConvertUTFTest, Empty) {
std::string Result;
- bool Success = convertUTF16ToUTF8String(None, Result);
+ bool Success = convertUTF16ToUTF8String(llvm::ArrayRef<char>(None), Result);
EXPECT_TRUE(Success);
EXPECT_TRUE(Result.empty());
}
@@ -80,6 +80,41 @@
EXPECT_FALSE(HasBOM);
}
+TEST(ConvertUTFTest, UTF16WrappersForConvertUTF16ToUTF8String) {
+ // Src is the look of disapproval.
+ static const char Src[] = "\xff\xfe\xa0\x0c_\x00\xa0\x0c";
+ ArrayRef<UTF16> SrcRef = makeArrayRef((const UTF16 *)Src, 4);
+ std::string Result;
+ bool Success = convertUTF16ToUTF8String(SrcRef, Result);
+ EXPECT_TRUE(Success);
+ std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
+ EXPECT_EQ(Expected, Result);
+}
+
+TEST(ConvertUTFTest, ConvertUTF8toWide) {
+ // Src is the look of disapproval.
+ static const char Src[] = "\xe0\xb2\xa0_\xe0\xb2\xa0";
+ std::wstring Result;
+ bool Success = ConvertUTF8toWide((const char*)Src, Result);
+ EXPECT_TRUE(Success);
+ std::wstring Expected(L"\x0ca0_\x0ca0");
+ EXPECT_EQ(Expected, Result);
+ Result.clear();
+ Success = ConvertUTF8toWide(StringRef(Src, 7), Result);
+ EXPECT_TRUE(Success);
+ EXPECT_EQ(Expected, Result);
+}
+
+TEST(ConvertUTFTest, convertWideToUTF8) {
+ // Src is the look of disapproval.
+ static const wchar_t Src[] = L"\x0ca0_\x0ca0";
+ std::string Result;
+ bool Success = convertWideToUTF8(Src, Result);
+ EXPECT_TRUE(Success);
+ std::string Expected("\xe0\xb2\xa0_\xe0\xb2\xa0");
+ EXPECT_EQ(Expected, Result);
+}
+
struct ConvertUTFResultContainer {
ConversionResult ErrorCode;
std::vector<unsigned> UnicodeScalars;