[Support] Harded JSON against invalid UTF-8.

Parsing invalid UTF-8 input is now a parse error.
Creating JSON values from invalid UTF-8 now triggers an assertion, and
(in no-assert builds) substitutes the unicode replacement character.
Strings retrieved from json::Value are always valid UTF-8.

llvm-svn: 336657
diff --git a/llvm/unittests/Support/JSONTest.cpp b/llvm/unittests/Support/JSONTest.cpp
index 07580aa..64a2bb9 100644
--- a/llvm/unittests/Support/JSONTest.cpp
+++ b/llvm/unittests/Support/JSONTest.cpp
@@ -27,6 +27,14 @@
   EXPECT_EQ(R"("foo")", s("foo"));
   EXPECT_EQ("[1,2,3]", s({1, 2, 3}));
   EXPECT_EQ(R"({"x":10,"y":20})", s(Object{{"x", 10}, {"y", 20}}));
+
+#ifdef NDEBUG
+  EXPECT_EQ(R"("��")", s("\xC0\x80"));
+  EXPECT_EQ(R"({"��":0})", s(Object{{"\xC0\x80", 0}}));
+#else
+  EXPECT_DEATH(s("\xC0\x80"), "Invalid UTF-8");
+  EXPECT_DEATH(s(Object{{"\xC0\x80", 0}}), "Invalid UTF-8");
+#endif
 }
 
 TEST(JSONTest, Constructors) {
@@ -181,6 +189,31 @@
   "valid": 1,
   invalid: 2
 })");
+  ExpectErr("Invalid UTF-8 sequence", "\"\xC0\x80\""); // WTF-8 null
+}
+
+// Direct tests of isUTF8 and fixUTF8. Internal uses are also tested elsewhere.
+TEST(JSONTest, UTF8) {
+  for (const char *Valid : {
+           "this is ASCII text",
+           "thïs tëxt häs BMP chäräctërs",
+           "𐌶𐌰L𐌾𐍈 C𐍈𐌼𐌴𐍃",
+       }) {
+    EXPECT_TRUE(isUTF8(Valid)) << Valid;
+    EXPECT_EQ(fixUTF8(Valid), Valid);
+  }
+  for (auto Invalid : std::vector<std::pair<const char *, const char *>>{
+           {"lone trailing \x81\x82 bytes", "lone trailing �� bytes"},
+           {"missing trailing \xD0 bytes", "missing trailing � bytes"},
+           {"truncated character \xD0", "truncated character �"},
+           {"not \xC1\x80 the \xE0\x9f\xBF shortest \xF0\x83\x83\x83 encoding",
+            "not �� the ��� shortest ���� encoding"},
+           {"too \xF9\x80\x80\x80\x80 long", "too ����� long"},
+           {"surrogate \xED\xA0\x80 invalid \xF4\x90\x80\x80",
+            "surrogate ��� invalid ����"}}) {
+    EXPECT_FALSE(isUTF8(Invalid.first)) << Invalid.first;
+    EXPECT_EQ(fixUTF8(Invalid.first), Invalid.second);
+  }
 }
 
 TEST(JSONTest, Inspection) {