Have Sema check for validity of CGString literal
instead of asserting in IRGen. Fixes radar 8390459.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@113253 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/Basic/DiagnosticSemaKinds.td b/include/clang/Basic/DiagnosticSemaKinds.td
index 3bb85fc..e3c9967 100644
--- a/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3091,6 +3091,9 @@
   "CFString literal is not a string constant">;
 def warn_cfstring_literal_contains_nul_character : Warning<
   "CFString literal contains NUL character">;
+def warn_cfstring_truncated : Warning<
+  "input conversion stopped due to an input byte that does not "
+  "belong to the input codeset UTF-8">;
 
 // Statements.
 def err_continue_not_in_loop : Error<
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index d125b37..6a527a2 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -1498,15 +1498,6 @@
                                                &ToPtr, ToPtr + NumBytes,
                                                strictConversion);
 
-  // Check for conversion failure.
-  if (Result != conversionOK) {
-    // FIXME: Have Sema::CheckObjCString() validate the UTF-8 string and remove
-    // this duplicate code.
-    assert(Result == sourceIllegal && "UTF-8 to UTF-16 conversion failed");
-    StringLength = NumBytes;
-    return Map.GetOrCreateValue(String);
-  }
-
   // ConvertUTF8toUTF16 returns the length in ToPtr.
   StringLength = ToPtr - &ToBuf[0];
 
diff --git a/lib/Sema/SemaChecking.cpp b/lib/Sema/SemaChecking.cpp
index a0b4b98..7b0941e 100644
--- a/lib/Sema/SemaChecking.cpp
+++ b/lib/Sema/SemaChecking.cpp
@@ -32,6 +32,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/Basic/TargetInfo.h"
+#include "clang/Basic/ConvertUTF.h"
+
 #include <limits>
 using namespace clang;
 using namespace sema;
@@ -581,9 +583,6 @@
 
 /// CheckObjCString - Checks that the argument to the builtin
 /// CFString constructor is correct
-/// FIXME: GCC currently emits the following warning:
-/// "warning: input conversion stopped due to an input byte that does not
-///           belong to the input codeset UTF-8"
 /// Note: It might also make sense to do the UTF-16 conversion here (would
 /// simplify the backend).
 bool Sema::CheckObjCString(Expr *Arg) {
@@ -602,7 +601,21 @@
          diag::warn_cfstring_literal_contains_nul_character)
       << Arg->getSourceRange();
   }
-
+  if (Literal->containsNonAsciiOrNull()) {
+    llvm::StringRef String = Literal->getString();
+    unsigned NumBytes = String.size();
+    llvm::SmallVector<UTF16, 128> ToBuf(NumBytes);
+    const UTF8 *FromPtr = (UTF8 *)String.data();
+    UTF16 *ToPtr = &ToBuf[0];
+    
+    ConversionResult Result = ConvertUTF8toUTF16(&FromPtr, FromPtr + NumBytes,
+                                                 &ToPtr, ToPtr + NumBytes,
+                                                 strictConversion);
+    // Check for conversion failure.
+    if (Result != conversionOK)
+      Diag(Arg->getLocStart(),
+           diag::warn_cfstring_truncated) << Arg->getSourceRange();
+  }
   return false;
 }
 
diff --git a/test/CodeGen/illegal-UTF8.m b/test/CodeGen/illegal-UTF8.m
index 871e6e5..4762e80 100644
--- a/test/CodeGen/illegal-UTF8.m
+++ b/test/CodeGen/illegal-UTF8.m
@@ -2,7 +2,5 @@
 
 @class NSString;
 
-// FIXME: GCC emits the following warning:
-// CodeGen/illegal-UTF8.m:4: warning: input conversion stopped due to an input byte that does not belong to the input codeset UTF-8
 
-NSString *S = @"\xff\xff___WAIT___";
+NSString *S = @"\xff\xff___WAIT___"; // expected-warning {{input conversion stopped due to an input byte that does not belong to the input codeset UTF-8}}
diff --git a/test/Sema/builtins.c b/test/Sema/builtins.c
index 787630c..21a1f72 100644
--- a/test/Sema/builtins.c
+++ b/test/Sema/builtins.c
@@ -26,7 +26,7 @@
 #define CFSTR __builtin___CFStringMakeConstantString
 void test7() {
   const void *X;
-  X = CFSTR("\242");
+  X = CFSTR("\242"); // expected-warning {{input conversion stopped}}
   X = CFSTR("\0"); // expected-warning {{ CFString literal contains NUL character }}
   X = CFSTR(242); // expected-error {{ CFString literal is not a string constant }} expected-warning {{incompatible integer to pointer conversion}}
   X = CFSTR("foo", "bar"); // expected-error {{too many arguments to function call}}