When checking the encoding of an 8-bit string literal, don't just check the first codepoint! Also, don't reject empty raw string literals for spurious "encoding" issues. Also, don't rely on undefined behavior in ConvertUTF.c. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@152344 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 49d517467c3dcd2c67e8a6c740ba5160e37625f7 [log] [tgz]
author: Richard Smith <richard-llvm@metafoo.co.uk> Thu Mar 08 21:59:28 2012 +0000
committer: Richard Smith <richard-llvm@metafoo.co.uk> Thu Mar 08 21:59:28 2012 +0000
tree: a41241684cb33d87947bea6ae6a0c7caf07a8933
parent: 188158db29f50443b6e412f2a40c800b2669c957 [diff] [blame]
diff --git a/lib/Lex/LiteralSupport.cpp b/lib/Lex/LiteralSupport.cpp
index 901d96d..e0a5ba3 100644
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp

@@ -333,7 +333,7 @@
 ///         decimal-constant integer-suffix
 ///         octal-constant integer-suffix
 ///         hexadecimal-constant integer-suffix
-///       user-defiend-integer-literal: [C++11 lex.ext]
+///       user-defined-integer-literal: [C++11 lex.ext]
 ///         decimal-literal ud-suffix
 ///         octal-literal ud-suffix
 ///         hexadecimal-literal ud-suffix
@@ -1167,17 +1167,14 @@
         ++ThisTokBuf;
       ++ThisTokBuf; // skip '('
 
-      // remove same number of characters from the end
-      if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
-        ThisTokEnd -= (ThisTokBuf - Prefix);
+      // Remove same number of characters from the end
+      ThisTokEnd -= ThisTokBuf - Prefix;
+      assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal");
 
       // Copy the string over
-      if (CopyStringFragment(StringRef(ThisTokBuf,ThisTokEnd-ThisTokBuf)))
-      {
+      if (CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf)))
         if (DiagnoseBadString(StringToks[i]))
           hadError = true;
-      }
-
     } else {
       assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
       ++ThisTokBuf; // skip "
@@ -1204,11 +1201,9 @@
           } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
 
           // Copy the character span over.
-          if (CopyStringFragment(StringRef(InStart,ThisTokBuf-InStart)))
-          {
+          if (CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart)))
             if (DiagnoseBadString(StringToks[i]))
               hadError = true;
-          }
           continue;
         }
         // Is this a Universal Character Name escape?
@@ -1292,8 +1287,8 @@
   ConversionResult result = conversionOK;
   // Copy the character span over.
   if (CharByteWidth == 1) {
-    if (!isLegalUTF8Sequence(reinterpret_cast<const UTF8*>(Fragment.begin()),
-                             reinterpret_cast<const UTF8*>(Fragment.end())))
+    if (!isLegalUTF8String(reinterpret_cast<const UTF8*>(Fragment.begin()),
+                           reinterpret_cast<const UTF8*>(Fragment.end())))
       result = sourceIllegal;
     memcpy(ResultPtr, Fragment.data(), Fragment.size());
     ResultPtr += Fragment.size();
commit	49d517467c3dcd2c67e8a6c740ba5160e37625f7	[log] [tgz]
author	Richard Smith <richard-llvm@metafoo.co.uk>	Thu Mar 08 21:59:28 2012 +0000
committer	Richard Smith <richard-llvm@metafoo.co.uk>	Thu Mar 08 21:59:28 2012 +0000
tree	a41241684cb33d87947bea6ae6a0c7caf07a8933
parent	188158db29f50443b6e412f2a40c800b2669c957 [diff] [blame]