User-defined literals: reject string and character UDLs in all places where the grammar requires a string-literal and not a user-defined-string-literal. The two constructs are still represented by the same TokenKind, in order to prevent a combinatorial explosion of different kinds of token. A flag on Token tracks whether a ud-suffix is present, in order to prevent clients from needing to look at the token's spelling. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@152098 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 99831e4677a7e2e051af636221694d60ba31fcdb [log] [tgz]
author: Richard Smith <richard-llvm@metafoo.co.uk> Tue Mar 06 03:21:47 2012 +0000
committer: Richard Smith <richard-llvm@metafoo.co.uk> Tue Mar 06 03:21:47 2012 +0000
tree: 4684bde38a7659db0c97673dcab98cffce576f9b
parent: c1b0f7fa9b755ab59129ae85187d0d4f91379995 [diff]
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 2b24d1c..a7fba80 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp

@@ -1582,6 +1582,7 @@
   unsigned Size;
   char C = getCharAndSize(CurPtr, Size);
   if (isIdentifierHead(C)) {
+    Result.setFlag(Token::HasUDSuffix);
     do {
       CurPtr = ConsumeChar(CurPtr, Size, Result);
       C = getCharAndSize(CurPtr, Size);

diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index 28043d1..e6851af 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp

@@ -617,6 +617,12 @@
     break;
       
   case tok::string_literal: {
+    if (LToken.hasUDSuffix()) {
+      Diags.Report(LToken.getLocation(), diag::err_invalid_string_udl);
+      HadError = true;
+      goto retry;
+    }
+
     // Parse the string literal.
     LangOptions LangOpts;
     StringLiteralParser StringLiteral(&LToken, 1, SourceMgr, LangOpts, *Target);

diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 1e8f590..7345ef2 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp

@@ -822,8 +822,10 @@
     ; // ok
   else if (StrTok.isNot(tok::string_literal)) {
     Diag(StrTok, diag::err_pp_line_invalid_filename);
-    DiscardUntilEndOfDirective();
-    return;
+    return DiscardUntilEndOfDirective();
+  } else if (StrTok.hasUDSuffix()) {
+    Diag(StrTok, diag::err_invalid_string_udl);
+    return DiscardUntilEndOfDirective();
   } else {
     // Parse and validate the string, converting it into a unique ID.
     StringLiteralParser Literal(&StrTok, 1, *this);
@@ -957,6 +959,9 @@
   else if (StrTok.isNot(tok::string_literal)) {
     Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
     return DiscardUntilEndOfDirective();
+  } else if (StrTok.hasUDSuffix()) {
+    Diag(StrTok, diag::err_invalid_string_udl);
+    return DiscardUntilEndOfDirective();
   } else {
     // Parse and validate the string, converting it into a unique ID.
     StringLiteralParser Literal(&StrTok, 1, *this);
@@ -1047,6 +1052,11 @@
     return;
   }
 
+  if (StrTok.hasUDSuffix()) {
+    Diag(StrTok, diag::err_invalid_string_udl);
+    return DiscardUntilEndOfDirective();
+  }
+
   // Verify that there is nothing after the string, other than EOD.
   CheckEndOfDirective("ident");
 

diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index c4ab143..8d8fe31 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp

@@ -251,6 +251,10 @@
   case tok::wide_char_constant: {   // L'x'
   case tok::utf16_char_constant:    // u'x'
   case tok::utf32_char_constant:    // U'x'
+    // Complain about, and drop, any ud-suffix.
+    if (PeekTok.hasUDSuffix())
+      PP.Diag(PeekTok, diag::err_pp_invalid_char_udl);
+
     SmallString<32> CharBuffer;
     bool CharInvalid = false;
     StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);

diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 99f2b23..777e0db 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp

@@ -825,6 +825,16 @@
     return false;
   }
 
+  // Get ')'.
+  PP.LexNonComment(Tok);
+
+  // Ensure we have a trailing ).
+  if (Tok.isNot(tok::r_paren)) {
+    PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName();
+    PP.Diag(LParenLoc, diag::note_matching) << "(";
+    return false;
+  }
+
   bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
   // If GetIncludeFilenameSpelling set the start ptr to null, there was an
   // error.
@@ -836,20 +846,8 @@
   const FileEntry *File =
       PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL);
 
-  // Get the result value.  Result = true means the file exists.
-  bool Result = File != 0;
-
-  // Get ')'.
-  PP.LexNonComment(Tok);
-
-  // Ensure we have a trailing ).
-  if (Tok.isNot(tok::r_paren)) {
-    PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName();
-    PP.Diag(LParenLoc, diag::note_matching) << "(";
-    return false;
-  }
-
-  return Result;
+  // Get the result value.  A result of true means the file exists.
+  return File != 0;
 }
 
 /// EvaluateHasInclude - Process a '__has_include("path")' expression.
@@ -1091,6 +1089,9 @@
         // from macro expansion.
         SmallVector<Token, 4> StrToks;
         while (Tok.is(tok::string_literal)) {
+          // Complain about, and drop, any ud-suffix.
+          if (Tok.hasUDSuffix())
+            Diag(Tok, diag::err_invalid_string_udl);
           StrToks.push_back(Tok);
           LexUnexpandedToken(Tok);
         }

diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index 046a4d0..404feaa 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp

@@ -133,6 +133,20 @@
   Lex(Tok);
   if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
+    // Skip this token, and the ')', if present.
+    if (Tok.isNot(tok::r_paren))
+      Lex(Tok);
+    if (Tok.is(tok::r_paren))
+      Lex(Tok);
+    return;
+  }
+
+  if (Tok.hasUDSuffix()) {
+    Diag(Tok, diag::err_invalid_string_udl);
+    // Skip this token, and the ')', if present.
+    Lex(Tok);
+    if (Tok.is(tok::r_paren))
+      Lex(Tok);
     return;
   }
 
@@ -442,6 +456,8 @@
     // "foo " "bar" "Baz"
     SmallVector<Token, 4> StrToks;
     while (Tok.is(tok::string_literal)) {
+      if (Tok.hasUDSuffix())
+        Diag(Tok, diag::err_invalid_string_udl);
       StrToks.push_back(Tok);
       Lex(Tok);
     }
@@ -518,6 +534,8 @@
   // "foo " "bar" "Baz"
   SmallVector<Token, 4> StrToks;
   while (Tok.is(tok::string_literal)) {
+    if (Tok.hasUDSuffix())
+      Diag(Tok, diag::err_invalid_string_udl);
     StrToks.push_back(Tok);
     Lex(Tok);
   }
@@ -577,6 +595,11 @@
     return 0;
   }
 
+  if (Tok.hasUDSuffix()) {
+    Diag(Tok, diag::err_invalid_string_udl);
+    return 0;
+  }
+
   // Remember the macro string.
   std::string StrVal = getSpelling(Tok);
 

diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index 335d864..ca7e55d 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp

@@ -85,6 +85,19 @@
   TokenInfo[tok::hash            ] |= aci_custom_firstchar;
   TokenInfo[tok::arrow           ] |= aci_custom_firstchar;
 
+  // These tokens have custom code in C++11 mode.
+  if (PP.getLangOptions().CPlusPlus0x) {
+    TokenInfo[tok::string_literal      ] |= aci_custom;
+    TokenInfo[tok::wide_string_literal ] |= aci_custom;
+    TokenInfo[tok::utf8_string_literal ] |= aci_custom;
+    TokenInfo[tok::utf16_string_literal] |= aci_custom;
+    TokenInfo[tok::utf32_string_literal] |= aci_custom;
+    TokenInfo[tok::char_constant       ] |= aci_custom;
+    TokenInfo[tok::wide_char_constant  ] |= aci_custom;
+    TokenInfo[tok::utf16_char_constant ] |= aci_custom;
+    TokenInfo[tok::utf32_char_constant ] |= aci_custom;
+  }
+
   // These tokens change behavior if followed by an '='.
   TokenInfo[tok::amp         ] |= aci_avoid_equal;           // &=
   TokenInfo[tok::plus        ] |= aci_avoid_equal;           // +=
@@ -183,6 +196,28 @@
   case tok::raw_identifier:
     llvm_unreachable("tok::raw_identifier in non-raw lexing mode!");
 
+  case tok::string_literal:
+  case tok::wide_string_literal:
+  case tok::utf8_string_literal:
+  case tok::utf16_string_literal:
+  case tok::utf32_string_literal:
+  case tok::char_constant:
+  case tok::wide_char_constant:
+  case tok::utf16_char_constant:
+  case tok::utf32_char_constant:
+    if (!PP.getLangOptions().CPlusPlus0x)
+      return false;
+
+    // In C++11, a string or character literal followed by an identifier is a
+    // single token.
+    if (Tok.getIdentifierInfo())
+      return true;
+
+    // A ud-suffix is an identifier. If the previous token ends with one, treat
+    // it as an identifier.
+    if (!PrevTok.hasUDSuffix())
+      return false;
+    // FALL THROUGH.
   case tok::identifier:   // id+id or id+number or id+L"foo".
     // id+'.'... will not append.
     if (Tok.is(tok::numeric_constant))
@@ -201,9 +236,11 @@
     // Otherwise, this is a narrow character or string.  If the *identifier*
     // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".
     return IsIdentifierStringPrefix(PrevTok);
+
   case tok::numeric_constant:
     return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
-    FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
+           FirstChar == '+' || FirstChar == '-' || FirstChar == '.' ||
+           (PP.getLangOptions().CPlusPlus0x && FirstChar == '_');
   case tok::period:          // ..., .*, .1234
     return (FirstChar == '.' && PrevPrevTok.is(tok::period)) ||
     isdigit(FirstChar) ||
commit	99831e4677a7e2e051af636221694d60ba31fcdb	[log] [tgz]
author	Richard Smith <richard-llvm@metafoo.co.uk>	Tue Mar 06 03:21:47 2012 +0000
committer	Richard Smith <richard-llvm@metafoo.co.uk>	Tue Mar 06 03:21:47 2012 +0000
tree	4684bde38a7659db0c97673dcab98cffce576f9b
parent	c1b0f7fa9b755ab59129ae85187d0d4f91379995 [diff]