User-defined literals: reject string and character UDLs in all places where the grammar requires a string-literal and not a user-defined-string-literal. The two constructs are still represented by the same TokenKind, in order to prevent a combinatorial explosion of different kinds of token. A flag on Token tracks whether a ud-suffix is present, in order to prevent clients from needing to look at the token's spelling. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@152098 91177308-0d34-0410-b5e6-96231b3b80d8

commit: 99831e4677a7e2e051af636221694d60ba31fcdb [log] [tgz]
author: Richard Smith <richard-llvm@metafoo.co.uk> Tue Mar 06 03:21:47 2012 +0000
committer: Richard Smith <richard-llvm@metafoo.co.uk> Tue Mar 06 03:21:47 2012 +0000
tree: 4684bde38a7659db0c97673dcab98cffce576f9b
parent: c1b0f7fa9b755ab59129ae85187d0d4f91379995 [diff] [blame]
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index 335d864..ca7e55d 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp

@@ -85,6 +85,19 @@
   TokenInfo[tok::hash            ] |= aci_custom_firstchar;
   TokenInfo[tok::arrow           ] |= aci_custom_firstchar;
 
+  // These tokens have custom code in C++11 mode.
+  if (PP.getLangOptions().CPlusPlus0x) {
+    TokenInfo[tok::string_literal      ] |= aci_custom;
+    TokenInfo[tok::wide_string_literal ] |= aci_custom;
+    TokenInfo[tok::utf8_string_literal ] |= aci_custom;
+    TokenInfo[tok::utf16_string_literal] |= aci_custom;
+    TokenInfo[tok::utf32_string_literal] |= aci_custom;
+    TokenInfo[tok::char_constant       ] |= aci_custom;
+    TokenInfo[tok::wide_char_constant  ] |= aci_custom;
+    TokenInfo[tok::utf16_char_constant ] |= aci_custom;
+    TokenInfo[tok::utf32_char_constant ] |= aci_custom;
+  }
+
   // These tokens change behavior if followed by an '='.
   TokenInfo[tok::amp         ] |= aci_avoid_equal;           // &=
   TokenInfo[tok::plus        ] |= aci_avoid_equal;           // +=
@@ -183,6 +196,28 @@
   case tok::raw_identifier:
     llvm_unreachable("tok::raw_identifier in non-raw lexing mode!");
 
+  case tok::string_literal:
+  case tok::wide_string_literal:
+  case tok::utf8_string_literal:
+  case tok::utf16_string_literal:
+  case tok::utf32_string_literal:
+  case tok::char_constant:
+  case tok::wide_char_constant:
+  case tok::utf16_char_constant:
+  case tok::utf32_char_constant:
+    if (!PP.getLangOptions().CPlusPlus0x)
+      return false;
+
+    // In C++11, a string or character literal followed by an identifier is a
+    // single token.
+    if (Tok.getIdentifierInfo())
+      return true;
+
+    // A ud-suffix is an identifier. If the previous token ends with one, treat
+    // it as an identifier.
+    if (!PrevTok.hasUDSuffix())
+      return false;
+    // FALL THROUGH.
   case tok::identifier:   // id+id or id+number or id+L"foo".
     // id+'.'... will not append.
     if (Tok.is(tok::numeric_constant))
@@ -201,9 +236,11 @@
     // Otherwise, this is a narrow character or string.  If the *identifier*
     // is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".
     return IsIdentifierStringPrefix(PrevTok);
+
   case tok::numeric_constant:
     return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
-    FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
+           FirstChar == '+' || FirstChar == '-' || FirstChar == '.' ||
+           (PP.getLangOptions().CPlusPlus0x && FirstChar == '_');
   case tok::period:          // ..., .*, .1234
     return (FirstChar == '.' && PrevPrevTok.is(tok::period)) ||
     isdigit(FirstChar) ||
commit	99831e4677a7e2e051af636221694d60ba31fcdb	[log] [tgz]
author	Richard Smith <richard-llvm@metafoo.co.uk>	Tue Mar 06 03:21:47 2012 +0000
committer	Richard Smith <richard-llvm@metafoo.co.uk>	Tue Mar 06 03:21:47 2012 +0000
tree	4684bde38a7659db0c97673dcab98cffce576f9b
parent	c1b0f7fa9b755ab59129ae85187d0d4f91379995 [diff] [blame]