User-defined literals: reject string and character UDLs in all places where the
grammar requires a string-literal and not a user-defined-string-literal. The
two constructs are still represented by the same TokenKind, in order to prevent
a combinatorial explosion of different kinds of token. A flag on Token tracks
whether a ud-suffix is present, in order to prevent clients from needing to look
at the token's spelling.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@152098 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Lex/Lexer.cpp b/lib/Lex/Lexer.cpp
index 2b24d1c..a7fba80 100644
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@@ -1582,6 +1582,7 @@
unsigned Size;
char C = getCharAndSize(CurPtr, Size);
if (isIdentifierHead(C)) {
+ Result.setFlag(Token::HasUDSuffix);
do {
CurPtr = ConsumeChar(CurPtr, Size, Result);
C = getCharAndSize(CurPtr, Size);
diff --git a/lib/Lex/ModuleMap.cpp b/lib/Lex/ModuleMap.cpp
index 28043d1..e6851af 100644
--- a/lib/Lex/ModuleMap.cpp
+++ b/lib/Lex/ModuleMap.cpp
@@ -617,6 +617,12 @@
break;
case tok::string_literal: {
+ if (LToken.hasUDSuffix()) {
+ Diags.Report(LToken.getLocation(), diag::err_invalid_string_udl);
+ HadError = true;
+ goto retry;
+ }
+
// Parse the string literal.
LangOptions LangOpts;
StringLiteralParser StringLiteral(<oken, 1, SourceMgr, LangOpts, *Target);
diff --git a/lib/Lex/PPDirectives.cpp b/lib/Lex/PPDirectives.cpp
index 1e8f590..7345ef2 100644
--- a/lib/Lex/PPDirectives.cpp
+++ b/lib/Lex/PPDirectives.cpp
@@ -822,8 +822,10 @@
; // ok
else if (StrTok.isNot(tok::string_literal)) {
Diag(StrTok, diag::err_pp_line_invalid_filename);
- DiscardUntilEndOfDirective();
- return;
+ return DiscardUntilEndOfDirective();
+ } else if (StrTok.hasUDSuffix()) {
+ Diag(StrTok, diag::err_invalid_string_udl);
+ return DiscardUntilEndOfDirective();
} else {
// Parse and validate the string, converting it into a unique ID.
StringLiteralParser Literal(&StrTok, 1, *this);
@@ -957,6 +959,9 @@
else if (StrTok.isNot(tok::string_literal)) {
Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
return DiscardUntilEndOfDirective();
+ } else if (StrTok.hasUDSuffix()) {
+ Diag(StrTok, diag::err_invalid_string_udl);
+ return DiscardUntilEndOfDirective();
} else {
// Parse and validate the string, converting it into a unique ID.
StringLiteralParser Literal(&StrTok, 1, *this);
@@ -1047,6 +1052,11 @@
return;
}
+ if (StrTok.hasUDSuffix()) {
+ Diag(StrTok, diag::err_invalid_string_udl);
+ return DiscardUntilEndOfDirective();
+ }
+
// Verify that there is nothing after the string, other than EOD.
CheckEndOfDirective("ident");
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index c4ab143..8d8fe31 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -251,6 +251,10 @@
case tok::wide_char_constant: { // L'x'
case tok::utf16_char_constant: // u'x'
case tok::utf32_char_constant: // U'x'
+ // Complain about, and drop, any ud-suffix.
+ if (PeekTok.hasUDSuffix())
+ PP.Diag(PeekTok, diag::err_pp_invalid_char_udl);
+
SmallString<32> CharBuffer;
bool CharInvalid = false;
StringRef ThisTok = PP.getSpelling(PeekTok, CharBuffer, &CharInvalid);
diff --git a/lib/Lex/PPMacroExpansion.cpp b/lib/Lex/PPMacroExpansion.cpp
index 99f2b23..777e0db 100644
--- a/lib/Lex/PPMacroExpansion.cpp
+++ b/lib/Lex/PPMacroExpansion.cpp
@@ -825,6 +825,16 @@
return false;
}
+ // Get ')'.
+ PP.LexNonComment(Tok);
+
+ // Ensure we have a trailing ).
+ if (Tok.isNot(tok::r_paren)) {
+ PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName();
+ PP.Diag(LParenLoc, diag::note_matching) << "(";
+ return false;
+ }
+
bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
// If GetIncludeFilenameSpelling set the start ptr to null, there was an
// error.
@@ -836,20 +846,8 @@
const FileEntry *File =
PP.LookupFile(Filename, isAngled, LookupFrom, CurDir, NULL, NULL, NULL);
- // Get the result value. Result = true means the file exists.
- bool Result = File != 0;
-
- // Get ')'.
- PP.LexNonComment(Tok);
-
- // Ensure we have a trailing ).
- if (Tok.isNot(tok::r_paren)) {
- PP.Diag(Tok.getLocation(), diag::err_pp_missing_rparen) << II->getName();
- PP.Diag(LParenLoc, diag::note_matching) << "(";
- return false;
- }
-
- return Result;
+ // Get the result value. A result of true means the file exists.
+ return File != 0;
}
/// EvaluateHasInclude - Process a '__has_include("path")' expression.
@@ -1091,6 +1089,9 @@
// from macro expansion.
SmallVector<Token, 4> StrToks;
while (Tok.is(tok::string_literal)) {
+ // Complain about, and drop, any ud-suffix.
+ if (Tok.hasUDSuffix())
+ Diag(Tok, diag::err_invalid_string_udl);
StrToks.push_back(Tok);
LexUnexpandedToken(Tok);
}
diff --git a/lib/Lex/Pragma.cpp b/lib/Lex/Pragma.cpp
index 046a4d0..404feaa 100644
--- a/lib/Lex/Pragma.cpp
+++ b/lib/Lex/Pragma.cpp
@@ -133,6 +133,20 @@
Lex(Tok);
if (Tok.isNot(tok::string_literal) && Tok.isNot(tok::wide_string_literal)) {
Diag(PragmaLoc, diag::err__Pragma_malformed);
+ // Skip this token, and the ')', if present.
+ if (Tok.isNot(tok::r_paren))
+ Lex(Tok);
+ if (Tok.is(tok::r_paren))
+ Lex(Tok);
+ return;
+ }
+
+ if (Tok.hasUDSuffix()) {
+ Diag(Tok, diag::err_invalid_string_udl);
+ // Skip this token, and the ')', if present.
+ Lex(Tok);
+ if (Tok.is(tok::r_paren))
+ Lex(Tok);
return;
}
@@ -442,6 +456,8 @@
// "foo " "bar" "Baz"
SmallVector<Token, 4> StrToks;
while (Tok.is(tok::string_literal)) {
+ if (Tok.hasUDSuffix())
+ Diag(Tok, diag::err_invalid_string_udl);
StrToks.push_back(Tok);
Lex(Tok);
}
@@ -518,6 +534,8 @@
// "foo " "bar" "Baz"
SmallVector<Token, 4> StrToks;
while (Tok.is(tok::string_literal)) {
+ if (Tok.hasUDSuffix())
+ Diag(Tok, diag::err_invalid_string_udl);
StrToks.push_back(Tok);
Lex(Tok);
}
@@ -577,6 +595,11 @@
return 0;
}
+ if (Tok.hasUDSuffix()) {
+ Diag(Tok, diag::err_invalid_string_udl);
+ return 0;
+ }
+
// Remember the macro string.
std::string StrVal = getSpelling(Tok);
diff --git a/lib/Lex/TokenConcatenation.cpp b/lib/Lex/TokenConcatenation.cpp
index 335d864..ca7e55d 100644
--- a/lib/Lex/TokenConcatenation.cpp
+++ b/lib/Lex/TokenConcatenation.cpp
@@ -85,6 +85,19 @@
TokenInfo[tok::hash ] |= aci_custom_firstchar;
TokenInfo[tok::arrow ] |= aci_custom_firstchar;
+ // These tokens have custom code in C++11 mode.
+ if (PP.getLangOptions().CPlusPlus0x) {
+ TokenInfo[tok::string_literal ] |= aci_custom;
+ TokenInfo[tok::wide_string_literal ] |= aci_custom;
+ TokenInfo[tok::utf8_string_literal ] |= aci_custom;
+ TokenInfo[tok::utf16_string_literal] |= aci_custom;
+ TokenInfo[tok::utf32_string_literal] |= aci_custom;
+ TokenInfo[tok::char_constant ] |= aci_custom;
+ TokenInfo[tok::wide_char_constant ] |= aci_custom;
+ TokenInfo[tok::utf16_char_constant ] |= aci_custom;
+ TokenInfo[tok::utf32_char_constant ] |= aci_custom;
+ }
+
// These tokens change behavior if followed by an '='.
TokenInfo[tok::amp ] |= aci_avoid_equal; // &=
TokenInfo[tok::plus ] |= aci_avoid_equal; // +=
@@ -183,6 +196,28 @@
case tok::raw_identifier:
llvm_unreachable("tok::raw_identifier in non-raw lexing mode!");
+ case tok::string_literal:
+ case tok::wide_string_literal:
+ case tok::utf8_string_literal:
+ case tok::utf16_string_literal:
+ case tok::utf32_string_literal:
+ case tok::char_constant:
+ case tok::wide_char_constant:
+ case tok::utf16_char_constant:
+ case tok::utf32_char_constant:
+ if (!PP.getLangOptions().CPlusPlus0x)
+ return false;
+
+ // In C++11, a string or character literal followed by an identifier is a
+ // single token.
+ if (Tok.getIdentifierInfo())
+ return true;
+
+ // A ud-suffix is an identifier. If the previous token ends with one, treat
+ // it as an identifier.
+ if (!PrevTok.hasUDSuffix())
+ return false;
+ // FALL THROUGH.
case tok::identifier: // id+id or id+number or id+L"foo".
// id+'.'... will not append.
if (Tok.is(tok::numeric_constant))
@@ -201,9 +236,11 @@
// Otherwise, this is a narrow character or string. If the *identifier*
// is a literal 'L', 'u8', 'u' or 'U', avoid pasting L "foo" -> L"foo".
return IsIdentifierStringPrefix(PrevTok);
+
case tok::numeric_constant:
return isalnum(FirstChar) || Tok.is(tok::numeric_constant) ||
- FirstChar == '+' || FirstChar == '-' || FirstChar == '.';
+ FirstChar == '+' || FirstChar == '-' || FirstChar == '.' ||
+ (PP.getLangOptions().CPlusPlus0x && FirstChar == '_');
case tok::period: // ..., .*, .1234
return (FirstChar == '.' && PrevPrevTok.is(tok::period)) ||
isdigit(FirstChar) ||