Remove use of lookahead from _Pragma handling and from all other internal lexing steps in the preprocessor. It is not safe to use the preprocessor's token lookahead except when operating on the final sequence of tokens that would be produced by phase 4 of translation. Doing so corrupts the token lookahead cache used by the parser. (See added testcase for an example.) Lookahead should instead be viewed as a layer on top of the normal lexer. Added assertions to catch any further incorrect uses of lookahead within lexing actions. llvm-svn: 358230

commit: 75f9681874252ac96db002a80df4202ec2cdd2f8 [log] [tgz]
author: Richard Smith <richard-llvm@metafoo.co.uk> Thu Apr 11 21:18:22 2019 +0000
committer: Richard Smith <richard-llvm@metafoo.co.uk> Thu Apr 11 21:18:22 2019 +0000
tree: f5f585fac34b89251395afacd92b5f94bd338f71
parent: 6ffa1511eaedfae67ea4c66c2264b46eaccbbd7d [diff] [blame]
diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp
index 286b863..1c87f3e 100644
--- a/clang/lib/Lex/Pragma.cpp
+++ b/clang/lib/Lex/Pragma.cpp

@@ -144,84 +144,72 @@
     DiscardUntilEndOfDirective();
 }
 
-namespace {
-
-/// Helper class for \see Preprocessor::Handle_Pragma.
-class LexingFor_PragmaRAII {
-  Preprocessor &PP;
-  bool InMacroArgPreExpansion;
-  bool Failed = false;
-  Token &OutTok;
-  Token PragmaTok;
-
-public:
-  LexingFor_PragmaRAII(Preprocessor &PP, bool InMacroArgPreExpansion,
-                       Token &Tok)
-      : PP(PP), InMacroArgPreExpansion(InMacroArgPreExpansion), OutTok(Tok) {
-    if (InMacroArgPreExpansion) {
-      PragmaTok = OutTok;
-      PP.EnableBacktrackAtThisPos();
-    }
-  }
-
-  ~LexingFor_PragmaRAII() {
-    if (InMacroArgPreExpansion) {
-      // When committing/backtracking the cached pragma tokens in a macro
-      // argument pre-expansion we want to ensure that either the tokens which
-      // have been committed will be removed from the cache or that the tokens
-      // over which we just backtracked won't remain in the cache after they're
-      // consumed and that the caching will stop after consuming them.
-      // Otherwise the caching will interfere with the way macro expansion
-      // works, because we will continue to cache tokens after consuming the
-      // backtracked tokens, which shouldn't happen when we're dealing with
-      // macro argument pre-expansion.
-      auto CachedTokenRange = PP.LastCachedTokenRange();
-      if (Failed) {
-        PP.CommitBacktrackedTokens();
-      } else {
-        PP.Backtrack();
-        OutTok = PragmaTok;
-      }
-      PP.EraseCachedTokens(CachedTokenRange);
-    }
-  }
-
-  void failed() {
-    Failed = true;
-  }
-};
-
-} // namespace
-
 /// Handle_Pragma - Read a _Pragma directive, slice it up, process it, then
 /// return the first token after the directive.  The _Pragma token has just
 /// been read into 'Tok'.
 void Preprocessor::Handle_Pragma(Token &Tok) {
-  // This works differently if we are pre-expanding a macro argument.
-  // In that case we don't actually "activate" the pragma now, we only lex it
-  // until we are sure it is lexically correct and then we backtrack so that
-  // we activate the pragma whenever we encounter the tokens again in the token
-  // stream. This ensures that we will activate it in the correct location
-  // or that we will ignore it if it never enters the token stream, e.g:
+  // C11 6.10.3.4/3:
+  //   all pragma unary operator expressions within [a completely
+  //   macro-replaced preprocessing token sequence] are [...] processed [after
+  //   rescanning is complete]
   //
-  //     #define EMPTY(x)
-  //     #define INACTIVE(x) EMPTY(x)
-  //     INACTIVE(_Pragma("clang diagnostic ignored \"-Wconversion\""))
+  // This means that we execute _Pragma operators in two cases:
+  //
+  //  1) on token sequences that would otherwise be produced as the output of
+  //     phase 4 of preprocessing, and
+  //  2) on token sequences formed as the macro-replaced token sequence of a
+  //     macro argument
+  //
+  // Case #2 appears to be a wording bug: only _Pragmas that would survive to
+  // the end of phase 4 should actually be executed. Discussion on the WG14
+  // mailing list suggests that a _Pragma operator is notionally checked early,
+  // but only pragmas that survive to the end of phase 4 should be executed.
+  //
+  // In Case #2, we check the syntax now, but then put the tokens back into the
+  // token stream for later consumption.
 
-  LexingFor_PragmaRAII _PragmaLexing(*this, InMacroArgPreExpansion, Tok);
+  struct TokenCollector {
+    Preprocessor &Self;
+    bool Collect;
+    SmallVector<Token, 3> Tokens;
+    Token &Tok;
+
+    void lex() {
+      if (Collect)
+        Tokens.push_back(Tok);
+      Self.Lex(Tok);
+    }
+
+    void revert() {
+      assert(Collect && "did not collect tokens");
+      assert(!Tokens.empty() && "collected unexpected number of tokens");
+
+      // Push the ( "string" ) tokens into the token stream.
+      auto Toks = llvm::make_unique<Token[]>(Tokens.size());
+      std::copy(Tokens.begin() + 1, Tokens.end(), Toks.get());
+      Toks[Tokens.size() - 1] = Tok;
+      Self.EnterTokenStream(std::move(Toks), Tokens.size(),
+                            /*DisableMacroExpansion*/ true);
+
+      // ... and return the _Pragma token unchanged.
+      Tok = *Tokens.begin();
+    }
+  };
+
+  TokenCollector Toks = {*this, InMacroArgPreExpansion, {}, Tok};
 
   // Remember the pragma token location.
   SourceLocation PragmaLoc = Tok.getLocation();
 
   // Read the '('.
-  Lex(Tok);
+  Toks.lex();
   if (Tok.isNot(tok::l_paren)) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
   }
 
   // Read the '"..."'.
-  Lex(Tok);
+  Toks.lex();
   if (!tok::isStringLiteral(Tok.getKind())) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
     // Skip bad tokens, and the ')', if present.
@@ -233,7 +221,7 @@
       Lex(Tok);
     if (Tok.is(tok::r_paren))
       Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
   }
 
   if (Tok.hasUDSuffix()) {
@@ -242,21 +230,24 @@
     Lex(Tok);
     if (Tok.is(tok::r_paren))
       Lex(Tok);
-    return _PragmaLexing.failed();
+    return;
   }
 
   // Remember the string.
   Token StrTok = Tok;
 
   // Read the ')'.
-  Lex(Tok);
+  Toks.lex();
   if (Tok.isNot(tok::r_paren)) {
     Diag(PragmaLoc, diag::err__Pragma_malformed);
-    return _PragmaLexing.failed();
+    return;
   }
 
-  if (InMacroArgPreExpansion)
+  // If we're expanding a macro argument, put the tokens back.
+  if (InMacroArgPreExpansion) {
+    Toks.revert();
     return;
+  }
 
   SourceLocation RParenLoc = Tok.getLocation();
   std::string StrVal = getSpelling(StrTok);
commit	75f9681874252ac96db002a80df4202ec2cdd2f8	[log] [tgz]
author	Richard Smith <richard-llvm@metafoo.co.uk>	Thu Apr 11 21:18:22 2019 +0000
committer	Richard Smith <richard-llvm@metafoo.co.uk>	Thu Apr 11 21:18:22 2019 +0000
tree	f5f585fac34b89251395afacd92b5f94bd338f71
parent	6ffa1511eaedfae67ea4c66c2264b46eaccbbd7d [diff] [blame]