[clang-scan-deps] add skip excluded conditional preprocessor block preprocessing optimization

This commit adds an optimization to clang-scan-deps and clang's preprocessor that skips excluded preprocessor
blocks by bumping the lexer pointer, and not lexing the tokens until reaching appropriate #else/#endif directive.
The skip positions and lexer offsets are computed when the file is minimized, directly from the minimized tokens.

On an 18-core iMacPro with macOS Catalina Beta I got 10-15% speed-up from this optimization when running clang-scan-deps on
the compilation database for a recent LLVM and Clang (3511 files).

Differential Revision: https://reviews.llvm.org/D67127

llvm-svn: 371656
diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp
index d9a98dc..a4ac950 100644
--- a/clang/lib/Lex/PPDirectives.cpp
+++ b/clang/lib/Lex/PPDirectives.cpp
@@ -370,6 +370,37 @@
   return DiscardUntilEndOfDirective().getEnd();
 }
 
+Optional<unsigned> Preprocessor::getSkippedRangeForExcludedConditionalBlock(
+    SourceLocation HashLoc) {
+  if (!ExcludedConditionalDirectiveSkipMappings)
+    return None;
+  if (!HashLoc.isFileID())
+    return None;
+
+  std::pair<FileID, unsigned> HashFileOffset =
+      SourceMgr.getDecomposedLoc(HashLoc);
+  const llvm::MemoryBuffer *Buf = SourceMgr.getBuffer(HashFileOffset.first);
+  auto It = ExcludedConditionalDirectiveSkipMappings->find(Buf);
+  if (It == ExcludedConditionalDirectiveSkipMappings->end())
+    return None;
+
+  const PreprocessorSkippedRangeMapping &SkippedRanges = *It->getSecond();
+  // Check if the offset of '#' is mapped in the skipped ranges.
+  auto MappingIt = SkippedRanges.find(HashFileOffset.second);
+  if (MappingIt == SkippedRanges.end())
+    return None;
+
+  unsigned BytesToSkip = MappingIt->getSecond();
+  unsigned CurLexerBufferOffset = CurLexer->getCurrentBufferOffset();
+  assert(CurLexerBufferOffset >= HashFileOffset.second &&
+         "lexer is before the hash?");
+  // Take into account the fact that the lexer has already advanced, so the
+  // number of bytes to skip must be adjusted.
+  unsigned LengthDiff = CurLexerBufferOffset - HashFileOffset.second;
+  assert(BytesToSkip >= LengthDiff && "lexer is after the skipped range?");
+  return BytesToSkip - LengthDiff;
+}
+
 /// SkipExcludedConditionalBlock - We just read a \#if or related directive and
 /// decided that the subsequent tokens are in the \#if'd out portion of the
 /// file.  Lex the rest of the file, until we see an \#endif.  If
@@ -396,6 +427,11 @@
   // disabling warnings, etc.
   CurPPLexer->LexingRawMode = true;
   Token Tok;
+  if (auto SkipLength =
+          getSkippedRangeForExcludedConditionalBlock(HashTokenLoc)) {
+    // Skip to the next '#endif' / '#else' / '#elif'.
+    CurLexer->skipOver(*SkipLength);
+  }
   while (true) {
     CurLexer->Lex(Tok);