[clang-format] make header guard identification stricter (with Lexer).

Summary: make header guard identification stricter with Lexer.

Reviewers: djasper

Subscribers: klimek, cfe-commits

Differential Revision: http://reviews.llvm.org/D20959

llvm-svn: 271883
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index c1b3182..7ef9a30 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1436,6 +1436,49 @@
          llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText());
 }
 
+void skipComments(Lexer &Lex, Token &Tok) {
+  while (Tok.is(tok::comment))
+    if (Lex.LexFromRawLexer(Tok))
+      return;
+}
+
+// Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
+// \p Tok will be the token after this directive; otherwise, it can be any token
+// after the given \p Tok (including \p Tok).
+bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) {
+  bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
+                 Tok.is(tok::raw_identifier) &&
+                 Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
+                 Tok.is(tok::raw_identifier);
+  if (Matched)
+    Lex.LexFromRawLexer(Tok);
+  return Matched;
+}
+
+unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
+                                               StringRef Code,
+                                               FormatStyle Style) {
+  std::unique_ptr<Environment> Env =
+      Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+  const SourceManager &SourceMgr = Env->getSourceManager();
+  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
+            getFormattingLangOpts(Style));
+  Token Tok;
+  // Get the first token.
+  Lex.LexFromRawLexer(Tok);
+  skipComments(Lex, Tok);
+  unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation());
+  if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+    skipComments(Lex, Tok);
+    if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
+      return SourceMgr.getFileOffset(Tok.getLocation());
+  }
+  return AfterComments;
+}
+
+// FIXME: we also need to insert a '\n' at the end of the code if we have an
+// insertion with offset Code.size(), and there is no '\n' at the end of the
+// code.
 // FIXME: do not insert headers into conditional #include blocks, e.g. #includes
 // surrounded by compile condition "#if...".
 // FIXME: do not insert existing headers.
@@ -1469,20 +1512,6 @@
   StringRef FileName = Replaces.begin()->getFilePath();
   IncludeCategoryManager Categories(Style, FileName);
 
-  std::unique_ptr<Environment> Env =
-      Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
-  const SourceManager &SourceMgr = Env->getSourceManager();
-  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
-            getFormattingLangOpts(Style));
-  Token Tok;
-  // All new headers should be inserted after this offset.
-  int MinInsertOffset = Code.size();
-  while (!Lex.LexFromRawLexer(Tok)) {
-    if (Tok.isNot(tok::comment)) {
-      MinInsertOffset = SourceMgr.getFileOffset(Tok.getLocation());
-      break;
-    }
-  }
   // Record the offset of the end of the last include in each category.
   std::map<int, int> CategoryEndOffsets;
   // All possible priorities.
@@ -1491,26 +1520,25 @@
   for (const auto &Category : Style.IncludeCategories)
     Priorities.insert(Category.Priority);
   int FirstIncludeOffset = -1;
-  bool HeaderGuardFound = false;
+  // All new headers should be inserted after this offset.
+  unsigned MinInsertOffset =
+      getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
   StringRef TrimmedCode = Code.drop_front(MinInsertOffset);
   SmallVector<StringRef, 32> Lines;
   TrimmedCode.split(Lines, '\n');
-  int Offset = MinInsertOffset;
+  unsigned Offset = MinInsertOffset;
+  unsigned NextLineOffset;
   for (auto Line : Lines) {
+    NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
     if (IncludeRegex.match(Line, &Matches)) {
       StringRef IncludeName = Matches[2];
       int Category = Categories.getIncludePriority(
           IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
-      CategoryEndOffsets[Category] = Offset + Line.size() + 1;
+      CategoryEndOffsets[Category] = NextLineOffset;
       if (FirstIncludeOffset < 0)
         FirstIncludeOffset = Offset;
     }
-    Offset += Line.size() + 1;
-    // FIXME: make header guard matching stricter, e.g. consider #ifndef.
-    if (!HeaderGuardFound && DefineRegex.match(Line)) {
-      HeaderGuardFound = true;
-      MinInsertOffset = Offset;
-    }
+    Offset = NextLineOffset;
   }
 
   // Populate CategoryEndOfssets: