[clang-format] insert new #includes into correct blocks when cleaning up Replacement with cleanupAroundReplacements().

Summary:
When a replacement's offset is set to UINT_MAX or -1U, it is treated as
a header insertion replacement by cleanupAroundReplacements(). The new #include
directive is then inserted into the correct block.

Reviewers: klimek, djasper

Subscribers: klimek, cfe-commits, bkramer

Differential Revision: http://reviews.llvm.org/D20734

llvm-svn: 271276
diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp
index 9fe22d3..33c3a7f 100644
--- a/clang/lib/Format/Format.cpp
+++ b/clang/lib/Format/Format.cpp
@@ -1262,6 +1262,65 @@
                                        result.size(), result));
 }
 
+namespace {
+
+// This class manages priorities of #include categories and calculates
+// priorities for headers.
+class IncludeCategoryManager {
+public:
+  IncludeCategoryManager(const FormatStyle &Style, StringRef FileName)
+      : Style(Style), FileName(FileName) {
+    FileStem = llvm::sys::path::stem(FileName);
+    for (const auto &Category : Style.IncludeCategories)
+      CategoryRegexs.emplace_back(Category.Regex);
+    IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
+                 FileName.endswith(".cpp") || FileName.endswith(".c++") ||
+                 FileName.endswith(".cxx") || FileName.endswith(".m") ||
+                 FileName.endswith(".mm");
+  }
+
+  // Returns the priority of the category which \p IncludeName belongs to.
+  // If \p CheckMainHeader is true and \p IncludeName is a main header, returns
+  // 0. Otherwise, returns the priority of the matching category or INT_MAX.
+  int getIncludePriority(StringRef IncludeName, bool CheckMainHeader) {
+    int Ret = INT_MAX;
+    for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
+      if (CategoryRegexs[i].match(IncludeName)) {
+        Ret = Style.IncludeCategories[i].Priority;
+        break;
+      }
+    if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
+      Ret = 0;
+    return Ret;
+  }
+
+private:
+  bool isMainHeader(StringRef IncludeName) const {
+    if (!IncludeName.startswith("\""))
+      return false;
+    StringRef HeaderStem =
+        llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
+    if (FileStem.startswith(HeaderStem)) {
+      llvm::Regex MainIncludeRegex(
+          (HeaderStem + Style.IncludeIsMainRegex).str());
+      if (MainIncludeRegex.match(FileStem))
+        return true;
+    }
+    return false;
+  }
+
+  const FormatStyle &Style;
+  bool IsMainFile;
+  StringRef FileName;
+  StringRef FileStem;
+  SmallVector<llvm::Regex, 4> CategoryRegexs;
+};
+
+const char IncludeRegexPattern[] =
+    R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
+
+} // anonymous namespace
+
 tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
                                       ArrayRef<tooling::Range> Ranges,
                                       StringRef FileName,
@@ -1269,8 +1328,7 @@
                                       unsigned *Cursor) {
   unsigned Prev = 0;
   unsigned SearchFrom = 0;
-  llvm::Regex IncludeRegex(
-      R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))");
+  llvm::Regex IncludeRegex(IncludeRegexPattern);
   SmallVector<StringRef, 4> Matches;
   SmallVector<IncludeDirective, 16> IncludesInBlock;
 
@@ -1281,19 +1339,9 @@
   //
   // FIXME: Do some sanity checking, e.g. edit distance of the base name, to fix
   // cases where the first #include is unlikely to be the main header.
-  bool IsSource = FileName.endswith(".c") || FileName.endswith(".cc") ||
-                  FileName.endswith(".cpp") || FileName.endswith(".c++") ||
-                  FileName.endswith(".cxx") || FileName.endswith(".m") ||
-                  FileName.endswith(".mm");
-  StringRef FileStem = llvm::sys::path::stem(FileName);
+  IncludeCategoryManager Categories(Style, FileName);
   bool FirstIncludeBlock = true;
   bool MainIncludeFound = false;
-
-  // Create pre-compiled regular expressions for the #include categories.
-  SmallVector<llvm::Regex, 4> CategoryRegexs;
-  for (const auto &Category : Style.IncludeCategories)
-    CategoryRegexs.emplace_back(Category.Regex);
-
   bool FormattingOff = false;
 
   for (;;) {
@@ -1310,26 +1358,11 @@
     if (!FormattingOff && !Line.endswith("\\")) {
       if (IncludeRegex.match(Line, &Matches)) {
         StringRef IncludeName = Matches[2];
-        int Category = INT_MAX;
-        for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i) {
-          if (CategoryRegexs[i].match(IncludeName)) {
-            Category = Style.IncludeCategories[i].Priority;
-            break;
-          }
-        }
-        if (IsSource && !MainIncludeFound && Category > 0 &&
-            FirstIncludeBlock && IncludeName.startswith("\"")) {
-          StringRef HeaderStem =
-              llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
-          if (FileStem.startswith(HeaderStem)) {
-            llvm::Regex MainIncludeRegex(
-                (HeaderStem + Style.IncludeIsMainRegex).str());
-            if (MainIncludeRegex.match(FileStem)) {
-              Category = 0;
-              MainIncludeFound = true;
-            }
-          }
-        }
+        int Category = Categories.getIncludePriority(
+            IncludeName,
+            /*CheckMainHeader=*/!MainIncludeFound && FirstIncludeBlock);
+        if (Category == 0)
+          MainIncludeFound = true;
         IncludesInBlock.push_back({IncludeName, Line, Prev, Category});
       } else if (!IncludesInBlock.empty()) {
         sortCppIncludes(Style, IncludesInBlock, Ranges, FileName, Replaces,
@@ -1402,6 +1435,113 @@
   return processReplacements(Reformat, Code, SortedReplaces, Style);
 }
 
+namespace {
+
+inline bool isHeaderInsertion(const tooling::Replacement &Replace) {
+  return Replace.getOffset() == UINT_MAX &&
+         llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText());
+}
+
+// FIXME: do not insert headers into conditional #include blocks, e.g. #includes
+// surrounded by compile condition "#if...".
+// FIXME: do not insert existing headers.
+// FIXME: insert empty lines between newly created blocks.
+tooling::Replacements
+fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
+                        const FormatStyle &Style) {
+  if (Style.Language != FormatStyle::LanguageKind::LK_Cpp)
+    return Replaces;
+
+  tooling::Replacements HeaderInsertions;
+  for (const auto &R : Replaces) {
+    if (isHeaderInsertion(R))
+      HeaderInsertions.insert(R);
+    else if (R.getOffset() == UINT_MAX)
+      llvm::errs() << "Insertions other than header #include insertion are "
+                      "not supported! "
+                   << R.getReplacementText() << "\n";
+  }
+  if (HeaderInsertions.empty())
+    return Replaces;
+  tooling::Replacements Result;
+  std::set_difference(Replaces.begin(), Replaces.end(),
+                      HeaderInsertions.begin(), HeaderInsertions.end(),
+                      std::inserter(Result, Result.begin()));
+
+  llvm::Regex IncludeRegex(IncludeRegexPattern);
+  llvm::Regex DefineRegex(R"(^[\t\ ]*#[\t\ ]*define[\t\ ]*[^\\]*$)");
+  SmallVector<StringRef, 4> Matches;
+
+  StringRef FileName = Replaces.begin()->getFilePath();
+  IncludeCategoryManager Categories(Style, FileName);
+
+  // Record the offset of the end of the last include in each category.
+  std::map<int, int> CategoryEndOffsets;
+  // All possible priorities.
+  // Add 0 for main header and INT_MAX for headers that are not in any category.
+  std::set<int> Priorities = {0, INT_MAX};
+  for (const auto &Category : Style.IncludeCategories)
+    Priorities.insert(Category.Priority);
+  int FirstIncludeOffset = -1;
+  int Offset = 0;
+  int AfterHeaderGuard = 0;
+  SmallVector<StringRef, 32> Lines;
+  Code.split(Lines, '\n');
+  for (auto Line : Lines) {
+    if (IncludeRegex.match(Line, &Matches)) {
+      StringRef IncludeName = Matches[2];
+      int Category = Categories.getIncludePriority(
+          IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
+      CategoryEndOffsets[Category] = Offset + Line.size() + 1;
+      if (FirstIncludeOffset < 0)
+        FirstIncludeOffset = Offset;
+    }
+    Offset += Line.size() + 1;
+    // FIXME: make header guard matching stricter, e.g. consider #ifndef.
+    if (AfterHeaderGuard == 0 && DefineRegex.match(Line))
+      AfterHeaderGuard = Offset;
+  }
+
+  // Populate CategoryEndOfssets:
+  // - Ensure that CategoryEndOffset[Highest] is always populated.
+  // - If CategoryEndOffset[Priority] isn't set, use the next higher value that
+  //   is set, up to CategoryEndOffset[Highest].
+  // FIXME: skip comment section in the beginning of the code if there is no
+  // existing #include and #define.
+  auto Highest = Priorities.begin();
+  if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
+    if (FirstIncludeOffset >= 0)
+      CategoryEndOffsets[*Highest] = FirstIncludeOffset;
+    else
+      CategoryEndOffsets[*Highest] = AfterHeaderGuard;
+  }
+  // By this point, CategoryEndOffset[Highest] is always set appropriately:
+  //  - to an appropriate location before/after existing #includes, or
+  //  - to right after the header guard, or
+  //  - to the beginning of the file.
+  for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
+    if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
+      CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
+
+  for (const auto &R : HeaderInsertions) {
+    auto IncludeDirective = R.getReplacementText();
+    bool Matched = IncludeRegex.match(IncludeDirective, &Matches);
+    assert(Matched && "Header insertion replacement must have replacement text "
+                      "'#include ...'");
+    auto IncludeName = Matches[2];
+    int Category =
+        Categories.getIncludePriority(IncludeName, /*CheckMainHeader=*/true);
+    Offset = CategoryEndOffsets[Category];
+    std::string NewInclude = !IncludeDirective.endswith("\n")
+                                 ? (IncludeDirective + "\n").str()
+                                 : IncludeDirective.str();
+    Result.insert(tooling::Replacement(FileName, Offset, 0, NewInclude));
+  }
+  return Result;
+}
+
+} // anonymous namespace
+
 tooling::Replacements
 cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces,
                           const FormatStyle &Style) {
@@ -1412,7 +1552,10 @@
                     StringRef FileName) -> tooling::Replacements {
     return cleanup(Style, Code, Ranges, FileName);
   };
-  return processReplacements(Cleanup, Code, Replaces, Style);
+  // Make header insertion replacements insert new headers into correct blocks.
+  tooling::Replacements NewReplaces =
+      fixCppIncludeInsertions(Code, Replaces, Style);
+  return processReplacements(Cleanup, Code, NewReplaces, Style);
 }
 
 tooling::Replacements reformat(const FormatStyle &Style, SourceManager &SM,