Added Delta IR Reduction Tool

Summary: Tool parses input IR file, and runs the delta debugging algorithm to reduce the functions inside the input file.

Reviewers: alexshap, chandlerc

Subscribers: mgorny, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63672

> llvm-svn: 368071

llvm-svn: 368358
diff --git a/llvm/tools/llvm-reduce/deltas/Delta.cpp b/llvm/tools/llvm-reduce/deltas/Delta.cpp
new file mode 100644
index 0000000..cf8d884
--- /dev/null
+++ b/llvm/tools/llvm-reduce/deltas/Delta.cpp
@@ -0,0 +1,179 @@
+//===- Delta.cpp - Delta Debugging Algorithm Implementation ---------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation for the Delta Debugging Algorithm:
+// it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.)
+// into chunks and tries to reduce the number chunks that are interesting.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Delta.h"
+#include "llvm/ADT/STLExtras.h"
+
+/// Writes IR code to the given Filepath
+static bool writeProgramToFile(StringRef Filepath, int FD, const Module &M) {
+  ToolOutputFile Out(Filepath, FD);
+  M.print(Out.os(), /*AnnotationWriter=*/nullptr);
+  Out.os().close();
+
+  if (!Out.os().has_error()) {
+    Out.keep();
+    return false;
+  }
+  return true;
+}
+
+/// Creates a temporary (and unique) file inside the tmp folder and writes
+/// the given module IR.
+static SmallString<128> createTmpFile(Module *M, StringRef TmpDir) {
+  SmallString<128> UniqueFilepath;
+  int UniqueFD;
+
+  SmallString<128> TmpFilepath;
+  sys::path::append(TmpFilepath, TmpDir, "tmp-%%%.ll");
+  std::error_code EC =
+      sys::fs::createUniqueFile(TmpFilepath, UniqueFD, UniqueFilepath);
+  if (EC) {
+    errs() << "Error making unique filename: " << EC.message() << "!\n";
+    exit(1);
+  }
+
+  if (writeProgramToFile(UniqueFilepath, UniqueFD, *M)) {
+    errs() << "Error emitting bitcode to file '" << UniqueFilepath << "'!\n";
+    exit(1);
+  }
+  return UniqueFilepath;
+}
+
+/// Prints the Chunk Indexes with the following format: [start, end], if
+/// chunk is at minimum size (1), then it just displays [start].
+static void printChunks(std::vector<Chunk> Chunks, bool Oneline = false) {
+  if (Chunks.empty()) {
+    outs() << "No Chunks";
+    return;
+  }
+
+  for (auto C : Chunks) {
+    if (!Oneline)
+      outs() << '\t';
+    C.print();
+    if (!Oneline)
+      outs() << '\n';
+  }
+}
+
+/// Counts the amount of lines for a given file
+static unsigned getLines(StringRef Filepath) {
+  unsigned Lines = 0;
+  std::string CurrLine;
+  std::ifstream FileStream(Filepath);
+
+  while (std::getline(FileStream, CurrLine))
+    ++Lines;
+
+  return Lines;
+}
+
+/// Splits Chunks in half and prints them.
+/// If unable to split (when chunk size is 1) returns false.
+static bool increaseGranularity(std::vector<Chunk> &Chunks) {
+  outs() << "Increasing granularity...";
+  std::vector<Chunk> NewChunks;
+  bool SplitOne = false;
+
+  for (auto &C : Chunks) {
+    if (C.end - C.begin == 0)
+      NewChunks.push_back(C);
+    else {
+      unsigned Half = (C.begin + C.end) / 2;
+      NewChunks.push_back({C.begin, Half});
+      NewChunks.push_back({Half + 1, C.end});
+      SplitOne = true;
+    }
+  }
+  if (SplitOne) {
+    Chunks = NewChunks;
+    outs() << "Success! New Chunks:\n";
+    printChunks(Chunks);
+  }
+  return SplitOne;
+}
+
+/// Runs the Delta Debugging algorithm, splits the code into chunks and
+/// reduces the amount of chunks that are considered interesting by the
+/// given test.
+void llvm::runDeltaPass(
+    TestRunner &Test, unsigned Targets,
+    std::function<void(const std::vector<Chunk> &, Module *)>
+        ExtractChunksFromModule) {
+  if (!Targets) {
+    outs() << "\nNothing to reduce\n";
+    return;
+  }
+
+  std::vector<Chunk> Chunks = {{1, Targets}};
+  std::set<Chunk> UninterestingChunks;
+  std::unique_ptr<Module> ReducedProgram;
+
+  if (!Test.run(Test.getReducedFilepath())) {
+    outs() << "\nInput isn't interesting! Verify interesting-ness test\n";
+    return;
+  }
+
+  if (!increaseGranularity(Chunks)) {
+    outs() << "\nAlready at minimum size. Cannot reduce anymore.\n";
+    return;
+  }
+
+  do {
+    UninterestingChunks = {};
+    for (int I = Chunks.size() - 1; I >= 0; --I) {
+      std::vector<Chunk> CurrentChunks;
+
+      for (auto C : Chunks)
+        if (!UninterestingChunks.count(C) && C != Chunks[I])
+          CurrentChunks.push_back(C);
+
+      if (CurrentChunks.empty())
+        continue;
+
+      // Clone module before hacking it up..
+      std::unique_ptr<Module> Clone = CloneModule(*Test.getProgram());
+      // Generate Module with only Targets inside Current Chunks
+      ExtractChunksFromModule(CurrentChunks, Clone.get());
+      // Write Module to tmp file
+      SmallString<128> CurrentFilepath =
+          createTmpFile(Clone.get(), Test.getTmpDir());
+
+      outs() << "Testing with: ";
+      printChunks(CurrentChunks, /*Oneline=*/true);
+      outs() << " | " << sys::path::filename(CurrentFilepath);
+
+      // Current Chunks aren't interesting
+      if (!Test.run(CurrentFilepath)) {
+        outs() << "\n";
+        continue;
+      }
+
+      UninterestingChunks.insert(Chunks[I]);
+      Test.setReducedFilepath(CurrentFilepath);
+      ReducedProgram = std::move(Clone);
+      outs() << " **** SUCCESS | lines: " << getLines(CurrentFilepath) << "\n";
+    }
+    // Delete uninteresting chunks
+    erase_if(Chunks, [&UninterestingChunks](const Chunk &C) {
+      return UninterestingChunks.count(C);
+    });
+
+  } while (!UninterestingChunks.empty() || increaseGranularity(Chunks));
+
+  // If we reduced the testcase replace it
+  if (ReducedProgram)
+    Test.setProgram(std::move(ReducedProgram));
+  outs() << "Couldn't increase anymore.\n";
+}
\ No newline at end of file
diff --git a/llvm/tools/llvm-reduce/deltas/Delta.h b/llvm/tools/llvm-reduce/deltas/Delta.h
new file mode 100644
index 0000000..8597adb
--- /dev/null
+++ b/llvm/tools/llvm-reduce/deltas/Delta.h
@@ -0,0 +1,84 @@
+//===- Delta.h - Delta Debugging Algorithm Implementation -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation for the Delta Debugging Algorithm:
+// it splits a given set of Targets (i.e. Functions, Instructions, BBs, etc.)
+// into chunks and tries to reduce the number chunks that are interesting.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMREDUCE_LLVMREDUCE_DELTA_H
+#define LLVM_TOOLS_LLVMREDUCE_LLVMREDUCE_DELTA_H
+
+#include "../TestRunner.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include <fstream>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+struct Chunk {
+  unsigned begin;
+  unsigned end;
+
+  /// Helper function to verify if a given Target-index is inside the Chunk
+  bool contains(unsigned Index) const { return Index >= begin && Index <= end; }
+
+  void print() const {
+    outs() << "[" << begin;
+    if (end - begin != 0)
+      outs() << "," << end;
+    outs() << "]";
+  }
+
+  /// Operator when populating CurrentChunks in Generic Delta Pass
+  friend bool operator!=(const Chunk &C1, const Chunk &C2) {
+    return C1.begin != C2.begin || C1.end != C2.end;
+  }
+
+  /// Operator used for sets
+  friend bool operator<(const Chunk &C1, const Chunk &C2) {
+    return std::tie(C1.begin, C1.end) < std::tie(C2.begin, C2.end);
+  }
+};
+
+namespace llvm {
+
+/// This function implements the Delta Debugging algorithm, it receives a
+/// number of Targets (e.g. Functions, Instructions, Basic Blocks, etc.) and
+/// splits them in half; these chunks of targets are then tested while ignoring
+/// one chunk, if a chunk is proven to be uninteresting (i.e. fails the test)
+/// it is removed from consideration. The algorithm will attempt to split the
+/// Chunks in half and start the process again until it can't split chunks
+/// anymore.
+///
+/// This function is intended to be called by each specialized delta pass (e.g.
+/// RemoveFunctions) and receives three key parameters:
+/// * Test: The main TestRunner instance which is used to run the provided
+/// interesting-ness test, as well as to store and access the reduced Program.
+/// * Targets: The amount of Targets that are going to be reduced by the
+/// algorithm, for example, the RemoveGlobalVars pass would send the amount of
+/// initialized GVs.
+/// * ExtractChunksFromModule: A function used to tailor the main program so it
+/// only contains Targets that are inside Chunks of the given iteration.
+/// Note: This function is implemented by each specialized Delta pass
+///
+/// Other implementations of the Delta Debugging algorithm can also be found in
+/// the CReduce, Delta, and Lithium projects.
+void runDeltaPass(TestRunner &Test, unsigned Targets,
+                  std::function<void(const std::vector<Chunk> &, Module *)>
+                      ExtractChunksFromModule);
+} // namespace llvm
+
+#endif
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp
new file mode 100644
index 0000000..30c2748
--- /dev/null
+++ b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.cpp
@@ -0,0 +1,80 @@
+//===- ReduceFunctions.cpp - Specialized Delta Pass -----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce functions (and any instruction that calls it) in the provided
+// Module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ReduceFunctions.h"
+
+/// Removes all the Defined Functions (as well as their calls)
+/// that aren't inside any of the desired Chunks.
+/// @returns the Module stripped of out-of-chunk functions
+static void extractFunctionsFromModule(const std::vector<Chunk> &ChunksToKeep,
+                                       Module *Program) {
+  // Get functions inside desired chunks
+  std::set<Function *> FuncsToKeep;
+  unsigned I = 0, FunctionCount = 0;
+  for (auto &F : *Program)
+    if (!F.isDeclaration() && I < ChunksToKeep.size()) {
+      if (ChunksToKeep[I].contains(++FunctionCount))
+        FuncsToKeep.insert(&F);
+      if (FunctionCount == ChunksToKeep[I].end)
+        ++I;
+    }
+
+  // Delete out-of-chunk functions, and replace their calls with undef
+  std::vector<Function *> FuncsToRemove;
+  for (auto &F : *Program)
+    if (!F.isDeclaration() && !FuncsToKeep.count(&F)) {
+      F.replaceAllUsesWith(UndefValue::get(F.getType()));
+      FuncsToRemove.push_back(&F);
+    }
+
+  for (auto *F : FuncsToRemove)
+    F->eraseFromParent();
+
+  // Delete instructions with undef calls
+  std::vector<Instruction *> InstToRemove;
+  for (auto &F : *Program)
+    for (auto &BB : F)
+      for (auto &I : BB)
+        if (auto *Call = dyn_cast<CallInst>(&I))
+          if (!Call->getCalledFunction()) {
+            // Instruction might be stored / used somewhere else
+            I.replaceAllUsesWith(UndefValue::get(I.getType()));
+            InstToRemove.push_back(&I);
+          }
+
+  for (auto *I : InstToRemove)
+    I->eraseFromParent();
+}
+
+/// Counts the amount of non-declaration functions and prints their
+/// respective name & index
+static unsigned countDefinedFunctions(Module *Program) {
+  // TODO: Silence index with --quiet flag
+  outs() << "----------------------------\n";
+  outs() << "Function Index Reference:\n";
+  unsigned FunctionCount = 0;
+  for (auto &F : *Program)
+    if (!F.isDeclaration())
+      outs() << "\t" << ++FunctionCount << ": " << F.getName() << "\n";
+
+  outs() << "----------------------------\n";
+  return FunctionCount;
+}
+
+void llvm::reduceFunctionsDeltaPass(TestRunner &Test) {
+  outs() << "*** Reducing Functions...\n";
+  unsigned Functions = countDefinedFunctions(Test.getProgram());
+  runDeltaPass(Test, Functions, extractFunctionsFromModule);
+  outs() << "----------------------------\n";
+}
\ No newline at end of file
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceFunctions.h b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.h
new file mode 100644
index 0000000..7c2cd3f
--- /dev/null
+++ b/llvm/tools/llvm-reduce/deltas/ReduceFunctions.h
@@ -0,0 +1,20 @@
+//===- ReduceFunctions.h - Specialized Delta Pass -------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a function which calls the Generic Delta pass in order
+// to reduce functions (and any instruction that calls it) in the provided
+// Module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Delta.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+namespace llvm {
+void reduceFunctionsDeltaPass(TestRunner &Test);
+} // namespace llvm