Initial checkin of bugpoint


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@4789 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/tools/bugpoint/BugDriver.cpp b/tools/bugpoint/BugDriver.cpp
new file mode 100644
index 0000000..0343f63
--- /dev/null
+++ b/tools/bugpoint/BugDriver.cpp
@@ -0,0 +1,100 @@
+//===- BugDriver.cpp - Top-Level BugPoint class implementation ------------===//
+//
+// This class contains all of the shared state and information that is used by
+// the BugPoint tool to track down errors in optimizations.  This class is the
+// main driver class that invokes all sub-functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "llvm/Module.h"
+#include "llvm/Bytecode/Reader.h"
+#include "llvm/Assembly/Parser.h"
+#include "llvm/Transforms/Utils/Linker.h"
+#include "llvm/Pass.h"
+#include <memory>
+
+/// ParseInputFile - Given a bytecode or assembly input filename, parse and
+/// return it, or return null if not possible.
+///
+Module *BugDriver::ParseInputFile(const std::string &InputFilename) const {
+  Module *Result = 0;
+  try {
+    Result = ParseBytecodeFile(InputFilename);
+    if (!Result && !(Result = ParseAssemblyFile(InputFilename))){
+      std::cerr << ToolName << ": could not read input file '"
+                << InputFilename << "'!\n";
+    }
+  } catch (const ParseException &E) {
+    std::cerr << ToolName << ": " << E.getMessage() << "\n";
+    Result = 0;
+  }
+  return Result;
+}
+
+// This method takes the specified list of LLVM input files, attempts to load
+// them, either as assembly or bytecode, then link them together.
+//
+bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
+  assert(Program == 0 && "Cannot call addSources multiple times!");
+  assert(!Filenames.empty() && "Must specify at least on input filename!");
+
+  // Load the first input file...
+  Program = ParseInputFile(Filenames[0]);
+  if (Program == 0) return true;
+  std::cout << "Read input file      : '" << Filenames[0] << "'\n";
+
+  for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
+    std::auto_ptr<Module> M(ParseInputFile(Filenames[i]));
+    if (M.get() == 0) return true;
+
+    std::cout << "Linking in input file: '" << Filenames[i] << "'\n";
+    std::string ErrorMessage;
+    if (LinkModules(Program, M.get(), &ErrorMessage)) {
+      std::cerr << ToolName << ": error linking in '" << Filenames[i] << "': "
+                << ErrorMessage << "\n";
+      return true;
+    }
+  }
+
+  std::cout << "*** All input ok\n";
+
+  // All input files read successfully!
+  return false;
+}
+
+
+
+/// run - The top level method that is invoked after all of the instance
+/// variables are set up from command line arguments.
+///
+bool BugDriver::run() {
+  // The first thing that we must do is determine what the problem is.  Does the
+  // optimization series crash the compiler, or does it produce illegal code? We
+  // make the top-level decision by trying to run all of the passes on the the
+  // input program, which should generate a bytecode file.  If it does generate
+  // a bytecode file, then we know the compiler didn't crash, so try to diagnose
+  // a miscompilation.
+  //
+  std::cout << "Running selected passes on program to test for crash: ";
+  if (runPasses(PassesToRun))
+    return debugCrash();
+  else
+    return debugMiscompilation();
+}
+
+
+/// debugMiscompilation - This method is used when the passes selected are not
+/// crashing, but the generated output is semantically different from the
+/// input.
+///
+bool BugDriver::debugMiscompilation() {
+  std::cout << "*** Debugging miscompilation!\n";
+  std::cerr << "Sorry, bugpoint cannot debug a miscompilation yet!\n";
+
+  // If no reference output was specified, run the program without optimizations
+  // to get a reference output.
+  //
+
+  return true;
+}
diff --git a/tools/bugpoint/BugDriver.h b/tools/bugpoint/BugDriver.h
new file mode 100644
index 0000000..f419249
--- /dev/null
+++ b/tools/bugpoint/BugDriver.h
@@ -0,0 +1,109 @@
+//===- BugDriver.h - Top-Level BugPoint class -------------------*- C++ -*-===//
+//
+// This class contains all of the shared state and information that is used by
+// the BugPoint tool to track down errors in optimizations.  This class is the
+// main driver class that invokes all sub-functionality.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BUGDRIVER_H
+#define BUGDRIVER_H
+
+#include <vector>
+#include <string>
+class PassInfo;
+class Module;
+class Function;
+
+class BugDriver {
+  const std::string ToolName;  // Name of bugpoint
+  Module *Program;             // The raw program, linked together
+  std::vector<const PassInfo*> PassesToRun;
+public:
+  BugDriver(const char *toolname) : ToolName(toolname), Program(0) {}
+
+  // Set up methods... these methods are used to copy information about the
+  // command line arguments into instance variables of BugDriver.
+  //
+  bool addSources(const std::vector<std::string> &FileNames);
+  template<class It>
+  void addPasses(It I, It E) { PassesToRun.insert(PassesToRun.end(), I, E); }
+
+  /// run - The top level method that is invoked after all of the instance
+  /// variables are set up from command line arguments.
+  ///
+  bool run();
+
+  /// debugCrash - This method is called when some pass crashes on input.  It
+  /// attempts to prune down the testcase to something reasonable, and figure
+  /// out exactly which pass is crashing.
+  ///
+  bool debugCrash();
+
+  /// debugPassCrash - This method is called when the specified pass crashes on
+  /// Program as input.  It tries to reduce the testcase to something that still
+  /// crashes, but it smaller.
+  ///
+  bool debugPassCrash(const PassInfo *PI);
+
+  /// debugMiscompilation - This method is used when the passes selected are not
+  /// crashing, but the generated output is semantically different from the
+  /// input.
+  bool debugMiscompilation();
+
+private:
+  /// ParseInputFile - Given a bytecode or assembly input filename, parse and
+  /// return it, or return null if not possible.
+  ///
+  Module *ParseInputFile(const std::string &InputFilename) const;
+
+  /// removeFile - Delete the specified file
+  ///
+  void removeFile(const std::string &Filename) const;
+
+  /// writeProgramToFile - This writes the current "Program" to the named
+  /// bytecode file.  If an error occurs, true is returned.
+  ///
+  bool writeProgramToFile(const std::string &Filename) const;
+
+
+  /// EmitProgressBytecode - This function is used to output the current Program
+  /// to a file named "bugpoing-ID.bc".
+  ///
+  void EmitProgressBytecode(const PassInfo *Pass, const std::string &ID);
+  
+  /// runPasses - Run the specified passes on Program, outputting a bytecode
+  /// file and writting the filename into OutputFile if successful.  If the
+  /// optimizations fail for some reason (optimizer crashes), return true,
+  /// otherwise return false.  If DeleteOutput is set to true, the bytecode is
+  /// deleted on success, and the filename string is undefined.  This prints to
+  /// cout a single line message indicating whether compilation was successful
+  /// or failed.
+  ///
+  bool runPasses(const std::vector<const PassInfo*> &PassesToRun,
+                 std::string &OutputFilename, bool DeleteOutput = false) const;
+
+  /// runPasses - Just like the method above, but this just returns true or
+  /// false indicating whether or not the optimizer crashed on the specified
+  /// input (true = crashed).
+  ///
+  bool runPasses(const std::vector<const PassInfo*> &PassesToRun,
+                 bool DeleteOutput = true) const {
+    std::string Filename;
+    return runPasses(PassesToRun, Filename, DeleteOutput);
+  }
+
+  /// runPass - Run only the specified pass on the program.
+  bool runPass(const PassInfo *P, bool DeleteOutput = true) const {
+    return runPasses(std::vector<const PassInfo*>(1, P), DeleteOutput);
+  }
+  
+  /// extractFunctionFromModule - This method is used to extract the specified
+  /// (non-external) function from the current program, slim down the module,
+  /// and then return it.  This does not modify Program at all, it modifies a
+  /// copy, which it returns.
+  Module *extractFunctionFromModule(Function *F) const;
+
+};
+
+#endif
diff --git a/tools/bugpoint/CrashDebugger.cpp b/tools/bugpoint/CrashDebugger.cpp
new file mode 100644
index 0000000..3c4fa2d
--- /dev/null
+++ b/tools/bugpoint/CrashDebugger.cpp
@@ -0,0 +1,120 @@
+//===- CrashDebugger.cpp - Debug compilation crashes ----------------------===//
+//
+// This file defines the bugpoint internals that narrow down compilation crashes
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "llvm/Module.h"
+#include "llvm/Bytecode/Writer.h"
+#include "llvm/Pass.h"
+#include <fstream>
+
+/// debugCrash - This method is called when some pass crashes on input.  It
+/// attempts to prune down the testcase to something reasonable, and figure
+/// out exactly which pass is crashing.
+///
+bool BugDriver::debugCrash() {
+  std::cout << "\n*** Debugging optimizer crash!\n";
+
+  // Determine which pass causes the optimizer to crash... using binary search
+  unsigned LastToPass = 0, LastToCrash = PassesToRun.size();
+  while (LastToPass != LastToCrash) {
+    unsigned Mid = (LastToCrash+LastToPass+1) / 2;
+    std::vector<const PassInfo*> P(PassesToRun.begin(),
+                                   PassesToRun.begin()+Mid);
+    std::cout << "Checking to see if the first " << Mid << " passes crash: ";
+
+    if (runPasses(P))
+      LastToCrash = Mid-1;
+    else
+      LastToPass = Mid;
+  }
+
+  // Make sure something crashed.  :)
+  if (LastToCrash >= PassesToRun.size()) {
+    std::cerr << "ERROR: No passes crashed!\n";
+    return true;
+  }
+
+  // Calculate which pass it is that crashes...
+  const PassInfo *CrashingPass = PassesToRun[LastToCrash];
+  
+  std::cout << "\n*** Found crashing pass '-" << CrashingPass->getPassArgument()
+            << "': " << CrashingPass->getPassName() << "\n";
+
+  // Compile the program with just the passes that don't crash.
+  if (LastToPass != 0) {
+    // Don't bother doing this if the first pass crashes...
+    std::vector<const PassInfo*> P(PassesToRun.begin(), 
+                                   PassesToRun.begin()+LastToPass);
+    std::string Filename;
+    std::cout << "Running passes that don't crash to get input for pass: ";
+    if (runPasses(P, Filename)) {
+      std::cerr << "ERROR: Running the first " << LastToPass
+                << " passes crashed this time!\n";
+      return true;
+    }
+
+    // Assuming everything was successful, we now have a valid bytecode file in
+    // OutputName.  Use it for "Program" Instead.
+    delete Program;
+    Program = ParseInputFile(Filename);
+
+    // Delete the file now.
+    removeFile(Filename);
+  }
+
+  return debugPassCrash(CrashingPass);
+}
+
+/// CountFunctions - return the number of non-external functions defined in the
+/// module.
+static unsigned CountFunctions(Module *M) {
+  unsigned N = 0;
+  for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I)
+    if (!I->isExternal())
+      ++N;
+  return N;
+}
+
+/// debugPassCrash - This method is called when the specified pass crashes on
+/// Program as input.  It tries to reduce the testcase to something that still
+/// crashes, but it smaller.
+///
+bool BugDriver::debugPassCrash(const PassInfo *Pass) {
+  EmitProgressBytecode(Pass, "passinput");
+
+  if (CountFunctions(Program) > 1) {
+    // Attempt to reduce the input program down to a single function that still
+    // crashes.
+    //
+    std::cout << "\n*** Attempting to reduce the testcase to one function\n";
+
+    for (Module::iterator I = Program->begin(), E = Program->end(); I != E; ++I)
+      if (!I->isExternal()) {
+        // Extract one function from the module...
+        Module *M = extractFunctionFromModule(I);
+
+        // Make the function the current program...
+        std::swap(Program, M);
+        
+        // Find out if the pass still crashes on this pass...
+        std::cout << "Checking function '" << I->getName() << "': ";
+        if (runPass(Pass)) {
+          // Yup, it does, we delete the old module, and continue trying to
+          // reduce the testcase...
+          delete M;
+
+          EmitProgressBytecode(Pass, "reduced-"+I->getName());
+          break;
+        }
+        
+        // This pass didn't crash on this function, try the next one.
+        delete Program;
+        Program = M;
+      }
+  }
+
+  return false;
+}
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
new file mode 100644
index 0000000..76b2258
--- /dev/null
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -0,0 +1,34 @@
+//===- ExtractFunction.cpp - Extract a function from Program --------------===//
+//
+// This file implements a method that extracts a function from program, cleans
+// it up, and returns it as a new module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "llvm/Module.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+/// extractFunctionFromModule - This method is used to extract the specified
+/// (non-external) function from the current program, slim down the module, and
+/// then return it.  This does not modify Program at all, it modifies a copy,
+/// which it returns.
+Module *BugDriver::extractFunctionFromModule(Function *F) const {
+  Module *Result = CloneModule(Program);
+
+  // Translate from the old module to the new copied module...
+  F = Result->getFunction(F->getName(), F->getFunctionType());
+
+  // In addition to just parsing the input from GCC, we also want to spiff it up
+  // a little bit.  Do this now.
+  //
+  PassManager Passes;
+  Passes.add(createFunctionExtractionPass(F));    // Extract the function
+  Passes.add(createGlobalDCEPass());              // Delete unreachable globals
+  Passes.add(createFunctionResolvingPass());      // Delete prototypes
+  Passes.add(createDeadTypeEliminationPass());    // Remove dead types...
+  Passes.run(*Result);
+  return Result;
+}
diff --git a/tools/bugpoint/Makefile b/tools/bugpoint/Makefile
new file mode 100644
index 0000000..43f1c91
--- /dev/null
+++ b/tools/bugpoint/Makefile
@@ -0,0 +1,13 @@
+LEVEL = ../..
+
+TOOLNAME = bugpoint
+
+OPTLIBS  = instrument profpaths scalaropts ipo
+ANALIBS  = datastructure ipa target.a analysis
+
+USEDLIBS = ipo scalaropts $(ANALIBS) \
+           transformutils asmparser bcreader bcwriter vmcore support
+
+TOOLLINKOPTS = -ldl
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/bugpoint/OptimizerDriver.cpp b/tools/bugpoint/OptimizerDriver.cpp
new file mode 100644
index 0000000..26a6ae7
--- /dev/null
+++ b/tools/bugpoint/OptimizerDriver.cpp
@@ -0,0 +1,150 @@
+//===- OptimizerDriver.cpp - Allow BugPoint to run passes safely ----------===//
+//
+// This file defines an interface that allows bugpoint to run various passes
+// without the threat of a buggy pass corrupting bugpoint (of course bugpoint
+// may have it's own bugs, but that's another story...).  It acheives this by
+// forking a copy of itself and having the child process do the optimizations.
+// If this client dies, we can always fork a new one.  :)
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "llvm/PassManager.h"
+#include "llvm/Analysis/Verifier.h"
+#include "llvm/Bytecode/WriteBytecodePass.h"
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fstream>
+
+/// removeFile - Delete the specified file
+///
+void BugDriver::removeFile(const std::string &Filename) const {
+  unlink(Filename.c_str());
+}
+
+/// writeProgramToFile - This writes the current "Program" to the named bytecode
+/// file.  If an error occurs, true is returned.
+///
+bool BugDriver::writeProgramToFile(const std::string &Filename) const {
+  std::ofstream Out(Filename.c_str());
+  if (!Out.good()) return true;
+
+  WriteBytecodeToFile(Program, Out);
+  return false;
+}
+
+
+/// EmitProgressBytecode - This function is used to output the current Program
+/// to a file named "bugpoing-ID.bc".
+///
+void BugDriver::EmitProgressBytecode(const PassInfo *Pass,
+                                     const std::string &ID) {
+  // Output the input to the current pass to a bytecode file, emit a message
+  // telling the user how to reproduce it: opt -foo blah.bc
+  //
+  std::string Filename = "bugpoint-" + ID + ".bc";
+  if (writeProgramToFile(Filename)) {
+    std::cerr <<  "Error opening file '" << Filename << "' for writing!\n";
+    return;
+  }
+
+  std::cout << "Emitted bytecode to 'bugpoint-" << Filename << ".bc'\n";
+  std::cout << "\n*** You can reproduce the problem with: ";
+
+  unsigned PassType = Pass->getPassType();
+  if (PassType & PassInfo::Analysis)
+    std::cout << "analyze";
+  else if (PassType & PassInfo::Optimization)
+    std::cout << "opt";
+  else if (PassType & PassInfo::LLC)
+    std::cout << "llc";
+  else
+    std::cout << "bugpoint";
+  std::cout << " " << Filename << " -" << Pass->getPassArgument() << "\n";
+}
+
+
+static void RunChild(Module *Program,const std::vector<const PassInfo*> &Passes,
+                     const std::string &OutFilename) {
+  std::ofstream OutFile(OutFilename.c_str());
+  if (!OutFile.good()) {
+    std::cerr << "Error opening bytecode file: " << OutFilename << "\n";
+    exit(1);
+  }
+
+  PassManager PM;
+  for (unsigned i = 0, e = Passes.size(); i != e; ++i) {
+    if (Passes[i]->getNormalCtor())
+      PM.add(Passes[i]->getNormalCtor()());
+    else
+      std::cerr << "Cannot create pass yet: " << Passes[i]->getPassName()
+                << "\n";
+  }
+  // Check that the module is well formed on completion of optimization
+  PM.add(createVerifierPass());
+
+  // Write bytecode out to disk as the last step...
+  PM.add(new WriteBytecodePass(&OutFile));
+
+  // Run all queued passes.
+  PM.run(*Program);
+}
+
+/// runPasses - Run the specified passes on Program, outputting a bytecode file
+/// and writting the filename into OutputFile if successful.  If the
+/// optimizations fail for some reason (optimizer crashes), return true,
+/// otherwise return false.  If DeleteOutput is set to true, the bytecode is
+/// deleted on success, and the filename string is undefined.  This prints to
+/// cout a single line message indicating whether compilation was successful or
+/// failed.
+///
+bool BugDriver::runPasses(const std::vector<const PassInfo*> &Passes,
+                          std::string &OutputFilename, bool DeleteOutput) const{
+  std::cout << std::flush;
+
+  // Agree on a temporary file name to use....
+  char FNBuffer[] = "bugpoint-output.bc-XXXXXX";
+  int TempFD;
+  if ((TempFD = mkstemp(FNBuffer)) == -1) {
+    std::cerr << ToolName << ": ERROR: Cannot create temporary"
+              << " file in the current directory!\n";
+    exit(1);
+  }
+  OutputFilename = FNBuffer;
+
+  // We don't need to hold the temp file descriptor... we will trust that noone
+  // will overwrite/delete the file while we are working on it...
+  close(TempFD);
+  
+  pid_t child_pid;
+  switch (child_pid = fork()) {
+  case -1:    // Error occurred
+    std::cerr << ToolName << ": Error forking!\n";
+    exit(1);
+  case 0:     // Child process runs passes.
+    RunChild(Program, Passes, OutputFilename);
+    exit(0);  // If we finish successfully, return 0!
+  default:    // Parent continues...
+    break;
+  }
+
+  // Wait for the child process to get done.
+  int Status;
+  if (wait(&Status) != child_pid) {
+    std::cerr << "Error waiting for child process!\n";
+    exit(1);
+  }
+
+  // If we are supposed to delete the bytecode file, remove it now
+  // unconditionally...  this may fail if the file was never created, but that's
+  // ok.
+  if (DeleteOutput)
+    removeFile(OutputFilename);
+
+  std::cout << (Status ? "Crashed!\n" : "Success!\n");
+
+  // Was the child successful?
+  return Status != 0;
+}
diff --git a/tools/bugpoint/TestPasses.cpp b/tools/bugpoint/TestPasses.cpp
new file mode 100644
index 0000000..56acdea
--- /dev/null
+++ b/tools/bugpoint/TestPasses.cpp
@@ -0,0 +1,54 @@
+//===- TestPasses.cpp - "buggy" passes used to test bugpoint --------------===//
+//
+// This file contains "buggy" passes that are used to test bugpoint, to check
+// that it is narrowing down testcases correctly.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/iOther.h"
+#include "llvm/Support/InstVisitor.h"
+#include "llvm/Constant.h"
+#include "llvm/BasicBlock.h"
+
+namespace {
+  /// CrashOnCalls - This pass is used to test bugpoint.  It intentionally
+  /// crashes on any call instructions.
+  class CrashOnCalls : public BasicBlockPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesAll();
+    }
+
+    bool runOnBasicBlock(BasicBlock &BB) {
+      for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+        if (isa<CallInst>(*I))
+          abort();
+
+      return false;
+    }
+  };
+
+  RegisterPass<CrashOnCalls>
+  X("bugpoint-crashcalls",
+    "BugPoint Test Pass - Intentionally crash on CallInsts");
+}
+
+namespace {
+  /// DeleteCalls - This pass is used to test bugpoint.  It intentionally
+  /// deletes all call instructions, "misoptimizing" the program.
+  class DeleteCalls : public BasicBlockPass {
+    bool runOnBasicBlock(BasicBlock &BB) {
+      for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
+        if (CallInst *CI = dyn_cast<CallInst>(&*I)) {
+          if (!CI->use_empty())
+            CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
+          CI->getParent()->getInstList().erase(CI);
+        }
+      return false;
+    }
+  };
+
+  RegisterPass<DeleteCalls>
+  Y("bugpoint-deletecalls",
+    "BugPoint Test Pass - Intentionally 'misoptimize' CallInsts");
+}
diff --git a/tools/bugpoint/bugpoint.cpp b/tools/bugpoint/bugpoint.cpp
new file mode 100644
index 0000000..d58d0a6
--- /dev/null
+++ b/tools/bugpoint/bugpoint.cpp
@@ -0,0 +1,39 @@
+//===----------------------------------------------------------------------===//
+// LLVM BugPoint Utility 
+//
+// This program is an automated compiler debugger tool.  It is used to narrow
+// down miscompilations and crash problems to a specific pass in the compiler,
+// and the specific Module or Function input that is causing the problem.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BugDriver.h"
+#include "Support/CommandLine.h"
+#include "llvm/Support/PassNameParser.h"
+
+static cl::list<std::string>
+InputFilenames(cl::Positional, cl::OneOrMore,
+               cl::desc("<input llvm ll/bc files>"));
+
+// The AnalysesList is automatically populated with registered Passes by the
+// PassNameParser.
+//
+static cl::list<const PassInfo*, bool, PassNameParser>
+PassList(cl::desc("Passes available:"), cl::OneOrMore);
+
+//cl::list<std::string>
+//InputArgv(cl::ConsumeAfter, cl::desc("<program arguments>..."));
+
+//cl::opt<bool>
+//Verbose("v", cl::desc("Enable verbose output"));
+
+int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv);
+
+  
+  BugDriver D(argv[0]);
+  if (D.addSources(InputFilenames)) return 1;
+  D.addPasses(PassList.begin(), PassList.end());
+
+  return D.run();
+}