Initial checkin of new LLI with JIT compiler


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@5126 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/ExecutionEngine/JIT/Callback.cpp b/lib/ExecutionEngine/JIT/Callback.cpp
new file mode 100644
index 0000000..b843e10
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/Callback.cpp
@@ -0,0 +1,62 @@
+//===-- Callback.cpp - Trap handler for function resolution ---------------===//
+//
+// This file defines the SIGSEGV handler which is invoked when a reference to a
+// non-codegen'd function is found.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VM.h"
+#include "Support/Statistic.h"
+#include <signal.h>
+#include <ucontext.h>
+#include <iostream>
+
+static VM *TheVM = 0;
+
+static void TrapHandler(int TN, siginfo_t *SI, ucontext_t *ucp) {
+  assert(TN == SIGSEGV && "Should be SIGSEGV!");
+
+#ifdef REG_EIP   /* this code does not compile on Sparc! */
+  if (SI->si_code != SEGV_MAPERR || SI->si_addr != 0 ||
+      ucp->uc_mcontext.gregs[REG_EIP] != 0) {
+    std::cerr << "Bad SEGV encountered!\n";
+    abort();
+  }
+
+  // The call instruction should have pushed the return value onto the stack...
+  unsigned RefAddr = *(unsigned*)ucp->uc_mcontext.gregs[REG_ESP];
+  RefAddr -= 4;  // Backtrack to the reference itself...
+
+  DEBUG(std::cerr << "In SEGV handler! Addr=0x" << std::hex << RefAddr
+                  << " ESP=0x" << ucp->uc_mcontext.gregs[REG_ESP] << std::dec
+                  << ": Resolving call to function: "
+                  << TheVM->getFunctionReferencedName((void*)RefAddr) << "\n");
+
+  // Sanity check to make sure this really is a call instruction...
+  assert(((unsigned char*)RefAddr)[-1] == 0xE8 && "Not a call instr!");
+  
+  unsigned NewVal = (unsigned)TheVM->resolveFunctionReference((void*)RefAddr);
+
+  // Rewrite the call target... so that we don't fault every time we execute
+  // the call.
+  *(unsigned*)RefAddr = NewVal-RefAddr-4;    
+
+  // Change the instruction pointer to be the real target of the call...
+  ucp->uc_mcontext.gregs[REG_EIP] = NewVal;
+
+#endif
+}
+
+
+void VM::registerCallback() {
+  TheVM = this;
+
+  // Register the signal handler...
+  struct sigaction SA;
+  SA.sa_sigaction = (void (*)(int, siginfo_t*, void*))TrapHandler;
+  sigfillset(&SA.sa_mask);               // Block all signals while codegen'ing
+  SA.sa_flags = SA_NOCLDSTOP|SA_SIGINFO; // Get siginfo
+  sigaction(SIGSEGV, &SA, 0);            // Install the handler
+}
+
+
diff --git a/lib/ExecutionEngine/JIT/GlobalVars.cpp b/lib/ExecutionEngine/JIT/GlobalVars.cpp
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/GlobalVars.cpp
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
new file mode 100644
index 0000000..b2b56a6
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -0,0 +1,53 @@
+//===-- JIT.cpp - LLVM Just in Time Compiler ------------------------------===//
+//
+// This file implements the top-level support for creating a Just-In-Time
+// compiler for the current architecture.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VM.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetMachineImpls.h"
+#include "llvm/Module.h"
+
+
+/// createJIT - Create an return a new JIT compiler if there is one available
+/// for the current target.  Otherwise it returns null.
+///
+ExecutionEngine *ExecutionEngine::createJIT(Module *M, unsigned Config) {
+  // FIXME: This should be controlled by which subdirectory gets linked in!
+#if !defined(i386) && !defined(__i386__) && !defined(__x86__)
+  return 0;
+#endif
+  // Allocate a target... in the future this will be controllable on the
+  // command line.
+  TargetMachine *Target = allocateX86TargetMachine(Config);
+  assert(Target && "Could not allocate X86 target machine!");
+
+  // Create the virtual machine object...
+  return new VM(M, Target);
+}
+
+VM::VM(Module *M, TargetMachine *tm) : ExecutionEngine(M), TM(*tm) {
+  setTargetData(TM.getTargetData());
+  MCE = createEmitter(*this);  // Initialize MCE
+  setupPassManager();
+  registerCallback();
+}
+
+int VM::run(const std::string &FnName, const std::vector<std::string> &Args) {
+  Function *F = getModule().getNamedFunction(FnName);
+  if (F == 0) {
+    std::cerr << "Could not find function '" << FnName <<"' in module!\n";
+    return 1;
+  }
+
+  int(*PF)(int, char**) = (int(*)(int, char**))getPointerToFunction(F);
+  assert(PF != 0 && "Null pointer to function?");
+
+  // Build an argv vector...
+  char **Argv = (char**)CreateArgv(Args);
+
+  // Call the main function...
+  return PF(Args.size(), Argv);
+}
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
new file mode 100644
index 0000000..253a229
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -0,0 +1,107 @@
+//===-- Emitter.cpp - Write machine code to executable memory -------------===//
+//
+// This file defines a MachineCodeEmitter object that is used by Jello to write
+// machine code to memory and remember where relocatable values lie.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VM.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Function.h"
+#include "Support/Statistic.h"
+
+namespace {
+  Statistic<> NumBytes("jello", "Number of bytes of machine code compiled");
+
+  class Emitter : public MachineCodeEmitter {
+    VM &TheVM;
+
+    unsigned char *CurBlock;
+    unsigned char *CurByte;
+    
+    std::vector<std::pair<BasicBlock*, unsigned *> > BBRefs;
+    std::map<BasicBlock*, unsigned> BBLocations;
+  public:
+    Emitter(VM &vm) : TheVM(vm) {}
+
+    virtual void startFunction(MachineFunction &F);
+    virtual void finishFunction(MachineFunction &F);
+    virtual void startBasicBlock(MachineBasicBlock &BB);
+    virtual void emitByte(unsigned char B);
+    virtual void emitPCRelativeDisp(Value *V);
+    virtual void emitGlobalAddress(GlobalValue *V);
+  };
+}
+
+MachineCodeEmitter *VM::createEmitter(VM &V) {
+  return new Emitter(V);
+}
+
+
+#define _POSIX_MAPPED_FILES
+#include <unistd.h>
+#include <sys/mman.h>
+
+static void *getMemory() {
+  return mmap(0, 4096*2, PROT_READ|PROT_WRITE|PROT_EXEC,
+              MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+}
+
+
+void Emitter::startFunction(MachineFunction &F) {
+  CurBlock = (unsigned char *)getMemory();
+  CurByte = CurBlock;  // Start writing at the beginning of the fn.
+  TheVM.addGlobalMapping(F.getFunction(), CurBlock);
+}
+
+void Emitter::finishFunction(MachineFunction &F) {
+  for (unsigned i = 0, e = BBRefs.size(); i != e; ++i) {
+    unsigned Location = BBLocations[BBRefs[i].first];
+    unsigned *Ref = BBRefs[i].second;
+    *Ref = Location-(unsigned)Ref-4;
+  }
+  BBRefs.clear();
+  BBLocations.clear();
+
+  NumBytes += CurByte-CurBlock;
+
+  DEBUG(std::cerr << "Finished CodeGen of [" << std::hex << (unsigned)CurBlock
+                  << std::dec << "] Function: " << F.getFunction()->getName()
+                  << ": " << CurByte-CurBlock << " bytes of text\n");
+}
+
+void Emitter::startBasicBlock(MachineBasicBlock &BB) {
+  BBLocations[BB.getBasicBlock()] = (unsigned)CurByte;
+}
+
+
+void Emitter::emitByte(unsigned char B) {
+  *CurByte++ = B;   // Write the byte to memory
+}
+
+
+// emitPCRelativeDisp - For functions, just output a displacement that will
+// cause a reference to the zero page, which will cause a seg-fault, causing
+// things to get resolved on demand.  Keep track of these markers.
+//
+// For basic block references, keep track of where the references are so they
+// may be patched up when the basic block is defined.
+//
+void Emitter::emitPCRelativeDisp(Value *V) {
+  if (Function *F = dyn_cast<Function>(V)) {
+    TheVM.addFunctionRef(CurByte, F);
+    unsigned ZeroAddr = -(unsigned)CurByte-4; // Calculate displacement to null
+    *(unsigned*)CurByte = ZeroAddr;           // 4 byte offset
+    CurByte += 4;
+  } else {
+    BasicBlock *BB = cast<BasicBlock>(V);     // Keep track of reference...
+    BBRefs.push_back(std::make_pair(BB, (unsigned*)CurByte));
+    CurByte += 4;
+  }
+}
+
+void Emitter::emitGlobalAddress(GlobalValue *V) {
+  *(void**)CurByte = TheVM.getPointerToGlobal(V);
+  CurByte += 4;
+}
diff --git a/lib/ExecutionEngine/JIT/Makefile b/lib/ExecutionEngine/JIT/Makefile
new file mode 100644
index 0000000..bc3f709
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/Makefile
@@ -0,0 +1,4 @@
+LEVEL = ../../..
+LIBRARYNAME = lli-jit
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/ExecutionEngine/JIT/VM.cpp b/lib/ExecutionEngine/JIT/VM.cpp
new file mode 100644
index 0000000..f66d4d7
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/VM.cpp
@@ -0,0 +1,84 @@
+//===-- jello.cpp - LLVM Just in Time Compiler ----------------------------===//
+//
+// This tool implements a just-in-time compiler for LLVM, allowing direct
+// execution of LLVM bytecode in an efficient manner.
+//
+//===----------------------------------------------------------------------===//
+
+#include "VM.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/MachineCodeEmitter.h"
+#include "llvm/Function.h"
+#include <dlfcn.h>    // dlsym access
+
+
+VM::~VM() {
+  delete MCE;
+  delete &TM;
+}
+
+/// setupPassManager - Initialize the VM PassManager object with all of the
+/// passes needed for the target to generate code.
+///
+void VM::setupPassManager() {
+  // Compile LLVM Code down to machine code in the intermediate representation
+  if (TM.addPassesToJITCompile(PM)) {
+    std::cerr << "lli: target '" << TM.getName()
+              << "' doesn't support JIT compilation!\n";
+    abort();
+  }
+
+  // Turn the machine code intermediate representation into bytes in memory that
+  // may be executed.
+  //
+  if (TM.addPassesToEmitMachineCode(PM, *MCE)) {
+    std::cerr << "lli: target '" << TM.getName()
+              << "' doesn't support machine code emission!\n";
+    abort();
+  }
+}
+
+void *VM::resolveFunctionReference(void *RefAddr) {
+  Function *F = FunctionRefs[RefAddr];
+  assert(F && "Reference address not known!");
+
+  void *Addr = getPointerToFunction(F);
+  assert(Addr && "Pointer to function unknown!");
+
+  FunctionRefs.erase(RefAddr);
+  return Addr;
+}
+
+const std::string &VM::getFunctionReferencedName(void *RefAddr) {
+  return FunctionRefs[RefAddr]->getName();
+}
+
+static void NoopFn() {}
+
+/// getPointerToFunction - This method is used to get the address of the
+/// specified function, compiling it if neccesary.
+///
+void *VM::getPointerToFunction(const Function *F) {
+  void *&Addr = GlobalAddress[F];   // Function already code gen'd
+  if (Addr) return Addr;
+
+  if (F->isExternal()) {
+    // If it's an external function, look it up in the process image...
+    void *Ptr = dlsym(0, F->getName().c_str());
+    if (Ptr == 0) {
+      std::cerr << "WARNING: Cannot resolve fn '" << F->getName()
+                << "' using a dummy noop function instead!\n";
+      Ptr = (void*)NoopFn;
+    }
+
+    return Addr = Ptr;
+  }
+
+  // JIT all of the functions in the module.  Eventually this will JIT functions
+  // on demand.  This has the effect of populating all of the non-external
+  // functions into the GlobalAddress table.
+  PM.run(getModule());
+
+  assert(Addr && "Code generation didn't add function to GlobalAddress table!");
+  return Addr;
+}