Support for exception throwing from JNI.

This change modifies the exception throwing JNI unit test to be
realistic and implements the missing exception throwing pieces on X86.
It also corrects some issues on ARM including methods with arguments
LJII (such as compareAndSwapInt).

Change-Id: I375f6efe2edeebb8007d7aa12c10b49742a8f119
diff --git a/src/assembler_arm.cc b/src/assembler_arm.cc
index c4dbbba..93d08b2 100644
--- a/src/assembler_arm.cc
+++ b/src/assembler_arm.cc
@@ -1810,28 +1810,27 @@
 
 void ArmAssembler::ExceptionPoll(ManagedRegister mscratch) {
   ArmManagedRegister scratch = mscratch.AsArm();
-  ArmExceptionSlowPath* slow = new ArmExceptionSlowPath();
+  ArmExceptionSlowPath* slow = new ArmExceptionSlowPath(scratch);
   buffer_.EnqueueSlowPath(slow);
   LoadFromOffset(kLoadWord, scratch.AsCoreRegister(),
                  TR, Thread::ExceptionOffset().Int32Value());
   cmp(scratch.AsCoreRegister(), ShifterOperand(0));
   b(slow->Entry(), NE);
-  Bind(slow->Continuation());
 }
 
 void ArmExceptionSlowPath::Emit(Assembler* sasm) {
   ArmAssembler* sp_asm = down_cast<ArmAssembler*>(sasm);
 #define __ sp_asm->
   __ Bind(&entry_);
-  // Pass top of stack as argument
-  __ mov(R0, ShifterOperand(SP));
-  __ LoadFromOffset(kLoadWord, R12, TR,
-                         Thread::ExceptionEntryPointOffset().Int32Value());
-  // Note: assume that link register will be spilled/filled on method entry/exit
+
+  // Pass exception object as argument
+  // Don't care about preserving R0 as this call won't return
+  __ mov(R0, ShifterOperand(scratch_.AsCoreRegister()));
+  // Set up call to Thread::Current()->pDeliverException
+  __ LoadFromOffset(kLoadWord, R12, TR, OFFSETOF_MEMBER(Thread, pDeliverException));
   __ blx(R12);
-  // TODO: this call should never return as it should make a long jump to
-  // the appropriate catch block
-  __ b(&continuation_);
+  // Call never returns
+  __ bkpt(0);
 #undef __
 }
 
diff --git a/src/assembler_arm.h b/src/assembler_arm.h
index e58eb92..1a722f7 100644
--- a/src/assembler_arm.h
+++ b/src/assembler_arm.h
@@ -609,8 +609,10 @@
 // Slowpath entered when Thread::Current()->_exception is non-null
 class ArmExceptionSlowPath : public SlowPath {
  public:
-  ArmExceptionSlowPath() {}
+  ArmExceptionSlowPath(ArmManagedRegister scratch) : scratch_(scratch) {}
   virtual void Emit(Assembler *sp_asm);
+ private:
+  const ArmManagedRegister scratch_;
 };
 
 // Slowpath entered when Thread::Current()->_suspend_count is non-zero
diff --git a/src/assembler_x86.cc b/src/assembler_x86.cc
index 892bf76..e126d88 100644
--- a/src/assembler_x86.cc
+++ b/src/assembler_x86.cc
@@ -1658,8 +1658,10 @@
   // TODO: place reference map on call
 }
 
-void X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister) {
-  UNIMPLEMENTED(FATAL);
+void X86Assembler::Call(FrameOffset base, Offset offset, ManagedRegister mscratch) {
+  Register scratch = mscratch.AsX86().AsCpuRegister();
+  movl(scratch, Address(ESP, base));
+  call(Address(scratch, offset));
 }
 
 void X86Assembler::Call(ThreadOffset offset, ManagedRegister mscratch) {
@@ -1713,7 +1715,6 @@
   buffer_.EnqueueSlowPath(slow);
   fs()->cmpl(Address::Absolute(Thread::ExceptionOffset()), Immediate(0));
   j(kNotEqual, slow->Entry());
-  Bind(slow->Continuation());
 }
 
 void X86ExceptionSlowPath::Emit(Assembler *sasm) {
@@ -1721,14 +1722,11 @@
 #define __ sp_asm->
   __ Bind(&entry_);
   // NB the return value is dead
-  // Pass top of stack as argument
-  __ pushl(ESP);
-  __ fs()->call(Address::Absolute(Thread::ExceptionEntryPointOffset()));
-  // TODO: this call should never return as it should make a long jump to
-  // the appropriate catch block
-  // Release argument
-  __ addl(ESP, Immediate(kPointerSize));
-  __ jmp(&continuation_);
+  // Pass exception as argument in EAX
+  __ fs()->movl(EAX, Address::Absolute(Thread::ExceptionOffset()));
+  __ fs()->call(Address::Absolute(OFFSETOF_MEMBER(Thread, pDeliverException)));
+  // this call should never return
+  __ int3();
 #undef __
 }
 
diff --git a/src/calling_convention.h b/src/calling_convention.h
index 4464609..a1d1b32 100644
--- a/src/calling_convention.h
+++ b/src/calling_convention.h
@@ -139,13 +139,13 @@
   virtual uint32_t CoreSpillMask() const = 0;
   virtual uint32_t FpSpillMask() const = 0;
 
-  // Returns true if the register will be clobbered by an outgoing
-  // argument value.
-  virtual bool IsOutArgRegister(ManagedRegister reg) = 0;
+  // Returns true if the method register will have been clobbered during argument
+  // set up
+  virtual bool IsMethodRegisterCrushedPreCall() = 0;
 
   // Iterator interface
   bool HasNext();
-  void Next();
+  virtual void Next();
   bool IsCurrentParamAReference();
   size_t CurrentParamSize();
   virtual bool IsCurrentParamInRegister() = 0;
diff --git a/src/calling_convention_arm.cc b/src/calling_convention_arm.cc
index f8e8f3d..6e664ca 100644
--- a/src/calling_convention_arm.cc
+++ b/src/calling_convention_arm.cc
@@ -100,13 +100,54 @@
 // JNI calling convention
 
 ArmJniCallingConvention::ArmJniCallingConvention(Method* method) : JniCallingConvention(method) {
-  for (int i = R4; i < R12; i++) {
-    callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(static_cast<Register>(i)));
+  // Compute padding to ensure longs and doubles are not split in AAPCS
+  // TODO: in terms of outgoing argument size this may be overly generous
+  // due to padding appearing in the registers
+  size_t padding = 0;
+  size_t check = method->IsStatic() ? 1 : 0;
+  for(size_t i = 0; i < method->NumArgs(); i++) {
+    if(((i & 1) == check) && method->IsParamALongOrDouble(i)) {
+      padding += 4;
+    }
   }
-  // TODO: VFP
-  // for (SRegister i = S16; i <= S31; i++) {
-  //  callee_save_regs_.push_back(ArmManagedRegister::FromSRegister(i));
-  // }
+  padding_ = padding;
+  if (method->IsSynchronized()) {
+    // Preserve callee saves that may be clobbered during monitor enter where
+    // we copy across R0 to R3
+    if (method->NumArgs() > 0) {
+      callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R4));
+      if (method->NumArgs() > 1) {
+        callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R5));
+        if (method->NumArgs() > 2) {
+          callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R6));
+          if (method->NumArgs() > 3) {
+            callee_save_regs_.push_back(ArmManagedRegister::FromCoreRegister(R7));
+          }
+        }
+      }
+    }
+  }
+}
+
+uint32_t ArmJniCallingConvention::CoreSpillMask() const {
+  // Compute spill mask to agree with callee saves initialized in the constructor
+  uint32_t result = 0;
+  Method* method = GetMethod();
+  if (method->IsSynchronized()) {
+    if (method->NumArgs() > 0) {
+      result |= 1 << R4;
+      if (method->NumArgs() > 1) {
+        result |= 1 << R5;
+        if (method->NumArgs() > 2) {
+          result |= 1 << R6;
+          if (method->NumArgs() > 3) {
+            result |= 1 << R7;
+          }
+        }
+      }
+    }
+  }
+  return result;
 }
 
 size_t ArmJniCallingConvention::FrameSize() {
@@ -120,16 +161,7 @@
 }
 
 size_t ArmJniCallingConvention::OutArgSize() {
-  const Method* method = GetMethod();
-  size_t padding;  // padding to ensure longs and doubles are not split in AAPCS
-  if (method->IsStatic()) {
-    padding = (method->NumArgs() > 1) && !method->IsParamALongOrDouble(0) &&
-              method->IsParamALongOrDouble(1) ? 4 : 0;
-  } else {
-    padding = (method->NumArgs() > 2) && !method->IsParamALongOrDouble(1) &&
-              method->IsParamALongOrDouble(2) ? 4 : 0;
-  }
-  return RoundUp(NumberOfOutgoingStackArgs() * kPointerSize + padding,
+  return RoundUp(NumberOfOutgoingStackArgs() * kPointerSize + padding_,
                  kStackAlignment);
 }
 
@@ -139,28 +171,27 @@
 }
 
 // Will reg be crushed by an outgoing argument?
-bool ArmJniCallingConvention::IsOutArgRegister(ManagedRegister mreg) {
-  Register reg = mreg.AsArm().AsCoreRegister();
-  return reg >= R0 && reg <= R3;
+bool ArmJniCallingConvention::IsMethodRegisterCrushedPreCall() {
+  return true;  // The method register R0 is always clobbered by the JNIEnv
 }
 
-// JniCallingConvention ABI follows AAPCS
-//
-// In processing each parameter, we know that IsCurrentParamInRegister()
-// or IsCurrentParamOnStack() will be called first.
-// Both functions will ensure that we conform to AAPCS.
-//
-bool ArmJniCallingConvention::IsCurrentParamInRegister() {
-  // AAPCS processing
+// JniCallingConvention ABI follows AAPCS where longs and doubles must occur
+// in even register numbers and stack slots
+void ArmJniCallingConvention::Next() {
+  JniCallingConvention::Next();
   Method* method = GetMethod();
-  int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni(method);
-  if ((itr_args_ >= 2) && method->IsParamALongOrDouble(arg_pos)) {
+  size_t arg_pos = itr_args_ - NumberOfExtraArgumentsForJni(method);
+  if ((itr_args_ >= 2) &&
+      (arg_pos < GetMethod()->NumArgs()) &&
+      method->IsParamALongOrDouble(arg_pos)) {
     // itr_slots_ needs to be an even number, according to AAPCS.
     if ((itr_slots_ & 0x1u) != 0) {
       itr_slots_++;
     }
   }
+}
 
+bool ArmJniCallingConvention::IsCurrentParamInRegister() {
   return itr_slots_ < 4;
 }
 
@@ -187,7 +218,7 @@
 FrameOffset ArmJniCallingConvention::CurrentParamStackOffset() {
   CHECK_GE(itr_slots_, 4u);
   return FrameOffset(displacement_.Int32Value() - OutArgSize()
-               + ((itr_slots_ - 4) * kPointerSize));
+                     + ((itr_slots_ - 4) * kPointerSize));
 }
 
 size_t ArmJniCallingConvention::NumberOfOutgoingStackArgs() {
diff --git a/src/calling_convention_arm.h b/src/calling_convention_arm.h
index 8e5d4c2..4415254 100644
--- a/src/calling_convention_arm.h
+++ b/src/calling_convention_arm.h
@@ -35,19 +35,18 @@
   virtual ManagedRegister ReturnRegister();
   virtual ManagedRegister InterproceduralScratchRegister();
   // JNI calling convention
+  virtual void Next();  // Override default behavior for AAPCS
   virtual size_t FrameSize();
   virtual size_t ReturnPcOffset();
   virtual size_t OutArgSize();
   virtual const std::vector<ManagedRegister>& CalleeSaveRegisters() const {
     return callee_save_regs_;
   }
-  virtual uint32_t CoreSpillMask() const {
-    return 0x0FF0;  // R4 to R12
-  }
+  virtual uint32_t CoreSpillMask() const;
   virtual uint32_t FpSpillMask() const {
-    return 0;
+    return 0;  // Floats aren't spilled in JNI down call
   }
-  virtual bool IsOutArgRegister(ManagedRegister reg);
+  virtual bool IsMethodRegisterCrushedPreCall();
   virtual bool IsCurrentParamInRegister();
   virtual bool IsCurrentParamOnStack();
   virtual ManagedRegister CurrentParamRegister();
@@ -60,6 +59,9 @@
   // TODO: these values aren't unique and can be shared amongst instances
   std::vector<ManagedRegister> callee_save_regs_;
 
+  // Padding to ensure longs and doubles are not split in AAPCS
+  size_t padding_;
+
   DISALLOW_COPY_AND_ASSIGN(ArmJniCallingConvention);
 };
 
diff --git a/src/calling_convention_x86.cc b/src/calling_convention_x86.cc
index af464af..a8f5778 100644
--- a/src/calling_convention_x86.cc
+++ b/src/calling_convention_x86.cc
@@ -41,7 +41,7 @@
 // Managed runtime calling convention
 
 ManagedRegister X86ManagedRuntimeCallingConvention::MethodRegister() {
-  return X86ManagedRegister::FromCpuRegister(EDI);
+  return X86ManagedRegister::FromCpuRegister(EAX);
 }
 
 bool X86ManagedRuntimeCallingConvention::IsCurrentParamInRegister() {
@@ -86,8 +86,8 @@
   return FrameSize() - kPointerSize;
 }
 
-bool X86JniCallingConvention::IsOutArgRegister(ManagedRegister) {
-  return false;  // Everything is passed by stack
+bool X86JniCallingConvention::IsMethodRegisterCrushedPreCall() {
+  return GetMethod()->IsSynchronized();  // Monitor enter crushes the method register
 }
 
 bool X86JniCallingConvention::IsCurrentParamInRegister() {
diff --git a/src/calling_convention_x86.h b/src/calling_convention_x86.h
index 8230754..8c55472 100644
--- a/src/calling_convention_x86.h
+++ b/src/calling_convention_x86.h
@@ -49,7 +49,7 @@
   virtual uint32_t FpSpillMask() const {
     return 0;
   }
-  virtual bool IsOutArgRegister(ManagedRegister reg);
+  virtual bool IsMethodRegisterCrushedPreCall();
   virtual bool IsCurrentParamInRegister();
   virtual bool IsCurrentParamOnStack();
   virtual ManagedRegister CurrentParamRegister();
diff --git a/src/compiler/codegen/arm/Thumb2/Gen.cc b/src/compiler/codegen/arm/Thumb2/Gen.cc
index 652899c..e729b51 100644
--- a/src/compiler/codegen/arm/Thumb2/Gen.cc
+++ b/src/compiler/codegen/arm/Thumb2/Gen.cc
@@ -683,9 +683,9 @@
 void genThrow(CompilationUnit* cUnit, MIR* mir, RegLocation rlSrc)
 {
     loadWordDisp(cUnit, rSELF,
-                 OFFSETOF_MEMBER(Thread, pThrowException), rLR);
+                 OFFSETOF_MEMBER(Thread, pDeliverException), rLR);
     loadValueDirectFixed(cUnit, rlSrc, r0);  // Get exception object
-    callNoUnwindHelper(cUnit, rLR); // art_throw_exception(exception);
+    callNoUnwindHelper(cUnit, rLR);  // art_deliver_exception(exception);
 }
 
 static void genInstanceof(CompilationUnit* cUnit, MIR* mir, RegLocation rlDest,
diff --git a/src/context_arm.cc b/src/context_arm.cc
index 387b71d..85ada9d 100644
--- a/src/context_arm.cc
+++ b/src/context_arm.cc
@@ -8,9 +8,11 @@
 namespace arm {
 
 ArmContext::ArmContext() {
+#ifndef NDEBUG
   for (int i=0; i < 16; i++) {
-    gprs_[i] = 0xEBAD6070;
+    gprs_[i] = 0xEBAD6070+i;
   }
+#endif
   memset(fprs_, 0, sizeof(fprs_));
 }
 
diff --git a/src/context_x86.cc b/src/context_x86.cc
index 43de4ba..04976a5 100644
--- a/src/context_x86.cc
+++ b/src/context_x86.cc
@@ -2,21 +2,58 @@
 
 #include "context_x86.h"
 
-#include "logging.h"
+#include "object.h"
 
 namespace art {
 namespace x86 {
 
+X86Context::X86Context() {
+  for (int i=0; i < 8; i++) {
+    gprs_[i] = 0xEBAD6070+i;
+  }
+}
+
+void X86Context::FillCalleeSaves(const Frame& fr) {
+  Method* method = fr.GetMethod();
+  uint32_t core_spills = method->GetCoreSpillMask();
+  size_t spill_count = __builtin_popcount(core_spills);
+  CHECK_EQ(method->GetFpSpillMask(), 0u);
+  if (spill_count > 0) {
+    // Lowest number spill is furthest away, walk registers and fill into context
+    int j = 1;
+    for(int i = 0; i < 8; i++) {
+      if (((core_spills >> i) & 1) != 0) {
+        gprs_[i] = fr.LoadCalleeSave(spill_count - j);
+        j++;
+      }
+    }
+  }
+}
+
 void X86Context::DoLongJump() {
 #if defined(__i386__)
   // Load ESP and EIP
-  asm volatile ( "movl %%esp, %0\n"
-                 "jmp *%1"
-      : // output
-      : "m"(esp_), "r"(&eip_)  // input
+  gprs_[ESP] -= 4;  // push EIP for return
+  *((uintptr_t*)(gprs_[ESP])) = eip_;
+  asm volatile (
+      "pushl %4\n\t"
+      "pushl %0\n\t"
+      "pushl %1\n\t"
+      "pushl %2\n\t"
+      "pushl %3\n\t"
+      "pushl %4\n\t"
+      "pushl %5\n\t"
+      "pushl %6\n\t"
+      "pushl %7\n\t"
+      "popal\n\t"
+      "popl %%esp\n\t"
+      "ret\n\t"
+      :  //output
+      : "g"(gprs_[EAX]), "g"(gprs_[ECX]), "g"(gprs_[EDX]), "g"(gprs_[EBX]),
+        "g"(gprs_[ESP]), "g"(gprs_[EBP]), "g"(gprs_[ESI]), "g"(gprs_[EDI])
       :);  // clobber
 #else
-  UNIMPLEMENTED(FATAL);
+    UNIMPLEMENTED(FATAL);
 #endif
 }
 
diff --git a/src/context_x86.h b/src/context_x86.h
index 0e31b25..10dcbb4 100644
--- a/src/context_x86.h
+++ b/src/context_x86.h
@@ -5,19 +5,21 @@
 
 #include "context.h"
 
+#include "constants_x86.h"
+
 namespace art {
 namespace x86 {
 
 class X86Context : public Context {
  public:
-  X86Context() : esp_(0), eip_(0) {}
+  X86Context();
   virtual ~X86Context() {}
 
   // No callee saves on X86
-  virtual void FillCalleeSaves(const Frame& fr) {}
+  virtual void FillCalleeSaves(const Frame& fr);
 
   virtual void SetSP(uintptr_t new_sp) {
-    esp_ = new_sp;
+    gprs_[ESP] = new_sp;
   }
 
   virtual void SetPC(uintptr_t new_pc) {
@@ -27,8 +29,7 @@
   virtual void DoLongJump();
 
  private:
-  // Currently just ESP and EIP are used
-  uintptr_t esp_;
+  uintptr_t gprs_[8];
   uintptr_t eip_;
 };
 }  // namespace x86
diff --git a/src/jni_compiler.cc b/src/jni_compiler.cc
index 2088f7d..9624246 100644
--- a/src/jni_compiler.cc
+++ b/src/jni_compiler.cc
@@ -273,7 +273,7 @@
   }
 
   // 9. Plant call to native code associated with method
-  if (!jni_conv->IsOutArgRegister(mr_conv->MethodRegister())) {
+  if (!jni_conv->IsMethodRegisterCrushedPreCall()) {
     // Method register shouldn't have been crushed by setting up outgoing
     // arguments
     __ Call(mr_conv->MethodRegister(), Method::NativeMethodOffset(),
diff --git a/src/jni_compiler_test.cc b/src/jni_compiler_test.cc
index 9328e85..35a76c9 100644
--- a/src/jni_compiler_test.cc
+++ b/src/jni_compiler_test.cc
@@ -448,42 +448,34 @@
   EXPECT_EQ(1, gSuspendCounterHandler_calls);
 }
 
-int gExceptionHandler_calls;
-void ExceptionHandler(Method** frame) {
-  // Check we came here in the native state then transition to runnable to work
-  // on the Object*
-  EXPECT_EQ(Thread::kNative, Thread::Current()->GetState());
-  ScopedJniThreadState ts(Thread::Current()->GetJniEnv());
-
-  EXPECT_TRUE((*frame)->GetName()->Equals("throwException"));
-  gExceptionHandler_calls++;
-  Thread::Current()->ClearException();
-}
-
 void Java_MyClass_throwException(JNIEnv* env, jobject) {
   jclass c = env->FindClass("java/lang/RuntimeException");
   env->ThrowNew(c, "hello");
 }
 
 TEST_F(JniCompilerTest, ExceptionHandling) {
-  Thread::Current()->RegisterExceptionEntryPoint(&ExceptionHandler);
-  gExceptionHandler_calls = 0;
   gJava_MyClass_foo_calls = 0;
 
+  // Check a single call of a JNI method is ok
   SetupForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClass_foo));
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
   EXPECT_EQ(1, gJava_MyClass_foo_calls);
-  EXPECT_EQ(0, gExceptionHandler_calls);
+  EXPECT_FALSE(Thread::Current()->IsExceptionPending());
 
+  // Get class for exception we expect to be thrown
+  Class* jlre = class_linker_->FindClass("Ljava/lang/RuntimeException;", class_loader_);
   SetupForTest(false, "throwException", "()V", reinterpret_cast<void*>(&Java_MyClass_throwException));
+  // Call Java_MyClass_throwException (JNI method that throws exception)
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
   EXPECT_EQ(1, gJava_MyClass_foo_calls);
-  EXPECT_EQ(1, gExceptionHandler_calls);
+  EXPECT_TRUE(Thread::Current()->IsExceptionPending());
+  EXPECT_TRUE(Thread::Current()->GetException()->InstanceOf(jlre));
+  Thread::Current()->ClearException();
 
+  // Check a single call of a JNI method is ok
   SetupForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClass_foo));
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
   EXPECT_EQ(2, gJava_MyClass_foo_calls);
-  EXPECT_EQ(1, gExceptionHandler_calls);
 }
 
 jint Java_MyClass_nativeUpCall(JNIEnv* env, jobject thisObj, jint i) {
@@ -558,4 +550,20 @@
   env_->CallStaticVoidMethod(jklass_, jmethod_, jobj_, 1234, jklass_, 5678, 9876);
 }
 
+jboolean my_casi(JNIEnv* env, jobject unsafe, jobject obj, jlong offset, jint expected, jint newval) {
+  EXPECT_TRUE(env->IsSameObject(JniCompilerTest::jobj_, unsafe));
+  EXPECT_TRUE(env->IsSameObject(JniCompilerTest::jobj_, obj));
+  EXPECT_EQ(0x12345678ABCDEF88ll, offset);
+  EXPECT_EQ(static_cast<jint>(0xCAFEF00D), expected);
+  EXPECT_EQ(static_cast<jint>(0xEBADF00D), newval);
+  return JNI_TRUE;
+}
+
+TEST_F(JniCompilerTest, CompareAndSwapInt) {
+  SetupForTest(false, "compareAndSwapInt", "(Ljava/lang/Object;JII)Z",
+               reinterpret_cast<void*>(&my_casi));
+  jboolean result = env_->CallBooleanMethod(jobj_, jmethod_, jobj_, 0x12345678ABCDEF88ll, 0xCAFEF00D, 0xEBADF00D);
+  EXPECT_EQ(result, JNI_TRUE);
+}
+
 }  // namespace art
diff --git a/src/jni_internal_x86.cc b/src/jni_internal_x86.cc
index a096da0..7128c6e 100644
--- a/src/jni_internal_x86.cc
+++ b/src/jni_internal_x86.cc
@@ -28,16 +28,15 @@
   UniquePtr<X86Assembler> assembler(
       down_cast<X86Assembler*>(Assembler::Create(kX86)));
 #define __ assembler->
-  // Size of frame - spill of EDI + Method* + possible receiver + arg array
+  // Size of frame - return address + Method* + possible receiver + arg array
   size_t frame_size = (2 * kPointerSize) +
                       (method->IsStatic() ? 0 : kPointerSize) +
                       method->NumArgArrayBytes();
   size_t pad_size = RoundUp(frame_size, kStackAlignment) - frame_size;
 
-  __ pushl(EDI);                   // preserve EDI
-  __ movl(EDI, Address(ESP, 8));   // EDI = method
-  __ movl(EAX, Address(ESP, 12));  // EAX = receiver
-  __ movl(EDX, Address(ESP, 20));  // EDX = arg array
+  __ movl(EAX, Address(ESP, 4));   // EAX = method
+  __ movl(ECX, Address(ESP, 8));   // ECX = receiver
+  __ movl(EDX, Address(ESP, 16));  // EDX = arg array
 
   // Push padding
   if (pad_size != 0) {
@@ -49,35 +48,35 @@
     __ pushl(Address(EDX, off - kPointerSize));
   }
   if (!method->IsStatic()) {
-    __ pushl(EAX);
+    __ pushl(ECX);
   }
   // Push 0 as NULL Method* thereby terminating managed stack crawls
   __ pushl(Immediate(0));
-  __ call(Address(EDI, method->GetCodeOffset()));  // Call code off of method
 
-  // pop arguments and padding up to saved EDI
+  __ call(Address(EAX, method->GetCodeOffset()));  // Call code off of method
+
+  // pop arguments up to the return address
   __ addl(ESP, Immediate(frame_size + pad_size - kPointerSize));
   char ch = method->GetShorty()->CharAt(0);
   if (ch != 'V') {
     // Load the result JValue pointer.
-    __ movl(EDI, Address(ESP, 24));
+    __ movl(ECX, Address(ESP, 20));
     switch (ch) {
       case 'D':
-        __ fstpl(Address(EDI, 0));
+        __ fstpl(Address(ECX, 0));
         break;
       case 'F':
-        __ fstps(Address(EDI, 0));
+        __ fstps(Address(ECX, 0));
         break;
       case 'J':
-        __ movl(Address(EDI, 0), EAX);
-        __ movl(Address(EDI, 4), EDX);
+        __ movl(Address(ECX, 0), EAX);
+        __ movl(Address(ECX, 4), EDX);
         break;
       default:
-        __ movl(Address(EDI, 0), EAX);
+        __ movl(Address(ECX, 0), EAX);
         break;
     }
   }
-  __ popl(EDI);  // restore EDI
   __ ret();
   // TODO: store native_entry in the stub table
   ByteArray* code = ByteArray::Alloc(assembler->CodeSize());
diff --git a/src/object.cc b/src/object.cc
index 157bfc9..7f247c7 100644
--- a/src/object.cc
+++ b/src/object.cc
@@ -488,8 +488,8 @@
 uint32_t Method::ToDexPC(const uintptr_t pc) const {
   IntArray* mapping_table = GetMappingTable();
   if (mapping_table == NULL) {
-    DCHECK(pc == 0);
-    return DexFile::kDexNoIndex;   // Special no mapping/pc == -1 case
+    DCHECK(IsNative());
+    return DexFile::kDexNoIndex;   // Special no mapping case
   }
   size_t mapping_table_length = mapping_table->GetLength();
   uint32_t sought_offset = pc - reinterpret_cast<uintptr_t>(GetCode());
diff --git a/src/runtime_support.S b/src/runtime_support.S
index 458102d..8b904b8 100644
--- a/src/runtime_support.S
+++ b/src/runtime_support.S
@@ -2,19 +2,19 @@
 
     .balign 4
 
-    .global art_throw_exception
-    .extern artThrowExceptionHelper
+    .global art_deliver_exception
+    .extern artDeliverExceptionHelper
     /*
-     * Called by managed code, saves all registers (forms basis of long jump context).
+     * Called by managed code, saves mosts registers (forms basis of long jump context).
      * artThrowExceptionHelper will place a mock Method* at the bottom of the thread.
      * r0 holds Throwable
      */
-art_throw_exception:
+art_deliver_exception:
     stmdb  sp!, {r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, lr}
-    sub sp, #16                @ 4 words of space, bottom word will hold Method*
+    sub sp, #16                  @ 4 words of space, bottom word will hold Method*
     mov r1, r9
     mov r2, sp
-    b artThrowExceptionHelper  @ artThrowExceptionHelper(Throwable*, SP)
+    b artDeliverExceptionHelper  @ artThrowExceptionHelper(Throwable*, SP)
 
     .global art_invoke_interface_trampoline
     .extern artFindInterfaceMethodInCache
@@ -122,3 +122,33 @@
     bx      lr
 
 #endif
+
+#if defined(__i386__)
+
+    .global art_deliver_exception
+    .extern artDeliverExceptionHelper
+    .extern _ZN3art6Thread5self_E
+    /*
+     * Called by managed code, saves callee saves and then calls artThrowExceptionHelper
+     * that will place a mock Method* at the bottom of the stack.
+     * EAX holds the exception.
+     */
+art_deliver_exception:
+    // Create frame
+    pushl %edi  // Save callee saves
+    pushl %esi
+    pushl %ebp
+    pushl %ebx
+    pushl $0
+    pushl $0
+    pushl $0   // Will be clobbered to be Method*
+    mov %esp, %ecx
+    // Outgoing argument set up
+    pushl $0    // Alignment padding
+    pushl %ecx
+    pushl $0    // TODO: pass fs:offsetof(Thread,self_) - for now this is computed in the helper
+    pushl %eax
+    call artDeliverExceptionHelper  // artThrowExceptionHelper(Throwable*, Thread*, SP)
+    int3
+
+#endif
diff --git a/src/runtime_support.h b/src/runtime_support.h
index 671d7dc..4915314 100644
--- a/src/runtime_support.h
+++ b/src/runtime_support.h
@@ -9,7 +9,7 @@
   extern "C" uint64_t art_shr_long(uint64_t, uint32_t);
   extern "C" uint64_t art_ushr_long(uint64_t, uint32_t);
   extern "C" void art_invoke_interface_trampoline(void*, void*, void*, void*);
-  extern "C" void art_throw_exception(void*);
+  extern "C" void art_deliver_exception(void*);
 
   /* Conversions */
   extern "C" float __aeabi_i2f(int op1);             // OP_INT_TO_FLOAT
@@ -45,4 +45,8 @@
 
 #endif
 
+#if defined(__i386__)
+extern "C" void art_deliver_exception(void*);
+#endif
+
 #endif  // ART_SRC_RUNTIME_SUPPORT_H_
diff --git a/src/stub_x86.cc b/src/stub_x86.cc
index c23c751..13f9c07 100644
--- a/src/stub_x86.cc
+++ b/src/stub_x86.cc
@@ -16,9 +16,8 @@
 
   // Pad stack to ensure 16-byte alignment
   __ pushl(Immediate(0));
-  __ pushl(Immediate(0));
   __ fs()->pushl(Address::Absolute(Thread::SelfOffset()));  // Thread*
-  __ pushl(EDI); // Method*
+  __ pushl(EAX); // Method*
 
   // Call to throw AbstractMethodError
   __ Call(ThreadOffset(OFFSETOF_MEMBER(Thread, pThrowAbstractMethodErrorFromCode)),
@@ -45,14 +44,12 @@
   // Pad stack to ensure 16-byte alignment
   __ pushl(Immediate(0));
   __ pushl(Immediate(0));
-  __ pushl(Immediate(0));
-  __ fs()->movl(ECX, Address::Absolute(Thread::SelfOffset()));
-  __ pushl(ECX);  // Thread*
+  __ fs()->pushl(Address::Absolute(Thread::SelfOffset()));  // Thread*
 
   __ Call(ThreadOffset(OFFSETOF_MEMBER(Thread, pFindNativeMethod)),
           X86ManagedRegister::FromCpuRegister(ECX));
 
-  __ addl(ESP, Immediate(16));
+  __ addl(ESP, Immediate(12));
 
   Label no_native_code_found;  // forward declaration
   __ cmpl(EAX, Immediate(0));
diff --git a/src/thread.cc b/src/thread.cc
index 83c1b81..86994cf 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -52,9 +52,9 @@
 }  // namespace art
 
 // Called by generated call to throw an exception
-extern "C" void artThrowExceptionHelper(art::Throwable* exception,
-                                        art::Thread* thread,
-                                        art::Method** sp) {
+extern "C" void artDeliverExceptionHelper(art::Throwable* exception,
+                                          art::Thread* thread,
+                                          art::Method** sp) {
   /*
    * exception may be NULL, in which case this routine should
    * throw NPE.  NOTE: this is a convenience for generated code,
@@ -62,6 +62,10 @@
    * and threw a NPE if NULL.  This routine responsible for setting
    * exception_ in thread and delivering the exception.
    */
+#if defined(__i386__)
+  thread = art::Thread::Current();  // TODO: fix passing this in as an argument
+#endif
+  // Place a special frame at the TOS that will save all callee saves
   *sp = thread->CalleeSaveMethod();
   thread->SetTopOfStack(sp, 0);
   thread->DeliverException(exception);
@@ -306,7 +310,10 @@
   pLdivmod = __aeabi_ldivmod;
   pLmul = __aeabi_lmul;
   pInvokeInterfaceTrampoline = art_invoke_interface_trampoline;
-  pThrowException = art_throw_exception;
+  pDeliverException = art_deliver_exception;
+#endif
+#if defined(__i386__)
+  pDeliverException = art_deliver_exception;
 #endif
   pF2l = F2L;
   pD2l = D2L;
@@ -347,9 +354,14 @@
 }
 
 void Frame::Next() {
+  size_t frame_size = GetMethod()->GetFrameSizeInBytes();
+  DCHECK_NE(frame_size, 0u);
+  DCHECK_LT(frame_size, 1024u);
   byte* next_sp = reinterpret_cast<byte*>(sp_) +
-      GetMethod()->GetFrameSizeInBytes();
+      frame_size;
   sp_ = reinterpret_cast<Method**>(next_sp);
+  DCHECK(*sp_ == NULL ||
+         (*sp_)->GetClass()->GetDescriptor()->Equals("Ljava/lang/reflect/Method;"));
 }
 
 uintptr_t Frame::GetReturnPC() const {
@@ -365,6 +377,9 @@
   size_t frame_size = method->GetFrameSizeInBytes();
   byte* save_addr = reinterpret_cast<byte*>(sp_) + frame_size -
                     ((num + 1) * kPointerSize);
+#if defined(__i386__)
+  save_addr -= kPointerSize;  // account for return address
+#endif
   return *reinterpret_cast<uintptr_t*>(save_addr);
 }
 
@@ -927,6 +942,11 @@
   return false;
 }
 
+void Thread::PopSirt() {
+  CHECK(top_sirt_ != NULL);
+  top_sirt_ = top_sirt_->Link();
+}
+
 Object* Thread::DecodeJObject(jobject obj) {
   DCHECK(CanAccessDirectReferences());
   if (obj == NULL) {
@@ -1069,15 +1089,19 @@
   }
 }
 
-void Thread::WalkStackUntilUpCall(StackVisitor* visitor) const {
+void Thread::WalkStackUntilUpCall(StackVisitor* visitor, bool include_upcall) const {
   Frame frame = GetTopOfStack();
   uintptr_t pc = top_of_managed_stack_pc_;
 
   if (frame.GetSP() != 0) {
     for ( ; frame.GetMethod() != 0; frame.Next()) {
+      DCHECK(frame.GetMethod()->IsWithinCode(pc));
       visitor->VisitFrame(frame, pc);
       pc = frame.GetReturnPC();
     }
+    if (include_upcall) {
+      visitor->VisitFrame(frame, pc);
+    }
   }
 }
 
@@ -1167,39 +1191,78 @@
 
 Method* Thread::CalleeSaveMethod() const {
   // TODO: we should only allocate this once
-  // TODO: this code is ARM specific
   Method* method = Runtime::Current()->GetClassLinker()->AllocMethod();
+#if defined(__arm__)
   method->SetCode(NULL, art::kThumb2, NULL);
   method->SetFrameSizeInBytes(64);
   method->SetReturnPcOffsetInBytes(60);
-  method->SetCoreSpillMask(0x4FFE);
+  method->SetCoreSpillMask((1 << art::arm::R1) |
+                           (1 << art::arm::R2) |
+                           (1 << art::arm::R3) |
+                           (1 << art::arm::R4) |
+                           (1 << art::arm::R5) |
+                           (1 << art::arm::R6) |
+                           (1 << art::arm::R7) |
+                           (1 << art::arm::R8) |
+                           (1 << art::arm::R9) |
+                           (1 << art::arm::R10) |
+                           (1 << art::arm::R11) |
+                           (1 << art::arm::LR));
   method->SetFpSpillMask(0);
+#elif defined(__i386__)
+  method->SetCode(NULL, art::kX86, NULL);
+  method->SetFrameSizeInBytes(32);
+  method->SetReturnPcOffsetInBytes(28);
+  method->SetCoreSpillMask((1 << art::x86::EBX) |
+                           (1 << art::x86::EBP) |
+                           (1 << art::x86::ESI) |
+                           (1 << art::x86::EDI));
+  method->SetFpSpillMask(0);
+#else
+  UNIMPLEMENTED(FATAL);
+#endif
   return method;
 }
 
 class CatchBlockStackVisitor : public Thread::StackVisitor {
  public:
   CatchBlockStackVisitor(Class* to_find, Context* ljc)
-      : found_(false), to_find_(to_find), long_jump_context_(ljc) {}
+      : found_(false), to_find_(to_find), long_jump_context_(ljc), native_method_count_(0) {
+#ifndef NDEBUG
+    handler_pc_ = 0xEBADC0DE;
+    handler_frame_.SetSP(reinterpret_cast<Method**>(0xEBADF00D));
+#endif
+  }
 
   virtual void VisitFrame(const Frame& fr, uintptr_t pc) {
     if (!found_) {
-      last_pc_ = pc;
-      handler_frame_ = fr;
       Method* method = fr.GetMethod();
-      if (pc > 0) {
-        // Move the PC back 2 bytes as a call will frequently terminate the
-        // decoding of a particular instruction and we want to make sure we
-        // get the Dex PC of the instruction with the call and not the
-        // instruction following.
-        pc -= 2;
+      if (method == NULL) {
+        // This is the upcall, we remember the frame and last_pc so that we may
+        // long jump to them
+        handler_pc_ = pc;
+        handler_frame_ = fr;
+        return;
       }
-      uint32_t dex_pc = method->ToDexPC(pc);
+      uint32_t dex_pc = DexFile::kDexNoIndex;
+      if (pc > 0) {
+        if (method->IsNative()) {
+          native_method_count_++;
+        } else {
+          // Move the PC back 2 bytes as a call will frequently terminate the
+          // decoding of a particular instruction and we want to make sure we
+          // get the Dex PC of the instruction with the call and not the
+          // instruction following.
+          pc -= 2;
+          dex_pc = method->ToDexPC(pc);
+        }
+      }
       if (dex_pc != DexFile::kDexNoIndex) {
         uint32_t found_dex_pc = method->FindCatchBlock(to_find_, dex_pc);
         if (found_dex_pc != DexFile::kDexNoIndex) {
           found_ = true;
-          handler_dex_pc_ = found_dex_pc;
+          handler_pc_ = method->ToNativePC(found_dex_pc);
+          handler_frame_ = fr;
         }
       }
       if (!found_) {
@@ -1215,11 +1278,12 @@
   Class* to_find_;
   // Frame with found handler or last frame if no handler found
   Frame handler_frame_;
-  // Found dex PC of the handler block
-  uint32_t handler_dex_pc_;
+  // PC to branch to for the handler
+  uintptr_t handler_pc_;
   // Context that will be the target of the long jump
   Context* long_jump_context_;
-  uintptr_t last_pc_;
+  // Number of native methods passed in crawl (equates to number of SIRTs to pop)
+  uint32_t native_method_count_;
 };
 
 void Thread::DeliverException(Throwable* exception) {
@@ -1227,16 +1291,18 @@
 
   Context* long_jump_context = GetLongJumpContext();
   CatchBlockStackVisitor catch_finder(exception->GetClass(), long_jump_context);
-  WalkStackUntilUpCall(&catch_finder);
+  WalkStackUntilUpCall(&catch_finder, true);
 
-  long_jump_context->SetSP(reinterpret_cast<intptr_t>(catch_finder.handler_frame_.GetSP()));
-  uintptr_t long_jump_pc;
-  if (catch_finder.found_) {
-    long_jump_pc = catch_finder.handler_frame_.GetMethod()->ToNativePC(catch_finder.handler_dex_pc_);
+  // Pop any SIRT
+  if (catch_finder.native_method_count_ == 1) {
+    PopSirt();
   } else {
-    long_jump_pc = catch_finder.last_pc_;
+    // We only expect the stack crawl to have passed 1 native method as its terminated
+    // by a up call
+    DCHECK_EQ(catch_finder.native_method_count_, 0u);
   }
-  long_jump_context->SetPC(long_jump_pc);
+  long_jump_context->SetSP(reinterpret_cast<intptr_t>(catch_finder.handler_frame_.GetSP()));
+  long_jump_context->SetPC(catch_finder.handler_pc_);
   long_jump_context->DoLongJump();
 }
 
diff --git a/src/thread.h b/src/thread.h
index 860a185..f4cc747 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -212,7 +212,7 @@
   Method* (*pFindInterfaceMethodInCache)(Class*, uint32_t, const Method*, struct DvmDex*);
   void (*pUnlockObjectFromCode)(Thread*, Object*);
   void (*pLockObjectFromCode)(Thread*, Object*);
-  void (*pThrowException)(void*);
+  void (*pDeliverException)(void*);
   void (*pHandleFillArrayDataFromCode)(Array*, const uint16_t*);
   Class* (*pInitializeTypeFromCode)(uint32_t, Method*);
   void (*pResolveMethodFromCode)(Method*, uint32_t);
@@ -385,6 +385,9 @@
   // Is the given obj in this thread's stack indirect reference table?
   bool SirtContains(jobject obj);
 
+  // Pop the top SIRT
+  void PopSirt();
+
   // Convert a jobject into a Object*
   Object* DecodeJObject(jobject obj);
 
@@ -416,10 +419,6 @@
     NotifyLocked();
   }
 
-  void RegisterExceptionEntryPoint(void (*handler)(Method**)) {
-    exception_entry_point_ = handler;
-  }
-
   void RegisterSuspendCountEntryPoint(void (*handler)(Method**)) {
     suspend_count_entry_point_ = handler;
   }
@@ -507,10 +506,6 @@
     return ThreadOffset(OFFSETOF_MEMBER(Thread, top_sirt_));
   }
 
-  static ThreadOffset ExceptionEntryPointOffset() {
-    return ThreadOffset(OFFSETOF_MEMBER(Thread, exception_entry_point_));
-  }
-
   static ThreadOffset SuspendCountEntryPointOffset() {
     return ThreadOffset(OFFSETOF_MEMBER(Thread, suspend_count_entry_point_));
   }
@@ -543,7 +538,7 @@
 
   void WalkStack(StackVisitor* visitor) const;
 
-  void WalkStackUntilUpCall(StackVisitor* visitor) const;
+  void WalkStackUntilUpCall(StackVisitor* visitor, bool include_upcall) const;
 
   // Thin lock thread id. This is a small integer used by the thin lock implementation.
   // This is not to be confused with the native thread's tid, nor is it the value returned
@@ -624,9 +619,6 @@
   // TLS key used to retrieve the VM thread object.
   static pthread_key_t pthread_key_self_;
 
-  // Entry point called when exception_ is set
-  void (*exception_entry_point_)(Method** frame);
-
   // Entry point called when suspend_count_ is non-zero
   void (*suspend_count_entry_point_)(Method** frame);