Merge "Switch on implicit null pointer and stack overflow checks."
diff --git a/compiler/dex/mir_dataflow.cc b/compiler/dex/mir_dataflow.cc
index 2f17e08..ed7e1f5 100644
--- a/compiler/dex/mir_dataflow.cc
+++ b/compiler/dex/mir_dataflow.cc
@@ -925,11 +925,17 @@
 int MIRGraph::AddNewSReg(int v_reg) {
   // Compiler temps always have a subscript of 0
   int subscript = (v_reg < 0) ? 0 : ++ssa_last_defs_[v_reg];
-  int ssa_reg = GetNumSSARegs();
+  uint32_t ssa_reg = GetNumSSARegs();
   SetNumSSARegs(ssa_reg + 1);
   ssa_base_vregs_->Insert(v_reg);
   ssa_subscripts_->Insert(subscript);
   DCHECK_EQ(ssa_base_vregs_->Size(), ssa_subscripts_->Size());
+  // If we are expanding very late, update use counts too.
+  if (ssa_reg > 0 && use_counts_.Size() == ssa_reg) {
+    // Need to expand the counts.
+    use_counts_.Insert(0);
+    raw_use_counts_.Insert(0);
+  }
   return ssa_reg;
 }
 
diff --git a/compiler/dex/mir_optimization.cc b/compiler/dex/mir_optimization.cc
index 5c1bdf4..5cc994f 100644
--- a/compiler/dex/mir_optimization.cc
+++ b/compiler/dex/mir_optimization.cc
@@ -286,10 +286,6 @@
         reg_location_[ssa_reg_high].high_word = 1;
         reg_location_[ssa_reg_high].s_reg_low = ssa_reg_low;
         reg_location_[ssa_reg_high].wide = true;
-
-        // A new SSA needs new use counts.
-        use_counts_.Insert(0);
-        raw_use_counts_.Insert(0);
       }
 
       num_non_special_compiler_temps_++;
@@ -302,10 +298,6 @@
     reg_location_[ssa_reg_low] = temp_loc;
     reg_location_[ssa_reg_low].s_reg_low = ssa_reg_low;
     reg_location_[ssa_reg_low].wide = wide;
-
-    // A new SSA needs new use counts.
-    use_counts_.Insert(0);
-    raw_use_counts_.Insert(0);
   }
 
   compiler_temps_.Insert(compiler_temp);
diff --git a/compiler/dex/quick/arm/assemble_arm.cc b/compiler/dex/quick/arm/assemble_arm.cc
index cac766d..a895e6e 100644
--- a/compiler/dex/quick/arm/assemble_arm.cc
+++ b/compiler/dex/quick/arm/assemble_arm.cc
@@ -1213,7 +1213,7 @@
   cu_->NewTimingSplit("Assemble");
   int assembler_retries = 0;
   CodeOffset starting_offset = LinkFixupInsns(first_lir_insn_, last_lir_insn_, 0);
-  data_offset_ = (starting_offset + 0x3) & ~0x3;
+  data_offset_ = RoundUp(starting_offset, 4);
   int32_t offset_adjustment;
   AssignDataOffsets();
 
@@ -1596,7 +1596,7 @@
         LOG(FATAL) << "Assembler error - too many retries";
       }
       starting_offset += offset_adjustment;
-      data_offset_ = (starting_offset + 0x3) & ~0x3;
+      data_offset_ = RoundUp(starting_offset, 4);
       AssignDataOffsets();
     }
   }
@@ -1609,7 +1609,7 @@
   write_pos = EncodeLIRs(write_pos, first_lir_insn_);
   DCHECK_EQ(static_cast<CodeOffset>(write_pos - &code_buffer_[0]), starting_offset);
 
-  DCHECK_EQ(data_offset_, (code_buffer_.size() + 0x3) & ~0x3);
+  DCHECK_EQ(data_offset_, RoundUp(code_buffer_.size(), 4));
 
   // Install literals
   InstallLiteralPools();
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index 163c0fe..d3477c9 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -360,6 +360,22 @@
     if (Runtime::Current()->ExplicitStackOverflowChecks()) {
       /* Load stack limit */
       Load32Disp(rs_rARM_SELF, Thread::StackEndOffset<4>().Int32Value(), rs_r12);
+    } else {
+      // Implicit stack overflow check.
+      // Generate a load from [sp, #-overflowsize].  If this is in the stack
+      // redzone we will get a segmentation fault.
+      //
+      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
+      // we need to make sure that it's loadable in an immediate field of
+      // a sub instruction.  Otherwise we will get a temp allocation and the
+      // code size will increase.
+      //
+      // This is done before the callee save instructions to avoid any possibility
+      // of these overflowing.  This uses r12 and that's never saved in a callee
+      // save.
+      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
+      Load32Disp(rs_r12, 0, rs_r12);
+      MarkPossibleStackOverflowException();
     }
   }
   /* Spill core callee saves */
@@ -418,17 +434,8 @@
         AddSlowPath(new(arena_)StackOverflowSlowPath(this, branch, false, frame_size_));
       }
     } else {
-      // Implicit stack overflow check.
-      // Generate a load from [sp, #-overflowsize].  If this is in the stack
-      // redzone we will get a segmentation fault.
-      //
-      // Caveat coder: if someone changes the kStackOverflowReservedBytes value
-      // we need to make sure that it's loadable in an immediate field of
-      // a sub instruction.  Otherwise we will get a temp allocation and the
-      // code size will increase.
-      OpRegRegImm(kOpSub, rs_r12, rs_rARM_SP, Thread::kStackOverflowReservedBytes);
-      Load32Disp(rs_r12, 0, rs_r12);
-      MarkPossibleStackOverflowException();
+      // Implicit stack overflow check has already been done.  Just make room on the
+      // stack for the frame now.
       OpRegImm(kOpSub, rs_rARM_SP, frame_size_without_spills);
     }
   } else {
diff --git a/compiler/dex/quick/codegen_util.cc b/compiler/dex/quick/codegen_util.cc
index 9f84e09..de13a2e 100644
--- a/compiler/dex/quick/codegen_util.cc
+++ b/compiler/dex/quick/codegen_util.cc
@@ -558,7 +558,7 @@
 static int AssignLiteralPointerOffsetCommon(LIR* lir, CodeOffset offset,
                                             unsigned int element_size) {
   // Align to natural pointer size.
-  offset = (offset + (element_size - 1)) & ~(element_size - 1);
+  offset = RoundUp(offset, element_size);
   for (; lir != NULL; lir = lir->next) {
     lir->offset = offset;
     offset += element_size;
@@ -758,7 +758,7 @@
     tab_rec->offset = offset;
     offset += tab_rec->size;
     // word align
-    offset = (offset + 3) & ~3;
+    offset = RoundUp(offset, 4);
     }
   return offset;
 }
@@ -1049,14 +1049,13 @@
 
 int Mir2Lir::ComputeFrameSize() {
   /* Figure out the frame size */
-  static const uint32_t kAlignMask = kStackAlignment - 1;
   uint32_t size = num_core_spills_ * GetBytesPerGprSpillLocation(cu_->instruction_set)
                   + num_fp_spills_ * GetBytesPerFprSpillLocation(cu_->instruction_set)
                   + sizeof(uint32_t)  // Filler.
                   + (cu_->num_regs + cu_->num_outs) * sizeof(uint32_t)
                   + GetNumBytesForCompilerTempSpillRegion();
   /* Align and set */
-  return (size + kAlignMask) & ~(kAlignMask);
+  return RoundUp(size, kStackAlignment);
 }
 
 /*
diff --git a/compiler/dex/quick/mips/assemble_mips.cc b/compiler/dex/quick/mips/assemble_mips.cc
index baae319..b26ab57 100644
--- a/compiler/dex/quick/mips/assemble_mips.cc
+++ b/compiler/dex/quick/mips/assemble_mips.cc
@@ -748,7 +748,7 @@
   int offset = AssignInsnOffsets();
 
   /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
+  offset = RoundUp(offset, 4);
 
   /* Set up offsets for literals */
   data_offset_ = offset;
diff --git a/compiler/dex/quick/x86/assemble_x86.cc b/compiler/dex/quick/x86/assemble_x86.cc
index 0fc5c6e..7436e39 100644
--- a/compiler/dex/quick/x86/assemble_x86.cc
+++ b/compiler/dex/quick/x86/assemble_x86.cc
@@ -1483,7 +1483,7 @@
   int offset = AssignInsnOffsets();
 
   /* Const values have to be word aligned */
-  offset = (offset + 3) & ~3;
+  offset = RoundUp(offset, 4);
 
   /* Set up offsets for literals */
   data_offset_ = offset;
diff --git a/compiler/dex/quick/x86/int_x86.cc b/compiler/dex/quick/x86/int_x86.cc
index 4446f43..b747102 100644
--- a/compiler/dex/quick/x86/int_x86.cc
+++ b/compiler/dex/quick/x86/int_x86.cc
@@ -325,49 +325,60 @@
   int32_t val_lo = Low32Bits(val);
   int32_t val_hi = High32Bits(val);
   LIR* taken = &block_label_list_[bb->taken];
-  LIR* not_taken = &block_label_list_[bb->fall_through];
   rl_src1 = LoadValueWide(rl_src1, kCoreReg);
+  bool is_equality_test = ccode == kCondEq || ccode == kCondNe;
+  if (is_equality_test && val != 0) {
+    rl_src1 = ForceTempWide(rl_src1);
+  }
   RegStorage low_reg = rl_src1.reg.GetLow();
   RegStorage high_reg = rl_src1.reg.GetHigh();
 
-  if (val == 0 && (ccode == kCondEq || ccode == kCondNe)) {
-    RegStorage t_reg = AllocTemp();
-    OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
-    FreeTemp(t_reg);
-    OpCondBranch(ccode, taken);
-    return;
+  if (is_equality_test) {
+    // We can simpolify of comparing for ==, != to 0.
+    if (val == 0) {
+      if (IsTemp(low_reg)) {
+        OpRegReg(kOpOr, low_reg, high_reg);
+        // We have now changed it; ignore the old values.
+        Clobber(rl_src1.reg);
+      } else {
+        RegStorage t_reg = AllocTemp();
+        OpRegRegReg(kOpOr, t_reg, low_reg, high_reg);
+        FreeTemp(t_reg);
+      }
+      OpCondBranch(ccode, taken);
+      return;
+    }
+
+    // Need to compute the actual value for ==, !=.
+    OpRegImm(kOpSub, low_reg, val_lo);
+    NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+    OpRegReg(kOpOr, high_reg, low_reg);
+    Clobber(rl_src1.reg);
+  } else if (ccode == kCondLe || ccode == kCondGt) {
+    // Swap operands and condition code to prevent use of zero flag.
+    RegStorage tmp = AllocTypedTempWide(false, kCoreReg);
+    LoadConstantWide(tmp, val);
+    OpRegReg(kOpSub, tmp.GetLow(), low_reg);
+    OpRegReg(kOpSbc, tmp.GetHigh(), high_reg);
+    ccode = (ccode == kCondLe) ? kCondGe : kCondLt;
+    FreeTemp(tmp);
+  } else {
+    // We can use a compare for the low word to set CF.
+    OpRegImm(kOpCmp, low_reg, val_lo);
+    if (IsTemp(high_reg)) {
+      NewLIR2(kX86Sbb32RI, high_reg.GetReg(), val_hi);
+      // We have now changed it; ignore the old values.
+      Clobber(rl_src1.reg);
+    } else {
+      // mov temp_reg, high_reg; sbb temp_reg, high_constant
+      RegStorage t_reg = AllocTemp();
+      OpRegCopy(t_reg, high_reg);
+      NewLIR2(kX86Sbb32RI, t_reg.GetReg(), val_hi);
+      FreeTemp(t_reg);
+    }
   }
 
-  OpRegImm(kOpCmp, high_reg, val_hi);
-  switch (ccode) {
-    case kCondEq:
-    case kCondNe:
-      OpCondBranch(kCondNe, (ccode == kCondEq) ? not_taken : taken);
-      break;
-    case kCondLt:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondUlt;
-      break;
-    case kCondLe:
-      OpCondBranch(kCondLt, taken);
-      OpCondBranch(kCondGt, not_taken);
-      ccode = kCondLs;
-      break;
-    case kCondGt:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondHi;
-      break;
-    case kCondGe:
-      OpCondBranch(kCondGt, taken);
-      OpCondBranch(kCondLt, not_taken);
-      ccode = kCondUge;
-      break;
-    default:
-      LOG(FATAL) << "Unexpected ccode: " << ccode;
-  }
-  OpCmpImmBranch(ccode, low_reg, val_lo, taken);
+  OpCondBranch(ccode, taken);
 }
 
 void X86Mir2Lir::CalculateMagicAndShift(int divisor, int& magic, int& shift) {
diff --git a/compiler/utils/arena_allocator.h b/compiler/utils/arena_allocator.h
index 18a5bce..032eabc 100644
--- a/compiler/utils/arena_allocator.h
+++ b/compiler/utils/arena_allocator.h
@@ -23,6 +23,7 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "mem_map.h"
+#include "utils.h"
 
 namespace art {
 
@@ -155,7 +156,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    bytes = (bytes + 3) & ~3;
+    bytes = RoundUp(bytes, 4);
     if (UNLIKELY(ptr_ + bytes > end_)) {
       // Obtain a new block.
       ObtainNewArenaForAllocation(bytes);
diff --git a/compiler/utils/scoped_arena_allocator.cc b/compiler/utils/scoped_arena_allocator.cc
index bd78eae..b8b0e6e 100644
--- a/compiler/utils/scoped_arena_allocator.cc
+++ b/compiler/utils/scoped_arena_allocator.cc
@@ -92,7 +92,7 @@
 }
 
 void* ArenaStack::AllocValgrind(size_t bytes, ArenaAllocKind kind) {
-  size_t rounded_bytes = (bytes + kValgrindRedZoneBytes + 3) & ~3;
+  size_t rounded_bytes = RoundUp(bytes + kValgrindRedZoneBytes, 4);
   uint8_t* ptr = top_ptr_;
   if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
     ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/compiler/utils/scoped_arena_allocator.h b/compiler/utils/scoped_arena_allocator.h
index 28e86ec..d5b003c 100644
--- a/compiler/utils/scoped_arena_allocator.h
+++ b/compiler/utils/scoped_arena_allocator.h
@@ -67,7 +67,7 @@
     if (UNLIKELY(running_on_valgrind_)) {
       return AllocValgrind(bytes, kind);
     }
-    size_t rounded_bytes = (bytes + 3) & ~3;
+    size_t rounded_bytes = RoundUp(bytes, 4);
     uint8_t* ptr = top_ptr_;
     if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
       ptr = AllocateFromNextArena(rounded_bytes);
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index eddaa0b..f81e2f9 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -34,7 +34,7 @@
 namespace art {
 
 extern "C" void art_quick_throw_null_pointer_exception();
-extern "C" void art_quick_throw_stack_overflow(void*);
+extern "C" void art_quick_throw_stack_overflow_from_signal();
 extern "C" void art_quick_implicit_suspend();
 
 // Get the size of a thumb2 instruction in bytes.
@@ -50,7 +50,7 @@
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   *out_sp = static_cast<uintptr_t>(sc->arm_sp);
-  LOG(DEBUG) << "sp: " << *out_sp;
+  VLOG(signals) << "sp: " << *out_sp;
   if (*out_sp == 0) {
     return;
   }
@@ -74,7 +74,7 @@
 
   // Need to work out the size of the instruction that caused the exception.
   uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-  LOG(DEBUG) << "pc: " << std::hex << static_cast<void*>(ptr);
+  VLOG(signals) << "pc: " << std::hex << static_cast<void*>(ptr);
   uint32_t instr_size = GetInstructionSize(ptr);
 
   *out_return_pc = (sc->arm_pc + instr_size) | 1;
@@ -95,7 +95,7 @@
   uint32_t instr_size = GetInstructionSize(ptr);
   sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
   sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_null_pointer_exception);
-  LOG(DEBUG) << "Generating null pointer exception";
+  VLOG(signals) << "Generating null pointer exception";
   return true;
 }
 
@@ -117,10 +117,10 @@
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
   uint8_t* ptr2 = reinterpret_cast<uint8_t*>(sc->arm_pc);
   uint8_t* ptr1 = ptr2 - 4;
-  LOG(DEBUG) << "checking suspend";
+  VLOG(signals) << "checking suspend";
 
   uint16_t inst2 = ptr2[0] | ptr2[1] << 8;
-  LOG(DEBUG) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
+  VLOG(signals) << "inst2: " << std::hex << inst2 << " checkinst2: " << checkinst2;
   if (inst2 != checkinst2) {
     // Second instruction is not good, not ours.
     return false;
@@ -132,7 +132,7 @@
   bool found = false;
   while (ptr1 > limit) {
     uint32_t inst1 = ((ptr1[0] | ptr1[1] << 8) << 16) | (ptr1[2] | ptr1[3] << 8);
-    LOG(DEBUG) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
+    VLOG(signals) << "inst1: " << std::hex << inst1 << " checkinst1: " << checkinst1;
     if (inst1 == checkinst1) {
       found = true;
       break;
@@ -140,7 +140,7 @@
     ptr1 -= 2;      // Min instruction size is 2 bytes.
   }
   if (found) {
-    LOG(DEBUG) << "suspend check match";
+    VLOG(signals) << "suspend check match";
     // This is a suspend check.  Arrange for the signal handler to return to
     // art_quick_implicit_suspend.  Also set LR so that after the suspend check it
     // will resume the instruction (current PC + 2).  PC points to the
@@ -148,14 +148,14 @@
 
     // NB: remember that we need to set the bottom bit of the LR register
     // to switch to thumb mode.
-    LOG(DEBUG) << "arm lr: " << std::hex << sc->arm_lr;
-    LOG(DEBUG) << "arm pc: " << std::hex << sc->arm_pc;
+    VLOG(signals) << "arm lr: " << std::hex << sc->arm_lr;
+    VLOG(signals) << "arm pc: " << std::hex << sc->arm_pc;
     sc->arm_lr = sc->arm_pc + 3;      // +2 + 1 (for thumb)
     sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_implicit_suspend);
 
     // Now remove the suspend trigger that caused this fault.
     Thread::Current()->RemoveSuspendTrigger();
-    LOG(DEBUG) << "removed suspend trigger invoking test suspend";
+    VLOG(signals) << "removed suspend trigger invoking test suspend";
     return true;
   }
   return false;
@@ -174,103 +174,60 @@
 // on the stack.
 //
 // If we determine this is a stack overflow we need to move the stack pointer
-// to the overflow region below the protected region.  Because we now have
-// a gap in the stack (skips over protected region), we need to arrange
-// for the rest of the system to be unaware of the new stack arrangement
-// and behave as if there is a fully valid stack.  We do this by placing
-// a unique address onto the stack followed by
-// the size of the gap.  The stack walker will detect this and skip over the
-// gap.
-
-// NB. We also need to be careful of stack alignment as the ARM EABI specifies that
-// stack must be 8 byte aligned when making any calls.
-
-// NB. The size of the gap is the difference between the previous frame's SP and
-// the SP at which the size word is pushed.
+// to the overflow region below the protected region.
 
 bool StackOverflowHandler::Action(int sig, siginfo_t* info, void* context) {
   struct ucontext *uc = (struct ucontext *)context;
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  LOG(DEBUG) << "stack overflow handler with sp at " << std::hex << &uc;
-  LOG(DEBUG) << "sigcontext: " << std::hex << sc;
+  VLOG(signals) << "stack overflow handler with sp at " << std::hex << &uc;
+  VLOG(signals) << "sigcontext: " << std::hex << sc;
 
-  uint8_t* sp = reinterpret_cast<uint8_t*>(sc->arm_sp);
-  LOG(DEBUG) << "sp: " << static_cast<void*>(sp);
+  uintptr_t sp = sc->arm_sp;
+  VLOG(signals) << "sp: " << std::hex << sp;
 
-  uintptr_t* fault_addr = reinterpret_cast<uintptr_t*>(sc->fault_address);
-  LOG(DEBUG) << "fault_addr: " << std::hex << fault_addr;
-  LOG(DEBUG) << "checking for stack overflow, sp: " << std::hex << static_cast<void*>(sp) <<
+  uintptr_t fault_addr = sc->fault_address;
+  VLOG(signals) << "fault_addr: " << std::hex << fault_addr;
+  VLOG(signals) << "checking for stack overflow, sp: " << std::hex << sp <<
     ", fault_addr: " << fault_addr;
-  uintptr_t* overflow_addr = reinterpret_cast<uintptr_t*>(sp - Thread::kStackOverflowReservedBytes);
+
+  uintptr_t overflow_addr = sp - Thread::kStackOverflowReservedBytes;
+
+  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+  CHECK_EQ(self, Thread::Current());
+  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
+      Thread::kStackOverflowProtectedSize;
 
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
-    LOG(DEBUG) << "Not a stack overflow";
+    VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
   // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // the exists below the protected region.  R9 contains the current Thread* so
-  // we can read the stack_end from that and subtract the size of the
-  // protected region.  This creates a gap in the stack that needs to be marked.
-  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
+  // the exists below the protected region.  Determine the address of the next
+  // available valid address below the protected region.
+  uintptr_t prevsp = sp;
+  sp = pregion;
+  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
 
-  uint8_t* prevsp = sp;
-  sp = self->GetStackEnd() - Thread::kStackOverflowProtectedSize;
-  LOG(DEBUG) << "setting sp to overflow region at " << std::hex << static_cast<void*>(sp);
-
-  // We need to find the previous frame.  Remember that
-  // this has not yet been fully constructed because the SP has not been
-  // decremented.  So we need to work out the size of the spill portion of the
-  // frame.  This consists of something like:
-  //
-  // 0xb6a1d49c: e92d40e0  push    {r5, r6, r7, lr}
-  // 0xb6a1d4a0: ed2d8a06  vpush.f32 {s16-s21}
-  //
-  // The first is encoded in the ArtMethod as the spill_mask, the second as the
-  // fp_spill_mask.  A population count on each will give the number of registers
-  // in each mask.  Each register is 4 bytes on ARM32.
-
-  mirror::ArtMethod* method = reinterpret_cast<mirror::ArtMethod*>(sc->arm_r0);
-  uint32_t spill_mask = method->GetCoreSpillMask();
-  uint32_t numcores = POPCOUNT(spill_mask);
-  uint32_t fp_spill_mask = method->GetFpSpillMask();
-  uint32_t numfps = POPCOUNT(fp_spill_mask);
-  uint32_t spill_size = (numcores + numfps) * 4;
-  LOG(DEBUG) << "spill size: " << spill_size;
-  uint8_t* prevframe = prevsp + spill_size;
-  LOG(DEBUG) << "previous frame: " << static_cast<void*>(prevframe);
-
-  // NOTE: the ARM EABI needs an 8 byte alignment.  In the case of ARM32 a pointer
-  // is 4 bytes so that, together with the offset to the previous frame is 8
-  // bytes.  On other architectures we will need to align the stack.
-
-  // Push a marker onto the stack to tell the stack walker that there is a stack
-  // overflow and the stack is not contiguous.
-
-  // First the offset from SP to the previous frame.
-  sp -= sizeof(uint32_t);
-  LOG(DEBUG) << "push gap of " << static_cast<uint32_t>(prevframe - sp);
-  *reinterpret_cast<uint32_t*>(sp) = static_cast<uint32_t>(prevframe - sp);
-
-  // Now the gap marker (pointer sized).
-  sp -= sizeof(mirror::ArtMethod*);
-  *reinterpret_cast<void**>(sp) = stack_overflow_gap_marker;
+  // Since the compiler puts the implicit overflow
+  // check before the callee save instructions, the SP is already pointing to
+  // the previous frame.
+  VLOG(signals) << "previous frame: " << std::hex << prevsp;
 
   // Now establish the stack pointer for the signal return.
-  sc->arm_sp = reinterpret_cast<uintptr_t>(sp);
+  sc->arm_sp = prevsp;
 
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
-  // We need the LR to point to the GC map just after the fault instruction.
-  uint8_t* ptr = reinterpret_cast<uint8_t*>(sc->arm_pc);
-  uint32_t instr_size = GetInstructionSize(ptr);
-  sc->arm_lr = (sc->arm_pc + instr_size) | 1;      // LR needs to point to gc map location
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
+  // Tell the stack overflow code where the new stack pointer should be.
+  sc->arm_ip = sp;      // aka r12
 
-  // The kernel will now return to the address in sc->arm_pc.  We have arranged the
-  // stack pointer to be in the overflow region.  Throwing the exception will perform
-  // a longjmp which will restore the stack pointer to the correct location for the
-  // exception catch.
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  // The value of LR must be the same as it was when we entered the code that
+  // caused this fault.  This will be inserted into a callee save frame by
+  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
+
+  // The kernel will now return to the address in sc->arm_pc.
   return true;
 }
 }       // namespace art
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bc80644..dcf4561 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -235,6 +235,31 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
+  /*
+   * Invoke stack overflow exception from signal handler.
+   * On entry:
+   * r9: thread
+   * sp: address of last known frame
+   * r12: address of next valid SP below protected region in stack
+   *
+   * This is deceptively simple but hides some complexity.  It is called in the case of
+   * a stack overflow condition during implicit checks.  The signal handler has been
+   * called by the kernel due to a load from the protected stack region.  The handler
+   * works out the address of the previous frame and passes this in SP.  However there
+   * is a piece of memory somewhere below the current SP that is not accessible (the
+   * memory that caused the signal).  The signal handler works out the next
+   * accessible value of SP and passes this in r12.  This code then sets up the SP
+   * to be this new value and calls the code to create and throw the stack overflow
+   * exception.
+   */
+ENTRY art_quick_throw_stack_overflow_from_signal
+    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
+    mov r0, r9                      @ pass Thread::Current
+    mov r1, sp                      @ pass SP
+    mov sp, r12                     @ move SP down to below protected region.
+    b   artThrowStackOverflowFromCode                   @ artThrowStackOverflowFromCode(Thread*, SP)
+END art_quick_throw_stack_overflow_from_signal
+
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 7b66613..8079460 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -419,14 +419,30 @@
     brk 0  // Unreached
 .endm
 
-.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
-    ldr x9, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
-    cbnz x9, 1f
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
+    ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
+    cbnz \reg, 1f
     ret
 1:
     DELIVER_PENDING_EXCEPTION
 .endm
 
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x9
+.endm
+
+// Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
+.macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+    RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
+.endm
+
+.macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
+    cbnz w0, 1f                // result non-zero branch over
+    ret                        // return
+1:
+    DELIVER_PENDING_EXCEPTION
+.endm
+
 // FIXME: Temporary fix for TR(XSELF).
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
@@ -1153,19 +1169,6 @@
 UNIMPLEMENTED art_quick_initialize_static_storage
 UNIMPLEMENTED art_quick_initialize_type
 UNIMPLEMENTED art_quick_initialize_type_and_verify_access
-UNIMPLEMENTED art_quick_get32_static
-UNIMPLEMENTED art_quick_get64_static
-UNIMPLEMENTED art_quick_get_obj_static
-UNIMPLEMENTED art_quick_get32_instance
-UNIMPLEMENTED art_quick_get64_instance
-UNIMPLEMENTED art_quick_get_obj_instance
-UNIMPLEMENTED art_quick_set32_static
-UNIMPLEMENTED art_quick_set64_static
-UNIMPLEMENTED art_quick_set_obj_static
-UNIMPLEMENTED art_quick_set32_instance
-UNIMPLEMENTED art_quick_set64_instance
-UNIMPLEMENTED art_quick_set_obj_instance
-UNIMPLEMENTED art_quick_resolve_string
 
 // Macro to facilitate adding new allocation entrypoints.
 // TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
@@ -1197,6 +1200,82 @@
 END \name
 .endm
 
+// Macros taking opportunity of code similarities for downcalls with referrer.
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x2, xSELF                  // pass Thread::Current
+    mov    x3, sp                     // pass SP
+    bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x3, xSELF                  // pass Thread::Current
+    mov    x4, sp                     // pass SP
+    bl     \entrypoint
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+// TODO: xSELF -> x19. Temporarily rely on xSELF being saved in REF_ONLY
+.macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
+    .extern \entrypoint
+ENTRY \name
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    ldr    x3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x4, xSELF                  // pass Thread::Current
+    mov    x5, sp                     // pass SP
+    bl     \entrypoint
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    \return
+END \name
+.endm
+
+ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+
+TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
+
+TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+
+THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
+
+// This is separated out as the argument order is different.
+    .extern artSet64StaticFromCode
+ENTRY art_quick_set64_static
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME  // save callee saves in case of GC
+    mov    x3, x1                     // Store value
+    ldr    x1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE] // Load referrer
+    mov    x2, x3                     // Put value param
+    mov    x3, xSELF                  // pass Thread::Current
+    mov    x4, sp                     // pass SP
+    bl     artSet64StaticFromCode
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME
+    RETURN_IF_W0_IS_ZERO_OR_DELIVER
+END art_quick_set64_static
+
+
+UNIMPLEMENTED art_quick_resolve_string
+
 // Generate the allocation entrypoints for each allocator.
 GENERATE_ALL_ALLOC_ENTRYPOINTS
 
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 94a7598..4438f25 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -15,6 +15,7 @@
  */
 
 #include "common_runtime_test.h"
+#include "mirror/art_field-inl.h"
 #include "mirror/string-inl.h"
 
 #include <cstdio>
@@ -73,17 +74,28 @@
     __asm__ __volatile__(
         "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
         ".cfi_adjust_cfa_offset 52\n\t"
-        "sub sp, sp, #8\n\t"        // +8B, so 16B aligned with nullptr
-        ".cfi_adjust_cfa_offset 8\n\t"
-        "mov r0, %[arg0]\n\t"       // Set arg0-arg2
-        "mov r1, %[arg1]\n\t"       // TODO: Any way to use constraints like on x86?
-        "mov r2, %[arg2]\n\t"
-        // Use r9 last as we don't know whether it was used for arg0-arg2
-        "mov r9, #0\n\t"            // Push nullptr to terminate stack
         "push {r9}\n\t"
         ".cfi_adjust_cfa_offset 4\n\t"
-        "mov r9, %[self]\n\t"       // Set the thread
-        "blx %[code]\n\t"           // Call the stub
+        "mov r9, #0\n\n"
+        "str r9, [sp, #-8]!\n\t"   // Push nullptr to terminate stack, +8B padding so 16B aligned
+        ".cfi_adjust_cfa_offset 8\n\t"
+        "ldr r9, [sp, #8]\n\t"
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #20\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #4]\n\t"
+        "str %[arg2], [sp, #8]\n\t"
+        "str %[code], [sp, #12]\n\t"
+        "str %[self], [sp, #16]\n\t"
+        "ldr r0, [sp]\n\t"
+        "ldr r1, [sp, #4]\n\t"
+        "ldr r2, [sp, #8]\n\t"
+        "ldr r3, [sp, #12]\n\t"
+        "ldr r9, [sp, #16]\n\t"
+        "add sp, sp, #20\n\t"
+
+        "blx r3\n\t"                // Call the stub
         "add sp, sp, #12\n\t"       // Pop nullptr and padding
         ".cfi_adjust_cfa_offset -12\n\t"
         "pop {r1-r12, lr}\n\t"      // Restore state
@@ -91,30 +103,42 @@
         "mov %[result], r0\n\t"     // Save the result
         : [result] "=r" (result)
           // Use the result from r0
-        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
         : );  // clobber.
 #elif defined(__aarch64__)
     __asm__ __volatile__(
         "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
         ".cfi_adjust_cfa_offset 48\n\t"
-        "stp xzr, x1, [sp]\n\t"        // nullptr(end of quick stack), x1
-        "stp x2, x18, [sp, #16]\n\t"   // Save x2, x18(xSELF)
-        "str x30, [sp, #32]\n\t"       // Save xLR
-        "mov x0, %[arg0]\n\t"          // Set arg0-arg2
-        "mov x1, %[arg1]\n\t"          // TODO: Any way to use constraints like on x86?
-        "mov x2, %[arg2]\n\t"
-        // Use r18 last as we don't know whether it was used for arg0-arg2
-        "mov x18, %[self]\n\t"         // Set the thread
-        "blr %[code]\n\t"              // Call the stub
+        "stp xzr, x1,  [sp]\n\t"        // nullptr(end of quick stack), x1
+        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
+        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #48\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #8]\n\t"
+        "str %[arg2], [sp, #16]\n\t"
+        "str %[code], [sp, #24]\n\t"
+        "str %[self], [sp, #32]\n\t"
+        "ldr x0, [sp]\n\t"
+        "ldr x1, [sp, #8]\n\t"
+        "ldr x2, [sp, #16]\n\t"
+        "ldr x3, [sp, #24]\n\t"
+        "ldr x18, [sp, #32]\n\t"
+        "add sp, sp, #48\n\t"
+
+        "blr x3\n\t"              // Call the stub
         "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
-        "ldp x18, x30, [sp, #24]\n\t"  // Restore xSELF, xLR
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"      // Restore xLR
         "add sp, sp, #48\n\t"          // Free stack space
         ".cfi_adjust_cfa_offset -48\n\t"
+
         "mov %[result], x0\n\t"        // Save the result
         : [result] "=r" (result)
           // Use the result from r0
         : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self)
-        : "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
 #elif defined(__x86_64__)
     // Note: Uses the native convention
     // TODO: Set the thread?
@@ -139,6 +163,151 @@
     self->PopManagedStackFragment(fragment);
     return result;
   }
+
+ public:
+  // TODO: Set up a frame according to referrer's specs.
+  size_t Invoke3WithReferrer(size_t arg0, size_t arg1, size_t arg2, uintptr_t code, Thread* self,
+                             mirror::ArtMethod* referrer) {
+    // Push a transition back into managed code onto the linked list in thread.
+    ManagedStack fragment;
+    self->PushManagedStackFragment(&fragment);
+
+    size_t result;
+#if defined(__i386__)
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "pushl %[referrer]\n\t"     // Store referrer
+        "call *%%edi\n\t"           // Call the stub
+        "addl $4, %%esp"            // Pop referrer
+        : "=a" (result)
+          // Use the result from eax
+          : "a"(arg0), "c"(arg1), "d"(arg2), "D"(code), [referrer]"r"(referrer)
+            // This places code into edi, arg0 into eax, arg1 into ecx, and arg2 into edx
+            : );  // clobber.
+    // TODO: Should we clobber the other registers? EBX gets clobbered by some of the stubs,
+    //       but compilation fails when declaring that.
+#elif defined(__arm__)
+    __asm__ __volatile__(
+        "push {r1-r12, lr}\n\t"     // Save state, 13*4B = 52B
+        ".cfi_adjust_cfa_offset 52\n\t"
+        "push {r9}\n\t"
+        ".cfi_adjust_cfa_offset 4\n\t"
+        "mov r9, %[referrer]\n\n"
+        "str r9, [sp, #-8]!\n\t"   // Push referrer, +8B padding so 16B aligned
+        ".cfi_adjust_cfa_offset 8\n\t"
+        "ldr r9, [sp, #8]\n\t"
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #20\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #4]\n\t"
+        "str %[arg2], [sp, #8]\n\t"
+        "str %[code], [sp, #12]\n\t"
+        "str %[self], [sp, #16]\n\t"
+        "ldr r0, [sp]\n\t"
+        "ldr r1, [sp, #4]\n\t"
+        "ldr r2, [sp, #8]\n\t"
+        "ldr r3, [sp, #12]\n\t"
+        "ldr r9, [sp, #16]\n\t"
+        "add sp, sp, #20\n\t"
+
+        "blx r3\n\t"                // Call the stub
+        "add sp, sp, #12\n\t"       // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -12\n\t"
+        "pop {r1-r12, lr}\n\t"      // Restore state
+        ".cfi_adjust_cfa_offset -52\n\t"
+        "mov %[result], r0\n\t"     // Save the result
+        : [result] "=r" (result)
+          // Use the result from r0
+        : [arg0] "r"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : );  // clobber.
+#elif defined(__aarch64__)
+    __asm__ __volatile__(
+        "sub sp, sp, #48\n\t"          // Reserve stack space, 16B aligned
+        ".cfi_adjust_cfa_offset 48\n\t"
+        "stp %[referrer], x1, [sp]\n\t"// referrer, x1
+        "stp x2, x3,   [sp, #16]\n\t"   // Save x2, x3
+        "stp x18, x30, [sp, #32]\n\t"   // Save x18(xSELF), xLR
+
+        // Push everything on the stack, so we don't rely on the order. What a mess. :-(
+        "sub sp, sp, #48\n\t"
+        "str %[arg0], [sp]\n\t"
+        "str %[arg1], [sp, #8]\n\t"
+        "str %[arg2], [sp, #16]\n\t"
+        "str %[code], [sp, #24]\n\t"
+        "str %[self], [sp, #32]\n\t"
+        "ldr x0, [sp]\n\t"
+        "ldr x1, [sp, #8]\n\t"
+        "ldr x2, [sp, #16]\n\t"
+        "ldr x3, [sp, #24]\n\t"
+        "ldr x18, [sp, #32]\n\t"
+        "add sp, sp, #48\n\t"
+
+        "blr x3\n\t"              // Call the stub
+        "ldp x1, x2, [sp, #8]\n\t"     // Restore x1, x2
+        "ldp x3, x18, [sp, #24]\n\t"   // Restore x3, xSELF
+        "ldr x30, [sp, #40]\n\t"      // Restore xLR
+        "add sp, sp, #48\n\t"          // Free stack space
+        ".cfi_adjust_cfa_offset -48\n\t"
+
+        "mov %[result], x0\n\t"        // Save the result
+        : [result] "=r" (result)
+          // Use the result from r0
+        : [arg0] "0"(arg0), [arg1] "r"(arg1), [arg2] "r"(arg2), [code] "r"(code), [self] "r"(self),
+          [referrer] "r"(referrer)
+        : "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17");  // clobber.
+#elif defined(__x86_64__)
+    // Note: Uses the native convention
+    // TODO: Set the thread?
+    __asm__ __volatile__(
+        "pushq %[referrer]\n\t"        // Push referrer
+        "pushq (%%rsp)\n\t"             // & 16B alignment padding
+        ".cfi_adjust_cfa_offset 16\n\t"
+        "call *%%rax\n\t"              // Call the stub
+        "addq $16, %%rsp\n\t"          // Pop nullptr and padding
+        ".cfi_adjust_cfa_offset -16\n\t"
+        : "=a" (result)
+          // Use the result from rax
+          : "D"(arg0), "S"(arg1), "d"(arg2), "a"(code), [referrer] "m"(referrer)
+            // This places arg0 into rdi, arg1 into rsi, arg2 into rdx, and code into rax
+            : "rbx", "rcx", "rbp", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15");  // clobber all
+    // TODO: Should we clobber the other registers?
+#else
+    LOG(WARNING) << "Was asked to invoke for an architecture I do not understand.";
+    result = 0;
+#endif
+    // Pop transition.
+    self->PopManagedStackFragment(fragment);
+    return result;
+  }
+
+  // Method with 32b arg0, 64b arg1
+  size_t Invoke3UWithReferrer(size_t arg0, uint64_t arg1, uintptr_t code, Thread* self,
+                              mirror::ArtMethod* referrer) {
+#if defined(__x86_64__) || defined(__aarch64__)
+    // Just pass through.
+    return Invoke3WithReferrer(arg0, arg1, 0U, code, self, referrer);
+#else
+    // Need to split up arguments.
+    uint32_t lower = static_cast<uint32_t>(arg1 & 0xFFFFFFFF);
+    uint32_t upper = static_cast<uint32_t>((arg1 >> 32) & 0xFFFFFFFF);
+
+    return Invoke3WithReferrer(arg0, lower, upper, code, self, referrer);
+#endif
+  }
+
+  // Method with 32b arg0, 32b arg1, 64b arg2
+  size_t Invoke3UUWithReferrer(uint32_t arg0, uint32_t arg1, uint64_t arg2, uintptr_t code,
+                               Thread* self, mirror::ArtMethod* referrer) {
+#if defined(__x86_64__) || defined(__aarch64__)
+    // Just pass through.
+    return Invoke3WithReferrer(arg0, arg1, arg2, code, self, referrer);
+#else
+    // TODO: Needs 4-param invoke.
+    return 0;
+#endif
+  }
 };
 
 
@@ -231,6 +400,7 @@
 #endif
 }
 
+
 class RandGen {
  public:
   explicit RandGen(uint32_t seed) : val_(seed) {}
@@ -723,11 +893,11 @@
   // Play with it...
 
   EXPECT_FALSE(self->IsExceptionPending());
-/*
- * For some reason this does not work, as the type_idx is artificial and outside what the
- * resolved types of c_obj allow...
- *
-  {
+
+  // For some reason this does not work, as the type_idx is artificial and outside what the
+  // resolved types of c_obj allow...
+
+  if (false) {
     // Use an arbitrary method from c to use as referrer
     size_t result = Invoke3(static_cast<size_t>(c->GetDexTypeIndex()),    // type_idx
                             reinterpret_cast<size_t>(c_obj->GetVirtualMethod(0)),  // arbitrary
@@ -742,7 +912,7 @@
     VerifyObject(obj);
     EXPECT_EQ(obj->GetLength(), 10);
   }
-*/
+
   {
     // We can use nullptr in the second argument as we do not need a method here (not used in
     // resolved/initialized cases)
@@ -750,7 +920,7 @@
                             reinterpret_cast<uintptr_t>(&art_quick_alloc_array_resolved_rosalloc),
                             self);
 
-    EXPECT_FALSE(self->IsExceptionPending());
+    EXPECT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException(nullptr));
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
     EXPECT_TRUE(obj->IsArrayInstance());
@@ -881,4 +1051,383 @@
 #endif
 }
 
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set32_static(void);
+extern "C" void art_quick_get32_static(void);
+#endif
+
+static void GetSet32Static(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f, Thread* self,
+                           mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  constexpr size_t num_values = 7;
+  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              static_cast<size_t>(values[i]),
+                              0U,
+                              reinterpret_cast<uintptr_t>(&art_quick_set32_static),
+                              self,
+                              referrer);
+
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                           0U, 0U,
+                                           reinterpret_cast<uintptr_t>(&art_quick_get32_static),
+                                           self,
+                                           referrer);
+
+    EXPECT_EQ(res, values[i]) << "Iteration " << i;
+  }
+#else
+  LOG(INFO) << "Skipping set32static as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set32static as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set32_instance(void);
+extern "C" void art_quick_get32_instance(void);
+#endif
+
+static void GetSet32Instance(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f,
+                             Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  constexpr size_t num_values = 7;
+  uint32_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              reinterpret_cast<size_t>(obj->get()),
+                              static_cast<size_t>(values[i]),
+                              reinterpret_cast<uintptr_t>(&art_quick_set32_instance),
+                              self,
+                              referrer);
+
+    int32_t res = f->get()->GetInt(obj->get());
+    EXPECT_EQ(res, static_cast<int32_t>(values[i])) << "Iteration " << i;
+
+    res++;
+    f->get()->SetInt<false>(obj->get(), res);
+
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                            reinterpret_cast<size_t>(obj->get()),
+                                            0U,
+                                            reinterpret_cast<uintptr_t>(&art_quick_get32_instance),
+                                            self,
+                                            referrer);
+    EXPECT_EQ(res, static_cast<int32_t>(res2));
+  }
+#else
+  LOG(INFO) << "Skipping set32instance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set32instance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set_obj_static(void);
+extern "C" void art_quick_get_obj_static(void);
+
+static void set_and_check_static(uint32_t f_idx, mirror::Object* val, Thread* self,
+                                 mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  test->Invoke3WithReferrer(static_cast<size_t>(f_idx),
+                            reinterpret_cast<size_t>(val),
+                            0U,
+                            reinterpret_cast<uintptr_t>(&art_quick_set_obj_static),
+                            self,
+                            referrer);
+
+  size_t res = test->Invoke3WithReferrer(static_cast<size_t>(f_idx),
+                                         0U, 0U,
+                                         reinterpret_cast<uintptr_t>(&art_quick_get_obj_static),
+                                         self,
+                                         referrer);
+
+  EXPECT_EQ(res, reinterpret_cast<size_t>(val)) << "Value " << val;
+}
+#endif
+
+static void GetSetObjStatic(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f, Thread* self,
+                            mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+
+  // Allocate a string object for simplicity.
+  mirror::String* str = mirror::String::AllocFromModifiedUtf8(self, "Test");
+  set_and_check_static((*f)->GetDexFieldIndex(), str, self, referrer, test);
+
+  set_and_check_static((*f)->GetDexFieldIndex(), nullptr, self, referrer, test);
+#else
+  LOG(INFO) << "Skipping setObjstatic as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping setObjstatic as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+extern "C" void art_quick_set_obj_instance(void);
+extern "C" void art_quick_get_obj_instance(void);
+
+static void set_and_check_instance(SirtRef<mirror::ArtField>* f, mirror::Object* trg,
+                                   mirror::Object* val, Thread* self, mirror::ArtMethod* referrer,
+                                   StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                            reinterpret_cast<size_t>(trg),
+                            reinterpret_cast<size_t>(val),
+                            reinterpret_cast<uintptr_t>(&art_quick_set_obj_instance),
+                            self,
+                            referrer);
+
+  size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                         reinterpret_cast<size_t>(trg),
+                                         0U,
+                                         reinterpret_cast<uintptr_t>(&art_quick_get_obj_instance),
+                                         self,
+                                         referrer);
+
+  EXPECT_EQ(res, reinterpret_cast<size_t>(val)) << "Value " << val;
+
+  EXPECT_EQ(val, f->get()->GetObj(trg));
+}
+#endif
+
+static void GetSetObjInstance(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f,
+                              Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__x86_64__)
+  set_and_check_instance(f, obj->get(), nullptr, self, referrer, test);
+
+  // Allocate a string object for simplicity.
+  mirror::String* str = mirror::String::AllocFromModifiedUtf8(self, "Test");
+  set_and_check_instance(f, obj->get(), str, self, referrer, test);
+
+  set_and_check_instance(f, obj->get(), nullptr, self, referrer, test);
+#else
+  LOG(INFO) << "Skipping setObjinstance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping setObjinstance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+// TODO: Complete these tests for 32b architectures.
+
+#if defined(__x86_64__) || defined(__aarch64__)
+extern "C" void art_quick_set64_static(void);
+extern "C" void art_quick_get64_static(void);
+#endif
+
+static void GetSet64Static(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f, Thread* self,
+                           mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__x86_64__) || defined(__aarch64__)
+  constexpr size_t num_values = 8;
+  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3UWithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                               values[i],
+                               reinterpret_cast<uintptr_t>(&art_quick_set64_static),
+                               self,
+                               referrer);
+
+    size_t res = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                           0U, 0U,
+                                           reinterpret_cast<uintptr_t>(&art_quick_get64_static),
+                                           self,
+                                           referrer);
+
+    EXPECT_EQ(res, values[i]) << "Iteration " << i;
+  }
+#else
+  LOG(INFO) << "Skipping set64static as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set64static as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+
+#if defined(__x86_64__) || defined(__aarch64__)
+extern "C" void art_quick_set64_instance(void);
+extern "C" void art_quick_get64_instance(void);
+#endif
+
+static void GetSet64Instance(SirtRef<mirror::Object>* obj, SirtRef<mirror::ArtField>* f,
+                             Thread* self, mirror::ArtMethod* referrer, StubTest* test)
+    SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+#if defined(__x86_64__) || defined(__aarch64__)
+  constexpr size_t num_values = 8;
+  uint64_t values[num_values] = { 0, 1, 2, 255, 32768, 1000000, 0xFFFFFFFF, 0xFFFFFFFFFFFF };
+
+  for (size_t i = 0; i < num_values; ++i) {
+    test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                              reinterpret_cast<size_t>(obj->get()),
+                              static_cast<size_t>(values[i]),
+                              reinterpret_cast<uintptr_t>(&art_quick_set64_instance),
+                              self,
+                              referrer);
+
+    int64_t res = f->get()->GetLong(obj->get());
+    EXPECT_EQ(res, static_cast<int64_t>(values[i])) << "Iteration " << i;
+
+    res++;
+    f->get()->SetLong<false>(obj->get(), res);
+
+    size_t res2 = test->Invoke3WithReferrer(static_cast<size_t>((*f)->GetDexFieldIndex()),
+                                            reinterpret_cast<size_t>(obj->get()),
+                                            0U,
+                                            reinterpret_cast<uintptr_t>(&art_quick_get64_instance),
+                                            self,
+                                            referrer);
+    EXPECT_EQ(res, static_cast<int64_t>(res2));
+  }
+#else
+  LOG(INFO) << "Skipping set64instance as I don't know how to do that on " << kRuntimeISA;
+  // Force-print to std::cout so it's also outside the logcat.
+  std::cout << "Skipping set64instance as I don't know how to do that on " << kRuntimeISA << std::endl;
+#endif
+}
+
+static void TestFields(Thread* self, StubTest* test, Primitive::Type test_type) {
+  // garbage is created during ClassLinker::Init
+
+  JNIEnv* env = Thread::Current()->GetJniEnv();
+  jclass jc = env->FindClass("AllFields");
+  CHECK(jc != NULL);
+  jobject o = env->AllocObject(jc);
+  CHECK(o != NULL);
+
+  ScopedObjectAccess soa(self);
+  SirtRef<mirror::Object> obj(self, soa.Decode<mirror::Object*>(o));
+
+  SirtRef<mirror::Class> c(self, obj->GetClass());
+
+  // Need a method as a referrer
+  SirtRef<mirror::ArtMethod> m(self, c->GetDirectMethod(0));
+
+  // Play with it...
+
+  // Static fields.
+  {
+    SirtRef<mirror::ObjectArray<mirror::ArtField>> fields(self, c.get()->GetSFields());
+    int32_t num_fields = fields->GetLength();
+    for (int32_t i = 0; i < num_fields; ++i) {
+      SirtRef<mirror::ArtField> f(self, fields->Get(i));
+
+      FieldHelper fh(f.get());
+      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      switch (type) {
+        case Primitive::Type::kPrimInt:
+          if (test_type == type) {
+            GetSet32Static(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimLong:
+          if (test_type == type) {
+            GetSet64Static(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimNot:
+          // Don't try array.
+          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+            GetSetObjStatic(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        default:
+          break;  // Skip.
+      }
+    }
+  }
+
+  // Instance fields.
+  {
+    SirtRef<mirror::ObjectArray<mirror::ArtField>> fields(self, c.get()->GetIFields());
+    int32_t num_fields = fields->GetLength();
+    for (int32_t i = 0; i < num_fields; ++i) {
+      SirtRef<mirror::ArtField> f(self, fields->Get(i));
+
+      FieldHelper fh(f.get());
+      Primitive::Type type = fh.GetTypeAsPrimitiveType();
+      switch (type) {
+        case Primitive::Type::kPrimInt:
+          if (test_type == type) {
+            GetSet32Instance(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimLong:
+          if (test_type == type) {
+            GetSet64Instance(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        case Primitive::Type::kPrimNot:
+          // Don't try array.
+          if (test_type == type && fh.GetTypeDescriptor()[0] != '[') {
+            GetSetObjInstance(&obj, &f, self, m.get(), test);
+          }
+          break;
+
+        default:
+          break;  // Skip.
+      }
+    }
+  }
+
+  // TODO: Deallocate things.
+}
+
+
+TEST_F(StubTest, Fields32) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimInt);
+}
+
+TEST_F(StubTest, FieldsObj) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimNot);
+}
+
+TEST_F(StubTest, Fields64) {
+  TEST_DISABLED_FOR_HEAP_REFERENCE_POISONING();
+
+  Thread* self = Thread::Current();
+
+  self->TransitionFromSuspendedToRunnable();
+  LoadDex("AllFields");
+  bool started = runtime_->Start();
+  CHECK(started);
+
+  TestFields(self, this, Primitive::Type::kPrimLong);
+}
+
 }  // namespace art
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 7b56718..a55dbb6 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -871,82 +871,63 @@
 UNIMPLEMENTED art_quick_lshr
 UNIMPLEMENTED art_quick_lushr
 
-DEFINE_FUNCTION art_quick_set32_instance
+
+MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movq 8(%rsp), %rsi                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 is in rdi
+    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
+    movq %rsp, %rcx                    // pass SP
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
+    movq 8(%rsp), %rdx                 // pass referrer
+    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
+                                       // arg0 and arg1 are in rdi/rsi
+    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
+    movq %rsp, %r8                     // pass SP
+    call PLT_VAR(cxx_name, 1)          // (arg0, arg1, referrer, Thread*, SP)
+    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
+    CALL_MACRO(return_macro, 2)
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
+
+MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
+    DEFINE_FUNCTION VAR(c_name, 0)
     movq 8(%rsp), %rcx                 // pass referrer
     SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx, Object* and new_val are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %r8   // pass Thread::Current()
+                                       // arg0, arg1, and arg2 are in rdi/rsi/rdx
+    movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
     movq %rsp, %r9                     // pass SP
-    call PLT_SYMBOL(artSet32InstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
+    call PLT_VAR(cxx_name, 1)          // cxx_name(arg0, arg1, arg2, referrer, Thread*, SP)
     RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set32_instance
+    CALL_MACRO(return_macro, 2)        // return or deliver exception
+    END_FUNCTION VAR(c_name, 0)
+END_MACRO
 
-DEFINE_FUNCTION art_quick_set64_instance
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx, Object* and new_val are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artSet64InstanceFromCode)  // (field_idx, Object*, new_val, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set64_instance
 
-DEFINE_FUNCTION art_quick_set_obj_instance
-    movq 8(%rsp), %rcx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx, Object* and new_val are in rdi/rsi/rdx
-    movq %gs:THREAD_SELF_OFFSET, %r8   // pass Thread::Current()
-    movq %rsp, %r9                     // pass SP
-    call PLT_SYMBOL(artSetObjInstanceFromCode)  // (field_idx, Object*, new_val, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set_obj_instance
+THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
+THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
 
-DEFINE_FUNCTION art_quick_get32_instance
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and Object* are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artGet32InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION  // return or deliver exception
-END_FUNCTION art_quick_get32_instance
+TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
 
-DEFINE_FUNCTION art_quick_get64_instance
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and Object* are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artGet64InstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION  // return or deliver exception
-END_FUNCTION art_quick_get64_instance
+TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
+TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
 
-DEFINE_FUNCTION art_quick_get_obj_instance
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and Object* are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artGetObjInstanceFromCode)  // (field_idx, Object*, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION  // return or deliver exception
-END_FUNCTION art_quick_get_obj_instance
+ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
+ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
 
-DEFINE_FUNCTION art_quick_set32_static
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and new_val are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artSet32StaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_IF_EAX_ZERO                 // return or deliver exception
-END_FUNCTION art_quick_set32_static
-
+// This is singled out as the argument order is different.
 DEFINE_FUNCTION art_quick_set64_static
     movq %rsi, %rdx                    // pass new_val
     movq 8(%rsp), %rsi                 // pass referrer
@@ -959,49 +940,6 @@
     RETURN_IF_EAX_ZERO                 // return or deliver exception
 END_FUNCTION art_quick_set64_static
 
-DEFINE_FUNCTION art_quick_set_obj_static
-    movq 8(%rsp), %rdx                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx and new_val are in rdi/rsi
-    movq %gs:THREAD_SELF_OFFSET, %rcx  // pass Thread::Current()
-    movq %rsp, %r8                     // pass SP
-    call PLT_SYMBOL(artSetObjStaticFromCode)  // (field_idx, new_val, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_set_obj_static
-
-DEFINE_FUNCTION art_quick_get32_static
-    movq 8(%rsp), %rsi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_SYMBOL(artGet32StaticFromCode)  // (field_idx, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_get32_static
-
-DEFINE_FUNCTION art_quick_get64_static
-    movq 8(%rsp), %rsi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_SYMBOL(artGet64StaticFromCode)  // (field_idx, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_get64_static
-
-DEFINE_FUNCTION art_quick_get_obj_static
-    movq 8(%rsp), %rsi                 // pass referrer
-    SETUP_REF_ONLY_CALLEE_SAVE_FRAME
-                                       // field_idx is in rdi
-    movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    movq %rsp, %rcx                    // pass SP
-    call PLT_SYMBOL(artGetObjStaticFromCode)  // (field_idx, referrer, Thread*, SP)
-    RESTORE_REF_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
-    RETURN_OR_DELIVER_PENDING_EXCEPTION
-END_FUNCTION art_quick_get_obj_static
 
 DEFINE_FUNCTION art_quick_proxy_invoke_handler
     // Save callee and GPR args, mixed together to agree with core spills bitmap of ref. and args
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index bd5ae85..c4461fa 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -296,6 +296,7 @@
   bool startup;
   bool third_party_jni;  // Enabled with "-verbose:third-party-jni".
   bool threads;
+  bool signals;
 };
 
 extern LogVerbosity gLogVerbosity;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index dbea0d8..e3c162b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -3309,33 +3309,36 @@
   if (klass->IsInterface()) {
     return true;
   }
-  Thread* self = Thread::Current();
-  // begin with the methods local to the superclass
+  // Begin with the methods local to the superclass.
+  MethodHelper mh;
+  MethodHelper super_mh;
   if (klass->HasSuperClass() &&
       klass->GetClassLoader() != klass->GetSuperClass()->GetClassLoader()) {
-    SirtRef<mirror::Class> super(self, klass->GetSuperClass());
-    for (int i = super->GetVTable()->GetLength() - 1; i >= 0; --i) {
-      mirror::ArtMethod* method = klass->GetVTable()->Get(i);
-      if (method != super->GetVTable()->Get(i) &&
-          !IsSameMethodSignatureInDifferentClassContexts(self, method, super.get(), klass.get())) {
+    for (int i = klass->GetSuperClass()->GetVTable()->GetLength() - 1; i >= 0; --i) {
+      mh.ChangeMethod(klass->GetVTable()->GetWithoutChecks(i));
+      super_mh.ChangeMethod(klass->GetSuperClass()->GetVTable()->GetWithoutChecks(i));
+      bool is_override = mh.GetMethod() != super_mh.GetMethod();
+      if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
         ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in superclass %s",
-                          PrettyDescriptor(klass.get()).c_str(), PrettyMethod(method).c_str(),
-                          PrettyDescriptor(super.get()).c_str());
+                          PrettyDescriptor(klass.get()).c_str(),
+                          PrettyMethod(mh.GetMethod()).c_str(),
+                          PrettyDescriptor(klass->GetSuperClass()).c_str());
         return false;
       }
     }
   }
   for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-    SirtRef<mirror::Class> interface(self, klass->GetIfTable()->GetInterface(i));
-    if (klass->GetClassLoader() != interface->GetClassLoader()) {
-      for (size_t j = 0; j < interface->NumVirtualMethods(); ++j) {
-        mirror::ArtMethod* method = klass->GetIfTable()->GetMethodArray(i)->Get(j);
-        if (!IsSameMethodSignatureInDifferentClassContexts(self, method, interface.get(),
-                                                           method->GetDeclaringClass())) {
+    if (klass->GetClassLoader() != klass->GetIfTable()->GetInterface(i)->GetClassLoader()) {
+      uint32_t num_methods = klass->GetIfTable()->GetInterface(i)->NumVirtualMethods();
+      for (uint32_t j = 0; j < num_methods; ++j) {
+        mh.ChangeMethod(klass->GetIfTable()->GetMethodArray(i)->GetWithoutChecks(j));
+        super_mh.ChangeMethod(klass->GetIfTable()->GetInterface(i)->GetVirtualMethod(j));
+        bool is_override = mh.GetMethod() != super_mh.GetMethod();
+        if (is_override && !mh.HasSameSignatureWithDifferentClassLoaders(&super_mh)) {
           ThrowLinkageError(klass.get(), "Class %s method %s resolves differently in interface %s",
-                            PrettyDescriptor(method->GetDeclaringClass()).c_str(),
-                            PrettyMethod(method).c_str(),
-                            PrettyDescriptor(interface.get()).c_str());
+                            PrettyDescriptor(klass.get()).c_str(),
+                            PrettyMethod(mh.GetMethod()).c_str(),
+                            PrettyDescriptor(klass->GetIfTable()->GetInterface(i)).c_str());
           return false;
         }
       }
@@ -3344,60 +3347,6 @@
   return true;
 }
 
-// Returns true if classes referenced by the signature of the method are the
-// same classes in klass1 as they are in klass2.
-bool ClassLinker::IsSameMethodSignatureInDifferentClassContexts(Thread* self,
-                                                                mirror::ArtMethod* method,
-                                                                mirror::Class* klass1,
-                                                                mirror::Class* klass2) {
-  if (klass1 == klass2) {
-    return true;
-  }
-  CHECK(klass1 != nullptr);
-  CHECK(klass2 != nullptr);
-  SirtRef<mirror::ClassLoader> loader1(self, klass1->GetClassLoader());
-  SirtRef<mirror::ClassLoader> loader2(self, klass2->GetClassLoader());
-  const DexFile& dex_file = *method->GetDeclaringClass()->GetDexCache()->GetDexFile();
-  const DexFile::ProtoId& proto_id =
-      dex_file.GetMethodPrototype(dex_file.GetMethodId(method->GetDexMethodIndex()));
-  for (DexFileParameterIterator it(dex_file, proto_id); it.HasNext(); it.Next()) {
-    const char* descriptor = it.GetDescriptor();
-    if (descriptor == nullptr) {
-      break;
-    }
-    if (descriptor[0] == 'L' || descriptor[0] == '[') {
-      // Found a non-primitive type.
-      if (!IsSameDescriptorInDifferentClassContexts(self, descriptor, loader1, loader2)) {
-        return false;
-      }
-    }
-  }
-  // Check the return type
-  const char* descriptor = dex_file.GetReturnTypeDescriptor(proto_id);
-  if (descriptor[0] == 'L' || descriptor[0] == '[') {
-    if (!IsSameDescriptorInDifferentClassContexts(self, descriptor, loader1, loader2)) {
-      return false;
-    }
-  }
-  return true;
-}
-
-// Returns true if the descriptor resolves to the same class in the context of loader1 and loader2.
-bool ClassLinker::IsSameDescriptorInDifferentClassContexts(Thread* self, const char* descriptor,
-                                                           SirtRef<mirror::ClassLoader>& loader1,
-                                                           SirtRef<mirror::ClassLoader>& loader2) {
-  CHECK(descriptor != nullptr);
-  SirtRef<mirror::Class> found1(self, FindClass(self, descriptor, loader1));
-  if (found1.get() == nullptr) {
-    self->ClearException();
-  }
-  mirror::Class* found2 = FindClass(self, descriptor, loader2);
-  if (found2 == nullptr) {
-    self->ClearException();
-  }
-  return found1.get() == found2;
-}
-
 bool ClassLinker::EnsureInitialized(const SirtRef<mirror::Class>& c, bool can_init_fields,
                                     bool can_init_parents) {
   DCHECK(c.get() != NULL);
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 0a35054..283faa2 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -116,23 +116,23 @@
 bool FaultManager::IsInGeneratedCode(void* context, bool check_dex_pc) {
   // We can only be running Java code in the current thread if it
   // is in Runnable state.
-  LOG(DEBUG) << "Checking for generated code";
+  VLOG(signals) << "Checking for generated code";
   Thread* thread = Thread::Current();
   if (thread == nullptr) {
-    LOG(DEBUG) << "no current thread";
+    VLOG(signals) << "no current thread";
     return false;
   }
 
   ThreadState state = thread->GetState();
   if (state != kRunnable) {
-    LOG(DEBUG) << "not runnable";
+    VLOG(signals) << "not runnable";
     return false;
   }
 
   // Current thread is runnable.
   // Make sure it has the mutator lock.
   if (!Locks::mutator_lock_->IsSharedHeld(thread)) {
-    LOG(DEBUG) << "no lock";
+    VLOG(signals) << "no lock";
     return false;
   }
 
@@ -145,9 +145,9 @@
   GetMethodAndReturnPCAndSP(context, &method_obj, &return_pc, &sp);
 
   // If we don't have a potential method, we're outta here.
-  LOG(DEBUG) << "potential method: " << method_obj;
+  VLOG(signals) << "potential method: " << method_obj;
   if (method_obj == 0 || !IsAligned<kObjectAlignment>(method_obj)) {
-    LOG(DEBUG) << "no method";
+    VLOG(signals) << "no method";
     return false;
   }
 
@@ -157,36 +157,36 @@
   // TODO: Method might be not a heap address, and GetClass could fault.
   mirror::Class* cls = method_obj->GetClass<kVerifyNone>();
   if (cls == nullptr) {
-    LOG(DEBUG) << "not a class";
+    VLOG(signals) << "not a class";
     return false;
   }
   if (!IsAligned<kObjectAlignment>(cls)) {
-    LOG(DEBUG) << "not aligned";
+    VLOG(signals) << "not aligned";
     return false;
   }
 
 
   if (!VerifyClassClass(cls)) {
-    LOG(DEBUG) << "not a class class";
+    VLOG(signals) << "not a class class";
     return false;
   }
 
   // Now make sure the class is a mirror::ArtMethod.
   if (!cls->IsArtMethodClass()) {
-    LOG(DEBUG) << "not a method";
+    VLOG(signals) << "not a method";
     return false;
   }
 
   // We can be certain that this is a method now.  Check if we have a GC map
   // at the return PC address.
   if (true || kIsDebugBuild) {
-    LOG(DEBUG) << "looking for dex pc for return pc " << std::hex << return_pc;
+    VLOG(signals) << "looking for dex pc for return pc " << std::hex << return_pc;
     const void* code = Runtime::Current()->GetInstrumentation()->GetQuickCodeFor(method_obj);
     uint32_t sought_offset = return_pc - reinterpret_cast<uintptr_t>(code);
-    LOG(DEBUG) << "pc offset: " << std::hex << sought_offset;
+    VLOG(signals) << "pc offset: " << std::hex << sought_offset;
   }
   uint32_t dexpc = method_obj->ToDexPc(return_pc, false);
-  LOG(DEBUG) << "dexpc: " << dexpc;
+  VLOG(signals) << "dexpc: " << dexpc;
   return !check_dex_pc || dexpc != DexFile::kDexNoIndex;
 }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 7232e54..a87f95c 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -28,7 +28,17 @@
                                 Object* receiver, uint32_t* args, JValue* result)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   std::string name(PrettyMethod(method));
-  if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
+  if (name == "java.lang.Object dalvik.system.VMRuntime.newUnpaddedArray(java.lang.Class, int)") {
+    int32_t length = args[1];
+    DCHECK_GE(length, 0);
+    mirror::Class* element_class = reinterpret_cast<Object*>(args[0])->AsClass();
+    Runtime* runtime = Runtime::Current();
+    mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(self, element_class);
+    DCHECK(array_class != nullptr);
+    gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
+    result->SetL(mirror::Array::Alloc<true>(self, array_class, length,
+                                            array_class->GetComponentSize(), allocator, true));
+  } else if (name == "java.lang.ClassLoader dalvik.system.VMStack.getCallingClassLoader()") {
     result->SetL(NULL);
   } else if (name == "java.lang.Class dalvik.system.VMStack.getStackClass2()") {
     NthCallerVisitor visitor(self, 3);
diff --git a/runtime/object_utils.h b/runtime/object_utils.h
index 072f074..504537a 100644
--- a/runtime/object_utils.h
+++ b/runtime/object_utils.h
@@ -520,8 +520,7 @@
     return GetParamPrimitiveType(param) == Primitive::kPrimNot;
   }
 
-  bool HasSameNameAndSignature(MethodHelper* other)
-      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  bool HasSameNameAndSignature(MethodHelper* other) SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     const DexFile& dex_file = GetDexFile();
     const DexFile::MethodId& mid = dex_file.GetMethodId(method_->GetDexMethodIndex());
     if (GetDexCache() == other->GetDexCache()) {
@@ -539,6 +538,33 @@
     return dex_file.GetMethodSignature(mid) == other_dex_file.GetMethodSignature(other_mid);
   }
 
+  bool HasSameSignatureWithDifferentClassLoaders(MethodHelper* other)
+      SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+    if (UNLIKELY(GetReturnType() != other->GetReturnType())) {
+      return false;
+    }
+    const DexFile::TypeList* types = GetParameterTypeList();
+    const DexFile::TypeList* other_types = other->GetParameterTypeList();
+    if (types == nullptr) {
+      return (other_types == nullptr) || (other_types->Size() == 0);
+    } else if (UNLIKELY(other_types == nullptr)) {
+      return types->Size() == 0;
+    }
+    uint32_t num_types = types->Size();
+    if (UNLIKELY(num_types != other_types->Size())) {
+      return false;
+    }
+    for (uint32_t i = 0; i < num_types; ++i) {
+      mirror::Class* param_type = GetClassFromTypeIdx(types->GetTypeItem(i).type_idx_);
+      mirror::Class* other_param_type =
+          other->GetClassFromTypeIdx(other_types->GetTypeItem(i).type_idx_);
+      if (UNLIKELY(param_type != other_param_type)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   const DexFile::CodeItem* GetCodeItem()
       SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
     return GetDexFile().GetCodeItem(method_->GetCodeItemOffset());
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 20f910d..1562527 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -232,6 +232,7 @@
 //  gLogVerbosity.startup = true;  // TODO: don't check this in!
 //  gLogVerbosity.third_party_jni = true;  // TODO: don't check this in!
 //  gLogVerbosity.threads = true;  // TODO: don't check this in!
+//  gLogVerbosity.signals = true;  // TODO: don't check this in!
 
   method_trace_ = false;
   method_trace_file_ = "/data/method-trace-file.bin";
@@ -464,6 +465,8 @@
           gLogVerbosity.third_party_jni = true;
         } else if (verbose_options[i] == "threads") {
           gLogVerbosity.threads = true;
+        } else if (verbose_options[i] == "signals") {
+           gLogVerbosity.signals = true;
         } else {
           Usage("Unknown -verbose option %s\n", verbose_options[i].c_str());
           return false;
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 9c709ae..5e64e59 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -32,14 +32,6 @@
 
 namespace art {
 
-// Define a piece of memory, the address of which can be used as a marker
-// for the gap in the stack added during stack overflow handling.
-static uint32_t stack_overflow_object;
-
-// The stack overflow gap marker is simply a valid unique address.
-void* stack_overflow_gap_marker = &stack_overflow_object;
-
-
 mirror::Object* ShadowFrame::GetThisObject() const {
   mirror::ArtMethod* m = GetMethod();
   if (m->IsStatic()) {
@@ -305,56 +297,23 @@
   bool exit_stubs_installed = Runtime::Current()->GetInstrumentation()->AreExitStubsInstalled();
   uint32_t instrumentation_stack_depth = 0;
 
-  bool kDebugStackWalk = false;
-  bool kDebugStackWalkVeryVerbose = false;            // The name says it all.
-
-  if (kDebugStackWalk) {
-    LOG(INFO) << "walking stack";
-  }
   for (const ManagedStack* current_fragment = thread_->GetManagedStack(); current_fragment != NULL;
        current_fragment = current_fragment->GetLink()) {
     cur_shadow_frame_ = current_fragment->GetTopShadowFrame();
     cur_quick_frame_ = current_fragment->GetTopQuickFrame();
     cur_quick_frame_pc_ = current_fragment->GetTopQuickFramePc();
-    if (kDebugStackWalkVeryVerbose) {
-      LOG(INFO) << "cur_quick_frame: " << cur_quick_frame_;
-      LOG(INFO) << "cur_quick_frame_pc: " << std::hex << cur_quick_frame_pc_;
-    }
 
     if (cur_quick_frame_ != NULL) {  // Handle quick stack frames.
       // Can't be both a shadow and a quick fragment.
       DCHECK(current_fragment->GetTopShadowFrame() == NULL);
       mirror::ArtMethod* method = *cur_quick_frame_;
       while (method != NULL) {
-        // Check for a stack overflow gap marker.
-        if (method == reinterpret_cast<mirror::ArtMethod*>(stack_overflow_gap_marker)) {
-          // Marker for a stack overflow.  This is followed by the offset from the
-          // current SP to the next frame.  There is a gap in the stack here.  Jump
-          // the gap silently.
-          // Caveat coder: the layout of the overflow marker depends on the architecture.
-          //   The first element is address sized (8 bytes on a 64 bit machine).  The second
-          //   element is 32 bits.  So be careful with those address calculations.
-
-          // Get the address of the offset, just beyond the marker pointer.
-          byte* gapsizeaddr = reinterpret_cast<byte*>(cur_quick_frame_) + sizeof(uintptr_t);
-          uint32_t gap = *reinterpret_cast<uint32_t*>(gapsizeaddr);
-          CHECK_GT(gap, Thread::kStackOverflowProtectedSize);
-          mirror::ArtMethod** next_frame = reinterpret_cast<mirror::ArtMethod**>(
-            reinterpret_cast<byte*>(gapsizeaddr) + gap);
-          if (kDebugStackWalk) {
-            LOG(INFO) << "stack overflow marker hit, gap: " << gap << ", next_frame: " <<
-                next_frame;
-          }
-          cur_quick_frame_ = next_frame;
-          method = *next_frame;
-          CHECK(method != nullptr);
-        } else {
-          SanityCheckFrame();
-          bool should_continue = VisitFrame();
-          if (UNLIKELY(!should_continue)) {
-            return;
-          }
+        SanityCheckFrame();
+        bool should_continue = VisitFrame();
+        if (UNLIKELY(!should_continue)) {
+          return;
         }
+
         if (context_ != NULL) {
           context_->FillCalleeSaves(*this);
         }
@@ -363,9 +322,6 @@
         size_t return_pc_offset = method->GetReturnPcOffsetInBytes();
         byte* return_pc_addr = reinterpret_cast<byte*>(cur_quick_frame_) + return_pc_offset;
         uintptr_t return_pc = *reinterpret_cast<uintptr_t*>(return_pc_addr);
-        if (kDebugStackWalkVeryVerbose) {
-          LOG(INFO) << "frame size: " << frame_size << ", return_pc: " << std::hex << return_pc;
-        }
         if (UNLIKELY(exit_stubs_installed)) {
           // While profiling, the return pc is restored from the side stack, except when walking
           // the stack for an exception where the side stack will be unwound in VisitFrame.
@@ -398,10 +354,6 @@
         cur_quick_frame_ = reinterpret_cast<mirror::ArtMethod**>(next_frame);
         cur_depth_++;
         method = *cur_quick_frame_;
-        if (kDebugStackWalkVeryVerbose) {
-          LOG(INFO) << "new cur_quick_frame_: " << cur_quick_frame_;
-          LOG(INFO) << "new cur_quick_frame_pc_: " << std::hex << cur_quick_frame_pc_;
-        }
       }
     } else if (cur_shadow_frame_ != NULL) {
       do {
diff --git a/runtime/stack.h b/runtime/stack.h
index 73a823a..88ef78f 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -102,14 +102,6 @@
   kVRegNonSpecialTempBaseReg = -3,
 };
 
-// Special object used to mark the gap in the stack placed when a stack
-// overflow fault occurs during implicit stack checking.  This is not
-// a real object - it is used simply as a valid address to which a
-// mirror::ArtMethod* can be compared during a stack walk.  It is inserted
-// into the stack during the stack overflow signal handling to mark the gap
-// in which the memory is protected against read and write.
-extern void* stack_overflow_gap_marker;
-
 // A reference from the shadow stack to a MirrorType object within the Java heap.
 template<class MirrorType>
 class MANAGED StackReference : public mirror::ObjectReference<false, MirrorType> {
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 23a6779..3a62cd5 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -243,10 +243,16 @@
   pregion -= kStackOverflowProtectedSize;
 
   // Touch the pages in the region to map them in.  Otherwise mprotect fails.  Only
-  // need to do this on the main stack.
+  // need to do this on the main stack.  We only need to touch one byte per page.
   if (is_main_stack) {
-    memset(pregion, 0x55, kStackOverflowProtectedSize);
+    byte* start = pregion;
+    byte* end = pregion + kStackOverflowProtectedSize;
+    while (start < end) {
+      *start = static_cast<byte>(0);
+      start += kPageSize;
+    }
   }
+
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
@@ -255,6 +261,11 @@
     LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
         << strerror(errno);
   }
+
+  // Tell the kernel that we won't be needing these pages any more.
+  if (is_main_stack) {
+    madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
+  }
 }
 
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
diff --git a/runtime/utils.cc b/runtime/utils.cc
index ee2cca4..c332bdf 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -1169,10 +1169,12 @@
 
 std::string GetDalvikCacheOrDie(const char* subdir, const bool create_if_absent) {
   CHECK(subdir != nullptr);
-  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", GetAndroidData()));
+  const char* android_data = GetAndroidData();
+  const std::string dalvik_cache_root(StringPrintf("%s/dalvik-cache/", android_data));
   const std::string dalvik_cache = dalvik_cache_root + subdir;
   if (create_if_absent && !OS::DirectoryExists(dalvik_cache.c_str())) {
-    if (StartsWith(dalvik_cache_root, "/tmp/")) {
+    // Don't create the system's /data/dalvik-cache/... because it needs special permissions.
+    if (strcmp(android_data, "/data") != 0) {
       int result = mkdir(dalvik_cache_root.c_str(), 0700);
       if (result != 0 && errno != EEXIST) {
         PLOG(FATAL) << "Failed to create dalvik-cache directory " << dalvik_cache_root;
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 03ceed3..bf1de86 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -3126,9 +3126,10 @@
     return nullptr;
   }
   mirror::ObjectArray<mirror::ArtMethod>* vtable = actual_arg_type.GetClass()->GetVTable();
-  CHECK(vtable != nullptr);
+  CHECK(vtable != nullptr) << PrettyDescriptor(actual_arg_type.GetClass());
   uint16_t vtable_index = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  CHECK_LT(static_cast<int32_t>(vtable_index), vtable->GetLength());
+  CHECK_LT(static_cast<int32_t>(vtable_index), vtable->GetLength())
+      << PrettyDescriptor(actual_arg_type.GetClass());
   mirror::ArtMethod* res_method = vtable->Get(vtable_index);
   CHECK(!Thread::Current()->IsExceptionPending());
   return res_method;
diff --git a/test/etc/push-and-run-test-jar b/test/etc/push-and-run-test-jar
index e0d2f1d..6cf7998 100755
--- a/test/etc/push-and-run-test-jar
+++ b/test/etc/push-and-run-test-jar
@@ -150,7 +150,7 @@
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
-cmdline="cd $DEX_LOCATION && mkdir -p dalvik-cache/{arm,arm64,mips,x86,x86_64} && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
+cmdline="cd $DEX_LOCATION && export ANDROID_DATA=$DEX_LOCATION && export DEX_LOCATION=$DEX_LOCATION && \
     $INVOKE_WITH $gdb /system/bin/dalvikvm$TARGET_SUFFIX $FLAGS $gdbargs -XXlib:$LIB $ZYGOTE $JNI_OPTS $INT_OPTS $DEBUGGER_OPTS $BOOT_OPT -cp $DEX_LOCATION/$TEST_NAME.jar Main"
 if [ "$DEV_MODE" = "y" ]; then
   echo $cmdline "$@"