Merge "Fix bugs in the x86 and arm versions of generic JNI."
diff --git a/compiler/dex/quick/arm/call_arm.cc b/compiler/dex/quick/arm/call_arm.cc
index b133991..4ba3c4b 100644
--- a/compiler/dex/quick/arm/call_arm.cc
+++ b/compiler/dex/quick/arm/call_arm.cc
@@ -354,13 +354,14 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm);
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm);
   NewLIR0(kPseudoMethodEntry);
-  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm) -
-      Thread::kStackOverflowSignalReservedBytes;
+  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm);
   bool large_frame = (static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes);
+  bool generate_explicit_stack_overflow_check = large_frame ||
+    !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
   if (!skip_overflow_check) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
+    if (generate_explicit_stack_overflow_check) {
       if (!large_frame) {
         /* Load stack limit */
         LockTemp(rs_r12);
@@ -399,7 +400,7 @@
   const int spill_size = spill_count * 4;
   const int frame_size_without_spills = frame_size_ - spill_size;
   if (!skip_overflow_check) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
+    if (generate_explicit_stack_overflow_check) {
       class StackOverflowSlowPath : public LIRSlowPath {
        public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, bool restore_lr, size_t sp_displace)
diff --git a/compiler/dex/quick/arm64/call_arm64.cc b/compiler/dex/quick/arm64/call_arm64.cc
index 28b747b..0538c31 100644
--- a/compiler/dex/quick/arm64/call_arm64.cc
+++ b/compiler/dex/quick/arm64/call_arm64.cc
@@ -329,16 +329,20 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kArm64);
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kArm64);
 
   NewLIR0(kPseudoMethodEntry);
 
+  const size_t kStackOverflowReservedUsableBytes = GetStackOverflowReservedBytes(kArm64);
+  const bool large_frame = static_cast<size_t>(frame_size_) > kStackOverflowReservedUsableBytes;
+  bool generate_explicit_stack_overflow_check = large_frame ||
+    !cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks();
   const int spill_count = num_core_spills_ + num_fp_spills_;
   const int spill_size = (spill_count * kArm64PointerSize + 15) & ~0xf;  // SP 16 byte alignment.
   const int frame_size_without_spills = frame_size_ - spill_size;
 
   if (!skip_overflow_check) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
+    if (generate_explicit_stack_overflow_check) {
       // Load stack limit
       LoadWordDisp(rs_xSELF, Thread::StackEndOffset<8>().Int32Value(), rs_xIP1);
     } else {
@@ -365,7 +369,7 @@
   }
 
   if (!skip_overflow_check) {
-    if (!cu_->compiler_driver->GetCompilerOptions().GetImplicitStackOverflowChecks()) {
+    if (generate_explicit_stack_overflow_check) {
       class StackOverflowSlowPath: public LIRSlowPath {
       public:
         StackOverflowSlowPath(Mir2Lir* m2l, LIR* branch, size_t sp_displace) :
diff --git a/compiler/dex/quick/mips/call_mips.cc b/compiler/dex/quick/mips/call_mips.cc
index 4577a4c..e8cb356 100644
--- a/compiler/dex/quick/mips/call_mips.cc
+++ b/compiler/dex/quick/mips/call_mips.cc
@@ -303,7 +303,7 @@
    * We can safely skip the stack overflow check if we're
    * a leaf *and* our frame size < fudge factor.
    */
-  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, kMips);
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, kMips);
   NewLIR0(kPseudoMethodEntry);
   RegStorage check_reg = AllocTemp();
   RegStorage new_sp = AllocTemp();
diff --git a/compiler/dex/quick/x86/call_x86.cc b/compiler/dex/quick/x86/call_x86.cc
index f5f8671..996689a 100644
--- a/compiler/dex/quick/x86/call_x86.cc
+++ b/compiler/dex/quick/x86/call_x86.cc
@@ -219,7 +219,7 @@
    * a leaf *and* our frame size < fudge factor.
    */
   InstructionSet isa =  cu_->target64 ? kX86_64 : kX86;
-  const bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !IsLargeFrame(frame_size_, isa);
+  bool skip_overflow_check = mir_graph_->MethodIsLeaf() && !FrameNeedsStackCheck(frame_size_, isa);
 
   // If we doing an implicit stack overflow check, perform the load immediately
   // before the stack pointer is decremented and anything is saved.
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 0b217a1..645fc1c 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -1885,10 +1885,6 @@
     thread_count = thread_count_;
   }
   context.ForAll(0, dex_file.NumClassDefs(), InitializeClass, thread_count);
-  if (IsImage()) {
-    // Prune garbage objects created during aborted transactions.
-    Runtime::Current()->GetHeap()->CollectGarbage(true);
-  }
 }
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
@@ -1899,6 +1895,10 @@
     CHECK(dex_file != NULL);
     InitializeClasses(class_loader, *dex_file, dex_files, thread_pool, timings);
   }
+  if (IsImage()) {
+    // Prune garbage objects created during aborted transactions.
+    Runtime::Current()->GetHeap()->CollectGarbage(true);
+  }
 }
 
 void CompilerDriver::Compile(jobject class_loader, const std::vector<const DexFile*>& dex_files,
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index ba7e13f..9c9cdf2 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -232,7 +232,7 @@
   size_t length = RoundUp(Runtime::Current()->GetHeap()->GetTotalMemory(), kPageSize);
   std::string error_msg;
   image_.reset(MemMap::MapAnonymous("image writer image", NULL, length, PROT_READ | PROT_WRITE,
-                                    true, &error_msg));
+                                    false, &error_msg));
   if (UNLIKELY(image_.get() == nullptr)) {
     LOG(ERROR) << "Failed to allocate memory for image file generation: " << error_msg;
     return false;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index eccc970..2c954a0 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -265,7 +265,7 @@
         codegen_(codegen) {}
 
 void CodeGeneratorARM::GenerateFrameEntry() {
-  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kArm);
+  bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
   if (!skip_overflow_check) {
     if (kExplicitStackOverflowCheck) {
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM();
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index ab53b17..35b8116 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -241,7 +241,7 @@
   static const int kFakeReturnRegister = 8;
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
-  bool skip_overflow_check = IsLeafMethod() && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86);
+  bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
   if (!skip_overflow_check && !kExplicitStackOverflowCheck) {
     __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86))));
     RecordPcInfo(0);
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index e4259f5..c4571ca 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -209,7 +209,7 @@
   core_spill_mask_ |= (1 << kFakeReturnRegister);
 
   bool skip_overflow_check = IsLeafMethod()
-      && !IsLargeFrame(GetFrameSize(), InstructionSet::kX86_64);
+      && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
 
   if (!skip_overflow_check && !kExplicitStackOverflowCheck) {
     __ testq(CpuRegister(RAX), Address(
diff --git a/compiler/utils/stack_checks.h b/compiler/utils/stack_checks.h
index 63adbc2..ce01077 100644
--- a/compiler/utils/stack_checks.h
+++ b/compiler/utils/stack_checks.h
@@ -33,10 +33,9 @@
 // Determine whether a frame is small or large, used in the decision on whether to elide a
 // stack overflow check on method entry.
 //
-// A frame is considered large when it's either above kLargeFrameSize, or a quarter of the
-// overflow-usable stack space.
-static inline bool IsLargeFrame(size_t size, InstructionSet isa) {
-  return size >= kLargeFrameSize || size >= GetStackOverflowReservedBytes(isa) / 4;
+// A frame is considered large when it's above kLargeFrameSize.
+static inline bool FrameNeedsStackCheck(size_t size, InstructionSet isa) {
+  return size >= kLargeFrameSize;
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm/asm_support_arm.h b/runtime/arch/arm/asm_support_arm.h
index ae30aee..330924e 100644
--- a/runtime/arch/arm/asm_support_arm.h
+++ b/runtime/arch/arm/asm_support_arm.h
@@ -24,9 +24,9 @@
 // Offset of field Thread::tls32_.thin_lock_thread_id verified in InitCpu
 #define THREAD_ID_OFFSET 12
 // Offset of field Thread::tlsPtr_.card_table verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 112
+#define THREAD_CARD_TABLE_OFFSET 120
 // Offset of field Thread::tlsPtr_.exception verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 116
+#define THREAD_EXCEPTION_OFFSET 124
 
 #define FRAME_SIZE_SAVE_ALL_CALLEE_SAVE 176
 #define FRAME_SIZE_REFS_ONLY_CALLEE_SAVE 32
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index be28544..28b69ec 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -35,7 +35,7 @@
 namespace art {
 
 extern "C" void art_quick_throw_null_pointer_exception();
-extern "C" void art_quick_throw_stack_overflow_from_signal();
+extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_implicit_suspend();
 
 // Get the size of a thumb2 instruction in bytes.
@@ -194,40 +194,19 @@
 
   uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kArm);
 
-  Thread* self = reinterpret_cast<Thread*>(sc->arm_r9);
-  CHECK_EQ(self, Thread::Current());
-  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
-      Thread::kStackOverflowProtectedSize;
-
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
     VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
-  // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // that exists below the protected region.  Determine the address of the next
-  // available valid address below the protected region.
-  uintptr_t prevsp = sp;
-  sp = pregion;
-  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
+  VLOG(signals) << "Stack overflow found";
 
-  // Since the compiler puts the implicit overflow
-  // check before the callee save instructions, the SP is already pointing to
-  // the previous frame.
-  VLOG(signals) << "previous frame: " << std::hex << prevsp;
-
-  // Now establish the stack pointer for the signal return.
-  sc->arm_sp = prevsp;
-
-  // Tell the stack overflow code where the new stack pointer should be.
-  sc->arm_ip = sp;      // aka r12
-
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from.
   // The value of LR must be the same as it was when we entered the code that
   // caused this fault.  This will be inserted into a callee save frame by
-  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
-  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
+  // the function to which this handler returns (art_quick_throw_stack_overflow).
+  sc->arm_pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
 
   // The kernel will now return to the address in sc->arm_pc.
   return true;
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 5ab70ea..9f0db8c 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -235,31 +235,6 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
-  /*
-   * Invoke stack overflow exception from signal handler.
-   * On entry:
-   * r9: thread
-   * sp: address of last known frame
-   * r12: address of next valid SP below protected region in stack
-   *
-   * This is deceptively simple but hides some complexity.  It is called in the case of
-   * a stack overflow condition during implicit checks.  The signal handler has been
-   * called by the kernel due to a load from the protected stack region.  The handler
-   * works out the address of the previous frame and passes this in SP.  However there
-   * is a piece of memory somewhere below the current SP that is not accessible (the
-   * memory that caused the signal).  The signal handler works out the next
-   * accessible value of SP and passes this in r12.  This code then sets up the SP
-   * to be this new value and calls the code to create and throw the stack overflow
-   * exception.
-   */
-ENTRY art_quick_throw_stack_overflow_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov r0, r9                      @ pass Thread::Current
-    mov r1, sp                      @ pass SP
-    mov sp, r12                     @ move SP down to below protected region.
-    b   artThrowStackOverflowFromCode                   @ artThrowStackOverflowFromCode(Thread*, SP)
-END art_quick_throw_stack_overflow_from_signal
-
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
diff --git a/runtime/arch/arm64/asm_support_arm64.h b/runtime/arch/arm64/asm_support_arm64.h
index 7f0f56f..a926449 100644
--- a/runtime/arch/arm64/asm_support_arm64.h
+++ b/runtime/arch/arm64/asm_support_arm64.h
@@ -30,9 +30,9 @@
 // Offset of field Thread::suspend_count_
 #define THREAD_FLAGS_OFFSET 0
 // Offset of field Thread::card_table_
-#define THREAD_CARD_TABLE_OFFSET 112
+#define THREAD_CARD_TABLE_OFFSET 120
 // Offset of field Thread::exception_
-#define THREAD_EXCEPTION_OFFSET 120
+#define THREAD_EXCEPTION_OFFSET 128
 // Offset of field Thread::thin_lock_thread_id_
 #define THREAD_ID_OFFSET 12
 
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index 3a7e689..b5948cb 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -27,7 +27,7 @@
 #include "thread.h"
 #include "thread-inl.h"
 
-extern "C" void art_quick_throw_stack_overflow_from_signal();
+extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_throw_null_pointer_exception();
 extern "C" void art_quick_implicit_suspend();
 
@@ -157,40 +157,19 @@
 
   uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kArm64);
 
-  Thread* self = reinterpret_cast<Thread*>(sc->regs[art::arm64::TR]);
-  CHECK_EQ(self, Thread::Current());
-  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
-      Thread::kStackOverflowProtectedSize;
-
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
     VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
-  // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // that exists below the protected region.  Determine the address of the next
-  // available valid address below the protected region.
-  uintptr_t prevsp = sp;
-  sp = pregion;
-  VLOG(signals) << "setting sp to overflow region at " << std::hex << sp;
+  VLOG(signals) << "Stack overflow found";
 
-  // Since the compiler puts the implicit overflow
-  // check before the callee save instructions, the SP is already pointing to
-  // the previous frame.
-  VLOG(signals) << "previous frame: " << std::hex << prevsp;
-
-  // Now establish the stack pointer for the signal return.
-  sc->sp = prevsp;
-
-  // Tell the stack overflow code where the new stack pointer should be.
-  sc->regs[art::arm64::IP0] = sp;      // aka x16
-
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
   // The value of LR must be the same as it was when we entered the code that
   // caused this fault.  This will be inserted into a callee save frame by
-  // the function to which this handler returns (art_quick_throw_stack_overflow_from_signal).
-  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow_from_signal);
+  // the function to which this handler returns (art_quick_throw_stack_overflow).
+  sc->pc = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
 
   // The kernel will now return to the address in sc->pc.
   return true;
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 62ae099..ab9035a 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -435,31 +435,6 @@
      */
 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
 
-  /*
-   * Invoke stack overflow exception from signal handler.
-   * On entry:
-   * xSELF: thread
-   * SP: address of last known frame
-   * IP0: address of next valid SP below protected region in stack
-   *
-   * This is deceptively simple but hides some complexity.  It is called in the case of
-   * a stack overflow condition during implicit checks.  The signal handler has been
-   * called by the kernel due to a load from the protected stack region.  The handler
-   * works out the address of the previous frame and passes this in SP.  However there
-   * is a piece of memory somewhere below the current SP that is not accessible (the
-   * memory that caused the signal).  The signal handler works out the next
-   * accessible value of SP and passes this in x16/IP0.  This code then sets up the SP
-   * to be this new value and calls the code to create and throw the stack overflow
-   * exception.
-   */
-ENTRY art_quick_throw_stack_overflow_from_signal
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov x0, xSELF                   // pass Thread::Current
-    mov x1, sp                      // pass SP
-    mov sp, xIP0                    // move SP down to below protected region.
-    b   artThrowStackOverflowFromCode                  // artThrowStackOverflowFromCode(Thread*, SP)
-END art_quick_throw_stack_overflow_from_signal
-
     /*
      * All generated callsites for interface invokes and invocation slow paths will load arguments
      * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
diff --git a/runtime/arch/x86/asm_support_x86.h b/runtime/arch/x86/asm_support_x86.h
index 531ed77..c9f5a25 100644
--- a/runtime/arch/x86/asm_support_x86.h
+++ b/runtime/arch/x86/asm_support_x86.h
@@ -20,11 +20,11 @@
 #include "asm_support.h"
 
 // Offset of field Thread::self_ verified in InitCpu
-#define THREAD_SELF_OFFSET 148
+#define THREAD_SELF_OFFSET 156
 // Offset of field Thread::card_table_ verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 112
+#define THREAD_CARD_TABLE_OFFSET 120
 // Offset of field Thread::exception_ verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 116
+#define THREAD_EXCEPTION_OFFSET 124
 // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
 #define THREAD_ID_OFFSET 12
 
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 89baded..c143c5d 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -34,7 +34,7 @@
 #define CTX_ESP uc_mcontext->__ss.__rsp
 #define CTX_EIP uc_mcontext->__ss.__rip
 #define CTX_EAX uc_mcontext->__ss.__rax
-#define CTX_METHOD uc_mcontext->__ss.__rax
+#define CTX_METHOD uc_mcontext->__ss.__rdi
 #else
 // 32 bit mac build.
 #define CTX_ESP uc_mcontext->__ss.__esp
@@ -71,7 +71,7 @@
 #define EXT_SYM(sym) _ ## sym
 #else
 extern "C" void art_quick_throw_null_pointer_exception();
-extern "C" void art_quick_throw_stack_overflow_from_signal();
+extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_test_suspend();
 #define EXT_SYM(sym) sym
 #endif
@@ -382,30 +382,20 @@
   uintptr_t overflow_addr = sp - GetStackOverflowReservedBytes(kX86);
 #endif
 
-  Thread* self = Thread::Current();
-  uintptr_t pregion = reinterpret_cast<uintptr_t>(self->GetStackEnd()) -
-      Thread::kStackOverflowProtectedSize;
-
   // Check that the fault address is the value expected for a stack overflow.
   if (fault_addr != overflow_addr) {
     VLOG(signals) << "Not a stack overflow";
     return false;
   }
 
-  // We know this is a stack overflow.  We need to move the sp to the overflow region
-  // that exists below the protected region.  Determine the address of the next
-  // available valid address below the protected region.
-  VLOG(signals) << "setting sp to overflow region at " << std::hex << pregion;
+  VLOG(signals) << "Stack overflow found";
 
   // Since the compiler puts the implicit overflow
   // check before the callee save instructions, the SP is already pointing to
   // the previous frame.
 
-  // Tell the stack overflow code where the new stack pointer should be.
-  uc->CTX_EAX = pregion;
-
-  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow_from_signal.
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(EXT_SYM(art_quick_throw_stack_overflow_from_signal));
+  // Now arrange for the signal handler to return to art_quick_throw_stack_overflow.
+  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_quick_throw_stack_overflow);
 
   return true;
 }
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 117738a..084846a 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -174,21 +174,6 @@
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
 
-// On entry to this function, EAX contains the ESP value for the overflow region.
-DEFINE_FUNCTION art_quick_throw_stack_overflow_from_signal
-    // Here, the ESP is above the protected region.  We need to create a
-    // callee save frame and then move ESP down to the overflow region.
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov %esp, %ecx                // get current stack pointer
-    mov %eax, %esp                // move ESP to the overflow region.
-    PUSH ecx                      // pass SP
-    pushl %fs:THREAD_SELF_OFFSET  // pass Thread::Current()
-    CFI_ADJUST_CFA_OFFSET(4)
-    SETUP_GOT_NOSAVE              // clobbers ebx (harmless here)
-    call PLT_SYMBOL(artThrowStackOverflowFromCode)    // artThrowStackOverflowFromCode(Thread*, SP)
-    int3                          // unreached
-END_FUNCTION art_quick_throw_stack_overflow_from_signal
-
     /*
      * Called by managed code, saves callee saves and then calls artThrowException
      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
diff --git a/runtime/arch/x86_64/asm_support_x86_64.h b/runtime/arch/x86_64/asm_support_x86_64.h
index c3637ef..40958dc 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.h
+++ b/runtime/arch/x86_64/asm_support_x86_64.h
@@ -28,11 +28,11 @@
 #define RUNTIME_REF_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET 16
 
 // Offset of field Thread::self_ verified in InitCpu
-#define THREAD_SELF_OFFSET 184
+#define THREAD_SELF_OFFSET 192
 // Offset of field Thread::card_table_ verified in InitCpu
-#define THREAD_CARD_TABLE_OFFSET 112
+#define THREAD_CARD_TABLE_OFFSET 120
 // Offset of field Thread::exception_ verified in InitCpu
-#define THREAD_EXCEPTION_OFFSET 120
+#define THREAD_EXCEPTION_OFFSET 128
 // Offset of field Thread::thin_lock_thread_id_ verified in InitCpu
 #define THREAD_ID_OFFSET 12
 
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index cbf0f38..48bc240 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -284,18 +284,6 @@
      */
 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
 
-// On entry to this function, RAX contains the ESP value for the overflow region.
-DEFINE_FUNCTION SYMBOL(art_quick_throw_stack_overflow_from_signal)
-    // Here, the RSP is above the protected region.  We need to create a
-    // callee save frame and then move RSP down to the overflow region.
-    SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
-    mov %rsp, %rsi                    // get current stack pointer, pass SP as second arg
-    mov %rax, %rsp                    // move RSP to the overflow region.
-    mov %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current() as first arg
-    call PLT_SYMBOL(artThrowStackOverflowFromCode)    // artThrowStackOverflowFromCode(Thread*, SP)
-    int3                              // unreached
-END_FUNCTION SYMBOL(art_quick_throw_stack_overflow_from_signal)
-
     /*
      * Called by managed code, saves callee saves and then calls artThrowException
      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index d52992c..91cd11b 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -4191,9 +4191,9 @@
         klass->GetSuperClass()->GetVTableLength();
     size_t actual_count = klass->GetSuperClass()->GetVTableLength();
     CHECK_LE(actual_count, max_count);
-    StackHandleScope<3> hs(self);
+    StackHandleScope<4> hs(self);
+    Handle<mirror::Class> super_class(hs.NewHandle(klass->GetSuperClass()));
     Handle<mirror::ObjectArray<mirror::ArtMethod>> vtable;
-    mirror::Class* super_class = klass->GetSuperClass();
     if (super_class->ShouldHaveEmbeddedImtAndVTable()) {
       vtable = hs.NewHandle(AllocArtMethodArray(self, max_count));
       if (UNLIKELY(vtable.Get() == nullptr)) {
@@ -4205,7 +4205,7 @@
         vtable->Set<false>(i, super_class->GetVTableEntry(i));
       }
     } else {
-      CHECK(super_class->GetVTable() != nullptr) << PrettyClass(super_class);
+      CHECK(super_class->GetVTable() != nullptr) << PrettyClass(super_class.Get());
       vtable = hs.NewHandle(super_class->GetVTable()->CopyOf(self, max_count));
       if (UNLIKELY(vtable.Get() == nullptr)) {
         CHECK(self->IsExceptionPending());  // OOME.
@@ -4853,7 +4853,7 @@
                                               Handle<mirror::ClassLoader> class_loader,
                                               Handle<mirror::ArtMethod> referrer,
                                               InvokeType type) {
-  DCHECK(dex_cache.Get() != NULL);
+  DCHECK(dex_cache.Get() != nullptr);
   // Check for hit in the dex cache.
   mirror::ArtMethod* resolved = dex_cache->GetResolvedMethod(method_idx);
   if (resolved != nullptr && !resolved->IsRuntimeMethod()) {
@@ -4862,9 +4862,9 @@
   // Fail, get the declaring class.
   const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
   mirror::Class* klass = ResolveType(dex_file, method_id.class_idx_, dex_cache, class_loader);
-  if (klass == NULL) {
+  if (klass == nullptr) {
     DCHECK(Thread::Current()->IsExceptionPending());
-    return NULL;
+    return nullptr;
   }
   // Scan using method_idx, this saves string compares but will only hit for matching dex
   // caches/files.
@@ -4875,7 +4875,7 @@
       break;
     case kInterface:
       resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx);
-      DCHECK(resolved == NULL || resolved->GetDeclaringClass()->IsInterface());
+      DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
       break;
     case kSuper:  // Fall-through.
     case kVirtual:
@@ -4884,7 +4884,7 @@
     default:
       LOG(FATAL) << "Unreachable - invocation type: " << type;
   }
-  if (resolved == NULL) {
+  if (resolved == nullptr) {
     // Search by name, which works across dex files.
     const char* name = dex_file.StringDataByIdx(method_id.name_idx_);
     const Signature signature = dex_file.GetMethodSignature(method_id);
@@ -4895,7 +4895,7 @@
         break;
       case kInterface:
         resolved = klass->FindInterfaceMethod(name, signature);
-        DCHECK(resolved == NULL || resolved->GetDeclaringClass()->IsInterface());
+        DCHECK(resolved == nullptr || resolved->GetDeclaringClass()->IsInterface());
         break;
       case kSuper:  // Fall-through.
       case kVirtual:
@@ -4903,94 +4903,97 @@
         break;
     }
   }
-  if (resolved != NULL) {
-    // We found a method, check for incompatible class changes.
-    if (resolved->CheckIncompatibleClassChange(type)) {
-      resolved = NULL;
-    }
-  }
-  if (resolved != NULL) {
+  // If we found a method, check for incompatible class changes.
+  if (LIKELY(resolved != nullptr && !resolved->CheckIncompatibleClassChange(type))) {
     // Be a good citizen and update the dex cache to speed subsequent calls.
     dex_cache->SetResolvedMethod(method_idx, resolved);
     return resolved;
   } else {
-    // We failed to find the method which means either an access error, an incompatible class
-    // change, or no such method. First try to find the method among direct and virtual methods.
-    const char* name = dex_file.StringDataByIdx(method_id.name_idx_);
-    const Signature signature = dex_file.GetMethodSignature(method_id);
-    switch (type) {
-      case kDirect:
-      case kStatic:
-        resolved = klass->FindVirtualMethod(name, signature);
-        break;
-      case kInterface:
-      case kVirtual:
-      case kSuper:
-        resolved = klass->FindDirectMethod(name, signature);
-        break;
-    }
-
-    // If we found something, check that it can be accessed by the referrer.
-    if (resolved != NULL && referrer.Get() != NULL) {
-      mirror::Class* methods_class = resolved->GetDeclaringClass();
-      mirror::Class* referring_class = referrer->GetDeclaringClass();
-      if (!referring_class->CanAccess(methods_class)) {
-        ThrowIllegalAccessErrorClassForMethodDispatch(referring_class, methods_class,
-                                                      resolved, type);
-        return NULL;
-      } else if (!referring_class->CanAccessMember(methods_class,
-                                                   resolved->GetAccessFlags())) {
-        ThrowIllegalAccessErrorMethod(referring_class, resolved);
-        return NULL;
-      }
-    }
-
-    // Otherwise, throw an IncompatibleClassChangeError if we found something, and check interface
-    // methods and throw if we find the method there. If we find nothing, throw a NoSuchMethodError.
-    switch (type) {
-      case kDirect:
-      case kStatic:
-        if (resolved != NULL) {
-          ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
-        } else {
-          resolved = klass->FindInterfaceMethod(name, signature);
-          if (resolved != NULL) {
-            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
-          } else {
-            ThrowNoSuchMethodError(type, klass, name, signature);
-          }
-        }
-        break;
-      case kInterface:
-        if (resolved != NULL) {
-          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
-        } else {
+    // If we had a method, it's an incompatible-class-change error.
+    if (resolved != nullptr) {
+      ThrowIncompatibleClassChangeError(type, resolved->GetInvokeType(), resolved, referrer.Get());
+    } else {
+      // We failed to find the method which means either an access error, an incompatible class
+      // change, or no such method. First try to find the method among direct and virtual methods.
+      const char* name = dex_file.StringDataByIdx(method_id.name_idx_);
+      const Signature signature = dex_file.GetMethodSignature(method_id);
+      switch (type) {
+        case kDirect:
+        case kStatic:
           resolved = klass->FindVirtualMethod(name, signature);
-          if (resolved != NULL) {
+          // Note: kDirect and kStatic are also mutually exclusive, but in that case we would
+          //       have had a resolved method before, which triggers the "true" branch above.
+          break;
+        case kInterface:
+        case kVirtual:
+        case kSuper:
+          resolved = klass->FindDirectMethod(name, signature);
+          break;
+      }
+
+      // If we found something, check that it can be accessed by the referrer.
+      if (resolved != nullptr && referrer.Get() != nullptr) {
+        mirror::Class* methods_class = resolved->GetDeclaringClass();
+        mirror::Class* referring_class = referrer->GetDeclaringClass();
+        if (!referring_class->CanAccess(methods_class)) {
+          ThrowIllegalAccessErrorClassForMethodDispatch(referring_class, methods_class,
+                                                        resolved, type);
+          return nullptr;
+        } else if (!referring_class->CanAccessMember(methods_class,
+                                                     resolved->GetAccessFlags())) {
+          ThrowIllegalAccessErrorMethod(referring_class, resolved);
+          return nullptr;
+        }
+      }
+
+      // Otherwise, throw an IncompatibleClassChangeError if we found something, and check interface
+      // methods and throw if we find the method there. If we find nothing, throw a
+      // NoSuchMethodError.
+      switch (type) {
+        case kDirect:
+        case kStatic:
+          if (resolved != nullptr) {
             ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
           } else {
-            ThrowNoSuchMethodError(type, klass, name, signature);
+            resolved = klass->FindInterfaceMethod(name, signature);
+            if (resolved != nullptr) {
+              ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
+            } else {
+              ThrowNoSuchMethodError(type, klass, name, signature);
+            }
           }
-        }
-        break;
-      case kSuper:
-        ThrowNoSuchMethodError(type, klass, name, signature);
-        break;
-      case kVirtual:
-        if (resolved != NULL) {
-          ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
-        } else {
-          resolved = klass->FindInterfaceMethod(name, signature);
-          if (resolved != NULL) {
-            ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
+          break;
+        case kInterface:
+          if (resolved != nullptr) {
+            ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
           } else {
-            ThrowNoSuchMethodError(type, klass, name, signature);
+            resolved = klass->FindVirtualMethod(name, signature);
+            if (resolved != nullptr) {
+              ThrowIncompatibleClassChangeError(type, kVirtual, resolved, referrer.Get());
+            } else {
+              ThrowNoSuchMethodError(type, klass, name, signature);
+            }
           }
-        }
-        break;
+          break;
+        case kSuper:
+          ThrowNoSuchMethodError(type, klass, name, signature);
+          break;
+        case kVirtual:
+          if (resolved != nullptr) {
+            ThrowIncompatibleClassChangeError(type, kDirect, resolved, referrer.Get());
+          } else {
+            resolved = klass->FindInterfaceMethod(name, signature);
+            if (resolved != nullptr) {
+              ThrowIncompatibleClassChangeError(type, kInterface, resolved, referrer.Get());
+            } else {
+              ThrowNoSuchMethodError(type, klass, name, signature);
+            }
+          }
+          break;
+      }
     }
     DCHECK(Thread::Current()->IsExceptionPending());
-    return NULL;
+    return nullptr;
   }
 }
 
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 4755b9e..a0e35f8 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -212,6 +212,11 @@
 
   bool explicit_overflow_check = Runtime::Current()->ExplicitStackOverflowChecks();
   self->ResetDefaultStackEnd(!explicit_overflow_check);  // Return to default stack size.
+
+  // And restore protection if implicit checks are on.
+  if (!explicit_overflow_check) {
+    self->ProtectStack();
+  }
 }
 
 void CheckReferenceResult(mirror::Object* o, Thread* self) {
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index 879010e..13decc8 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -58,8 +58,10 @@
                                                      StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  self->NoteSignalBeingHandled();
   ThrowLocation throw_location = self->GetCurrentLocationForThrow();
   ThrowNullPointerExceptionFromDexPC(throw_location);
+  self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
@@ -83,7 +85,9 @@
 extern "C" void artThrowStackOverflowFromCode(Thread* self, StackReference<mirror::ArtMethod>* sp)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
   FinishCalleeSaveFrameSetup(self, sp, Runtime::kSaveAll);
+  self->NoteSignalBeingHandled();
   ThrowStackOverflowError(self);
+  self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index ae1b94f..f572d27 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -70,6 +70,10 @@
     EXPECT_OFFSET_DIFFP(Thread, tls32_, daemon, throwing_OutOfMemoryError, 4);
     EXPECT_OFFSET_DIFFP(Thread, tls32_, throwing_OutOfMemoryError, no_thread_suspension, 4);
     EXPECT_OFFSET_DIFFP(Thread, tls32_, no_thread_suspension, thread_exit_check_count, 4);
+    EXPECT_OFFSET_DIFFP(Thread, tls32_, thread_exit_check_count,
+                        is_exception_reported_to_instrumentation_, 4);
+    EXPECT_OFFSET_DIFFP(Thread, tls32_, is_exception_reported_to_instrumentation_,
+                        handling_signal_, 4);
 
     // TODO: Better connection. Take alignment into account.
     EXPECT_OFFSET_DIFF_GT3(Thread, tls32_.thread_exit_check_count, tls64_.trace_clock_base, 4,
diff --git a/runtime/instruction_set.cc b/runtime/instruction_set.cc
index d7e358c..d8a38f4 100644
--- a/runtime/instruction_set.cc
+++ b/runtime/instruction_set.cc
@@ -87,11 +87,10 @@
 static constexpr size_t kDefaultStackOverflowReservedBytes = 16 * KB;
 static constexpr size_t kMipsStackOverflowReservedBytes = kDefaultStackOverflowReservedBytes;
 
-// TODO: Lower once implicit stack-overflow checks can work with less than 16K.
-static constexpr size_t kArmStackOverflowReservedBytes =    (kIsDebugBuild ? 16 : 16) * KB;
-static constexpr size_t kArm64StackOverflowReservedBytes =  (kIsDebugBuild ? 16 : 16) * KB;
-static constexpr size_t kX86StackOverflowReservedBytes =    (kIsDebugBuild ? 16 : 16) * KB;
-static constexpr size_t kX86_64StackOverflowReservedBytes = (kIsDebugBuild ? 16 : 16) * KB;
+static constexpr size_t kArmStackOverflowReservedBytes =    8 * KB;
+static constexpr size_t kArm64StackOverflowReservedBytes =  8 * KB;
+static constexpr size_t kX86StackOverflowReservedBytes =    8 * KB;
+static constexpr size_t kX86_64StackOverflowReservedBytes = 8 * KB;
 
 size_t GetStackOverflowReservedBytes(InstructionSet isa) {
   switch (isa) {
diff --git a/runtime/mirror/art_method.cc b/runtime/mirror/art_method.cc
index e88a390..370bfb9 100644
--- a/runtime/mirror/art_method.cc
+++ b/runtime/mirror/art_method.cc
@@ -283,6 +283,11 @@
 
 void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* result,
                        const char* shorty) {
+  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
+    ThrowStackOverflowError(self);
+    return;
+  }
+
   if (kIsDebugBuild) {
     self->AssertThreadSuspensionIsAllowable();
     CHECK_EQ(kRunnable, self->GetState());
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 5f718ba..b079229 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -76,6 +76,10 @@
   ScopedFastNativeObjectAccess soa(env);
   NthCallerVisitor visitor(soa.Self(), 2);
   visitor.WalkStack();
+  if (UNLIKELY(visitor.caller == nullptr)) {
+    // The caller is an attached native thread.
+    return nullptr;
+  }
   return soa.AddLocalReference<jobject>(visitor.caller->GetDeclaringClass()->GetClassLoader());
 }
 
@@ -113,6 +117,10 @@
   ScopedFastNativeObjectAccess soa(env);
   NthCallerVisitor visitor(soa.Self(), 3);
   visitor.WalkStack();
+  if (UNLIKELY(visitor.caller == nullptr)) {
+    // The caller is an attached native thread.
+    return nullptr;
+  }
   return soa.AddLocalReference<jclass>(visitor.caller->GetDeclaringClass());
 }
 
diff --git a/runtime/oat.cc b/runtime/oat.cc
index 0a8c35b..ede108c 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -23,7 +23,7 @@
 namespace art {
 
 const uint8_t OatHeader::kOatMagic[] = { 'o', 'a', 't', '\n' };
-const uint8_t OatHeader::kOatVersion[] = { '0', '3', '8', '\0' };
+const uint8_t OatHeader::kOatVersion[] = { '0', '3', '9', '\0' };
 
 static size_t ComputeOatHeaderSize(const SafeMap<std::string, std::string>* variable_data) {
   size_t estimate = 0U;
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 3ef7a17..27f7765 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -206,7 +206,7 @@
   // If background_collector_type_ is kCollectorTypeNone, it defaults to the collector_type_ after
   // parsing options. If you set this to kCollectorTypeHSpaceCompact then we will do an hspace
   // compaction when we transition to background instead of a normal collector transition.
-  background_collector_type_ = gc::kCollectorTypeSS;
+  background_collector_type_ = gc::kCollectorTypeHomogeneousSpaceCompact;
   stack_size_ = 0;  // 0 means default.
   max_spins_before_thin_lock_inflation_ = Monitor::kDefaultMaxSpinsBeforeThinLockInflation;
   low_memory_mode_ = false;
@@ -394,6 +394,10 @@
     } else if (option == "-XX:IgnoreMaxFootprint") {
       ignore_max_footprint_ = true;
     } else if (option == "-XX:LowMemoryMode") {
+      if (background_collector_type_ == gc::kCollectorTypeHomogeneousSpaceCompact) {
+        // Use semispace instead of homogenous space compact for low memory mode.
+        background_collector_type_ = gc::kCollectorTypeSS;
+      }
       low_memory_mode_ = true;
       // TODO Might want to turn off must_relocate here.
     } else if (option == "-XX:UseTLAB") {
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index cc50961..7da450c 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -447,6 +447,14 @@
 JValue InvokeWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa, jobject obj, jmethodID mid,
                          va_list args)
     SHARED_LOCKS_REQUIRED(Locks::mutator_lock_) {
+  // We want to make sure that the stack is not within a small distance from the
+  // protected region in case we are calling into a leaf function whose stack
+  // check has been elided.
+  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
+    ThrowStackOverflowError(soa.Self());
+    return JValue();
+  }
+
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   mirror::Object* receiver = method->IsStatic() ? nullptr : soa.Decode<mirror::Object*>(obj);
   uint32_t shorty_len = 0;
@@ -460,6 +468,14 @@
 
 JValue InvokeWithJValues(const ScopedObjectAccessAlreadyRunnable& soa, mirror::Object* receiver,
                          jmethodID mid, jvalue* args) {
+  // We want to make sure that the stack is not within a small distance from the
+  // protected region in case we are calling into a leaf function whose stack
+  // check has been elided.
+  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
+    ThrowStackOverflowError(soa.Self());
+    return JValue();
+  }
+
   mirror::ArtMethod* method = soa.DecodeMethod(mid);
   uint32_t shorty_len = 0;
   const char* shorty = method->GetShorty(&shorty_len);
@@ -472,6 +488,14 @@
 
 JValue InvokeVirtualOrInterfaceWithJValues(const ScopedObjectAccessAlreadyRunnable& soa,
                                            mirror::Object* receiver, jmethodID mid, jvalue* args) {
+  // We want to make sure that the stack is not within a small distance from the
+  // protected region in case we are calling into a leaf function whose stack
+  // check has been elided.
+  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
+    ThrowStackOverflowError(soa.Self());
+    return JValue();
+  }
+
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   uint32_t shorty_len = 0;
   const char* shorty = method->GetShorty(&shorty_len);
@@ -484,6 +508,14 @@
 
 JValue InvokeVirtualOrInterfaceWithVarArgs(const ScopedObjectAccessAlreadyRunnable& soa,
                                            jobject obj, jmethodID mid, va_list args) {
+  // We want to make sure that the stack is not within a small distance from the
+  // protected region in case we are calling into a leaf function whose stack
+  // check has been elided.
+  if (UNLIKELY(__builtin_frame_address(0) < soa.Self()->GetStackEnd())) {
+    ThrowStackOverflowError(soa.Self());
+    return JValue();
+  }
+
   mirror::Object* receiver = soa.Decode<mirror::Object*>(obj);
   mirror::ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
   uint32_t shorty_len = 0;
@@ -497,6 +529,14 @@
 
 void InvokeWithShadowFrame(Thread* self, ShadowFrame* shadow_frame, uint16_t arg_offset,
                            MethodHelper& mh, JValue* result) {
+  // We want to make sure that the stack is not within a small distance from the
+  // protected region in case we are calling into a leaf function whose stack
+  // check has been elided.
+  if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEnd())) {
+    ThrowStackOverflowError(self);
+    return;
+  }
+
   ArgArray arg_array(mh.GetShorty(), mh.GetShortyLength());
   arg_array.BuildArgArrayFromFrame(shadow_frame, arg_offset);
   shadow_frame->GetMethod()->Invoke(self, arg_array.GetArray(), arg_array.GetNumBytes(), result,
@@ -505,6 +545,15 @@
 
 jobject InvokeMethod(const ScopedObjectAccessAlreadyRunnable& soa, jobject javaMethod,
                      jobject javaReceiver, jobject javaArgs, bool accessible) {
+  // We want to make sure that the stack is not within a small distance from the
+  // protected region in case we are calling into a leaf function whose stack
+  // check has been elided.
+  if (UNLIKELY(__builtin_frame_address(0) <
+               soa.Self()->GetStackEndForInterpreter(true))) {
+    ThrowStackOverflowError(soa.Self());
+    return nullptr;
+  }
+
   mirror::ArtMethod* m = mirror::ArtMethod::FromReflectedMethod(soa, javaMethod);
 
   mirror::Class* declaring_class = m->GetDeclaringClass();
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 8e6da74..f06d081 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -76,8 +76,7 @@
 bool Thread::is_started_ = false;
 pthread_key_t Thread::pthread_key_self_;
 ConditionVariable* Thread::resume_cond_ = nullptr;
-const size_t Thread::kStackOverflowImplicitCheckSize = kStackOverflowProtectedSize +
-    GetStackOverflowReservedBytes(kRuntimeISA);
+const size_t Thread::kStackOverflowImplicitCheckSize = GetStackOverflowReservedBytes(kRuntimeISA);
 
 static const char* kThreadNameDuringStartup = "<native thread without managed peer>";
 
@@ -238,92 +237,48 @@
 byte dont_optimize_this;
 
 // Install a protected region in the stack.  This is used to trigger a SIGSEGV if a stack
-// overflow is detected.  It is located right below the stack_end_.  Just below that
-// is the StackOverflow reserved region used when creating the StackOverflow
-// exception.
+// overflow is detected.  It is located right below the stack_begin_.
 //
-// There is a little complexity here that deserves a special mention.  When running on the
-// host (glibc), the process's main thread's stack is allocated with a special flag
+// There is a little complexity here that deserves a special mention.  On some
+// architectures, the stack created using a VM_GROWSDOWN flag
 // to prevent memory being allocated when it's not needed.  This flag makes the
 // kernel only allocate memory for the stack by growing down in memory.  Because we
 // want to put an mprotected region far away from that at the stack top, we need
 // to make sure the pages for the stack are mapped in before we call mprotect.  We do
 // this by reading every page from the stack bottom (highest address) to the stack top.
 // We then madvise this away.
-void Thread::InstallImplicitProtection(bool is_main_stack) {
-  byte* pregion = tlsPtr_.stack_end;
-  byte* stack_lowmem = tlsPtr_.stack_begin;
-  byte* stack_top = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(&pregion) &
+void Thread::InstallImplicitProtection() {
+  byte* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
+  byte* stack_himem = tlsPtr_.stack_end;
+  byte* stack_top = reinterpret_cast<byte*>(reinterpret_cast<uintptr_t>(&stack_himem) &
       ~(kPageSize - 1));    // Page containing current top of stack.
 
-  const bool running_on_intel = (kRuntimeISA == kX86) || (kRuntimeISA == kX86_64);
+  // First remove the protection on the protected region as will want to read and
+  // write it.  This may fail (on the first attempt when the stack is not mapped)
+  // but we ignore that.
+  UnprotectStack();
 
-  if (running_on_intel) {
-    // On Intel, we need to map in the main stack.  This must be done by reading from the
-    // current stack pointer downwards as the stack is mapped using VM_GROWSDOWN
-    // in the kernel.  Any access more than a page below the current SP will cause
-    // a segv.
-    if (is_main_stack) {
-      // First we need to unprotect the protected region because this may
-      // be called more than once for a particular stack and we will crash
-      // if we try to read the protected page.
-      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_READ);
+  // Map in the stack.  This must be done by reading from the
+  // current stack pointer downwards as the stack may be mapped using VM_GROWSDOWN
+  // in the kernel.  Any access more than a page below the current SP might cause
+  // a segv.
 
-      // Read every page from the high address to the low.
-      for (byte* p = stack_top; p > stack_lowmem; p -= kPageSize) {
-        dont_optimize_this = *p;
-      }
-    }
+  // Read every page from the high address to the low.
+  for (byte* p = stack_top; p > pregion; p -= kPageSize) {
+    dont_optimize_this = *p;
   }
 
-  // Check and place a marker word at the lowest usable address in the stack.  This
-  // is used to prevent a double protection.
-  constexpr uint32_t kMarker = 0xdadadada;
-  uintptr_t *marker = reinterpret_cast<uintptr_t*>(pregion);
-  if (*marker == kMarker) {
-    // The region has already been set up.  But on the main stack on the host we have
-    // removed the protected region in order to read the stack memory.  We need to put
-    // this back again.
-    if (is_main_stack && running_on_intel) {
-      mprotect(pregion - kStackOverflowProtectedSize, kStackOverflowProtectedSize, PROT_NONE);
-      madvise(stack_lowmem, stack_top - stack_lowmem, MADV_DONTNEED);
-    }
-    return;
-  }
-  // Add marker so that we can detect a second attempt to do this.
-  *marker = kMarker;
-
-  if (!running_on_intel) {
-    // Running on !Intel, stacks are mapped cleanly.  The protected region for the
-    // main stack just needs to be mapped in.  We do this by writing one byte per page.
-    for (byte* p = pregion - kStackOverflowProtectedSize;  p < pregion; p += kPageSize) {
-      *p = 0;
-    }
-  }
-
-  pregion -= kStackOverflowProtectedSize;
-
   VLOG(threads) << "installing stack protected region at " << std::hex <<
       static_cast<void*>(pregion) << " to " <<
       static_cast<void*>(pregion + kStackOverflowProtectedSize - 1);
 
-
-  if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
-    LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. Reason:"
-        << strerror(errno) << kStackOverflowProtectedSize;
-  }
+  // Protect the bottom of the stack to prevent read/write to it.
+  ProtectStack();
 
   // Tell the kernel that we won't be needing these pages any more.
   // NB. madvise will probably write zeroes into the memory (on linux it does).
-  if (is_main_stack) {
-    if (running_on_intel) {
-      // On the host, it's the whole stack (minus a page to prevent overwrite of stack top).
-      madvise(stack_lowmem, stack_top - stack_lowmem - kPageSize, MADV_DONTNEED);
-    } else {
-      // On Android, just the protected region.
-      madvise(pregion, kStackOverflowProtectedSize, MADV_DONTNEED);
-    }
-  }
+  uint32_t unwanted_size = stack_top - pregion - kPageSize;
+  madvise(pregion, unwanted_size, MADV_DONTNEED);
 }
 
 void Thread::CreateNativeThread(JNIEnv* env, jobject java_peer, size_t stack_size, bool is_daemon) {
@@ -538,15 +493,19 @@
   tlsPtr_.stack_begin = reinterpret_cast<byte*>(read_stack_base);
   tlsPtr_.stack_size = read_stack_size;
 
-  if (read_stack_size <= GetStackOverflowReservedBytes(kRuntimeISA)) {
+  // The minimum stack size we can cope with is the overflow reserved bytes (typically
+  // 8K) + the protected region size (4K) + another page (4K).  Typically this will
+  // be 8+4+4 = 16K.  The thread won't be able to do much with this stack even the GC takes
+  // between 8K and 12K.
+  uint32_t min_stack = GetStackOverflowReservedBytes(kRuntimeISA) + kStackOverflowProtectedSize
+    + 4 * KB;
+  if (read_stack_size <= min_stack) {
     LOG(FATAL) << "Attempt to attach a thread with a too-small stack (" << read_stack_size
         << " bytes)";
   }
 
   // TODO: move this into the Linux GetThreadStack implementation.
-#if defined(__APPLE__)
-  bool is_main_thread = false;
-#else
+#if !defined(__APPLE__)
   // If we're the main thread, check whether we were run with an unlimited stack. In that case,
   // glibc will have reported a 2GB stack for our 32-bit process, and our stack overflow detection
   // will be broken because we'll die long before we get close to 2GB.
@@ -582,20 +541,19 @@
 
   // Install the protected region if we are doing implicit overflow checks.
   if (implicit_stack_check) {
-    if (is_main_thread) {
-      size_t guardsize;
-      pthread_attr_t attributes;
-      CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), "guard size query");
-      CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, &guardsize), "guard size query");
-      CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), "guard size query");
-      // The main thread might have protected region at the bottom.  We need
-      // to install our own region so we need to move the limits
-      // of the stack to make room for it.
-      tlsPtr_.stack_begin += guardsize;
-      tlsPtr_.stack_end += guardsize;
-      tlsPtr_.stack_size -= guardsize;
-    }
-    InstallImplicitProtection(is_main_thread);
+    size_t guardsize;
+    pthread_attr_t attributes;
+    CHECK_PTHREAD_CALL(pthread_attr_init, (&attributes), "guard size query");
+    CHECK_PTHREAD_CALL(pthread_attr_getguardsize, (&attributes, &guardsize), "guard size query");
+    CHECK_PTHREAD_CALL(pthread_attr_destroy, (&attributes), "guard size query");
+    // The thread might have protected region at the bottom.  We need
+    // to install our own region so we need to move the limits
+    // of the stack to make room for it.
+    tlsPtr_.stack_begin += guardsize;
+    tlsPtr_.stack_end += guardsize;
+    tlsPtr_.stack_size -= guardsize;
+
+    InstallImplicitProtection();
   }
 
   // Sanity check.
@@ -2266,6 +2224,14 @@
   }
 
   tlsPtr_.stack_end = tlsPtr_.stack_begin;
+
+  // Remove the stack overflow protection if is it set up.
+  bool implicit_stack_check = !Runtime::Current()->ExplicitStackOverflowChecks();
+  if (implicit_stack_check) {
+    if (!UnprotectStack()) {
+      LOG(ERROR) << "Unable to remove stack protection for stack overflow";
+    }
+  }
 }
 
 void Thread::SetTlab(byte* start, byte* end) {
@@ -2291,4 +2257,21 @@
   return os;
 }
 
+void Thread::ProtectStack() {
+  void* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
+  VLOG(threads) << "Protecting stack at " << pregion;
+  if (mprotect(pregion, kStackOverflowProtectedSize, PROT_NONE) == -1) {
+    LOG(FATAL) << "Unable to create protected region in stack for implicit overflow check. "
+        "Reason: "
+        << strerror(errno) << " size:  " << kStackOverflowProtectedSize;
+  }
+}
+
+bool Thread::UnprotectStack() {
+  void* pregion = tlsPtr_.stack_begin - kStackOverflowProtectedSize;
+  VLOG(threads) << "Unprotecting stack at " << pregion;
+  return mprotect(pregion, kStackOverflowProtectedSize, PROT_READ|PROT_WRITE) == 0;
+}
+
+
 }  // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index c2b200b..120ff6f 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -94,16 +94,41 @@
 
 static constexpr size_t kNumRosAllocThreadLocalSizeBrackets = 34;
 
+// Thread's stack layout for implicit stack overflow checks:
+//
+//   +---------------------+  <- highest address of stack memory
+//   |                     |
+//   .                     .  <- SP
+//   |                     |
+//   |                     |
+//   +---------------------+  <- stack_end
+//   |                     |
+//   |  Gap                |
+//   |                     |
+//   +---------------------+  <- stack_begin
+//   |                     |
+//   | Protected region    |
+//   |                     |
+//   +---------------------+  <- lowest address of stack memory
+//
+// The stack always grows down in memory.  At the lowest address is a region of memory
+// that is set mprotect(PROT_NONE).  Any attempt to read/write to this region will
+// result in a segmentation fault signal.  At any point, the thread's SP will be somewhere
+// between the stack_end and the highest address in stack memory.  An implicit stack
+// overflow check is a read of memory at a certain offset below the current SP (4K typically).
+// If the thread's SP is below the stack_end address this will be a read into the protected
+// region.  If the SP is above the stack_end address, the thread is guaranteed to have
+// at least 4K of space.  Because stack overflow checks are only performed in generated code,
+// if the thread makes a call out to a native function (through JNI), that native function
+// might only have 4K of memory (if the SP is adjacent to stack_end).
+
 class Thread {
  public:
-  // How much of the reserved bytes is reserved for incoming signals.
-  static constexpr size_t kStackOverflowSignalReservedBytes = 2 * KB;
-
   // For implicit overflow checks we reserve an extra piece of memory at the bottom
   // of the stack (lowest memory).  The higher portion of the memory
   // is protected against reads and the lower is available for use while
   // throwing the StackOverflow exception.
-  static constexpr size_t kStackOverflowProtectedSize = 16 * KB;
+  static constexpr size_t kStackOverflowProtectedSize = 4 * KB;
   static const size_t kStackOverflowImplicitCheckSize;
 
   // Creates a new native thread corresponding to the given managed peer.
@@ -582,7 +607,7 @@
   }
 
   // Install the protected region for implicit stack checks.
-  void InstallImplicitProtection(bool is_main_stack);
+  void InstallImplicitProtection();
 
   bool IsHandlingStackOverflow() const {
     return tlsPtr_.stack_end == tlsPtr_.stack_begin;
@@ -814,6 +839,20 @@
     tls32_.is_exception_reported_to_instrumentation_ = reported;
   }
 
+  void ProtectStack();
+  bool UnprotectStack();
+
+  void NoteSignalBeingHandled() {
+    if (tls32_.handling_signal_) {
+      LOG(FATAL) << "Detected signal while processing a signal";
+    }
+    tls32_.handling_signal_ = true;
+  }
+
+  void NoteSignalHandlerDone() {
+    tls32_.handling_signal_ = false;
+  }
+
  private:
   explicit Thread(bool daemon);
   ~Thread() LOCKS_EXCLUDED(Locks::mutator_lock_,
@@ -918,7 +957,8 @@
     explicit tls_32bit_sized_values(bool is_daemon) :
       suspend_count(0), debug_suspend_count(0), thin_lock_thread_id(0), tid(0),
       daemon(is_daemon), throwing_OutOfMemoryError(false), no_thread_suspension(0),
-      thread_exit_check_count(0), is_exception_reported_to_instrumentation_(false) {
+      thread_exit_check_count(0), is_exception_reported_to_instrumentation_(false),
+      handling_signal_(false), padding_(0) {
     }
 
     union StateAndFlags state_and_flags;
@@ -958,6 +998,12 @@
     // When true this field indicates that the exception associated with this thread has already
     // been reported to instrumentation.
     bool32_t is_exception_reported_to_instrumentation_;
+
+    // True if signal is being handled by this thread.
+    bool32_t handling_signal_;
+
+    // Padding to make the size aligned to 8.  Remove this if we add another 32 bit field.
+    int32_t padding_;
   } tls32_;
 
   struct PACKED(8) tls_64bit_sized_values {
diff --git a/runtime/thread_linux.cc b/runtime/thread_linux.cc
index 9aacb30..1254056 100644
--- a/runtime/thread_linux.cc
+++ b/runtime/thread_linux.cc
@@ -33,8 +33,11 @@
 }
 
 // The default SIGSTKSZ on linux is 8K.  If we do any logging in a signal
-// handler this is too small.  We allocate 16K instead.
-static constexpr int kHostAltSigStackSize = 16*1024;    // 16K signal stack.
+// handler this is too small.  We allocate 16K instead or the minimum signal
+// stack size.
+// TODO: We shouldn't do logging (with locks) in signal handlers.
+static constexpr int kHostAltSigStackSize =
+    16 * KB < MINSIGSTKSZ ? MINSIGSTKSZ : 16 * KB;
 
 void Thread::SetUpAlternateSignalStack() {
   // Create and set an alternate signal stack.
diff --git a/test/004-InterfaceTest/src/Main.java b/test/004-InterfaceTest/src/Main.java
index 9ebac59..297cbb0 100644
--- a/test/004-InterfaceTest/src/Main.java
+++ b/test/004-InterfaceTest/src/Main.java
@@ -23,7 +23,7 @@
     Integer intobj = new Integer(0);
     String s = "asdf";
     long start = System.currentTimeMillis();
-    for (int i = 0; i < 1000000; i++) {
+    for (int i = 0; i < 10000; i++) {
         map.put(intobj, s);
     }
     long end = System.currentTimeMillis();
@@ -34,7 +34,7 @@
     Integer intobj = new Integer(0);
     String s = "asdf";
     long start = System.currentTimeMillis();
-    for (int i = 0; i < 1000000; i++) {
+    for (int i = 0; i < 10000; i++) {
         map.put(intobj, s);
     }
     long end = System.currentTimeMillis();
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index 554712a..f5a1d65 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -28,162 +28,133 @@
 static JavaVM* jvm = NULL;
 
 extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void *) {
-  assert(vm != NULL);
-  assert(jvm == NULL);
+  assert(vm != nullptr);
+  assert(jvm == nullptr);
   jvm = vm;
   return JNI_VERSION_1_6;
 }
 
-static void* testFindClassOnAttachedNativeThread(void*) {
-  assert(jvm != NULL);
+static void* AttachHelper(void* arg) {
+  assert(jvm != nullptr);
 
-  JNIEnv* env = NULL;
+  JNIEnv* env = nullptr;
   JavaVMAttachArgs args = { JNI_VERSION_1_6, __FUNCTION__, NULL };
   int attach_result = jvm->AttachCurrentThread(&env, &args);
   assert(attach_result == 0);
 
-  jclass clazz = env->FindClass("Main");
-  assert(clazz != NULL);
-  assert(!env->ExceptionCheck());
-
-  jobjectArray array = env->NewObjectArray(0, clazz, NULL);
-  assert(array != NULL);
-  assert(!env->ExceptionCheck());
+  typedef void (*Fn)(JNIEnv*);
+  Fn fn = reinterpret_cast<Fn>(arg);
+  fn(env);
 
   int detach_result = jvm->DetachCurrentThread();
   assert(detach_result == 0);
-  return NULL;
+  return nullptr;
+}
+
+static void PthreadHelper(void (*fn)(JNIEnv*)) {
+  pthread_t pthread;
+  int pthread_create_result = pthread_create(&pthread, nullptr, AttachHelper,
+                                             reinterpret_cast<void*>(fn));
+  assert(pthread_create_result == 0);
+  int pthread_join_result = pthread_join(pthread, nullptr);
+  assert(pthread_join_result == 0);
+}
+
+static void testFindClassOnAttachedNativeThread(JNIEnv* env) {
+  jclass clazz = env->FindClass("Main");
+  assert(clazz != nullptr);
+  assert(!env->ExceptionCheck());
+
+  jobjectArray array = env->NewObjectArray(0, clazz, nullptr);
+  assert(array != nullptr);
+  assert(!env->ExceptionCheck());
 }
 
 // http://b/10994325
-extern "C" JNIEXPORT void JNICALL Java_Main_testFindClassOnAttachedNativeThread(JNIEnv*,
-                                                                                   jclass) {
-  pthread_t pthread;
-  int pthread_create_result = pthread_create(&pthread,
-                                             NULL,
-                                             testFindClassOnAttachedNativeThread,
-                                             NULL);
-  assert(pthread_create_result == 0);
-  int pthread_join_result = pthread_join(pthread, NULL);
-  assert(pthread_join_result == 0);
+extern "C" JNIEXPORT void JNICALL Java_Main_testFindClassOnAttachedNativeThread(JNIEnv*, jclass) {
+  PthreadHelper(&testFindClassOnAttachedNativeThread);
 }
 
-static void* testFindFieldOnAttachedNativeThread(void*) {
-  assert(jvm != NULL);
-
-  JNIEnv* env = NULL;
-  JavaVMAttachArgs args = { JNI_VERSION_1_6, __FUNCTION__, NULL };
-  int attach_result = jvm->AttachCurrentThread(&env, &args);
-  assert(attach_result == 0);
-
+static void testFindFieldOnAttachedNativeThread(JNIEnv* env) {
   jclass clazz = env->FindClass("Main");
-  assert(clazz != NULL);
+  assert(clazz != nullptr);
   assert(!env->ExceptionCheck());
 
   jfieldID field = env->GetStaticFieldID(clazz, "testFindFieldOnAttachedNativeThreadField", "Z");
-  assert(field != NULL);
+  assert(field != nullptr);
   assert(!env->ExceptionCheck());
 
   env->SetStaticBooleanField(clazz, field, JNI_TRUE);
-
-  int detach_result = jvm->DetachCurrentThread();
-  assert(detach_result == 0);
-  return NULL;
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_testFindFieldOnAttachedNativeThreadNative(JNIEnv*,
-                                                                                         jclass) {
-  pthread_t pthread;
-  int pthread_create_result = pthread_create(&pthread,
-                                             NULL,
-                                             testFindFieldOnAttachedNativeThread,
-                                             NULL);
-  assert(pthread_create_result == 0);
-  int pthread_join_result = pthread_join(pthread, NULL);
-  assert(pthread_join_result == 0);
+                                                                                      jclass) {
+  PthreadHelper(&testFindFieldOnAttachedNativeThread);
 }
 
-static void* testReflectFieldGetFromAttachedNativeThread(void*) {
-  assert(jvm != NULL);
-
-  JNIEnv* env = NULL;
-  JavaVMAttachArgs args = { JNI_VERSION_1_6, __FUNCTION__, NULL };
-  int attach_result = jvm->AttachCurrentThread(&env, &args);
-  assert(attach_result == 0);
-
+static void testReflectFieldGetFromAttachedNativeThread(JNIEnv* env) {
   jclass clazz = env->FindClass("Main");
-  assert(clazz != NULL);
+  assert(clazz != nullptr);
   assert(!env->ExceptionCheck());
 
   jclass class_clazz = env->FindClass("java/lang/Class");
-  assert(class_clazz != NULL);
+  assert(class_clazz != nullptr);
   assert(!env->ExceptionCheck());
 
   jmethodID getFieldMetodId = env->GetMethodID(class_clazz, "getField",
                                                "(Ljava/lang/String;)Ljava/lang/reflect/Field;");
-  assert(getFieldMetodId != NULL);
+  assert(getFieldMetodId != nullptr);
   assert(!env->ExceptionCheck());
 
   jstring field_name = env->NewStringUTF("testReflectFieldGetFromAttachedNativeThreadField");
-  assert(field_name != NULL);
+  assert(field_name != nullptr);
   assert(!env->ExceptionCheck());
 
   jobject field = env->CallObjectMethod(clazz, getFieldMetodId, field_name);
-  assert(field != NULL);
+  assert(field != nullptr);
   assert(!env->ExceptionCheck());
 
   jclass field_clazz = env->FindClass("java/lang/reflect/Field");
-  assert(field_clazz != NULL);
+  assert(field_clazz != nullptr);
   assert(!env->ExceptionCheck());
 
   jmethodID getBooleanMetodId = env->GetMethodID(field_clazz, "getBoolean",
                                                  "(Ljava/lang/Object;)Z");
-  assert(getBooleanMetodId != NULL);
+  assert(getBooleanMetodId != nullptr);
   assert(!env->ExceptionCheck());
 
   jboolean value = env->CallBooleanMethod(field, getBooleanMetodId, /* ignored */ clazz);
   assert(value == false);
   assert(!env->ExceptionCheck());
-
-  int detach_result = jvm->DetachCurrentThread();
-  assert(detach_result == 0);
-  return NULL;
 }
 
 // http://b/15539150
 extern "C" JNIEXPORT void JNICALL Java_Main_testReflectFieldGetFromAttachedNativeThreadNative(
     JNIEnv*, jclass) {
-  pthread_t pthread;
-  int pthread_create_result = pthread_create(&pthread,
-                                             NULL,
-                                             testReflectFieldGetFromAttachedNativeThread,
-                                             NULL);
-  assert(pthread_create_result == 0);
-  int pthread_join_result = pthread_join(pthread, NULL);
-  assert(pthread_join_result == 0);
+  PthreadHelper(&testReflectFieldGetFromAttachedNativeThread);
 }
 
 
 // http://b/11243757
 extern "C" JNIEXPORT void JNICALL Java_Main_testCallStaticVoidMethodOnSubClassNative(JNIEnv* env,
-                                                                                        jclass) {
+                                                                                     jclass) {
   jclass super_class = env->FindClass("Main$testCallStaticVoidMethodOnSubClass_SuperClass");
-  assert(super_class != NULL);
+  assert(super_class != nullptr);
 
   jmethodID execute = env->GetStaticMethodID(super_class, "execute", "()V");
-  assert(execute != NULL);
+  assert(execute != nullptr);
 
   jclass sub_class = env->FindClass("Main$testCallStaticVoidMethodOnSubClass_SubClass");
-  assert(sub_class != NULL);
+  assert(sub_class != nullptr);
 
   env->CallStaticVoidMethod(sub_class, execute);
 }
 
 extern "C" JNIEXPORT jobject JNICALL Java_Main_testGetMirandaMethodNative(JNIEnv* env, jclass) {
   jclass abstract_class = env->FindClass("Main$testGetMirandaMethod_MirandaAbstract");
-  assert(abstract_class != NULL);
+  assert(abstract_class != nullptr);
   jmethodID miranda_method = env->GetMethodID(abstract_class, "inInterface", "()Z");
-  assert(miranda_method != NULL);
+  assert(miranda_method != nullptr);
   return env->ToReflectedMethod(abstract_class, miranda_method, JNI_FALSE);
 }
 
@@ -191,7 +162,7 @@
 extern "C" void JNICALL Java_Main_testZeroLengthByteBuffers(JNIEnv* env, jclass) {
   std::vector<uint8_t> buffer(1);
   jobject byte_buffer = env->NewDirectByteBuffer(&buffer[0], 0);
-  assert(byte_buffer != NULL);
+  assert(byte_buffer != nullptr);
   assert(!env->ExceptionCheck());
 
   assert(env->GetDirectBufferAddress(byte_buffer) == &buffer[0]);
@@ -202,8 +173,8 @@
 jbyte byte_returns[kByteReturnSize] = { 0, 1, 2, 127, -1, -2, -128 };
 
 extern "C" jbyte JNICALL Java_Main_byteMethod(JNIEnv* env, jclass klass, jbyte b1, jbyte b2,
-                                                    jbyte b3, jbyte b4, jbyte b5, jbyte b6,
-                                                    jbyte b7, jbyte b8, jbyte b9, jbyte b10) {
+                                              jbyte b3, jbyte b4, jbyte b5, jbyte b6,
+                                              jbyte b7, jbyte b8, jbyte b9, jbyte b10) {
   // We use b1 to drive the output.
   assert(b2 == 2);
   assert(b3 == -3);
@@ -227,8 +198,8 @@
 // The weird static_cast is because short int is only guaranteed down to -32767, not Java's -32768.
 
 extern "C" jshort JNICALL Java_Main_shortMethod(JNIEnv* env, jclass klass, jshort s1, jshort s2,
-                                                    jshort s3, jshort s4, jshort s5, jshort s6,
-                                                    jshort s7, jshort s8, jshort s9, jshort s10) {
+                                                jshort s3, jshort s4, jshort s5, jshort s6,
+                                                jshort s7, jshort s8, jshort s9, jshort s10) {
   // We use s1 to drive the output.
   assert(s2 == 2);
   assert(s3 == -3);
@@ -247,9 +218,9 @@
 }
 
 extern "C" jboolean JNICALL Java_Main_booleanMethod(JNIEnv* env, jclass klass, jboolean b1,
-                                                       jboolean b2, jboolean b3, jboolean b4,
-                                                       jboolean b5, jboolean b6, jboolean b7,
-                                                       jboolean b8, jboolean b9, jboolean b10) {
+                                                    jboolean b2, jboolean b3, jboolean b4,
+                                                    jboolean b5, jboolean b6, jboolean b7,
+                                                    jboolean b8, jboolean b9, jboolean b10) {
   // We use b1 to drive the output.
   assert(b2 == JNI_TRUE);
   assert(b3 == JNI_FALSE);
@@ -269,8 +240,8 @@
 jchar char_returns[kCharReturnSize] = { 0, 1, 2, 127, 255, 256, 15000, 34000 };
 
 extern "C" jchar JNICALL Java_Main_charMethod(JNIEnv* env, jclass klacc, jchar c1, jchar c2,
-                                                    jchar c3, jchar c4, jchar c5, jchar c6,
-                                                    jchar c7, jchar c8, jchar c9, jchar c10) {
+                                              jchar c3, jchar c4, jchar c5, jchar c6, jchar c7,
+                                              jchar c8, jchar c9, jchar c10) {
   // We use c1 to drive the output.
   assert(c2 == 'a');
   assert(c3 == 'b');
@@ -291,3 +262,94 @@
                                                                        jclass from, jclass to) {
   return env->IsAssignableFrom(from, to);
 }
+
+static void testShallowGetCallingClassLoader(JNIEnv* env) {
+  // Test direct call.
+  {
+    jclass vmstack_clazz = env->FindClass("dalvik/system/VMStack");
+    assert(vmstack_clazz != nullptr);
+    assert(!env->ExceptionCheck());
+
+    jmethodID getCallingClassLoaderMethodId = env->GetStaticMethodID(vmstack_clazz,
+                                                                     "getCallingClassLoader",
+                                                                     "()Ljava/lang/ClassLoader;");
+    assert(getCallingClassLoaderMethodId != nullptr);
+    assert(!env->ExceptionCheck());
+
+    jobject class_loader = env->CallStaticObjectMethod(vmstack_clazz,
+                                                       getCallingClassLoaderMethodId);
+    assert(class_loader == nullptr);
+    assert(!env->ExceptionCheck());
+  }
+
+  // Test one-level call. Use System.loadLibrary().
+  {
+    jclass system_clazz = env->FindClass("java/lang/System");
+    assert(system_clazz != nullptr);
+    assert(!env->ExceptionCheck());
+
+    jmethodID loadLibraryMethodId = env->GetStaticMethodID(system_clazz, "loadLibrary",
+                                                           "(Ljava/lang/String;)V");
+    assert(loadLibraryMethodId != nullptr);
+    assert(!env->ExceptionCheck());
+
+    // Create a string object.
+    jobject library_string = env->NewStringUTF("arttest");
+    assert(library_string != nullptr);
+    assert(!env->ExceptionCheck());
+
+    env->CallStaticVoidMethod(system_clazz, loadLibraryMethodId, library_string);
+    if (env->ExceptionCheck()) {
+      // At most we expect UnsatisfiedLinkError.
+      jthrowable thrown = env->ExceptionOccurred();
+      env->ExceptionClear();
+
+      jclass unsatisfied_link_error_clazz = env->FindClass("java/lang/UnsatisfiedLinkError");
+      jclass thrown_class = env->GetObjectClass(thrown);
+      assert(env->IsSameObject(unsatisfied_link_error_clazz, thrown_class));
+    }
+  }
+}
+
+// http://b/16867274
+extern "C" JNIEXPORT void JNICALL Java_Main_nativeTestShallowGetCallingClassLoader(JNIEnv* env,
+                                                                                   jclass) {
+  PthreadHelper(&testShallowGetCallingClassLoader);
+}
+
+static void testShallowGetStackClass2(JNIEnv* env) {
+  jclass vmstack_clazz = env->FindClass("dalvik/system/VMStack");
+  assert(vmstack_clazz != nullptr);
+  assert(!env->ExceptionCheck());
+
+  // Test direct call.
+  {
+    jmethodID getStackClass2MethodId = env->GetStaticMethodID(vmstack_clazz, "getStackClass2",
+                                                              "()Ljava/lang/Class;");
+    assert(getStackClass2MethodId != nullptr);
+    assert(!env->ExceptionCheck());
+
+    jobject caller_class = env->CallStaticObjectMethod(vmstack_clazz, getStackClass2MethodId);
+    assert(caller_class == nullptr);
+    assert(!env->ExceptionCheck());
+  }
+
+  // Test one-level call. Use VMStack.getStackClass1().
+  {
+    jmethodID getStackClass1MethodId = env->GetStaticMethodID(vmstack_clazz, "getStackClass1",
+                                                              "()Ljava/lang/Class;");
+    assert(getStackClass1MethodId != nullptr);
+    assert(!env->ExceptionCheck());
+
+    jobject caller_class = env->CallStaticObjectMethod(vmstack_clazz, getStackClass1MethodId);
+    assert(caller_class == nullptr);
+    assert(!env->ExceptionCheck());
+  }
+
+  // For better testing we would need to compile against libcore and have a two-deep stack
+  // ourselves.
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_nativeTestShallowGetStackClass2(JNIEnv* env, jclass) {
+  PthreadHelper(&testShallowGetStackClass2);
+}
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index ae133be..5884bc0 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -30,6 +30,8 @@
         testBooleanMethod();
         testCharMethod();
         testIsAssignableFromOnPrimitiveTypes();
+        testShallowGetCallingClassLoader();
+        testShallowGetStackClass2();
     }
 
     private static native void testFindClassOnAttachedNativeThread();
@@ -167,4 +169,16 @@
     }
 
     native static boolean nativeIsAssignableFrom(Class<?> from, Class<?> to);
+
+    static void testShallowGetCallingClassLoader() {
+        nativeTestShallowGetCallingClassLoader();
+    }
+
+    native static void nativeTestShallowGetCallingClassLoader();
+
+    static void testShallowGetStackClass2() {
+        nativeTestShallowGetStackClass2();
+    }
+
+    native static void nativeTestShallowGetStackClass2();
 }
diff --git a/test/004-SignalTest/signaltest.cc b/test/004-SignalTest/signaltest.cc
index a2dd664..c05dc22 100644
--- a/test/004-SignalTest/signaltest.cc
+++ b/test/004-SignalTest/signaltest.cc
@@ -16,6 +16,7 @@
 
 #include <signal.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <unistd.h>
 
 #include "jni.h"
@@ -24,8 +25,15 @@
 #include <sys/ucontext.h>
 #endif
 
+static int signal_count;
+static const int kMaxSignal = 2;
+
 static void signalhandler(int sig, siginfo_t* info, void* context) {
   printf("signal caught\n");
+  ++signal_count;
+  if (signal_count > kMaxSignal) {
+     abort();
+  }
 #ifdef __arm__
   // On ARM we do a more exhaustive test to make sure the signal
   // context is OK.
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 47111c5..0cf9e16 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -415,8 +415,7 @@
   else
     ifeq ($(5),gcverify)
       uc_run_type := GCVERIFY
-      run_test_options += --runtime-option -Xgc:preverify --runtime-option -Xgc:postverify \
-        --runtime-option -Xgc:preverify_rosalloc --runtime-option -Xgc:postverify_rosalloc
+      run_test_options += --gcverify
       run_test_rule_name := test-art-$(2)-run-test-gcverify-$(3)-$(6)-$(1)$(4)
       ifneq ($$(ART_TEST_GC_VERIFY),true)
         skip_test := true
@@ -424,8 +423,7 @@
     else
       ifeq ($(5),gcstress)
         uc_run_type := GCSTRESS
-        run_test_options += --runtime-option -Xgc:SS --runtime-option -Xms2m \
-          --runtime-option -Xmx2m --runtime-option -Xgc:preverify --runtime-option -Xgc:postverify
+        run_test_options += --gcstress
         run_test_rule_name := test-art-$(2)-run-test-gcstress-$(3)-$(6)-$(1)$(4)
         ifneq ($$(ART_TEST_GC_STRESS),true)
           skip_test := true
diff --git a/test/run-all-tests b/test/run-all-tests
index 284cca0..98f1208 100755
--- a/test/run-all-tests
+++ b/test/run-all-tests
@@ -80,6 +80,12 @@
     elif [ "x$1" = "x--64" ]; then
         run_args="${run_args} --64"
         shift
+    elif [ "x$1" = "x--gcstress" ]; then
+        run_args="${run_args} --gcstress"
+        shift
+    elif [ "x$1" = "x--gcverify" ]; then
+        run_args="${run_args} --gcverify"
+        shift
     elif [ "x$1" = "x--trace" ]; then
         run_args="${run_args} --trace"
         shift
@@ -116,7 +122,7 @@
              "further documentation:"
         echo "    --debug --dev --host --interpreter --jvm --no-optimize"
         echo "    --no-verify -O --update --valgrind --zygote --64 --relocate"
-        echo "    --prebuild --always-clean"
+        echo "    --prebuild --always-clean --gcstress --gcverify --trace"
         echo "  Specific Runtime Options:"
         echo "    --seq                Run tests one-by-one, avoiding failures caused by busy CPU"
     ) 1>&2
diff --git a/test/run-test b/test/run-test
index ca7e68c..496f7d1 100755
--- a/test/run-test
+++ b/test/run-test
@@ -77,6 +77,9 @@
 build_only="no"
 suffix64=""
 trace="false"
+basic_verify="false"
+gc_verify="false"
+gc_stress="false"
 always_clean="no"
 
 while true; do
@@ -108,6 +111,14 @@
     elif [ "x$1" = "x--no-prebuild" ]; then
         prebuild_mode="no"
         shift;
+    elif [ "x$1" = "x--gcverify" ]; then
+        basic_verify="true"
+        gc_verify="true"
+        shift
+    elif [ "x$1" = "x--gcstress" ]; then
+        basic_verify="true"
+        gc_stress="true"
+        shift
     elif [ "x$1" = "x--image" ]; then
         shift
         image="$1"
@@ -197,6 +208,15 @@
 done
 mkdir -p $tmp_dir
 
+if [ "$basic_verify" = "true" ]; then
+  run_args="${run_args} --runtime-option -Xgc:preverify --runtime-option -Xgc:postverify"
+fi
+if [ "$gc_verify" = "true" ]; then
+  run_args="${run_args} --runtime-option -Xgc:preverify_rosalloc --runtime-option -Xgc:postverify_rosalloc"
+fi
+if [ "$gc_stress" = "true" ]; then
+  run_args="${run_args} --runtime-option -Xgc:SS --runtime-option -Xms2m --runtime-option -Xmx2m"
+fi
 if [ "$trace" = "true" ]; then
     run_args="${run_args} --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file:${DEX_LOCATION}/trace.bin --runtime-option -Xmethod-trace-file-size:2000000"
 fi
@@ -333,6 +353,8 @@
              "files."
         echo "    --64                 Run the test in 64-bit mode"
         echo "    --trace              Run with method tracing"
+        echo "    --gcstress           Run with gc stress testing"
+        echo "    --gcverify           Run with gc verification"
         echo "    --always-clean       Delete the test files even if the test fails."
     ) 1>&2
     exit 1