Merge "Make ImageWriter and PatchOat helper classes nested."
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 223be88..aa0d10b 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -538,14 +538,9 @@
               : optimizer::DexToDexCompilationLevel::kRequired);
     }
   } else if ((access_flags & kAccNative) != 0) {
-    const InstructionSet instruction_set = driver->GetInstructionSet();
-    const bool use_generic_jni =
-        // Are we extracting only and have support for generic JNI down calls?
-        (!driver->GetCompilerOptions().IsJniCompilationEnabled() &&
-             InstructionSetHasGenericJniStub(instruction_set)) ||
-        // Always punt to generic JNI for MIPS because of no support for @CriticalNative. b/31743474
-        (instruction_set == kMips || instruction_set == kMips64);
-    if (use_generic_jni) {
+    // Are we extracting only and have support for generic JNI down calls?
+    if (!driver->GetCompilerOptions().IsJniCompilationEnabled() &&
+        InstructionSetHasGenericJniStub(driver->GetInstructionSet())) {
       // Leaving this empty will trigger the generic JNI version
     } else {
       // Look-up the ArtMethod associated with this code_item (if any)
@@ -973,11 +968,12 @@
     return true;
   }
   DCHECK(profile_compilation_info_ != nullptr);
-  bool result = profile_compilation_info_->ContainsClass(dex_file, class_idx);
+  const DexFile::ClassDef& class_def = dex_file.GetClassDef(class_idx);
+  uint16_t type_idx = class_def.class_idx_;
+  bool result = profile_compilation_info_->ContainsClass(dex_file, type_idx);
   if (kDebugProfileGuidedCompilation) {
-    LOG(INFO) << "[ProfileGuidedCompilation] "
-        << (result ? "Verified" : "Skipped") << " method:"
-        << dex_file.GetClassDescriptor(dex_file.GetClassDef(class_idx));
+    LOG(INFO) << "[ProfileGuidedCompilation] " << (result ? "Verified" : "Skipped") << " method:"
+        << dex_file.GetClassDescriptor(class_def);
   }
   return result;
 }
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index a9044a2..21042a3 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -20,6 +20,7 @@
 #include <math.h>
 
 #include "art_method-inl.h"
+#include "base/bit_utils.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
 #include "compiler.h"
@@ -366,7 +367,9 @@
   void StackArgsIntsFirstImpl();
   void StackArgsFloatsFirstImpl();
   void StackArgsMixedImpl();
+#if defined(__mips__) && defined(__LP64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
   void StackArgsSignExtendedMips64Impl();
+#endif
 
   void NormalNativeImpl();
   void FastNativeImpl();
@@ -2145,50 +2148,43 @@
 
 JNI_TEST_CRITICAL(StackArgsMixed)
 
-void Java_MyClassNatives_stackArgsSignExtendedMips64(JNIEnv*, jclass, jint i1, jint i2, jint i3,
-                                                     jint i4, jint i5, jint i6, jint i7, jint i8) {
-  EXPECT_EQ(i1, 1);
-  EXPECT_EQ(i2, 2);
-  EXPECT_EQ(i3, 3);
-  EXPECT_EQ(i4, 4);
-  EXPECT_EQ(i5, 5);
-  EXPECT_EQ(i6, 6);
-  EXPECT_EQ(i7, 7);
-  EXPECT_EQ(i8, -8);
-
 #if defined(__mips__) && defined(__LP64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-  // Mips64 ABI requires that arguments passed through stack be sign-extended 8B slots.
-  // First 8 arguments are passed through registers, check i7 and i8.
-  uint32_t stack1_high = *(&i7 + 1);
-  uint32_t stack2_high = *(&i8 + 1);
-
-  EXPECT_EQ(stack1_high, static_cast<uint32_t>(0));
-  EXPECT_EQ(stack2_high, static_cast<uint32_t>(0xffffffff));
-#else
-  LOG(INFO) << "Skipping stackArgsSignExtendedMips64 as there is nothing to be done on "
-            << kRuntimeISA;
-  // Force-print to std::cout so it's also outside the logcat.
-  std::cout << "Skipping stackArgsSignExtendedMips64 as there is nothing to be done on "
-            << kRuntimeISA << std::endl;
-#endif
+// Function will fetch the last argument passed from caller that is now on top of the stack and
+// return it as a 8B long. That way we can test if the caller has properly sign-extended the
+// value when placing it on the stack.
+__attribute__((naked))
+jlong Java_MyClassNatives_getStackArgSignExtendedMips64(
+    JNIEnv*, jclass,                      // Arguments passed from caller
+    jint, jint, jint, jint, jint, jint,   // through regs a0 to a7.
+    jint) {                               // The last argument will be passed on the stack.
+  __asm__(
+      ".set noreorder\n\t"                // Just return and store 8 bytes from the top of the stack
+      "jr  $ra\n\t"                       // in v0 (in branch delay slot). This should be the last
+      "ld  $v0, 0($sp)\n\t");             // argument. It is a 32-bit int, but it should be sign
+                                          // extended and it occupies 64-bit location.
 }
 
 void JniCompilerTest::StackArgsSignExtendedMips64Impl() {
-  SetUpForTest(true, "stackArgsSignExtendedMips64", "(IIIIIIII)V",
-               CURRENT_JNI_WRAPPER(Java_MyClassNatives_stackArgsSignExtendedMips64));
-  jint i1 = 1;
-  jint i2 = 2;
-  jint i3 = 3;
-  jint i4 = 4;
-  jint i5 = 5;
-  jint i6 = 6;
-  jint i7 = 7;
-  jint i8 = -8;
+  uint64_t ret;
+  SetUpForTest(true,
+               "getStackArgSignExtendedMips64",
+               "(IIIIIII)J",
+               // Don't use wrapper because this is raw assembly function.
+               reinterpret_cast<void*>(&Java_MyClassNatives_getStackArgSignExtendedMips64));
 
-  env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8);
+  // Mips64 ABI requires that arguments passed through stack be sign-extended 8B slots.
+  // First 8 arguments are passed through registers.
+  // Final argument's value is 7. When sign-extended, higher stack bits should be 0.
+  ret = env_->CallStaticLongMethod(jklass_, jmethod_, 1, 2, 3, 4, 5, 6, 7);
+  EXPECT_EQ(High32Bits(ret), static_cast<uint32_t>(0));
+
+  // Final argument's value is -8.  When sign-extended, higher stack bits should be 0xffffffff.
+  ret = env_->CallStaticLongMethod(jklass_, jmethod_, 1, 2, 3, 4, 5, 6, -8);
+  EXPECT_EQ(High32Bits(ret), static_cast<uint32_t>(0xffffffff));
 }
 
-JNI_TEST_CRITICAL(StackArgsSignExtendedMips64)
+JNI_TEST(StackArgsSignExtendedMips64)
+#endif
 
 void Java_MyClassNatives_normalNative(JNIEnv*, jclass) {
   // Intentionally left empty.
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index 3fb7b56..33f4d77 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -222,7 +222,11 @@
                                                      bool is_synchronized,
                                                      bool is_critical_native,
                                                      const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, is_critical_native, shorty, kArm64PointerSize) {
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_critical_native,
+                           shorty,
+                           kArm64PointerSize) {
 }
 
 uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index 9859b5d..36a87a8 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -152,24 +152,6 @@
                                                                    bool is_critical_native,
                                                                    const char* shorty,
                                                                    InstructionSet instruction_set) {
-  if (UNLIKELY(is_critical_native)) {
-    // Sanity check that the requested JNI instruction set
-    // is supported for critical natives. Not every one is.
-    switch (instruction_set) {
-      case kX86_64:
-      case kX86:
-      case kArm64:
-      case kArm:
-      case kThumb2:
-        break;
-      default:
-        is_critical_native = false;
-        LOG(WARNING) << "@CriticalNative support not implemented for " << instruction_set
-                     << "; will crash at runtime if trying to invoke such a method.";
-        // TODO: implement for MIPS/MIPS64
-    }
-  }
-
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
@@ -191,12 +173,18 @@
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
       return std::unique_ptr<JniCallingConvention>(
-          new (arena) mips::MipsJniCallingConvention(is_static, is_synchronized, shorty));
+          new (arena) mips::MipsJniCallingConvention(is_static,
+                                                     is_synchronized,
+                                                     is_critical_native,
+                                                     shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
       return std::unique_ptr<JniCallingConvention>(
-          new (arena) mips64::Mips64JniCallingConvention(is_static, is_synchronized, shorty));
+          new (arena) mips64::Mips64JniCallingConvention(is_static,
+                                                         is_synchronized,
+                                                         is_critical_native,
+                                                         shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index f541d8f..335a2df 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -370,14 +370,6 @@
     kObjectOrClass = 1
   };
 
-  // TODO: remove this constructor once all are changed to the below one.
-  JniCallingConvention(bool is_static,
-                       bool is_synchronized,
-                       const char* shorty,
-                       PointerSize frame_pointer_size)
-      : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size),
-        is_critical_native_(false) {}
-
   JniCallingConvention(bool is_static,
                        bool is_synchronized,
                        bool is_critical_native,
diff --git a/compiler/jni/quick/mips/calling_convention_mips.cc b/compiler/jni/quick/mips/calling_convention_mips.cc
index f5ab5f7..e6948ec 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.cc
+++ b/compiler/jni/quick/mips/calling_convention_mips.cc
@@ -23,6 +23,13 @@
 namespace art {
 namespace mips {
 
+// Up to how many float-like (float, double) args can be enregistered in floating-point registers.
+// The rest of the args must go in integer registers or on the stack.
+constexpr size_t kMaxFloatOrDoubleRegisterArguments = 2u;
+// Up to how many integer-like (pointers, objects, longs, int, short, bool, etc) args can be
+// enregistered. The rest of the args must go on the stack.
+constexpr size_t kMaxIntLikeRegisterArguments = 4u;
+
 static const Register kCoreArgumentRegisters[] = { A0, A1, A2, A3 };
 static const FRegister kFArgumentRegisters[] = { F12, F14 };
 static const DRegister kDArgumentRegisters[] = { D6, D7 };
@@ -170,23 +177,134 @@
 }
 // JNI calling convention
 
-MipsJniCallingConvention::MipsJniCallingConvention(bool is_static, bool is_synchronized,
+MipsJniCallingConvention::MipsJniCallingConvention(bool is_static,
+                                                   bool is_synchronized,
+                                                   bool is_critical_native,
                                                    const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kMipsPointerSize) {
-  // Compute padding to ensure longs and doubles are not split in AAPCS. Ignore the 'this' jobject
-  // or jclass for static methods and the JNIEnv. We start at the aligned register A2.
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_critical_native,
+                           shorty,
+                           kMipsPointerSize) {
+  // SYSTEM V - Application Binary Interface (MIPS RISC Processor):
+  // Data Representation - Fundamental Types (3-4) specifies fundamental alignments for each type.
+  //   "Each member is assigned to the lowest available offset with the appropriate alignment. This
+  // may require internal padding, depending on the previous member."
+  //
+  // All of our stack arguments are usually 4-byte aligned, however longs and doubles must be 8
+  // bytes aligned. Add padding to maintain 8-byte alignment invariant.
+  //
+  // Compute padding to ensure longs and doubles are not split in o32.
   size_t padding = 0;
-  for (size_t cur_arg = IsStatic() ? 0 : 1, cur_reg = 2; cur_arg < NumArgs(); cur_arg++) {
+  size_t cur_arg, cur_reg;
+  if (LIKELY(HasExtraArgumentsForJni())) {
+    // Ignore the 'this' jobject or jclass for static methods and the JNIEnv.
+    // We start at the aligned register A2.
+    //
+    // Ignore the first 2 parameters because they are guaranteed to be aligned.
+    cur_arg = NumImplicitArgs();  // Skip the "this" argument.
+    cur_reg = 2;  // Skip {A0=JNIEnv, A1=jobject} / {A0=JNIEnv, A1=jclass} parameters (start at A2).
+  } else {
+    // Check every parameter.
+    cur_arg = 0;
+    cur_reg = 0;
+  }
+
+  // Shift across a logical register mapping that looks like:
+  //
+  //   | A0 | A1 | A2 | A3 | SP+16 | SP+20 | SP+24 | ... | SP+n | SP+n+4 |
+  //
+  //   or some of variants with floating-point registers (F12 and F14), for example
+  //
+  //   | F12     | F14 | A3 | SP+16 | SP+20 | SP+24 | ... | SP+n | SP+n+4 |
+  //
+  //   (where SP is the stack pointer at the start of called function).
+  //
+  // Any time there would normally be a long/double in an odd logical register,
+  // we have to push out the rest of the mappings by 4 bytes to maintain an 8-byte alignment.
+  //
+  // This works for both physical register pairs {A0, A1}, {A2, A3},
+  // floating-point registers F12, F14 and for when the value is on the stack.
+  //
+  // For example:
+  // (a) long would normally go into A1, but we shift it into A2
+  //  | INT | (PAD) | LONG    |
+  //  | A0  |  A1   | A2 | A3 |
+  //
+  // (b) long would normally go into A3, but we shift it into SP
+  //  | INT | INT | INT | (PAD) | LONG        |
+  //  | A0  | A1  | A2  |  A3   | SP+16 SP+20 |
+  //
+  // where INT is any <=4 byte arg, and LONG is any 8-byte arg.
+  for (; cur_arg < NumArgs(); cur_arg++) {
     if (IsParamALongOrDouble(cur_arg)) {
       if ((cur_reg & 1) != 0) {
         padding += 4;
-        cur_reg++;  // additional bump to ensure alignment
+        cur_reg++;   // Additional bump to ensure alignment.
       }
-      cur_reg++;  // additional bump to skip extra long word
+      cur_reg += 2;  // Bump the iterator twice for every long argument.
+    } else {
+      cur_reg++;     // Bump the iterator for every argument.
     }
-    cur_reg++;  // bump the iterator for every argument
   }
-  padding_ = padding;
+  if (cur_reg < kMaxIntLikeRegisterArguments) {
+    // As a special case when, as a result of shifting (or not) there are no arguments on the stack,
+    // we actually have 0 stack padding.
+    //
+    // For example with @CriticalNative and:
+    // (int, long) -> shifts the long but doesn't need to pad the stack
+    //
+    //          shift
+    //           \/
+    //  | INT | (PAD) | LONG      | (EMPTY) ...
+    //  | r0  |  r1   |  r2  | r3 |   SP    ...
+    //                                /\
+    //                          no stack padding
+    padding_ = 0;
+  } else {
+    padding_ = padding;
+  }
+
+  // Argument Passing (3-17):
+  //   "When the first argument is integral, the remaining arguments are passed in the integer
+  // registers."
+  //
+  //   "The rules that determine which arguments go into registers and which ones must be passed on
+  // the stack are most easily explained by considering the list of arguments as a structure,
+  // aligned according to normal structure rules. Mapping of this structure into the combination of
+  // stack and registers is as follows: up to two leading floating-point arguments can be passed in
+  // $f12 and $f14; everything else with a structure offset greater than or equal to 16 is passed on
+  // the stack. The remainder of the arguments are passed in $4..$7 based on their structure offset.
+  // Holes left in the structure for alignment are unused, whether in registers or in the stack."
+  //
+  // For example with @CriticalNative and:
+  // (a) first argument is not floating-point, so all go into integer registers
+  //  | INT | FLOAT | DOUBLE  |
+  //  | A0  |  A1   | A2 | A3 |
+  // (b) first argument is floating-point, but 2nd is integer
+  //  | FLOAT | INT | DOUBLE  |
+  //  |  F12  | A1  | A2 | A3 |
+  // (c) first two arguments are floating-point (float, double)
+  //  | FLAOT | (PAD) | DOUBLE |  INT  |
+  //  |  F12  |       |  F14   | SP+16 |
+  // (d) first two arguments are floating-point (double, float)
+  //  | DOUBLE | FLOAT | INT |
+  //  |  F12   |  F14  | A3  |
+  // (e) first three arguments are floating-point, but just first two will go into fp registers
+  //  | DOUBLE | FLOAT | FLOAT |
+  //  |  F12   |  F14  |  A3   |
+  //
+  // Find out if the first argument is a floating-point. In that case, floating-point registers will
+  // be used for up to two leading floating-point arguments. Otherwise, all arguments will be passed
+  // using integer registers.
+  use_fp_arg_registers_ = false;
+  if (is_critical_native) {
+    if (NumArgs() > 0) {
+      if (IsParamAFloatOrDouble(0)) {
+        use_fp_arg_registers_ = true;
+      }
+    }
+  }
 }
 
 uint32_t MipsJniCallingConvention::CoreSpillMask() const {
@@ -202,74 +320,127 @@
 }
 
 size_t MipsJniCallingConvention::FrameSize() {
-  // ArtMethod*, RA and callee save area size, local reference segment state
-  size_t frame_data_size = static_cast<size_t>(kMipsPointerSize) +
-      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
-  // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kMipsPointerSize, ReferenceCount());
-  // Plus return value spill area size
-  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
+  // ArtMethod*, RA and callee save area size, local reference segment state.
+  const size_t method_ptr_size = static_cast<size_t>(kMipsPointerSize);
+  const size_t ra_return_addr_size = kFramePointerSize;
+  const size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
+
+  size_t frame_data_size = method_ptr_size + ra_return_addr_size + callee_save_area_size;
+
+  if (LIKELY(HasLocalReferenceSegmentState())) {
+    // Local reference segment state.
+    frame_data_size += kFramePointerSize;
+  }
+
+  // References plus 2 words for HandleScope header.
+  const size_t handle_scope_size = HandleScope::SizeOf(kMipsPointerSize, ReferenceCount());
+
+  size_t total_size = frame_data_size;
+  if (LIKELY(HasHandleScope())) {
+    // HandleScope is sometimes excluded.
+    total_size += handle_scope_size;    // Handle scope size.
+  }
+
+  // Plus return value spill area size.
+  total_size += SizeOfReturnValue();
+
+  return RoundUp(total_size, kStackAlignment);
 }
 
 size_t MipsJniCallingConvention::OutArgSize() {
-  return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize + padding_, kStackAlignment);
+  // Argument Passing (3-17):
+  //   "Despite the fact that some or all of the arguments to a function are passed in registers,
+  // always allocate space on the stack for all arguments. This stack space should be a structure
+  // large enough to contain all the arguments, aligned according to normal structure rules (after
+  // promotion and structure return pointer insertion). The locations within the stack frame used
+  // for arguments are called the home locations."
+  //
+  // Allocate 16 bytes for home locations + space needed for stack arguments.
+  return RoundUp(
+      (kMaxIntLikeRegisterArguments + NumberOfOutgoingStackArgs()) * kFramePointerSize + padding_,
+      kStackAlignment);
 }
 
 ArrayRef<const ManagedRegister> MipsJniCallingConvention::CalleeSaveRegisters() const {
   return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
 }
 
-// JniCallingConvention ABI follows AAPCS where longs and doubles must occur
-// in even register numbers and stack slots
+// JniCallingConvention ABI follows o32 where longs and doubles must occur
+// in even register numbers and stack slots.
 void MipsJniCallingConvention::Next() {
   JniCallingConvention::Next();
-  size_t arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-  if ((itr_args_ >= 2) &&
-      (arg_pos < NumArgs()) &&
-      IsParamALongOrDouble(arg_pos)) {
-    // itr_slots_ needs to be an even number, according to AAPCS.
-    if ((itr_slots_ & 0x1u) != 0) {
+
+  if (LIKELY(HasNext())) {  // Avoid CHECK failure for IsCurrentParam
+    // Ensure slot is 8-byte aligned for longs/doubles (o32).
+    if (IsCurrentParamALongOrDouble() && ((itr_slots_ & 0x1u) != 0)) {
+      // itr_slots_ needs to be an even number, according to o32.
       itr_slots_++;
     }
   }
 }
 
 bool MipsJniCallingConvention::IsCurrentParamInRegister() {
-  return itr_slots_ < 4;
+  // Argument Passing (3-17):
+  //   "The rules that determine which arguments go into registers and which ones must be passed on
+  // the stack are most easily explained by considering the list of arguments as a structure,
+  // aligned according to normal structure rules. Mapping of this structure into the combination of
+  // stack and registers is as follows: up to two leading floating-point arguments can be passed in
+  // $f12 and $f14; everything else with a structure offset greater than or equal to 16 is passed on
+  // the stack. The remainder of the arguments are passed in $4..$7 based on their structure offset.
+  // Holes left in the structure for alignment are unused, whether in registers or in the stack."
+  //
+  // Even when floating-point registers are used, there can be up to 4 arguments passed in
+  // registers.
+  return itr_slots_ < kMaxIntLikeRegisterArguments;
 }
 
 bool MipsJniCallingConvention::IsCurrentParamOnStack() {
   return !IsCurrentParamInRegister();
 }
 
-static const Register kJniArgumentRegisters[] = {
-  A0, A1, A2, A3
-};
 ManagedRegister MipsJniCallingConvention::CurrentParamRegister() {
-  CHECK_LT(itr_slots_, 4u);
-  int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-  if ((itr_args_ >= 2) && IsParamALongOrDouble(arg_pos)) {
-    CHECK_EQ(itr_slots_, 2u);
-    return MipsManagedRegister::FromRegisterPair(A2_A3);
+  CHECK_LT(itr_slots_, kMaxIntLikeRegisterArguments);
+  // Up to two leading floating-point arguments can be passed in floating-point registers.
+  if (use_fp_arg_registers_ && (itr_args_ < kMaxFloatOrDoubleRegisterArguments)) {
+    if (IsCurrentParamAFloatOrDouble()) {
+      if (IsCurrentParamADouble()) {
+        return MipsManagedRegister::FromDRegister(kDArgumentRegisters[itr_args_]);
+      } else {
+        return MipsManagedRegister::FromFRegister(kFArgumentRegisters[itr_args_]);
+      }
+    }
+  }
+  // All other arguments (including other floating-point arguments) will be passed in integer
+  // registers.
+  if (IsCurrentParamALongOrDouble()) {
+    if (itr_slots_ == 0u) {
+      return MipsManagedRegister::FromRegisterPair(A0_A1);
+    } else {
+      CHECK_EQ(itr_slots_, 2u);
+      return MipsManagedRegister::FromRegisterPair(A2_A3);
+    }
   } else {
-    return
-      MipsManagedRegister::FromCoreRegister(kJniArgumentRegisters[itr_slots_]);
+    return MipsManagedRegister::FromCoreRegister(kCoreArgumentRegisters[itr_slots_]);
   }
 }
 
 FrameOffset MipsJniCallingConvention::CurrentParamStackOffset() {
-  CHECK_GE(itr_slots_, 4u);
+  CHECK_GE(itr_slots_, kMaxIntLikeRegisterArguments);
   size_t offset = displacement_.Int32Value() - OutArgSize() + (itr_slots_ * kFramePointerSize);
   CHECK_LT(offset, OutArgSize());
   return FrameOffset(offset);
 }
 
 size_t MipsJniCallingConvention::NumberOfOutgoingStackArgs() {
-  size_t static_args = IsStatic() ? 1 : 0;  // count jclass
-  // regular argument parameters and this
-  size_t param_args = NumArgs() + NumLongOrDoubleArgs();
-  // count JNIEnv*
-  return static_args + param_args + 1;
+  size_t static_args = HasSelfClass() ? 1 : 0;            // Count jclass.
+  // Regular argument parameters and this.
+  size_t param_args = NumArgs() + NumLongOrDoubleArgs();  // Twice count 8-byte args.
+  // Count JNIEnv* less arguments in registers.
+  size_t internal_args = (HasJniEnv() ? 1 : 0);
+  size_t total_args = static_args + param_args + internal_args;
+
+  return total_args - std::min(kMaxIntLikeRegisterArguments, static_cast<size_t>(total_args));
 }
+
 }  // namespace mips
 }  // namespace art
diff --git a/compiler/jni/quick/mips/calling_convention_mips.h b/compiler/jni/quick/mips/calling_convention_mips.h
index e95a738..ad3f118 100644
--- a/compiler/jni/quick/mips/calling_convention_mips.h
+++ b/compiler/jni/quick/mips/calling_convention_mips.h
@@ -54,14 +54,17 @@
 
 class MipsJniCallingConvention FINAL : public JniCallingConvention {
  public:
-  MipsJniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  MipsJniCallingConvention(bool is_static,
+                           bool is_synchronized,
+                           bool is_critical_native,
+                           const char* shorty);
   ~MipsJniCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
   ManagedRegister IntReturnRegister() OVERRIDE;
   ManagedRegister InterproceduralScratchRegister() OVERRIDE;
   // JNI calling convention
-  void Next() OVERRIDE;  // Override default behavior for AAPCS
+  void Next() OVERRIDE;  // Override default behavior for o32.
   size_t FrameSize() OVERRIDE;
   size_t OutArgSize() OVERRIDE;
   ArrayRef<const ManagedRegister> CalleeSaveRegisters() const OVERRIDE;
@@ -82,8 +85,9 @@
   size_t NumberOfOutgoingStackArgs() OVERRIDE;
 
  private:
-  // Padding to ensure longs and doubles are not split in AAPCS
+  // Padding to ensure longs and doubles are not split in o32.
   size_t padding_;
+  size_t use_fp_arg_registers_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsJniCallingConvention);
 };
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.cc b/compiler/jni/quick/mips64/calling_convention_mips64.cc
index 8341e8e..afe6a76 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.cc
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.cc
@@ -23,6 +23,9 @@
 namespace art {
 namespace mips64 {
 
+// Up to kow many args can be enregistered. The rest of the args must go on the stack.
+constexpr size_t kMaxRegisterArguments = 8u;
+
 static const GpuRegister kGpuArgumentRegisters[] = {
   A0, A1, A2, A3, A4, A5, A6, A7
 };
@@ -150,9 +153,15 @@
 
 // JNI calling convention
 
-Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static, bool is_synchronized,
+Mips64JniCallingConvention::Mips64JniCallingConvention(bool is_static,
+                                                       bool is_synchronized,
+                                                       bool is_critical_native,
                                                        const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kMips64PointerSize) {
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_critical_native,
+                           shorty,
+                           kMips64PointerSize) {
 }
 
 uint32_t Mips64JniCallingConvention::CoreSpillMask() const {
@@ -168,13 +177,28 @@
 }
 
 size_t Mips64JniCallingConvention::FrameSize() {
-  // ArtMethod*, RA and callee save area size, local reference segment state
-  size_t frame_data_size = kFramePointerSize +
-      (CalleeSaveRegisters().size() + 1) * kFramePointerSize + sizeof(uint32_t);
-  // References plus 2 words for HandleScope header
+  // ArtMethod*, RA and callee save area size, local reference segment state.
+  size_t method_ptr_size = static_cast<size_t>(kFramePointerSize);
+  size_t ra_and_callee_save_area_size = (CalleeSaveRegisters().size() + 1) * kFramePointerSize;
+
+  size_t frame_data_size = method_ptr_size + ra_and_callee_save_area_size;
+  if (LIKELY(HasLocalReferenceSegmentState())) {                     // Local ref. segment state.
+    // Local reference segment state is sometimes excluded.
+    frame_data_size += sizeof(uint32_t);
+  }
+  // References plus 2 words for HandleScope header.
   size_t handle_scope_size = HandleScope::SizeOf(kMips64PointerSize, ReferenceCount());
-  // Plus return value spill area size
-  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
+
+  size_t total_size = frame_data_size;
+  if (LIKELY(HasHandleScope())) {
+    // HandleScope is sometimes excluded.
+    total_size += handle_scope_size;                                 // Handle scope size.
+  }
+
+  // Plus return value spill area size.
+  total_size += SizeOfReturnValue();
+
+  return RoundUp(total_size, kStackAlignment);
 }
 
 size_t Mips64JniCallingConvention::OutArgSize() {
@@ -186,7 +210,7 @@
 }
 
 bool Mips64JniCallingConvention::IsCurrentParamInRegister() {
-  return itr_args_ < 8;
+  return itr_args_ < kMaxRegisterArguments;
 }
 
 bool Mips64JniCallingConvention::IsCurrentParamOnStack() {
@@ -204,7 +228,8 @@
 
 FrameOffset Mips64JniCallingConvention::CurrentParamStackOffset() {
   CHECK(IsCurrentParamOnStack());
-  size_t offset = displacement_.Int32Value() - OutArgSize() + ((itr_args_ - 8) * kFramePointerSize);
+  size_t args_on_stack = itr_args_ - kMaxRegisterArguments;
+  size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
   CHECK_LT(offset, OutArgSize());
   return FrameOffset(offset);
 }
@@ -214,7 +239,7 @@
   size_t all_args = NumArgs() + NumberOfExtraArgumentsForJni();
 
   // Nothing on the stack unless there are more than 8 arguments
-  return (all_args > 8) ? all_args - 8 : 0;
+  return (all_args > kMaxRegisterArguments) ? all_args - kMaxRegisterArguments : 0;
 }
 }  // namespace mips64
 }  // namespace art
diff --git a/compiler/jni/quick/mips64/calling_convention_mips64.h b/compiler/jni/quick/mips64/calling_convention_mips64.h
index a5fd111..faedaef 100644
--- a/compiler/jni/quick/mips64/calling_convention_mips64.h
+++ b/compiler/jni/quick/mips64/calling_convention_mips64.h
@@ -54,7 +54,10 @@
 
 class Mips64JniCallingConvention FINAL : public JniCallingConvention {
  public:
-  Mips64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  Mips64JniCallingConvention(bool is_static,
+                             bool is_synchronized,
+                             bool is_critical_native,
+                             const char* shorty);
   ~Mips64JniCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
diff --git a/dexlayout/dex_visualize.cc b/dexlayout/dex_visualize.cc
index bc9ca6d..7c55659 100644
--- a/dexlayout/dex_visualize.cc
+++ b/dexlayout/dex_visualize.cc
@@ -350,7 +350,8 @@
   const uint32_t class_defs_size = header->GetCollections().ClassDefsSize();
   for (uint32_t class_index = 0; class_index < class_defs_size; class_index++) {
     dex_ir::ClassDef* class_def = header->GetCollections().GetClassDef(class_index);
-    if (profile_info_ != nullptr && !profile_info_->ContainsClass(*dex_file, class_index)) {
+    uint16_t type_idx = class_def->ClassType()->GetIndex();
+    if (profile_info_ != nullptr && !profile_info_->ContainsClass(*dex_file, type_idx)) {
       continue;
     }
     dumper->DumpAddressRange(class_def, class_index);
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index cb0bdbf..6a442a5 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -156,17 +156,24 @@
   // JNI
   qpoints->pJniMethodStart = JniMethodStart;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodStart), "Non-direct C stub marked direct.");
+  qpoints->pJniMethodFastStart = JniMethodFastStart;
+  static_assert(!IsDirectEntrypoint(kQuickJniMethodFastStart), "Non-direct C stub marked direct.");
   qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodStartSynchronized),
                 "Non-direct C stub marked direct.");
   qpoints->pJniMethodEnd = JniMethodEnd;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEnd), "Non-direct C stub marked direct.");
+  qpoints->pJniMethodFastEnd = JniMethodFastEnd;
+  static_assert(!IsDirectEntrypoint(kQuickJniMethodFastEnd), "Non-direct C stub marked direct.");
   qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEndSynchronized),
                 "Non-direct C stub marked direct.");
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEndWithReference),
                 "Non-direct C stub marked direct.");
+  qpoints->pJniMethodFastEndWithReference = JniMethodFastEndWithReference;
+  static_assert(!IsDirectEntrypoint(kQuickJniMethodFastEndWithReference),
+                "Non-direct C stub marked direct.");
   qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEndWithReferenceSynchronized),
                 "Non-direct C stub marked direct.");
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index 077d2db..cb3dfec 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -17,6 +17,7 @@
 #include "context_x86.h"
 
 #include "base/bit_utils.h"
+#include "base/memory_tool.h"
 #include "quick/quick_method_frame_info.h"
 
 namespace art {
@@ -102,6 +103,7 @@
   uintptr_t esp = gprs[kNumberOfCpuRegisters - ESP - 1] - sizeof(intptr_t);
   gprs[kNumberOfCpuRegisters] = esp;
   *(reinterpret_cast<uintptr_t*>(esp)) = eip_;
+  MEMORY_TOOL_HANDLE_NO_RETURN;
   __asm__ __volatile__(
       "movl %1, %%ebx\n\t"          // Address base of FPRs.
       "movsd 0(%%ebx), %%xmm0\n\t"  // Load up XMM0-XMM7.
diff --git a/runtime/base/memory_tool.h b/runtime/base/memory_tool.h
index e1a2e07..42cbaa0 100644
--- a/runtime/base/memory_tool.h
+++ b/runtime/base/memory_tool.h
@@ -40,7 +40,10 @@
 constexpr bool kMemoryToolIsAvailable = false;
 #endif
 
+extern "C" void __asan_handle_no_return();
+
 #define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address))
+#define MEMORY_TOOL_HANDLE_NO_RETURN __asan_handle_no_return()
 #define RUNNING_ON_MEMORY_TOOL 1U
 constexpr bool kMemoryToolIsValgrind = false;
 constexpr bool kMemoryToolDetectsLeaks = true;
@@ -55,6 +58,7 @@
 #define MEMORY_TOOL_MAKE_UNDEFINED(p, s) VALGRIND_MAKE_MEM_UNDEFINED(p, s)
 #define MEMORY_TOOL_MAKE_DEFINED(p, s) VALGRIND_MAKE_MEM_DEFINED(p, s)
 #define ATTRIBUTE_NO_SANITIZE_ADDRESS
+#define MEMORY_TOOL_HANDLE_NO_RETURN do { } while (0)
 #define RUNNING_ON_MEMORY_TOOL RUNNING_ON_VALGRIND
 constexpr bool kMemoryToolIsAvailable = true;
 constexpr bool kMemoryToolIsValgrind = true;
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 65e46c2..4905514 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -8366,17 +8366,16 @@
       }
       ObjPtr<mirror::DexCache> klass_dex_cache = klass->GetDexCache();
       if (klass_dex_cache == dex_cache) {
-        const size_t class_def_idx = klass->GetDexClassDefIndex();
         DCHECK(klass->IsResolved());
-        CHECK_LT(class_def_idx, num_class_defs);
-        class_set.insert(class_def_idx);
+        CHECK_LT(klass->GetDexClassDefIndex(), num_class_defs);
+        class_set.insert(klass->GetDexTypeIndex());
       }
     }
 
     if (!class_set.empty()) {
       auto it = ret.find(resolved_classes);
       if (it != ret.end()) {
-        // Already have the key, union the class def idxs.
+        // Already have the key, union the class type indexes.
         it->AddClasses(class_set.begin(), class_set.end());
       } else {
         resolved_classes.AddClasses(class_set.begin(), class_set.end());
@@ -8419,13 +8418,8 @@
       VLOG(profiler) << "Found opened dex file for " << dex_file->GetLocation() << " with "
                      << info.GetClasses().size() << " classes";
       DCHECK_EQ(dex_file->GetLocationChecksum(), info.GetLocationChecksum());
-      for (uint16_t class_def_idx : info.GetClasses()) {
-        if (class_def_idx >= dex_file->NumClassDefs()) {
-          LOG(WARNING) << "Class def index " << class_def_idx << " >= " << dex_file->NumClassDefs();
-          continue;
-        }
-        const DexFile::TypeId& type_id = dex_file->GetTypeId(
-            dex_file->GetClassDef(class_def_idx).class_idx_);
+      for (uint16_t type_idx : info.GetClasses()) {
+        const DexFile::TypeId& type_id = dex_file->GetTypeId(type_idx);
         const char* descriptor = dex_file->GetTypeDescriptor(type_id);
         ret.insert(descriptor);
       }
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 23a5ddd..803e9d5 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -701,5 +701,24 @@
   }
 }
 
+ScopedJitSuspend::ScopedJitSuspend() {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  was_on_ = (jit != nullptr) && (jit->GetThreadPool() != nullptr);
+  if (was_on_) {
+    Thread* self = Thread::Current();
+    jit->WaitForCompilationToFinish(self);
+    jit->GetThreadPool()->StopWorkers(self);
+    jit->WaitForCompilationToFinish(self);
+  }
+}
+
+ScopedJitSuspend::~ScopedJitSuspend() {
+  if (was_on_) {
+    DCHECK(Runtime::Current()->GetJit() != nullptr);
+    DCHECK(Runtime::Current()->GetJit()->GetThreadPool() != nullptr);
+    Runtime::Current()->GetJit()->GetThreadPool()->StartWorkers(Thread::Current());
+  }
+}
+
 }  // namespace jit
 }  // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index a782437..a230c78 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -175,6 +175,10 @@
 
   static bool LoadCompilerLibrary(std::string* error_msg);
 
+  ThreadPool* GetThreadPool() const {
+    return thread_pool_.get();
+  }
+
  private:
   Jit();
 
@@ -278,6 +282,16 @@
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
 };
 
+// Helper class to stop the JIT for a given scope. This will wait for the JIT to quiesce.
+class ScopedJitSuspend {
+ public:
+  ScopedJitSuspend();
+  ~ScopedJitSuspend();
+
+ private:
+  bool was_on_;
+};
+
 }  // namespace jit
 }  // namespace art
 
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index f535151..b9f5981 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -37,7 +37,7 @@
 namespace art {
 
 const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
-const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '1', '\0' };
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '2', '\0' };
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
@@ -282,12 +282,12 @@
 
 bool ProfileCompilationInfo::AddClassIndex(const std::string& dex_location,
                                            uint32_t checksum,
-                                           uint16_t class_idx) {
+                                           uint16_t type_idx) {
   DexFileData* const data = GetOrAddDexFileData(dex_location, checksum);
   if (data == nullptr) {
     return false;
   }
-  data->class_set.insert(class_idx);
+  data->class_set.insert(type_idx);
   return true;
 }
 
@@ -304,8 +304,8 @@
   }
 
   for (uint16_t i = 0; i < class_set_size; i++) {
-    uint16_t class_def_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
-    if (!AddClassIndex(dex_location, checksum, class_def_idx)) {
+    uint16_t type_idx = line_buffer.ReadUintAndAdvance<uint16_t>();
+    if (!AddClassIndex(dex_location, checksum, type_idx)) {
       return false;
     }
   }
@@ -569,14 +569,14 @@
   return false;
 }
 
-bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, uint16_t class_def_idx) const {
+bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, uint16_t type_idx) const {
   auto info_it = info_.find(GetProfileDexFileKey(dex_file.GetLocation()));
   if (info_it != info_.end()) {
     if (!ChecksumMatch(dex_file, info_it->second.checksum)) {
       return false;
     }
     const std::set<uint16_t>& classes = info_it->second.class_set;
-    return classes.find(class_def_idx) != classes.end();
+    return classes.find(type_idx) != classes.end();
   }
   return false;
 }
@@ -637,7 +637,7 @@
     os << "\n\tclasses: ";
     for (const auto class_it : dex_data.class_set) {
       if (dex_file != nullptr) {
-        os << "\n\t\t" << dex_file->GetClassDescriptor(dex_file->GetClassDef(class_it));
+        os << "\n\t\t" << dex_file->PrettyType(class_it);
       } else {
         os << class_it << ",";
       }
@@ -702,11 +702,11 @@
     }
 
     for (uint16_t c = 0; c < number_of_classes; c++) {
-      uint16_t class_idx = rand() % max_classes;
+      uint16_t type_idx = rand() % max_classes;
       if (c < (number_of_classes / kFavorSplit)) {
-        class_idx %= kFavorFirstN;
+        type_idx %= kFavorFirstN;
       }
-      info.AddClassIndex(profile_key, 0, class_idx);
+      info.AddClassIndex(profile_key, 0, type_idx);
     }
   }
   return info.Save(fd);
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index fdca078..f8ed573 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -65,8 +65,8 @@
   // Returns true if the method reference is present in the profiling info.
   bool ContainsMethod(const MethodReference& method_ref) const;
 
-  // Returns true if the class is present in the profiling info.
-  bool ContainsClass(const DexFile& dex_file, uint16_t class_def_idx) const;
+  // Returns true if the class's type is present in the profiling info.
+  bool ContainsClass(const DexFile& dex_file, uint16_t type_idx) const;
 
   // Dumps all the loaded profile info into a string and returns it.
   // If dex_files is not null then the method indices will be resolved to their
@@ -115,7 +115,7 @@
 
   DexFileData* GetOrAddDexFileData(const std::string& dex_location, uint32_t checksum);
   bool AddMethodIndex(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
-  bool AddClassIndex(const std::string& dex_location, uint32_t checksum, uint16_t class_idx);
+  bool AddClassIndex(const std::string& dex_location, uint32_t checksum, uint16_t type_idx);
   bool AddResolvedClasses(const DexCacheResolvedClasses& classes);
 
   // Parsing functionality.
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index b14f340..65fd999 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -177,7 +177,7 @@
     }
 
     ++waiting_count_;
-    if (waiting_count_ == GetThreadCount() && tasks_.empty()) {
+    if (waiting_count_ == GetThreadCount() && !HasOutstandingTasks()) {
       // We may be done, lets broadcast to the completion condition.
       completion_condition_.Broadcast(self);
     }
@@ -200,7 +200,7 @@
 }
 
 Task* ThreadPool::TryGetTaskLocked() {
-  if (started_ && !tasks_.empty()) {
+  if (HasOutstandingTasks()) {
     Task* task = tasks_.front();
     tasks_.pop_front();
     return task;
@@ -218,7 +218,7 @@
   }
   // Wait until each thread is waiting and the task list is empty.
   MutexLock mu(self, task_queue_lock_);
-  while (!shutting_down_ && (waiting_count_ != GetThreadCount() || !tasks_.empty())) {
+  while (!shutting_down_ && (waiting_count_ != GetThreadCount() || HasOutstandingTasks())) {
     if (!may_hold_locks) {
       completion_condition_.Wait(self);
     } else {
diff --git a/runtime/thread_pool.h b/runtime/thread_pool.h
index b6c6f02..2ff33a6 100644
--- a/runtime/thread_pool.h
+++ b/runtime/thread_pool.h
@@ -100,7 +100,8 @@
   ThreadPool(const char* name, size_t num_threads);
   virtual ~ThreadPool();
 
-  // Wait for all tasks currently on queue to get completed.
+  // Wait for all tasks currently on queue to get completed. If the pool has been stopped, only
+  // wait till all already running tasks are done.
   void Wait(Thread* self, bool do_work, bool may_hold_locks) REQUIRES(!task_queue_lock_);
 
   size_t GetTaskCount(Thread* self) REQUIRES(!task_queue_lock_);
@@ -130,6 +131,10 @@
     return shutting_down_;
   }
 
+  bool HasOutstandingTasks() const REQUIRES(task_queue_lock_) {
+    return started_ && !tasks_.empty();
+  }
+
   const std::string name_;
   Mutex task_queue_lock_;
   ConditionVariable task_queue_condition_ GUARDED_BY(task_queue_lock_);
diff --git a/runtime/thread_pool_test.cc b/runtime/thread_pool_test.cc
index d5f17d1..2ae2ecf 100644
--- a/runtime/thread_pool_test.cc
+++ b/runtime/thread_pool_test.cc
@@ -98,6 +98,24 @@
   thread_pool.Wait(self, false, false);
 }
 
+TEST_F(ThreadPoolTest, StopWait) {
+  Thread* self = Thread::Current();
+  ThreadPool thread_pool("Thread pool test thread pool", num_threads);
+
+  AtomicInteger count(0);
+  static const int32_t num_tasks = num_threads * 100;
+  for (int32_t i = 0; i < num_tasks; ++i) {
+    thread_pool.AddTask(self, new CountTask(&count));
+  }
+
+  // Signal the threads to start processing tasks.
+  thread_pool.StartWorkers(self);
+  usleep(200);
+  thread_pool.StopWorkers(self);
+
+  thread_pool.Wait(self, false, false);  // We should not deadlock here.
+}
+
 class TreeTask : public Task {
  public:
   TreeTask(ThreadPool* const thread_pool, AtomicInteger* count, int depth)
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index 0c627d6..871902e 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -185,20 +185,194 @@
         }
       }
 
-      lines_.push_back(
-          StringPrintf("%s --(%s)--> %" PRId64 "@%" PRId64 " [size=%" PRId64 ", length=%d]",
-                       referrer_str.c_str(),
-                       GetReferenceTypeStr(reference_kind, reference_info).c_str(),
-                       *tag_ptr,
-                       class_tag,
-                       adapted_size,
-                       length));
+      std::string referree_str = StringPrintf("%" PRId64 "@%" PRId64, *tag_ptr, class_tag);
+
+      lines_.push_back(CreateElem(referrer_str,
+                                  referree_str,
+                                  reference_kind,
+                                  reference_info,
+                                  adapted_size,
+                                  length));
 
       if (reference_kind == JVMTI_HEAP_REFERENCE_THREAD && *tag_ptr == 1000) {
         DumpStacks();
       }
     }
 
+    std::vector<std::string> GetLines() const {
+      std::vector<std::string> ret;
+      for (const std::unique_ptr<Elem>& e : lines_) {
+        ret.push_back(e->Print());
+      }
+      return ret;
+    }
+
+   private:
+    // We need to postpone some printing, as required functions are not callback-safe.
+    class Elem {
+     public:
+      Elem(const std::string& referrer, const std::string& referree, jlong size, jint length)
+          : referrer_(referrer), referree_(referree), size_(size), length_(length) {}
+      virtual ~Elem() {}
+
+      std::string Print() const {
+        return StringPrintf("%s --(%s)--> %s [size=%" PRId64 ", length=%d]",
+                            referrer_.c_str(),
+                            PrintArrowType().c_str(),
+                            referree_.c_str(),
+                            size_,
+                            length_);
+      }
+
+     protected:
+      virtual std::string PrintArrowType() const = 0;
+
+     private:
+      std::string referrer_;
+      std::string referree_;
+      jlong size_;
+      jint length_;
+    };
+
+    // For simple or unimplemented cases.
+    class StringElement : public Elem {
+     public:
+      StringElement(const std::string& referrer,
+                   const std::string& referree,
+                   jlong size,
+                   jint length,
+                   const std::string& string)
+          : Elem(referrer, referree, size, length), string_(string) {}
+
+     protected:
+      std::string PrintArrowType() const OVERRIDE {
+        return string_;
+      }
+
+     private:
+      const std::string string_;
+    };
+
+    static std::unique_ptr<Elem> CreateElem(const std::string& referrer,
+                                            const std::string& referree,
+                                            jvmtiHeapReferenceKind reference_kind,
+                                            const jvmtiHeapReferenceInfo* reference_info,
+                                            jlong size,
+                                            jint length) {
+      switch (reference_kind) {
+        case JVMTI_HEAP_REFERENCE_CLASS:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "class"));
+        case JVMTI_HEAP_REFERENCE_FIELD: {
+          std::string tmp = StringPrintf("field@%d", reference_info->field.index);
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                        referree,
+                                                        size,
+                                                        length,
+                                                        tmp));
+        }
+        case JVMTI_HEAP_REFERENCE_ARRAY_ELEMENT: {
+          std::string tmp = StringPrintf("array-element@%d", reference_info->array.index);
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         tmp));
+        }
+        case JVMTI_HEAP_REFERENCE_CLASS_LOADER:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "classloader"));
+        case JVMTI_HEAP_REFERENCE_SIGNERS:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "signers"));
+        case JVMTI_HEAP_REFERENCE_PROTECTION_DOMAIN:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "protection-domain"));
+        case JVMTI_HEAP_REFERENCE_INTERFACE:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "interface"));
+        case JVMTI_HEAP_REFERENCE_STATIC_FIELD: {
+          std::string tmp = StringPrintf("array-element@%d", reference_info->array.index);
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         tmp));;
+        }
+        case JVMTI_HEAP_REFERENCE_CONSTANT_POOL:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "constant-pool"));
+        case JVMTI_HEAP_REFERENCE_SUPERCLASS:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "superclass"));
+        case JVMTI_HEAP_REFERENCE_JNI_GLOBAL:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "jni-global"));
+        case JVMTI_HEAP_REFERENCE_SYSTEM_CLASS:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "system-class"));
+        case JVMTI_HEAP_REFERENCE_MONITOR:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "monitor"));
+        case JVMTI_HEAP_REFERENCE_STACK_LOCAL:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "stack-local"));
+        case JVMTI_HEAP_REFERENCE_JNI_LOCAL:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "jni-local"));
+        case JVMTI_HEAP_REFERENCE_THREAD:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "thread"));
+        case JVMTI_HEAP_REFERENCE_OTHER:
+          return std::unique_ptr<Elem>(new StringElement(referrer,
+                                                         referree,
+                                                         size,
+                                                         length,
+                                                         "other"));
+      }
+      LOG(FATAL) << "Unknown kind";
+      UNREACHABLE();
+    }
+
     static void DumpStacks() NO_THREAD_SAFETY_ANALYSIS {
       auto dump_function = [](art::Thread* t, void* data ATTRIBUTE_UNUSED) {
         std::string name;
@@ -209,58 +383,15 @@
       art::Runtime::Current()->GetThreadList()->ForEach(dump_function, nullptr);
     }
 
-    static std::string GetReferenceTypeStr(jvmtiHeapReferenceKind reference_kind,
-                                           const jvmtiHeapReferenceInfo* reference_info) {
-      switch (reference_kind) {
-        case JVMTI_HEAP_REFERENCE_CLASS:
-          return "class";
-        case JVMTI_HEAP_REFERENCE_FIELD:
-          return StringPrintf("field@%d", reference_info->field.index);
-        case JVMTI_HEAP_REFERENCE_ARRAY_ELEMENT:
-          return StringPrintf("array-element@%d", reference_info->array.index);
-        case JVMTI_HEAP_REFERENCE_CLASS_LOADER:
-          return "classloader";
-        case JVMTI_HEAP_REFERENCE_SIGNERS:
-          return "signers";
-        case JVMTI_HEAP_REFERENCE_PROTECTION_DOMAIN:
-          return "protection-domain";
-        case JVMTI_HEAP_REFERENCE_INTERFACE:
-          return "interface";
-        case JVMTI_HEAP_REFERENCE_STATIC_FIELD:
-          return StringPrintf("static-field@%d", reference_info->field.index);
-        case JVMTI_HEAP_REFERENCE_CONSTANT_POOL:
-          return "constant-pool";
-        case JVMTI_HEAP_REFERENCE_SUPERCLASS:
-          return "superclass";
-        case JVMTI_HEAP_REFERENCE_JNI_GLOBAL:
-          return "jni-global";
-        case JVMTI_HEAP_REFERENCE_SYSTEM_CLASS:
-          return "system-class";
-        case JVMTI_HEAP_REFERENCE_MONITOR:
-          return "monitor";
-        case JVMTI_HEAP_REFERENCE_STACK_LOCAL:
-          return "stack-local";
-        case JVMTI_HEAP_REFERENCE_JNI_LOCAL:
-          return "jni-local";
-        case JVMTI_HEAP_REFERENCE_THREAD:
-          return "thread";
-        case JVMTI_HEAP_REFERENCE_OTHER:
-          return "other";
-      }
-      return "unknown";
-    }
-
-    const std::vector<std::string>& GetLines() const {
-      return lines_;
-    }
-
-   private:
     jint counter_;
     const jint stop_after_;
     const jint follow_set_;
-    std::vector<std::string> lines_;
+
+    std::vector<std::unique_ptr<Elem>> lines_;
   };
 
+  jit::ScopedJitSuspend sjs;  // Wait to avoid JIT influence (e.g., JNI globals).
+
   // If jniRef isn't null, add a local and a global ref.
   ScopedLocalRef<jobject> jni_local_ref(env, nullptr);
   jobject jni_global_ref = nullptr;
@@ -272,7 +403,7 @@
   PrintIterationConfig config(stop_after, follow_set);
   Run(heap_filter, klass_filter, initial_object, &config);
 
-  const std::vector<std::string>& lines = config.GetLines();
+  std::vector<std::string> lines = config.GetLines();
   jobjectArray ret = CreateObjectArray(env,
                                        static_cast<jint>(lines.size()),
                                        "java/lang/String",
@@ -299,12 +430,5 @@
   return 0;
 }
 
-extern "C" JNIEXPORT void JNICALL Java_Main_waitForJitCompilation(JNIEnv*, jclass) {
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if (jit != nullptr) {
-    jit->WaitForCompilationToFinish(Thread::Current());
-  }
-}
-
 }  // namespace Test913Heaps
 }  // namespace art
diff --git a/test/913-heaps/src/Main.java b/test/913-heaps/src/Main.java
index fc00ada..a6ace9a 100644
--- a/test/913-heaps/src/Main.java
+++ b/test/913-heaps/src/Main.java
@@ -101,8 +101,6 @@
 
   private static void doFollowReferencesTestImpl(A root, int stopAfter, int followSet,
       Object asRoot, Verifier v, String additionalEnabled) {
-    waitForJitCompilation();  // Wait to avoid JIT influence (e.g., JNI globals).
-
     String[] lines =
         followReferences(0, null, root, stopAfter, followSet, asRoot);
 
@@ -388,6 +386,4 @@
 
   private static native String[] followReferences(int heapFilter, Class<?> klassFilter,
       Object initialObject, int stopAfter, int followSet, Object jniRef);
-
-  private static native void waitForJitCompilation();
 }
diff --git a/test/Android.bp b/test/Android.bp
index bdb7f80..fe20f29 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -266,10 +266,7 @@
 art_cc_test_library {
     name: "libtiagent",
     defaults: ["libtiagent-defaults"],
-    shared_libs: [
-        "libart",
-        "libopenjdkjvmti",
-    ],
+    shared_libs: ["libart"],
 }
 
 art_cc_test_library {
@@ -278,10 +275,7 @@
         "libtiagent-defaults",
         "art_debug_defaults",
     ],
-    shared_libs: [
-        "libartd",
-        "libopenjdkjvmtid",
-    ],
+    shared_libs: ["libartd"],
 }
 
 cc_defaults {
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 60318a4..e92ba1a 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -804,7 +804,9 @@
   $(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libnativebridgetest) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
-  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvmti$(ART_HOST_SHLIB_EXTENSION) \
+  $(ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvmtid$(ART_HOST_SHLIB_EXTENSION) \
 
 ifneq ($(HOST_PREFER_32_BIT),true)
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES += \
@@ -817,7 +819,10 @@
   $(OUT_DIR)/$(ART_TEST_LIST_host_$(2ND_ART_HOST_ARCH)_libnativebridgetest) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libjavacore$(ART_HOST_SHLIB_EXTENSION) \
   $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdk$(ART_HOST_SHLIB_EXTENSION) \
-  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION)
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkd$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvmti$(ART_HOST_SHLIB_EXTENSION) \
+  $(2ND_ART_HOST_OUT_SHARED_LIBRARIES)/libopenjdkjvmtid$(ART_HOST_SHLIB_EXTENSION) \
+
 endif
 
 # Create a rule to build and run a tests following the form:
diff --git a/test/MyClassNatives/MyClassNatives.java b/test/MyClassNatives/MyClassNatives.java
index 3cb1f23..c601e3e 100644
--- a/test/MyClassNatives/MyClassNatives.java
+++ b/test/MyClassNatives/MyClassNatives.java
@@ -139,8 +139,8 @@
         float f9, int i10, float f10);
 
     // Normal native
-    native static void stackArgsSignExtendedMips64(int i1, int i2, int i3, int i4, int i5, int i6,
-        int i7, int i8);
+    native static long getStackArgSignExtendedMips64(int i1, int i2, int i3, int i4, int i5, int i6,
+        int stack_arg);
 
     // Normal native
     static native double logD(double d);
@@ -273,8 +273,8 @@
         float f9, int i10, float f10);
 
     @FastNative
-    native static void stackArgsSignExtendedMips64_Fast(int i1, int i2, int i3, int i4, int i5, int i6,
-        int i7, int i8);
+    native static long getStackArgSignExtendedMips64_Fast(int i1, int i2, int i3, int i4, int i5, int i6,
+        int stack_arg);
 
     @FastNative
     static native double logD_Fast(double d);
@@ -316,10 +316,6 @@
         float f9, int i10, float f10);
 
     @CriticalNative
-    native static void stackArgsSignExtendedMips64_Critical(int i1, int i2, int i3, int i4, int i5, int i6,
-        int i7, int i8);
-
-    @CriticalNative
     static native double logD_Critical(double d);
     @CriticalNative
     static native float logF_Critical(float f);