diff --git a/Android.mk b/Android.mk
index 0ed5d87..5a17379 100644
--- a/Android.mk
+++ b/Android.mk
@@ -77,6 +77,7 @@
 # product rules
 
 include $(art_path)/dex2oat/Android.mk
+include $(art_path)/oatdump/Android.mk
 include $(art_path)/imgdiag/Android.mk
 include $(art_path)/patchoat/Android.mk
 include $(art_path)/profman/Android.mk
diff --git a/build/Android.bp b/build/Android.bp
index 630cf3c..4be43ec 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -140,7 +140,6 @@
     },
 
     include_dirs: [
-        "external/gtest/include",
         "external/icu/icu4c/source/common",
         "external/lz4/lib",
         "external/valgrind/include",
diff --git a/build/Android.common_build.mk b/build/Android.common_build.mk
index 845b272..7edc1cc 100644
--- a/build/Android.common_build.mk
+++ b/build/Android.common_build.mk
@@ -87,7 +87,6 @@
 ART_CPP_EXTENSION := .cc
 
 ART_C_INCLUDES := \
-  external/gtest/include \
   external/icu/icu4c/source/common \
   external/lz4/lib \
   external/valgrind/include \
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index c09241a..8a6d52d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -37,6 +37,7 @@
   MyClassNatives \
   Nested \
   NonStaticLeafMethods \
+  Packages \
   ProtoCompare \
   ProtoCompare2 \
   ProfileTestMultiDex \
@@ -69,7 +70,7 @@
 
 ART_GTEST_class_linker_test_DEX_DEPS := Interfaces MultiDex MyClass Nested Statics StaticsFromCode
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
-ART_GTEST_dex_cache_test_DEX_DEPS := Main
+ART_GTEST_dex_cache_test_DEX_DEPS := Main Packages
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested
 ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
@@ -422,7 +423,7 @@
 LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
 LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/cmdline art/compiler
 LOCAL_SHARED_LIBRARIES := libartd libartd-compiler libdl
-LOCAL_STATIC_LIBRARIES += libgtest
+LOCAL_WHOLE_STATIC_LIBRARIES += libgtest
 LOCAL_ADDITIONAL_DEPENDENCIES := art/build/Android.common_build.mk
 LOCAL_ADDITIONAL_DEPENDENCIES += art/build/Android.gtest.mk
 $(eval LOCAL_CLANG := $(ART_TARGET_CLANG))
@@ -439,7 +440,7 @@
 LOCAL_SRC_FILES := runtime/common_runtime_test.cc compiler/common_compiler_test.cc
 LOCAL_C_INCLUDES := $(ART_C_INCLUDES) art/runtime art/cmdline art/compiler
 LOCAL_SHARED_LIBRARIES := libartd libartd-compiler
-LOCAL_STATIC_LIBRARIES := libgtest_host
+LOCAL_WHOLE_STATIC_LIBRARIES := libgtest
 LOCAL_LDLIBS += -ldl -lpthread
 LOCAL_MULTILIB := both
 LOCAL_CLANG := $(ART_HOST_CLANG)
diff --git a/build/codegen.go b/build/codegen.go
index eb2c37d..fde9420 100644
--- a/build/codegen.go
+++ b/build/codegen.go
@@ -112,7 +112,15 @@
 func defaultDeviceCodegenArches(ctx android.CustomizePropertiesContext) []string {
 	arches := make(map[string]bool)
 	for _, a := range ctx.DeviceConfig().Arches() {
-		arches[a.ArchType.String()] = true
+		s := a.ArchType.String()
+		arches[s] = true
+		if s == "arm64" {
+			arches["arm"] = true
+		} else if s == "mips64" {
+			arches["mips"] = true
+		} else if s == "x86_64" {
+			arches["x86"] = true
+		}
 	}
 	ret := make([]string, 0, len(arches))
 	for a := range arches {
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 9e5fb83..ed42958 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -39,8 +39,9 @@
   };
 
   enum JniOptimizationFlags {
-    kNone,
-    kFastNative,
+    kNone                       = 0x0,
+    kFastNative                 = 0x1,
+    kCriticalNative             = 0x2,
   };
 
   static Compiler* Create(CompilerDriver* driver, Kind kind);
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index b5bc2fb..daac7fb 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -616,17 +616,22 @@
               /* referrer */ nullptr,
               invoke_type);
 
-      bool fast_native = false;
-      if (LIKELY(method != nullptr)) {
-        fast_native = method->IsAnnotatedWithFastNative();
-      } else {
+      // Query any JNI optimization annotations such as @FastNative or @CriticalNative.
+      Compiler::JniOptimizationFlags optimization_flags = Compiler::kNone;
+      if (UNLIKELY(method == nullptr)) {
         // Failed method resolutions happen very rarely, e.g. ancestor class cannot be resolved.
         DCHECK(self->IsExceptionPending());
         self->ClearException();
+      } else if (method->IsAnnotatedWithFastNative()) {
+        // TODO: Will no longer need this CHECK once we have verifier checking this.
+        CHECK(!method->IsAnnotatedWithCriticalNative());
+        optimization_flags = Compiler::kFastNative;
+      } else if (method->IsAnnotatedWithCriticalNative()) {
+        // TODO: Will no longer need this CHECK once we have verifier checking this.
+        CHECK(!method->IsAnnotatedWithFastNative());
+        optimization_flags = Compiler::kCriticalNative;
       }
 
-      Compiler::JniOptimizationFlags optimization_flags =
-          fast_native ? Compiler::kFastNative : Compiler::kNone;
       compiled_method = driver->GetCompiler()->JniCompile(access_flags,
                                                           method_idx,
                                                           dex_file,
diff --git a/compiler/jni/jni_cfi_test.cc b/compiler/jni/jni_cfi_test.cc
index 4b056f5..28b7290 100644
--- a/compiler/jni/jni_cfi_test.cc
+++ b/compiler/jni/jni_cfi_test.cc
@@ -64,7 +64,12 @@
     ArenaAllocator arena(&pool);
 
     std::unique_ptr<JniCallingConvention> jni_conv(
-        JniCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
+        JniCallingConvention::Create(&arena,
+                                     is_static,
+                                     is_synchronized,
+                                     /*is_critical_native*/false,
+                                     shorty,
+                                     isa));
     std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
         ManagedRuntimeCallingConvention::Create(&arena, is_static, is_synchronized, shorty, isa));
     const int frame_size(jni_conv->FrameSize());
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index b83985a..cdd4c68 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -15,12 +15,14 @@
  */
 
 #include <memory>
+#include <type_traits>
 
 #include <math.h>
 
 #include "art_method-inl.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
+#include "compiler.h"
 #include "dex_file.h"
 #include "gtest/gtest.h"
 #include "indirect_reference_table.h"
@@ -47,6 +49,171 @@
 
 namespace art {
 
+enum class JniKind {
+  kNormal   = Compiler::kNone,               // Regular kind of un-annotated natives.
+  kFast     = Compiler::kFastNative,         // Native method annotated with @FastNative.
+  kCritical = Compiler::kCriticalNative,     // Native method annotated with @CriticalNative.
+  kCount    = Compiler::kCriticalNative + 1  // How many different types of JNIs we can have.
+};
+
+// Used to initialize array sizes that want to have different state per current jni.
+static constexpr size_t kJniKindCount = static_cast<size_t>(JniKind::kCount);
+// Do not use directly, use the helpers instead.
+uint32_t gCurrentJni = static_cast<uint32_t>(JniKind::kNormal);
+
+// Is the current native method under test @CriticalNative?
+static bool IsCurrentJniCritical() {
+  return gCurrentJni == static_cast<uint32_t>(JniKind::kCritical);
+}
+
+// Is the current native method a plain-old non-annotated native?
+static bool IsCurrentJniNormal() {
+  return gCurrentJni == static_cast<uint32_t>(JniKind::kNormal);
+}
+
+// Signifify that a different kind of JNI is about to be tested.
+static void UpdateCurrentJni(JniKind kind) {
+  gCurrentJni = static_cast<uint32_t>(kind);
+}
+
+// (Match the name suffixes of native methods in MyClassNatives.java)
+static std::string CurrentJniStringSuffix() {
+  switch (gCurrentJni) {
+    case static_cast<uint32_t>(JniKind::kNormal): {
+      return "";
+    }
+    case static_cast<uint32_t>(JniKind::kFast): {
+      return "_Fast";
+    }
+    case static_cast<uint32_t>(JniKind::kCritical): {
+      return "_Critical";
+    }
+    default:
+      LOG(FATAL) << "Invalid current JNI value: " << gCurrentJni;
+      UNREACHABLE();
+  }
+}
+
+// Dummy values passed to our JNI handlers when we enter @CriticalNative.
+// Normally @CriticalNative calling convention strips out the "JNIEnv*, jclass" parameters.
+// However to avoid duplicating every single test method we have a templated handler
+// that inserts dummy parameters (0,1) to make it compatible with a regular JNI handler.
+static JNIEnv* const kCriticalDummyJniEnv = reinterpret_cast<JNIEnv*>(0xDEADFEAD);
+static jclass const kCriticalDummyJniClass = reinterpret_cast<jclass>(0xBEAFBEEF);
+
+// Type trait. Returns true if "T" is the same type as one of the types in Args...
+//
+// Logically equal to OR(std::same_type<T, U> for all U in Args).
+template <typename T, typename ... Args>
+struct is_any_of;
+
+template <typename T, typename U, typename ... Args>
+struct is_any_of<T, U, Args ...> {
+  using value_type = bool;
+  static constexpr const bool value = std::is_same<T, U>::value || is_any_of<T, Args ...>::value;
+};
+
+template <typename T, typename U>
+struct is_any_of<T, U> {
+  using value_type = bool;
+  static constexpr const bool value = std::is_same<T, U>::value;
+};
+
+// Type traits for JNI types.
+template <typename T>
+struct jni_type_traits {
+  // True if type T ends up holding an object reference. False otherwise.
+  // (Non-JNI types will also be false).
+  static constexpr const bool is_ref =
+      is_any_of<T, jclass, jobject, jstring, jobjectArray, jintArray,
+                jcharArray, jfloatArray, jshortArray, jdoubleArray, jlongArray>::value;
+};
+
+template <typename ... Args>
+struct count_refs_helper {
+  using value_type = size_t;
+  static constexpr const size_t value = 0;
+};
+
+template <typename Arg, typename ... Args>
+struct count_refs_helper<Arg, Args ...> {
+  using value_type = size_t;
+  static constexpr size_t value =
+      (jni_type_traits<Arg>::is_ref ? 1 : 0) + count_refs_helper<Args ...>::value;
+};
+
+template <typename T, T fn>
+struct count_refs_fn_helper;
+
+template <typename R, typename ... Args, R fn(Args...)>
+struct count_refs_fn_helper<R(Args...), fn> : public count_refs_helper<Args...> {};
+
+// Given a function type 'T' figure out how many of the parameter types are a reference.
+// -- The implicit jclass and thisObject also count as 1 reference.
+//
+// Fields:
+// * value - the result counting # of refs
+// * value_type - the type of value (size_t)
+template <typename T, T fn>
+struct count_refs : public count_refs_fn_helper<T, fn> {};
+
+// Base case: No parameters = 0 refs.
+size_t count_nonnull_refs_helper() {
+  return 0;
+}
+
+// SFINAE for ref types. 1 if non-null, 0 otherwise.
+template <typename T>
+size_t count_nonnull_refs_single_helper(T arg,
+                                        typename std::enable_if<jni_type_traits<T>::is_ref>::type*
+                                            = nullptr) {
+  return ((arg == NULL) ? 0 : 1);
+}
+
+// SFINAE for non-ref-types. Always 0.
+template <typename T>
+size_t count_nonnull_refs_single_helper(T arg ATTRIBUTE_UNUSED,
+                                        typename std::enable_if<!jni_type_traits<T>::is_ref>::type*
+                                            = nullptr) {
+  return 0;
+}
+
+// Recursive case.
+template <typename T, typename ... Args>
+size_t count_nonnull_refs_helper(T arg, Args ... args) {
+  return count_nonnull_refs_single_helper(arg) + count_nonnull_refs_helper(args...);
+}
+
+// Given any list of parameters, check how many object refs there are and only count
+// them if their runtime value is non-null.
+//
+// For example given (jobject, jint, jclass) we can get (2) if both #0/#2 are non-null,
+// (1) if either #0/#2 are null but not both, and (0) if all parameters are null.
+// Primitive parameters (including JNIEnv*, if present) are ignored.
+template <typename ... Args>
+size_t count_nonnull_refs(Args ... args) {
+  return count_nonnull_refs_helper(args...);
+}
+
+template <typename T, T fn>
+struct remove_extra_parameters_helper;
+
+template <typename R, typename Arg1, typename Arg2, typename ... Args, R fn(Arg1, Arg2, Args...)>
+struct remove_extra_parameters_helper<R(Arg1, Arg2, Args...), fn> {
+  // Note: Do not use Args&& here to maintain C-style parameter types.
+  static R apply(Args... args) {
+    JNIEnv* env = kCriticalDummyJniEnv;
+    jclass kls = kCriticalDummyJniClass;
+    return fn(env, kls, args...);
+  }
+};
+
+// Given a function 'fn' create a function 'apply' which will omit the JNIEnv/jklass parameters
+//
+// i.e. if fn(JNIEnv*,jklass,a,b,c,d,e...) then apply(a,b,c,d,e,...)
+template <typename T, T fn>
+struct jni_remove_extra_parameters : public remove_extra_parameters_helper<T, fn> {};
+
 class JniCompilerTest : public CommonCompilerTest {
  protected:
   void SetUp() OVERRIDE {
@@ -63,8 +230,11 @@
     check_generic_jni_ = generic;
   }
 
-  void CompileForTest(jobject class_loader, bool direct,
-                      const char* method_name, const char* method_sig) {
+ private:
+  void CompileForTest(jobject class_loader,
+                      bool direct,
+                      const char* method_name,
+                      const char* method_sig) {
     ScopedObjectAccess soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
     Handle<mirror::ClassLoader> loader(
@@ -87,8 +257,28 @@
     }
   }
 
-  void SetUpForTest(bool direct, const char* method_name, const char* method_sig,
+ protected:
+  void CompileForTestWithCurrentJni(jobject class_loader,
+                                    bool direct,
+                                    const char* method_name_orig,
+                                    const char* method_sig) {
+    // Append the JNI kind to the method name, so that we automatically get the
+    // fast or critical versions of the same method.
+    std::string method_name_str = std::string(method_name_orig) + CurrentJniStringSuffix();
+    const char* method_name = method_name_str.c_str();
+
+    CompileForTest(class_loader, direct, method_name, method_sig);
+  }
+
+  void SetUpForTest(bool direct,
+                    const char* method_name_orig,
+                    const char* method_sig,
                     void* native_fnptr) {
+    // Append the JNI kind to the method name, so that we automatically get the
+    // fast or critical versions of the same method.
+    std::string method_name_str = std::string(method_name_orig) + CurrentJniStringSuffix();
+    const char* method_name = method_name_str.c_str();
+
     // Initialize class loader and compile method when runtime not started.
     if (!runtime_->IsStarted()) {
       {
@@ -129,6 +319,7 @@
   }
 
  public:
+  // Available as statics so our JNI handlers can access these.
   static jclass jklass_;
   static jobject jobj_;
   static jobject class_loader_;
@@ -151,6 +342,8 @@
   void RunStaticReturnTrueImpl();
   void RunStaticReturnFalseImpl();
   void RunGenericStaticReturnIntImpl();
+  void RunGenericStaticReturnDoubleImpl();
+  void RunGenericStaticReturnLongImpl();
   void CompileAndRunStaticIntObjectObjectMethodImpl();
   void CompileAndRunStaticSynchronizedIntObjectObjectMethodImpl();
   void ExceptionHandlingImpl();
@@ -177,10 +370,13 @@
 
   void NormalNativeImpl();
   void FastNativeImpl();
+  void CriticalNativeImpl();
 
   JNIEnv* env_;
   jstring library_search_path_;
   jmethodID jmethod_;
+
+ private:
   bool check_generic_jni_;
 };
 
@@ -188,46 +384,238 @@
 jobject JniCompilerTest::jobj_;
 jobject JniCompilerTest::class_loader_;
 
-#define JNI_TEST(TestName) \
+// Test the normal compiler and normal generic JNI only.
+// The following features are unsupported in @FastNative:
+// 1) JNI stubs (lookup via dlsym) when methods aren't explicitly registered
+// 2) Returning objects from the JNI function
+// 3) synchronized keyword
+// -- TODO: We can support (1) if we remove the mutator lock assert during stub lookup.
+# define JNI_TEST_NORMAL_ONLY(TestName)          \
   TEST_F(JniCompilerTest, TestName ## Default) { \
+    SCOPED_TRACE("Normal JNI with compiler");    \
+    gCurrentJni = static_cast<uint32_t>(JniKind::kNormal); \
     TestName ## Impl();                          \
   }                                              \
-                                                 \
   TEST_F(JniCompilerTest, TestName ## Generic) { \
+    SCOPED_TRACE("Normal JNI with generic");     \
+    gCurrentJni = static_cast<uint32_t>(JniKind::kNormal); \
     TEST_DISABLED_FOR_MIPS();                    \
     SetCheckGenericJni(true);                    \
     TestName ## Impl();                          \
   }
 
-int gJava_MyClassNatives_foo_calls = 0;
-void Java_MyClassNatives_foo(JNIEnv* env, jobject thisObj) {
-  // 1 = thisObj
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  Locks::mutator_lock_->AssertNotHeld(Thread::Current());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  gJava_MyClassNatives_foo_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+// Test normal compiler, @FastNative compiler, and normal/@FastNative generic for normal natives.
+#define JNI_TEST(TestName) \
+  JNI_TEST_NORMAL_ONLY(TestName)                 \
+  TEST_F(JniCompilerTest, TestName ## Fast) {    \
+    SCOPED_TRACE("@FastNative JNI with compiler");  \
+    gCurrentJni = static_cast<uint32_t>(JniKind::kFast); \
+    TestName ## Impl();                          \
+  }                                              \
+                                          \
+
+// TODO: maybe. @FastNative generic JNI support?
+#if 0
+  TEST_F(JniCompilerTest, TestName ## FastGeneric) { \
+    gCurrentJni = static_cast<uint32_t>(JniKind::kFast); \
+    TEST_DISABLED_FOR_MIPS();                    \
+    SetCheckGenericJni(true);                    \
+    TestName ## Impl();                          \
+  }
+#endif
+
+#define JNI_TEST_CRITICAL_ONLY(TestName) \
+  TEST_F(JniCompilerTest, TestName ## DefaultCritical) { \
+    SCOPED_TRACE("@CriticalNative JNI with compiler");  \
+    gCurrentJni = static_cast<uint32_t>(JniKind::kCritical); \
+    TestName ## Impl();                          \
+  }
+
+// Test everything above and also the @CriticalNative compiler, and @CriticalNative generic JNI.
+#define JNI_TEST_CRITICAL(TestName)              \
+  JNI_TEST(TestName)                             \
+  JNI_TEST_CRITICAL_ONLY(TestName)               \
+
+// TODO: maybe, more likely since calling convention changed. @Criticalnative generic JNI support?
+#if 0
+  TEST_F(JniCompilerTest, TestName ## GenericCritical) { \
+    gCurrentJni = static_cast<uint32_t>(JniKind::kCritical); \
+    TestName ## Impl();                          \
+  }
+#endif
+
+static void expectValidThreadState() {
+  // Normal JNI always transitions to "Native". Other JNIs stay in the "Runnable" state.
+  if (IsCurrentJniNormal()) {
+    EXPECT_EQ(kNative, Thread::Current()->GetState());
+  } else {
+    EXPECT_EQ(kRunnable, Thread::Current()->GetState());
+  }
+}
+
+#define EXPECT_THREAD_STATE_FOR_CURRENT_JNI() expectValidThreadState()
+
+static void expectValidMutatorLockHeld() {
+  if (IsCurrentJniNormal()) {
+    Locks::mutator_lock_->AssertNotHeld(Thread::Current());
+  } else {
+    Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+  }
+}
+
+#define EXPECT_MUTATOR_LOCK_FOR_CURRENT_JNI() expectValidMutatorLockHeld()
+
+static void expectValidJniEnvAndObject(JNIEnv* env, jobject thisObj) {
+  if (!IsCurrentJniCritical()) {
+    EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
+    ASSERT_TRUE(thisObj != nullptr);
+    EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
+  } else {
+    LOG(FATAL) << "Objects are not supported for @CriticalNative, why is this being tested?";
+    UNREACHABLE();
+  }
+}
+
+// Validates the JNIEnv to be the same as the current thread's JNIEnv, and makes sure
+// that the object here is an instance of the class we registered the method with.
+//
+// Hard-fails if this somehow gets invoked for @CriticalNative since objects are unsupported.
+#define EXPECT_JNI_ENV_AND_OBJECT_FOR_CURRENT_JNI(env, thisObj) \
+    expectValidJniEnvAndObject(env, thisObj)
+
+static void expectValidJniEnvAndClass(JNIEnv* env, jclass kls) {
+  if (!IsCurrentJniCritical()) {
+    EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
+    ASSERT_TRUE(kls != nullptr);
+    EXPECT_TRUE(env->IsSameObject(static_cast<jobject>(JniCompilerTest::jklass_),
+                                  static_cast<jobject>(kls)));
+  } else {
+    // This is pretty much vacuously true but catch any testing setup mistakes.
+    EXPECT_EQ(env, kCriticalDummyJniEnv);
+    EXPECT_EQ(kls, kCriticalDummyJniClass);
+  }
+}
+
+// Validates the JNIEnv is the same as the current thread's JNIenv, and makes sure
+// that the jclass we got in the JNI handler is the same one as the class the method was looked
+// up for.
+//
+// (Checks are skipped for @CriticalNative since the two values are dummy).
+#define EXPECT_JNI_ENV_AND_CLASS_FOR_CURRENT_JNI(env, kls) expectValidJniEnvAndClass(env, kls)
+
+// Temporarily disable the EXPECT_NUM_STACK_REFERENCES check (for a single test).
+struct ScopedDisableCheckNumStackReferences {
+  ScopedDisableCheckNumStackReferences() {
+    sCheckNumStackReferences = false;
+  }
+
+  ~ScopedDisableCheckNumStackReferences() {
+    sCheckNumStackReferences = true;
+  }
+
+  static bool sCheckNumStackReferences;
+};
+
+bool ScopedDisableCheckNumStackReferences::sCheckNumStackReferences = true;
+
+static void expectNumStackReferences(size_t val1, size_t val2) {
+  // In rare cases when JNI functions call themselves recursively,
+  // disable this test because it will have a false negative.
+  if (!IsCurrentJniCritical() && ScopedDisableCheckNumStackReferences::sCheckNumStackReferences) {
+    /* @CriticalNative doesn't build a HandleScope, so this test is meaningless then. */
+    ScopedObjectAccess soa(Thread::Current());
+
+    size_t actual_num = Thread::Current()->NumStackReferences();
+    // XX: Not too sure what's going on.
+    // Sometimes null references get placed and sometimes they don't?
+    EXPECT_TRUE(val1 == actual_num || val2 == actual_num)
+      << "expected either " << val1 << " or " << val2
+      << " number of stack references, but got: " << actual_num;
+  }
+}
+
+#define EXPECT_NUM_STACK_REFERENCES(val1, val2) expectNumStackReferences(val1, val2)
+
+template <typename T, T fn>
+struct make_jni_test_decorator;
+
+// Decorator for "static" JNI callbacks.
+template <typename R, typename ... Args, R fn(JNIEnv*, jclass, Args...)>
+struct make_jni_test_decorator<R(JNIEnv*, jclass kls, Args...), fn> {
+  static R apply(JNIEnv* env, jclass kls, Args ... args) {
+    EXPECT_THREAD_STATE_FOR_CURRENT_JNI();
+    EXPECT_MUTATOR_LOCK_FOR_CURRENT_JNI();
+    EXPECT_JNI_ENV_AND_CLASS_FOR_CURRENT_JNI(env, kls);
+    // All incoming parameters + the jclass get put into the transition's StackHandleScope.
+    EXPECT_NUM_STACK_REFERENCES(count_nonnull_refs(kls, args...),
+                                (count_refs_helper<jclass, Args...>::value));
+
+    return fn(env, kls, args...);
+  }
+};
+
+// Decorator for instance JNI callbacks.
+template <typename R, typename ... Args, R fn(JNIEnv*, jobject, Args...)>
+struct make_jni_test_decorator<R(JNIEnv*, jobject, Args...), fn> {
+  static R apply(JNIEnv* env, jobject thisObj, Args ... args) {
+    EXPECT_THREAD_STATE_FOR_CURRENT_JNI();
+    EXPECT_MUTATOR_LOCK_FOR_CURRENT_JNI();
+    EXPECT_JNI_ENV_AND_OBJECT_FOR_CURRENT_JNI(env, thisObj);
+    // All incoming parameters + the implicit 'this' get put into the transition's StackHandleScope.
+    EXPECT_NUM_STACK_REFERENCES(count_nonnull_refs(thisObj, args...),
+                                (count_refs_helper<jobject, Args...>::value));
+
+    return fn(env, thisObj, args...);
+  }
+};
+
+// Decorate the regular JNI callee with the extra gtest checks.
+// This way we can have common test logic for everything generic like checking if a lock is held,
+// checking handle scope state, etc.
+#define MAKE_JNI_TEST_DECORATOR(fn) make_jni_test_decorator<decltype(fn), (fn)>::apply
+
+// Convert function f(JNIEnv*,jclass,a,b,c,d...) into f2(a,b,c,d...)
+// -- This way we don't have to write out each implementation twice for @CriticalNative.
+#define JNI_CRITICAL_WRAPPER(func) jni_remove_extra_parameters<decltype(func), (func)>::apply
+// Get a function pointer whose calling convention either matches a regular native
+// or a critical native depending on which kind of jni is currently under test.
+// -- This also has the benefit of genering a compile time error if the 'func' doesn't properly
+//    have JNIEnv and jclass parameters first.
+#define CURRENT_JNI_WRAPPER(func)                                                         \
+    (IsCurrentJniCritical()                                                               \
+         ? reinterpret_cast<void*>(&JNI_CRITICAL_WRAPPER(MAKE_JNI_TEST_DECORATOR(func)))  \
+         : reinterpret_cast<void*>(&MAKE_JNI_TEST_DECORATOR(func)))
+
+// Do the opposite of the above. Do *not* wrap the function, instead just cast it to a void*.
+// Only for "TEST_JNI_NORMAL_ONLY" configs, and it inserts a test assert to ensure this is the case.
+#define NORMAL_JNI_ONLY_NOWRAP(func) \
+    ({ ASSERT_TRUE(IsCurrentJniNormal()); reinterpret_cast<void*>(&(func)); })
+// Same as above, but with nullptr. When we want to test the stub functionality.
+#define NORMAL_JNI_ONLY_NULLPTR \
+    ({ ASSERT_TRUE(IsCurrentJniNormal()); nullptr; })
+
+
+int gJava_MyClassNatives_foo_calls[kJniKindCount] = {};
+void Java_MyClassNatives_foo(JNIEnv*, jobject) {
+  gJava_MyClassNatives_foo_calls[gCurrentJni]++;
 }
 
 void JniCompilerTest::CompileAndRunNoArgMethodImpl() {
-  SetUpForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo));
+  SetUpForTest(false, "foo", "()V", CURRENT_JNI_WRAPPER(Java_MyClassNatives_foo));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_foo_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_foo_calls[gCurrentJni]);
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
-  EXPECT_EQ(1, gJava_MyClassNatives_foo_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_foo_calls[gCurrentJni]);
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
-  EXPECT_EQ(2, gJava_MyClassNatives_foo_calls);
+  EXPECT_EQ(2, gJava_MyClassNatives_foo_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_foo_calls = 0;
+  gJava_MyClassNatives_foo_calls[gCurrentJni] = 0;
 }
 
 JNI_TEST(CompileAndRunNoArgMethod)
 
 void JniCompilerTest::CompileAndRunIntMethodThroughStubImpl() {
-  SetUpForTest(false, "bar", "(I)I", nullptr);
+  SetUpForTest(false, "bar", "(I)I", NORMAL_JNI_ONLY_NULLPTR);
   // calling through stub will link with &Java_MyClassNatives_bar
 
   std::string reason;
@@ -239,10 +627,11 @@
   EXPECT_EQ(25, result);
 }
 
-JNI_TEST(CompileAndRunIntMethodThroughStub)
+// TODO: Support @FastNative and @CriticalNative through stubs.
+JNI_TEST_NORMAL_ONLY(CompileAndRunIntMethodThroughStub)
 
 void JniCompilerTest::CompileAndRunStaticIntMethodThroughStubImpl() {
-  SetUpForTest(true, "sbar", "(I)I", nullptr);
+  SetUpForTest(true, "sbar", "(I)I", NORMAL_JNI_ONLY_NULLPTR);
   // calling through stub will link with &Java_MyClassNatives_sbar
 
   std::string reason;
@@ -254,174 +643,131 @@
   EXPECT_EQ(43, result);
 }
 
-JNI_TEST(CompileAndRunStaticIntMethodThroughStub)
+// TODO: Support @FastNative and @CriticalNative through stubs.
+JNI_TEST_NORMAL_ONLY(CompileAndRunStaticIntMethodThroughStub)
 
-int gJava_MyClassNatives_fooI_calls = 0;
-jint Java_MyClassNatives_fooI(JNIEnv* env, jobject thisObj, jint x) {
-  // 1 = thisObj
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  gJava_MyClassNatives_fooI_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+int gJava_MyClassNatives_fooI_calls[kJniKindCount] = {};
+jint Java_MyClassNatives_fooI(JNIEnv*, jobject, jint x) {
+  gJava_MyClassNatives_fooI_calls[gCurrentJni]++;
   return x;
 }
 
 void JniCompilerTest::CompileAndRunIntMethodImpl() {
   SetUpForTest(false, "fooI", "(I)I",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooI));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooI));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooI_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooI_calls[gCurrentJni]);
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 42);
   EXPECT_EQ(42, result);
-  EXPECT_EQ(1, gJava_MyClassNatives_fooI_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooI_calls[gCurrentJni]);
   result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 0xCAFED00D);
   EXPECT_EQ(static_cast<jint>(0xCAFED00D), result);
-  EXPECT_EQ(2, gJava_MyClassNatives_fooI_calls);
+  EXPECT_EQ(2, gJava_MyClassNatives_fooI_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooI_calls = 0;
+  gJava_MyClassNatives_fooI_calls[gCurrentJni] = 0;
 }
 
 JNI_TEST(CompileAndRunIntMethod)
 
-int gJava_MyClassNatives_fooII_calls = 0;
-jint Java_MyClassNatives_fooII(JNIEnv* env, jobject thisObj, jint x, jint y) {
-  // 1 = thisObj
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  gJava_MyClassNatives_fooII_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+int gJava_MyClassNatives_fooII_calls[kJniKindCount] = {};
+jint Java_MyClassNatives_fooII(JNIEnv*, jobject, jint x, jint y) {
+  gJava_MyClassNatives_fooII_calls[gCurrentJni]++;
   return x - y;  // non-commutative operator
 }
 
 void JniCompilerTest::CompileAndRunIntIntMethodImpl() {
   SetUpForTest(false, "fooII", "(II)I",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooII));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooII));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooII_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooII_calls[gCurrentJni]);
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 99, 10);
   EXPECT_EQ(99 - 10, result);
-  EXPECT_EQ(1, gJava_MyClassNatives_fooII_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooII_calls[gCurrentJni]);
   result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 0xCAFEBABE,
                                          0xCAFED00D);
   EXPECT_EQ(static_cast<jint>(0xCAFEBABE - 0xCAFED00D), result);
-  EXPECT_EQ(2, gJava_MyClassNatives_fooII_calls);
+  EXPECT_EQ(2, gJava_MyClassNatives_fooII_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooII_calls = 0;
+  gJava_MyClassNatives_fooII_calls[gCurrentJni] = 0;
 }
 
 JNI_TEST(CompileAndRunIntIntMethod)
 
-int gJava_MyClassNatives_fooJJ_calls = 0;
-jlong Java_MyClassNatives_fooJJ(JNIEnv* env, jobject thisObj, jlong x, jlong y) {
-  // 1 = thisObj
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  gJava_MyClassNatives_fooJJ_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+int gJava_MyClassNatives_fooJJ_calls[kJniKindCount] = {};
+jlong Java_MyClassNatives_fooJJ(JNIEnv*, jobject, jlong x, jlong y) {
+  gJava_MyClassNatives_fooJJ_calls[gCurrentJni]++;
   return x - y;  // non-commutative operator
 }
 
 void JniCompilerTest::CompileAndRunLongLongMethodImpl() {
   SetUpForTest(false, "fooJJ", "(JJ)J",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooJJ));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooJJ));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooJJ_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooJJ_calls[gCurrentJni]);
   jlong a = INT64_C(0x1234567890ABCDEF);
   jlong b = INT64_C(0xFEDCBA0987654321);
   jlong result = env_->CallNonvirtualLongMethod(jobj_, jklass_, jmethod_, a, b);
   EXPECT_EQ(a - b, result);
-  EXPECT_EQ(1, gJava_MyClassNatives_fooJJ_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooJJ_calls[gCurrentJni]);
   result = env_->CallNonvirtualLongMethod(jobj_, jklass_, jmethod_, b, a);
   EXPECT_EQ(b - a, result);
-  EXPECT_EQ(2, gJava_MyClassNatives_fooJJ_calls);
+  EXPECT_EQ(2, gJava_MyClassNatives_fooJJ_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooJJ_calls = 0;
+  gJava_MyClassNatives_fooJJ_calls[gCurrentJni] = 0;
 }
 
 JNI_TEST(CompileAndRunLongLongMethod)
 
-int gJava_MyClassNatives_fooDD_calls = 0;
-jdouble Java_MyClassNatives_fooDD(JNIEnv* env, jobject thisObj, jdouble x, jdouble y) {
-  // 1 = thisObj
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  gJava_MyClassNatives_fooDD_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+int gJava_MyClassNatives_fooDD_calls[kJniKindCount] = {};
+jdouble Java_MyClassNatives_fooDD(JNIEnv*, jobject, jdouble x, jdouble y) {
+  gJava_MyClassNatives_fooDD_calls[gCurrentJni]++;
   return x - y;  // non-commutative operator
 }
 
 void JniCompilerTest::CompileAndRunDoubleDoubleMethodImpl() {
   SetUpForTest(false, "fooDD", "(DD)D",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooDD));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooDD));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooDD_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooDD_calls[gCurrentJni]);
   jdouble result = env_->CallNonvirtualDoubleMethod(jobj_, jklass_, jmethod_,
                                                     99.0, 10.0);
   EXPECT_DOUBLE_EQ(99.0 - 10.0, result);
-  EXPECT_EQ(1, gJava_MyClassNatives_fooDD_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooDD_calls[gCurrentJni]);
   jdouble a = 3.14159265358979323846;
   jdouble b = 0.69314718055994530942;
   result = env_->CallNonvirtualDoubleMethod(jobj_, jklass_, jmethod_, a, b);
   EXPECT_DOUBLE_EQ(a - b, result);
-  EXPECT_EQ(2, gJava_MyClassNatives_fooDD_calls);
+  EXPECT_EQ(2, gJava_MyClassNatives_fooDD_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooDD_calls = 0;
+  gJava_MyClassNatives_fooDD_calls[gCurrentJni] = 0;
 }
 
-int gJava_MyClassNatives_fooJJ_synchronized_calls = 0;
-jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv* env, jobject thisObj, jlong x, jlong y) {
-  // 1 = thisObj
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  gJava_MyClassNatives_fooJJ_synchronized_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+int gJava_MyClassNatives_fooJJ_synchronized_calls[kJniKindCount] = {};
+jlong Java_MyClassNatives_fooJJ_synchronized(JNIEnv*, jobject, jlong x, jlong y) {
+  gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni]++;
   return x | y;
 }
 
 void JniCompilerTest::CompileAndRun_fooJJ_synchronizedImpl() {
   SetUpForTest(false, "fooJJ_synchronized", "(JJ)J",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooJJ_synchronized));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooJJ_synchronized));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooJJ_synchronized_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni]);
   jlong a = 0x1000000020000000ULL;
   jlong b = 0x00ff000000aa0000ULL;
   jlong result = env_->CallNonvirtualLongMethod(jobj_, jklass_, jmethod_, a, b);
   EXPECT_EQ(a | b, result);
-  EXPECT_EQ(1, gJava_MyClassNatives_fooJJ_synchronized_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooJJ_synchronized_calls = 0;
+  gJava_MyClassNatives_fooJJ_synchronized_calls[gCurrentJni] = 0;
 }
 
-JNI_TEST(CompileAndRun_fooJJ_synchronized)
+JNI_TEST_NORMAL_ONLY(CompileAndRun_fooJJ_synchronized)
 
-int gJava_MyClassNatives_fooIOO_calls = 0;
-jobject Java_MyClassNatives_fooIOO(JNIEnv* env, jobject thisObj, jint x, jobject y,
+int gJava_MyClassNatives_fooIOO_calls[kJniKindCount] = {};
+jobject Java_MyClassNatives_fooIOO(JNIEnv*, jobject thisObj, jint x, jobject y,
                             jobject z) {
-  // 3 = this + y + z
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  gJava_MyClassNatives_fooIOO_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  size_t null_args = (y == nullptr ? 1 : 0) + (z == nullptr ? 1 : 0);
-  EXPECT_TRUE(3U == Thread::Current()->NumStackReferences() ||
-              (3U - null_args) == Thread::Current()->NumStackReferences());
+  gJava_MyClassNatives_fooIOO_calls[gCurrentJni]++;
   switch (x) {
     case 1:
       return y;
@@ -435,96 +781,89 @@
 void JniCompilerTest::CompileAndRunIntObjectObjectMethodImpl() {
   SetUpForTest(false, "fooIOO",
                "(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooIOO));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooIOO));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooIOO_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooIOO_calls[gCurrentJni]);
   jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, nullptr, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
-  EXPECT_EQ(1, gJava_MyClassNatives_fooIOO_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooIOO_calls[gCurrentJni]);
 
   result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, nullptr, jklass_);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
-  EXPECT_EQ(2, gJava_MyClassNatives_fooIOO_calls);
+  EXPECT_EQ(2, gJava_MyClassNatives_fooIOO_calls[gCurrentJni]);
   result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, nullptr, jklass_);
   EXPECT_TRUE(env_->IsSameObject(nullptr, result));
-  EXPECT_EQ(3, gJava_MyClassNatives_fooIOO_calls);
+  EXPECT_EQ(3, gJava_MyClassNatives_fooIOO_calls[gCurrentJni]);
   result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, nullptr, jklass_);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
-  EXPECT_EQ(4, gJava_MyClassNatives_fooIOO_calls);
+  EXPECT_EQ(4, gJava_MyClassNatives_fooIOO_calls[gCurrentJni]);
 
   result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 0, jklass_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
-  EXPECT_EQ(5, gJava_MyClassNatives_fooIOO_calls);
+  EXPECT_EQ(5, gJava_MyClassNatives_fooIOO_calls[gCurrentJni]);
   result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 1, jklass_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
-  EXPECT_EQ(6, gJava_MyClassNatives_fooIOO_calls);
+  EXPECT_EQ(6, gJava_MyClassNatives_fooIOO_calls[gCurrentJni]);
   result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, 2, jklass_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(nullptr, result));
-  EXPECT_EQ(7, gJava_MyClassNatives_fooIOO_calls);
+  EXPECT_EQ(7, gJava_MyClassNatives_fooIOO_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooIOO_calls = 0;
+  gJava_MyClassNatives_fooIOO_calls[gCurrentJni] = 0;
 }
 
-JNI_TEST(CompileAndRunIntObjectObjectMethod)
+// TODO: Maybe. @FastNative support for returning Objects?
+JNI_TEST_NORMAL_ONLY(CompileAndRunIntObjectObjectMethod)
 
-int gJava_MyClassNatives_fooSII_calls = 0;
-jint Java_MyClassNatives_fooSII(JNIEnv* env, jclass klass, jint x, jint y) {
-  // 1 = klass
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
-  gJava_MyClassNatives_fooSII_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+int gJava_MyClassNatives_fooSII_calls[kJniKindCount] = {};
+jint Java_MyClassNatives_fooSII(JNIEnv* env ATTRIBUTE_UNUSED,
+                                jclass klass ATTRIBUTE_UNUSED,
+                                jint x,
+                                jint y) {
+  gJava_MyClassNatives_fooSII_calls[gCurrentJni]++;
   return x + y;
 }
 
 void JniCompilerTest::CompileAndRunStaticIntIntMethodImpl() {
   SetUpForTest(true, "fooSII", "(II)I",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooSII));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooSII));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooSII_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooSII_calls[gCurrentJni]);
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 20, 30);
   EXPECT_EQ(50, result);
-  EXPECT_EQ(1, gJava_MyClassNatives_fooSII_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooSII_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooSII_calls = 0;
+  gJava_MyClassNatives_fooSII_calls[gCurrentJni] = 0;
 }
 
-JNI_TEST(CompileAndRunStaticIntIntMethod)
+JNI_TEST_CRITICAL(CompileAndRunStaticIntIntMethod)
 
-int gJava_MyClassNatives_fooSDD_calls = 0;
-jdouble Java_MyClassNatives_fooSDD(JNIEnv* env, jclass klass, jdouble x, jdouble y) {
-  // 1 = klass
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
-  gJava_MyClassNatives_fooSDD_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+int gJava_MyClassNatives_fooSDD_calls[kJniKindCount] = {};
+jdouble Java_MyClassNatives_fooSDD(JNIEnv* env ATTRIBUTE_UNUSED,
+                                   jclass klass ATTRIBUTE_UNUSED,
+                                   jdouble x,
+                                   jdouble y) {
+  gJava_MyClassNatives_fooSDD_calls[gCurrentJni]++;
   return x - y;  // non-commutative operator
 }
 
 void JniCompilerTest::CompileAndRunStaticDoubleDoubleMethodImpl() {
   SetUpForTest(true, "fooSDD", "(DD)D",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooSDD));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooSDD));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooSDD_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooSDD_calls[gCurrentJni]);
   jdouble result = env_->CallStaticDoubleMethod(jklass_, jmethod_, 99.0, 10.0);
   EXPECT_DOUBLE_EQ(99.0 - 10.0, result);
-  EXPECT_EQ(1, gJava_MyClassNatives_fooSDD_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooSDD_calls[gCurrentJni]);
   jdouble a = 3.14159265358979323846;
   jdouble b = 0.69314718055994530942;
   result = env_->CallStaticDoubleMethod(jklass_, jmethod_, a, b);
   EXPECT_DOUBLE_EQ(a - b, result);
-  EXPECT_DOUBLE_EQ(2, gJava_MyClassNatives_fooSDD_calls);
+  EXPECT_DOUBLE_EQ(2, gJava_MyClassNatives_fooSDD_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooSDD_calls = 0;
+  gJava_MyClassNatives_fooSDD_calls[gCurrentJni] = 0;
 }
 
-JNI_TEST(CompileAndRunStaticDoubleDoubleMethod)
+JNI_TEST_CRITICAL(CompileAndRunStaticDoubleDoubleMethod)
 
 // The x86 generic JNI code had a bug where it assumed a floating
 // point return value would be in xmm0. We use log, to somehow ensure
@@ -534,27 +873,47 @@
   return log(x);
 }
 
+jdouble Java_MyClassNatives_logD_notNormal(JNIEnv*, jclass, jdouble x) {
+  EXPECT_DOUBLE_EQ(2.0, x);
+  return log(x);
+}
+
 void JniCompilerTest::RunStaticLogDoubleMethodImpl() {
-  SetUpForTest(true, "logD", "(D)D", reinterpret_cast<void*>(&Java_MyClassNatives_logD));
+  void* jni_handler;
+  if (IsCurrentJniNormal()) {
+    // This test seems a bit special, don't use a JNI wrapper here.
+    jni_handler = NORMAL_JNI_ONLY_NOWRAP(Java_MyClassNatives_logD);
+  } else {
+    jni_handler = CURRENT_JNI_WRAPPER(Java_MyClassNatives_logD_notNormal);
+  }
+  SetUpForTest(true, "logD", "(D)D", jni_handler);
 
   jdouble result = env_->CallStaticDoubleMethod(jklass_, jmethod_, 2.0);
   EXPECT_DOUBLE_EQ(log(2.0), result);
 }
 
-JNI_TEST(RunStaticLogDoubleMethod)
+JNI_TEST_CRITICAL(RunStaticLogDoubleMethod)
 
 jfloat Java_MyClassNatives_logF(JNIEnv*, jclass, jfloat x) {
   return logf(x);
 }
 
 void JniCompilerTest::RunStaticLogFloatMethodImpl() {
-  SetUpForTest(true, "logF", "(F)F", reinterpret_cast<void*>(&Java_MyClassNatives_logF));
+  void* jni_handler;
+  if (IsCurrentJniNormal()) {
+    // This test seems a bit special, don't use a JNI wrapper here.
+    jni_handler = NORMAL_JNI_ONLY_NOWRAP(Java_MyClassNatives_logF);
+  } else {
+    jni_handler = CURRENT_JNI_WRAPPER(Java_MyClassNatives_logF);
+  }
+
+  SetUpForTest(true, "logF", "(F)F", jni_handler);
 
   jfloat result = env_->CallStaticFloatMethod(jklass_, jmethod_, 2.0);
   EXPECT_FLOAT_EQ(logf(2.0), result);
 }
 
-JNI_TEST(RunStaticLogFloatMethod)
+JNI_TEST_CRITICAL(RunStaticLogFloatMethod)
 
 jboolean Java_MyClassNatives_returnTrue(JNIEnv*, jclass) {
   return JNI_TRUE;
@@ -569,46 +928,67 @@
 }
 
 void JniCompilerTest::RunStaticReturnTrueImpl() {
-  SetUpForTest(true, "returnTrue", "()Z", reinterpret_cast<void*>(&Java_MyClassNatives_returnTrue));
+  SetUpForTest(true, "returnTrue", "()Z", CURRENT_JNI_WRAPPER(Java_MyClassNatives_returnTrue));
 
   jboolean result = env_->CallStaticBooleanMethod(jklass_, jmethod_);
   EXPECT_TRUE(result);
 }
 
-JNI_TEST(RunStaticReturnTrue)
+JNI_TEST_CRITICAL(RunStaticReturnTrue)
 
 void JniCompilerTest::RunStaticReturnFalseImpl() {
   SetUpForTest(true, "returnFalse", "()Z",
-               reinterpret_cast<void*>(&Java_MyClassNatives_returnFalse));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_returnFalse));
 
   jboolean result = env_->CallStaticBooleanMethod(jklass_, jmethod_);
   EXPECT_FALSE(result);
 }
 
-JNI_TEST(RunStaticReturnFalse)
+JNI_TEST_CRITICAL(RunStaticReturnFalse)
 
 void JniCompilerTest::RunGenericStaticReturnIntImpl() {
-  SetUpForTest(true, "returnInt", "()I", reinterpret_cast<void*>(&Java_MyClassNatives_returnInt));
+  SetUpForTest(true, "returnInt", "()I", CURRENT_JNI_WRAPPER(Java_MyClassNatives_returnInt));
 
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_);
   EXPECT_EQ(42, result);
 }
 
-JNI_TEST(RunGenericStaticReturnInt)
+JNI_TEST_CRITICAL(RunGenericStaticReturnInt)
 
-int gJava_MyClassNatives_fooSIOO_calls = 0;
-jobject Java_MyClassNatives_fooSIOO(JNIEnv* env, jclass klass, jint x, jobject y,
-                             jobject z) {
-  // 3 = klass + y + z
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
-  gJava_MyClassNatives_fooSIOO_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  size_t null_args = (y == nullptr ? 1 : 0) + (z == nullptr ? 1 : 0);
-  EXPECT_TRUE(3U == Thread::Current()->NumStackReferences() ||
-              (3U - null_args) == Thread::Current()->NumStackReferences());
+int gJava_MyClassNatives_returnDouble_calls[kJniKindCount] = {};
+jdouble Java_MyClassNatives_returnDouble(JNIEnv*, jclass) {
+  gJava_MyClassNatives_returnDouble_calls[gCurrentJni]++;
+  return 4.0;
+}
+
+void JniCompilerTest::RunGenericStaticReturnDoubleImpl() {
+  SetUpForTest(true, "returnDouble", "()D", CURRENT_JNI_WRAPPER(Java_MyClassNatives_returnDouble));
+
+  jdouble result = env_->CallStaticDoubleMethod(jklass_, jmethod_);
+  EXPECT_DOUBLE_EQ(4.0, result);
+  EXPECT_EQ(1, gJava_MyClassNatives_returnDouble_calls[gCurrentJni]);
+
+  gJava_MyClassNatives_returnDouble_calls[gCurrentJni] = 0;
+}
+
+JNI_TEST_CRITICAL(RunGenericStaticReturnDouble)
+
+jlong Java_MyClassNatives_returnLong(JNIEnv*, jclass) {
+  return 0xFEEDDEADFEEDL;
+}
+
+void JniCompilerTest::RunGenericStaticReturnLongImpl() {
+  SetUpForTest(true, "returnLong", "()J", CURRENT_JNI_WRAPPER(Java_MyClassNatives_returnLong));
+
+  jlong result = env_->CallStaticLongMethod(jklass_, jmethod_);
+  EXPECT_EQ(0xFEEDDEADFEEDL, result);
+}
+
+JNI_TEST_CRITICAL(RunGenericStaticReturnLong)
+
+int gJava_MyClassNatives_fooSIOO_calls[kJniKindCount] = {};
+jobject Java_MyClassNatives_fooSIOO(JNIEnv*, jclass klass, jint x, jobject y, jobject z) {
+  gJava_MyClassNatives_fooSIOO_calls[gCurrentJni]++;
   switch (x) {
     case 1:
       return y;
@@ -619,54 +999,45 @@
   }
 }
 
-
 void JniCompilerTest::CompileAndRunStaticIntObjectObjectMethodImpl() {
   SetUpForTest(true, "fooSIOO",
                "(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooSIOO));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooSIOO));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooSIOO_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooSIOO_calls[gCurrentJni]);
   jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
-  EXPECT_EQ(1, gJava_MyClassNatives_fooSIOO_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooSIOO_calls[gCurrentJni]);
 
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
-  EXPECT_EQ(2, gJava_MyClassNatives_fooSIOO_calls);
+  EXPECT_EQ(2, gJava_MyClassNatives_fooSIOO_calls[gCurrentJni]);
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(nullptr, result));
-  EXPECT_EQ(3, gJava_MyClassNatives_fooSIOO_calls);
+  EXPECT_EQ(3, gJava_MyClassNatives_fooSIOO_calls[gCurrentJni]);
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
-  EXPECT_EQ(4, gJava_MyClassNatives_fooSIOO_calls);
+  EXPECT_EQ(4, gJava_MyClassNatives_fooSIOO_calls[gCurrentJni]);
 
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
-  EXPECT_EQ(5, gJava_MyClassNatives_fooSIOO_calls);
+  EXPECT_EQ(5, gJava_MyClassNatives_fooSIOO_calls[gCurrentJni]);
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
-  EXPECT_EQ(6, gJava_MyClassNatives_fooSIOO_calls);
+  EXPECT_EQ(6, gJava_MyClassNatives_fooSIOO_calls[gCurrentJni]);
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(nullptr, result));
-  EXPECT_EQ(7, gJava_MyClassNatives_fooSIOO_calls);
+  EXPECT_EQ(7, gJava_MyClassNatives_fooSIOO_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooSIOO_calls = 0;
+  gJava_MyClassNatives_fooSIOO_calls[gCurrentJni] = 0;
 }
 
-JNI_TEST(CompileAndRunStaticIntObjectObjectMethod)
+// TODO: Maybe. @FastNative support for returning Objects?
+JNI_TEST_NORMAL_ONLY(CompileAndRunStaticIntObjectObjectMethod)
 
-int gJava_MyClassNatives_fooSSIOO_calls = 0;
-jobject Java_MyClassNatives_fooSSIOO(JNIEnv* env, jclass klass, jint x, jobject y, jobject z) {
-  // 3 = klass + y + z
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(klass != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(JniCompilerTest::jobj_, klass));
-  gJava_MyClassNatives_fooSSIOO_calls++;
-  ScopedObjectAccess soa(Thread::Current());
-  size_t null_args = (y == nullptr ? 1 : 0) + (z == nullptr ? 1 : 0);
-  EXPECT_TRUE(3U == Thread::Current()->NumStackReferences() ||
-              (3U - null_args) == Thread::Current()->NumStackReferences());
+int gJava_MyClassNatives_fooSSIOO_calls[kJniKindCount] = {};
+jobject Java_MyClassNatives_fooSSIOO(JNIEnv*, jclass klass, jint x, jobject y, jobject z) {
+  gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]++;
   switch (x) {
     case 1:
       return y;
@@ -680,37 +1051,38 @@
 void JniCompilerTest::CompileAndRunStaticSynchronizedIntObjectObjectMethodImpl() {
   SetUpForTest(true, "fooSSIOO",
                "(ILjava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooSSIOO));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooSSIOO));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_fooSSIOO_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]);
   jobject result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
-  EXPECT_EQ(1, gJava_MyClassNatives_fooSSIOO_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]);
 
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
-  EXPECT_EQ(2, gJava_MyClassNatives_fooSSIOO_calls);
+  EXPECT_EQ(2, gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]);
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(nullptr, result));
-  EXPECT_EQ(3, gJava_MyClassNatives_fooSSIOO_calls);
+  EXPECT_EQ(3, gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]);
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, nullptr, jobj_);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
-  EXPECT_EQ(4, gJava_MyClassNatives_fooSSIOO_calls);
+  EXPECT_EQ(4, gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]);
 
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 0, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jklass_, result));
-  EXPECT_EQ(5, gJava_MyClassNatives_fooSSIOO_calls);
+  EXPECT_EQ(5, gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]);
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 1, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(jobj_, result));
-  EXPECT_EQ(6, gJava_MyClassNatives_fooSSIOO_calls);
+  EXPECT_EQ(6, gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]);
   result = env_->CallStaticObjectMethod(jklass_, jmethod_, 2, jobj_, nullptr);
   EXPECT_TRUE(env_->IsSameObject(nullptr, result));
-  EXPECT_EQ(7, gJava_MyClassNatives_fooSSIOO_calls);
+  EXPECT_EQ(7, gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_fooSSIOO_calls = 0;
+  gJava_MyClassNatives_fooSSIOO_calls[gCurrentJni] = 0;
 }
 
-JNI_TEST(CompileAndRunStaticSynchronizedIntObjectObjectMethod)
+// TODO: Maybe. @FastNative support for returning Objects?
+JNI_TEST_NORMAL_ONLY(CompileAndRunStaticSynchronizedIntObjectObjectMethod)
 
 void Java_MyClassNatives_throwException(JNIEnv* env, jobject) {
   jclass c = env->FindClass("java/lang/RuntimeException");
@@ -724,30 +1096,30 @@
     class_loader_ = LoadDex("MyClassNatives");
 
     // all compilation needs to happen before Runtime::Start
-    CompileForTest(class_loader_, false, "foo", "()V");
-    CompileForTest(class_loader_, false, "throwException", "()V");
-    CompileForTest(class_loader_, false, "foo", "()V");
+    CompileForTestWithCurrentJni(class_loader_, false, "foo", "()V");
+    CompileForTestWithCurrentJni(class_loader_, false, "throwException", "()V");
+    CompileForTestWithCurrentJni(class_loader_, false, "foo", "()V");
   }
   // Start runtime to avoid re-initialization in SetupForTest.
   Thread::Current()->TransitionFromSuspendedToRunnable();
   bool started = runtime_->Start();
   CHECK(started);
 
-  gJava_MyClassNatives_foo_calls = 0;
+  gJava_MyClassNatives_foo_calls[gCurrentJni] = 0;
 
   // Check a single call of a JNI method is ok
-  SetUpForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo));
+  SetUpForTest(false, "foo", "()V", CURRENT_JNI_WRAPPER(Java_MyClassNatives_foo));
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
-  EXPECT_EQ(1, gJava_MyClassNatives_foo_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_foo_calls[gCurrentJni]);
   EXPECT_FALSE(Thread::Current()->IsExceptionPending());
 
   // Get class for exception we expect to be thrown
   ScopedLocalRef<jclass> jlre(env_, env_->FindClass("java/lang/RuntimeException"));
   SetUpForTest(false, "throwException", "()V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_throwException));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_throwException));
   // Call Java_MyClassNatives_throwException (JNI method that throws exception)
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
-  EXPECT_EQ(1, gJava_MyClassNatives_foo_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_foo_calls[gCurrentJni]);
   EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE);
   ScopedLocalRef<jthrowable> exception(env_, env_->ExceptionOccurred());
   env_->ExceptionClear();
@@ -756,9 +1128,9 @@
   // Check a single call of a JNI method is ok
   SetUpForTest(false, "foo", "()V", reinterpret_cast<void*>(&Java_MyClassNatives_foo));
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_);
-  EXPECT_EQ(2, gJava_MyClassNatives_foo_calls);
+  EXPECT_EQ(2, gJava_MyClassNatives_foo_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_foo_calls = 0;
+  gJava_MyClassNatives_foo_calls[gCurrentJni] = 0;
 }
 
 JNI_TEST(ExceptionHandling)
@@ -782,7 +1154,7 @@
       mirror::StackTraceElement* ste = trace_array->Get(j);
       EXPECT_STREQ("MyClassNatives.java", ste->GetFileName()->ToModifiedUtf8().c_str());
       EXPECT_STREQ("MyClassNatives", ste->GetDeclaringClass()->ToModifiedUtf8().c_str());
-      EXPECT_STREQ("fooI", ste->GetMethodName()->ToModifiedUtf8().c_str());
+      EXPECT_EQ(("fooI" + CurrentJniStringSuffix()), ste->GetMethodName()->ToModifiedUtf8());
     }
 
     // end recursion
@@ -790,7 +1162,9 @@
   } else {
     jclass jklass = env->FindClass("MyClassNatives");
     EXPECT_TRUE(jklass != nullptr);
-    jmethodID jmethod = env->GetMethodID(jklass, "fooI", "(I)I");
+    jmethodID jmethod = env->GetMethodID(jklass,
+                                         ("fooI" + CurrentJniStringSuffix()).c_str(),
+                                         "(I)I");
     EXPECT_TRUE(jmethod != nullptr);
 
     // Recurse with i - 1
@@ -803,8 +1177,13 @@
 
 void JniCompilerTest::NativeStackTraceElementImpl() {
   SetUpForTest(false, "fooI", "(I)I",
-               reinterpret_cast<void*>(&Java_MyClassNatives_nativeUpCall));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_nativeUpCall));
+
+  // Usual # local references on stack check fails because nativeUpCall calls itself recursively,
+  // each time the # of local references will therefore go up.
+  ScopedDisableCheckNumStackReferences disable_num_stack_check;
   jint result = env_->CallNonvirtualIntMethod(jobj_, jklass_, jmethod_, 10);
+
   EXPECT_EQ(10+9+8+7+6+5+4+3+2+1, result);
 }
 
@@ -816,13 +1195,14 @@
 
 void JniCompilerTest::ReturnGlobalRefImpl() {
   SetUpForTest(false, "fooO", "(Ljava/lang/Object;)Ljava/lang/Object;",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fooO));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fooO));
   jobject result = env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, jobj_);
   EXPECT_EQ(JNILocalRefType, env_->GetObjectRefType(result));
   EXPECT_TRUE(env_->IsSameObject(result, jobj_));
 }
 
-JNI_TEST(ReturnGlobalRef)
+// TODO: Maybe. @FastNative support for returning objects?
+JNI_TEST_NORMAL_ONLY(ReturnGlobalRef)
 
 jint local_ref_test(JNIEnv* env, jobject thisObj, jint x) {
   // Add 10 local references
@@ -834,7 +1214,7 @@
 }
 
 void JniCompilerTest::LocalReferenceTableClearingTestImpl() {
-  SetUpForTest(false, "fooI", "(I)I", reinterpret_cast<void*>(&local_ref_test));
+  SetUpForTest(false, "fooI", "(I)I", CURRENT_JNI_WRAPPER(local_ref_test));
   // 1000 invocations of a method that adds 10 local references
   for (int i = 0; i < 1000; i++) {
     jint result = env_->CallIntMethod(jobj_, jmethod_, i);
@@ -855,7 +1235,7 @@
 
 void JniCompilerTest::JavaLangSystemArrayCopyImpl() {
   SetUpForTest(true, "arraycopy", "(Ljava/lang/Object;ILjava/lang/Object;II)V",
-               reinterpret_cast<void*>(&my_arraycopy));
+               CURRENT_JNI_WRAPPER(my_arraycopy));
   env_->CallStaticVoidMethod(jklass_, jmethod_, jobj_, 1234, jklass_, 5678, 9876);
 }
 
@@ -872,7 +1252,7 @@
 
 void JniCompilerTest::CompareAndSwapIntImpl() {
   SetUpForTest(false, "compareAndSwapInt", "(Ljava/lang/Object;JII)Z",
-               reinterpret_cast<void*>(&my_casi));
+               CURRENT_JNI_WRAPPER(my_casi));
   jboolean result = env_->CallBooleanMethod(jobj_, jmethod_, jobj_, INT64_C(0x12345678ABCDEF88),
                                             0xCAFEF00D, 0xEBADF00D);
   EXPECT_EQ(result, JNI_TRUE);
@@ -891,7 +1271,7 @@
 
 void JniCompilerTest::GetTextImpl() {
   SetUpForTest(true, "getText", "(JLjava/lang/Object;JLjava/lang/Object;)I",
-               reinterpret_cast<void*>(&my_gettext));
+               CURRENT_JNI_WRAPPER(my_gettext));
   jint result = env_->CallStaticIntMethod(jklass_, jmethod_, 0x12345678ABCDEF88ll, jobj_,
                                           INT64_C(0x7FEDCBA987654321), jobj_);
   EXPECT_EQ(result, 42);
@@ -899,37 +1279,33 @@
 
 JNI_TEST(GetText)
 
-int gJava_MyClassNatives_GetSinkProperties_calls = 0;
-jarray Java_MyClassNatives_GetSinkProperties(JNIEnv* env, jobject thisObj, jstring s) {
-  // 1 = thisObj
-  Thread* self = Thread::Current();
-  EXPECT_EQ(kNative, self->GetState());
-  Locks::mutator_lock_->AssertNotHeld(self);
-  EXPECT_EQ(self->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
+int gJava_MyClassNatives_GetSinkProperties_calls[kJniKindCount] = {};
+jarray Java_MyClassNatives_GetSinkProperties(JNIEnv*, jobject thisObj, jstring s) {
   EXPECT_EQ(s, nullptr);
-  gJava_MyClassNatives_GetSinkProperties_calls++;
+  gJava_MyClassNatives_GetSinkProperties_calls[gCurrentJni]++;
+
+  Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
-  EXPECT_EQ(2U, self->NumStackReferences());
   EXPECT_TRUE(self->HoldsLock(soa.Decode<mirror::Object*>(thisObj)));
   return nullptr;
 }
 
 void JniCompilerTest::GetSinkPropertiesNativeImpl() {
   SetUpForTest(false, "getSinkPropertiesNative", "(Ljava/lang/String;)[Ljava/lang/Object;",
-               reinterpret_cast<void*>(&Java_MyClassNatives_GetSinkProperties));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_GetSinkProperties));
 
-  EXPECT_EQ(0, gJava_MyClassNatives_GetSinkProperties_calls);
+  EXPECT_EQ(0, gJava_MyClassNatives_GetSinkProperties_calls[gCurrentJni]);
   jarray result = down_cast<jarray>(
       env_->CallNonvirtualObjectMethod(jobj_, jklass_, jmethod_, nullptr));
   EXPECT_EQ(nullptr, result);
-  EXPECT_EQ(1, gJava_MyClassNatives_GetSinkProperties_calls);
+  EXPECT_EQ(1, gJava_MyClassNatives_GetSinkProperties_calls[gCurrentJni]);
 
-  gJava_MyClassNatives_GetSinkProperties_calls = 0;
+  gJava_MyClassNatives_GetSinkProperties_calls[gCurrentJni] = 0;
 }
 
-JNI_TEST(GetSinkPropertiesNative)
+// @FastNative doesn't support 'synchronized' keyword and
+// never will -- locking functions aren't fast.
+JNI_TEST_NORMAL_ONLY(GetSinkPropertiesNative)
 
 // This should return jclass, but we're imitating a bug pattern.
 jobject Java_MyClassNatives_instanceMethodThatShouldReturnClass(JNIEnv* env, jobject) {
@@ -943,39 +1319,59 @@
 
 void JniCompilerTest::UpcallReturnTypeChecking_InstanceImpl() {
   SetUpForTest(false, "instanceMethodThatShouldReturnClass", "()Ljava/lang/Class;",
-               reinterpret_cast<void*>(&Java_MyClassNatives_instanceMethodThatShouldReturnClass));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_instanceMethodThatShouldReturnClass));
 
   CheckJniAbortCatcher check_jni_abort_catcher;
   // This native method is bad, and tries to return a jstring as a jclass.
   env_->CallObjectMethod(jobj_, jmethod_);
-  check_jni_abort_catcher.Check("attempt to return an instance of java.lang.String from java.lang.Class MyClassNatives.instanceMethodThatShouldReturnClass()");
+  check_jni_abort_catcher.Check(std::string() + "attempt to return an instance " +
+                                    "of java.lang.String from java.lang.Class " +
+                                    "MyClassNatives.instanceMethodThatShouldReturnClass" +
+                                    CurrentJniStringSuffix() + "()");
 
   // Here, we just call the method incorrectly; we should catch that too.
   env_->CallObjectMethod(jobj_, jmethod_);
-  check_jni_abort_catcher.Check("attempt to return an instance of java.lang.String from java.lang.Class MyClassNatives.instanceMethodThatShouldReturnClass()");
+  check_jni_abort_catcher.Check(std::string() + "attempt to return an instance " +
+                                    "of java.lang.String from java.lang.Class " +
+                                    "MyClassNatives.instanceMethodThatShouldReturnClass" +
+                                    CurrentJniStringSuffix() + "()");
   env_->CallStaticObjectMethod(jklass_, jmethod_);
-  check_jni_abort_catcher.Check("calling non-static method java.lang.Class MyClassNatives.instanceMethodThatShouldReturnClass() with CallStaticObjectMethodV");
+  check_jni_abort_catcher.Check(std::string() + "calling non-static method " +
+                                    "java.lang.Class " +
+                                    "MyClassNatives.instanceMethodThatShouldReturnClass" +
+                                    CurrentJniStringSuffix() + "() with CallStaticObjectMethodV");
 }
 
-JNI_TEST(UpcallReturnTypeChecking_Instance)
+// TODO: Maybe support returning objects for @FastNative?
+JNI_TEST_NORMAL_ONLY(UpcallReturnTypeChecking_Instance)
 
 void JniCompilerTest::UpcallReturnTypeChecking_StaticImpl() {
   SetUpForTest(true, "staticMethodThatShouldReturnClass", "()Ljava/lang/Class;",
-               reinterpret_cast<void*>(&Java_MyClassNatives_staticMethodThatShouldReturnClass));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_staticMethodThatShouldReturnClass));
 
   CheckJniAbortCatcher check_jni_abort_catcher;
   // This native method is bad, and tries to return a jstring as a jclass.
   env_->CallStaticObjectMethod(jklass_, jmethod_);
-  check_jni_abort_catcher.Check("attempt to return an instance of java.lang.String from java.lang.Class MyClassNatives.staticMethodThatShouldReturnClass()");
+  check_jni_abort_catcher.Check(std::string() + "attempt to return an instance " +
+                                    "of java.lang.String from java.lang.Class " +
+                                    "MyClassNatives.staticMethodThatShouldReturnClass" +
+                                    CurrentJniStringSuffix() + "()");
 
   // Here, we just call the method incorrectly; we should catch that too.
   env_->CallStaticObjectMethod(jklass_, jmethod_);
-  check_jni_abort_catcher.Check("attempt to return an instance of java.lang.String from java.lang.Class MyClassNatives.staticMethodThatShouldReturnClass()");
+  check_jni_abort_catcher.Check(std::string() + "attempt to return an instance " +
+                                    "of java.lang.String from java.lang.Class " +
+                                    "MyClassNatives.staticMethodThatShouldReturnClass" +
+                                    CurrentJniStringSuffix() + "()");
   env_->CallObjectMethod(jobj_, jmethod_);
-  check_jni_abort_catcher.Check("calling static method java.lang.Class MyClassNatives.staticMethodThatShouldReturnClass() with CallObjectMethodV");
+  check_jni_abort_catcher.Check(std::string() + "calling static method " +
+                                    "java.lang.Class " +
+                                    "MyClassNatives.staticMethodThatShouldReturnClass" +
+                                    CurrentJniStringSuffix() + "() with CallObjectMethodV");
 }
 
-JNI_TEST(UpcallReturnTypeChecking_Static)
+// TODO: Maybe support returning objects for @FastNative?
+JNI_TEST_NORMAL_ONLY(UpcallReturnTypeChecking_Static)
 
 // This should take jclass, but we're imitating a bug pattern.
 void Java_MyClassNatives_instanceMethodThatShouldTakeClass(JNIEnv*, jobject, jclass) {
@@ -990,12 +1386,14 @@
   ScopedLogSeverity sls(LogSeverity::FATAL);
 
   SetUpForTest(false, "instanceMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_instanceMethodThatShouldTakeClass));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_instanceMethodThatShouldTakeClass));
 
   CheckJniAbortCatcher check_jni_abort_catcher;
   // We deliberately pass a bad second argument here.
   env_->CallVoidMethod(jobj_, jmethod_, 123, env_->NewStringUTF("not a class!"));
-  check_jni_abort_catcher.Check("bad arguments passed to void MyClassNatives.instanceMethodThatShouldTakeClass(int, java.lang.Class)");
+  check_jni_abort_catcher.Check(std::string() + "bad arguments passed to void " +
+                                    "MyClassNatives.instanceMethodThatShouldTakeClass" +
+                                    CurrentJniStringSuffix() + "(int, java.lang.Class)");
 }
 
 JNI_TEST(UpcallArgumentTypeChecking_Instance)
@@ -1005,29 +1403,25 @@
   ScopedLogSeverity sls(LogSeverity::FATAL);
 
   SetUpForTest(true, "staticMethodThatShouldTakeClass", "(ILjava/lang/Class;)V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_staticMethodThatShouldTakeClass));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_staticMethodThatShouldTakeClass));
 
   CheckJniAbortCatcher check_jni_abort_catcher;
   // We deliberately pass a bad second argument here.
   env_->CallStaticVoidMethod(jklass_, jmethod_, 123, env_->NewStringUTF("not a class!"));
-  check_jni_abort_catcher.Check("bad arguments passed to void MyClassNatives.staticMethodThatShouldTakeClass(int, java.lang.Class)");
+  check_jni_abort_catcher.Check(std::string() + "bad arguments passed to void " +
+                                    "MyClassNatives.staticMethodThatShouldTakeClass" +
+                                    CurrentJniStringSuffix() + "(int, java.lang.Class)");
 }
 
 JNI_TEST(UpcallArgumentTypeChecking_Static)
 
-jfloat Java_MyClassNatives_checkFloats(JNIEnv* env, jobject thisObj, jfloat f1, jfloat f2) {
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+jfloat Java_MyClassNatives_checkFloats(JNIEnv*, jobject, jfloat f1, jfloat f2) {
   return f1 - f2;  // non-commutative operator
 }
 
 void JniCompilerTest::CompileAndRunFloatFloatMethodImpl() {
   SetUpForTest(false, "checkFloats", "(FF)F",
-               reinterpret_cast<void*>(&Java_MyClassNatives_checkFloats));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_checkFloats));
 
   jfloat result = env_->CallNonvirtualFloatMethod(jobj_, jklass_, jmethod_,
                                                     99.0F, 10.0F);
@@ -1042,28 +1436,22 @@
 
 void Java_MyClassNatives_checkParameterAlign(JNIEnv* env ATTRIBUTE_UNUSED,
                                              jobject thisObj ATTRIBUTE_UNUSED,
-                                             jint i1 ATTRIBUTE_UNUSED,
-                                             jlong l1 ATTRIBUTE_UNUSED) {
-//  EXPECT_EQ(kNative, Thread::Current()->GetState());
-//  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-//  EXPECT_TRUE(thisObj != nullptr);
-//  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-//  ScopedObjectAccess soa(Thread::Current());
-//  EXPECT_EQ(1U, Thread::Current()->NumStackReferences());
+                                             jint i1,
+                                             jlong l1) {
   EXPECT_EQ(i1, 1234);
   EXPECT_EQ(l1, INT64_C(0x12345678ABCDEF0));
 }
 
 void JniCompilerTest::CheckParameterAlignImpl() {
   SetUpForTest(false, "checkParameterAlign", "(IJ)V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_checkParameterAlign));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_checkParameterAlign));
 
   env_->CallNonvirtualVoidMethod(jobj_, jklass_, jmethod_, 1234, INT64_C(0x12345678ABCDEF0));
 }
 
 JNI_TEST(CheckParameterAlign)
 
-void Java_MyClassNatives_maxParamNumber(JNIEnv* env, jobject thisObj,
+void Java_MyClassNatives_maxParamNumber(JNIEnv* env, jobject,
     jobject o0, jobject o1, jobject o2, jobject o3, jobject o4, jobject o5, jobject o6, jobject o7,
     jobject o8, jobject o9, jobject o10, jobject o11, jobject o12, jobject o13, jobject o14, jobject o15,
     jobject o16, jobject o17, jobject o18, jobject o19, jobject o20, jobject o21, jobject o22, jobject o23,
@@ -1096,13 +1484,6 @@
     jobject o232, jobject o233, jobject o234, jobject o235, jobject o236, jobject o237, jobject o238, jobject o239,
     jobject o240, jobject o241, jobject o242, jobject o243, jobject o244, jobject o245, jobject o246, jobject o247,
     jobject o248, jobject o249, jobject o250, jobject o251, jobject o252, jobject o253) {
-  EXPECT_EQ(kNative, Thread::Current()->GetState());
-  EXPECT_EQ(Thread::Current()->GetJniEnv(), env);
-  EXPECT_TRUE(thisObj != nullptr);
-  EXPECT_TRUE(env->IsInstanceOf(thisObj, JniCompilerTest::jklass_));
-  ScopedObjectAccess soa(Thread::Current());
-  EXPECT_GE(255U, Thread::Current()->NumStackReferences());
-
   // two tests possible
   if (o0 == nullptr) {
     // 1) everything is null
@@ -1470,7 +1851,7 @@
 
 void JniCompilerTest::MaxParamNumberImpl() {
   SetUpForTest(false, "maxParamNumber", longSig,
-               reinterpret_cast<void*>(&Java_MyClassNatives_maxParamNumber));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_maxParamNumber));
 
   jvalue args[254];
 
@@ -1497,7 +1878,7 @@
   // This will lead to error messages in the log.
   ScopedLogSeverity sls(LogSeverity::FATAL);
 
-  SetUpForTest(false, "withoutImplementation", "()V", nullptr);
+  SetUpForTest(false, "withoutImplementation", "()V", NORMAL_JNI_ONLY_NULLPTR);
 
   env_->CallVoidMethod(jobj_, jmethod_);
 
@@ -1505,13 +1886,18 @@
   EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE);
 }
 
-JNI_TEST(WithoutImplementation)
+// TODO: Don't test @FastNative here since it goes through a stub lookup (unsupported) which would
+// normally fail with an exception, but fails with an assert.
+JNI_TEST_NORMAL_ONLY(WithoutImplementation)
 
 void JniCompilerTest::WithoutImplementationRefReturnImpl() {
   // This will lead to error messages in the log.
   ScopedLogSeverity sls(LogSeverity::FATAL);
 
-  SetUpForTest(false, "withoutImplementationRefReturn", "()Ljava/lang/Object;", nullptr);
+  SetUpForTest(false,
+               "withoutImplementationRefReturn",
+               "()Ljava/lang/Object;",
+               NORMAL_JNI_ONLY_NULLPTR);
 
   env_->CallObjectMethod(jobj_, jmethod_);
 
@@ -1519,7 +1905,8 @@
   EXPECT_TRUE(env_->ExceptionCheck() == JNI_TRUE);
 }
 
-JNI_TEST(WithoutImplementationRefReturn)
+// TODO: Should work for @FastNative too.
+JNI_TEST_NORMAL_ONLY(WithoutImplementationRefReturn)
 
 void Java_MyClassNatives_stackArgsIntsFirst(JNIEnv*, jclass, jint i1, jint i2, jint i3,
                                             jint i4, jint i5, jint i6, jint i7, jint i8, jint i9,
@@ -1561,7 +1948,7 @@
 
 void JniCompilerTest::StackArgsIntsFirstImpl() {
   SetUpForTest(true, "stackArgsIntsFirst", "(IIIIIIIIIIFFFFFFFFFF)V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_stackArgsIntsFirst));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_stackArgsIntsFirst));
 
   jint i1 = 1;
   jint i2 = 2;
@@ -1589,7 +1976,7 @@
                              f3, f4, f5, f6, f7, f8, f9, f10);
 }
 
-JNI_TEST(StackArgsIntsFirst)
+JNI_TEST_CRITICAL(StackArgsIntsFirst)
 
 void Java_MyClassNatives_stackArgsFloatsFirst(JNIEnv*, jclass, jfloat f1, jfloat f2,
                                               jfloat f3, jfloat f4, jfloat f5, jfloat f6, jfloat f7,
@@ -1631,7 +2018,7 @@
 
 void JniCompilerTest::StackArgsFloatsFirstImpl() {
   SetUpForTest(true, "stackArgsFloatsFirst", "(FFFFFFFFFFIIIIIIIIII)V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_stackArgsFloatsFirst));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_stackArgsFloatsFirst));
 
   jint i1 = 1;
   jint i2 = 2;
@@ -1659,7 +2046,7 @@
                              i4, i5, i6, i7, i8, i9, i10);
 }
 
-JNI_TEST(StackArgsFloatsFirst)
+JNI_TEST_CRITICAL(StackArgsFloatsFirst)
 
 void Java_MyClassNatives_stackArgsMixed(JNIEnv*, jclass, jint i1, jfloat f1, jint i2,
                                         jfloat f2, jint i3, jfloat f3, jint i4, jfloat f4, jint i5,
@@ -1700,7 +2087,7 @@
 
 void JniCompilerTest::StackArgsMixedImpl() {
   SetUpForTest(true, "stackArgsMixed", "(IFIFIFIFIFIFIFIFIFIF)V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_stackArgsMixed));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_stackArgsMixed));
 
   jint i1 = 1;
   jint i2 = 2;
@@ -1728,7 +2115,7 @@
                              f7, i8, f8, i9, f9, i10, f10);
 }
 
-JNI_TEST(StackArgsMixed)
+JNI_TEST_CRITICAL(StackArgsMixed)
 
 void Java_MyClassNatives_stackArgsSignExtendedMips64(JNIEnv*, jclass, jint i1, jint i2, jint i3,
                                                      jint i4, jint i5, jint i6, jint i7, jint i8) {
@@ -1760,7 +2147,7 @@
 
 void JniCompilerTest::StackArgsSignExtendedMips64Impl() {
   SetUpForTest(true, "stackArgsSignExtendedMips64", "(IIIIIIII)V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_stackArgsSignExtendedMips64));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_stackArgsSignExtendedMips64));
   jint i1 = 1;
   jint i2 = 2;
   jint i3 = 3;
@@ -1773,7 +2160,7 @@
   env_->CallStaticVoidMethod(jklass_, jmethod_, i1, i2, i3, i4, i5, i6, i7, i8);
 }
 
-JNI_TEST(StackArgsSignExtendedMips64)
+JNI_TEST_CRITICAL(StackArgsSignExtendedMips64)
 
 void Java_MyClassNatives_normalNative(JNIEnv*, jclass) {
   // Intentionally left empty.
@@ -1785,15 +2172,18 @@
   SetUpForTest(/* direct */ true,
                "normalNative",
                "()V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_normalNative));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_normalNative));
 
   ScopedObjectAccess soa(Thread::Current());
   ArtMethod* method = soa.DecodeMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
+  EXPECT_FALSE(method->IsAnnotatedWithCriticalNative());
   EXPECT_FALSE(method->IsAnnotatedWithFastNative());
 }
-JNI_TEST(NormalNative)
+
+// TODO: just rename the java functions  to the standard convention and remove duplicated tests
+JNI_TEST_NORMAL_ONLY(NormalNative)
 
 // Methods annotated with @FastNative are considered "fast native"
 // -- Check that the annotation lookup succeeds.
@@ -1805,14 +2195,53 @@
   SetUpForTest(/* direct */ true,
                "fastNative",
                "()V",
-               reinterpret_cast<void*>(&Java_MyClassNatives_fastNative));
+               CURRENT_JNI_WRAPPER(Java_MyClassNatives_fastNative));
 
   ScopedObjectAccess soa(Thread::Current());
   ArtMethod* method = soa.DecodeMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
+  EXPECT_FALSE(method->IsAnnotatedWithCriticalNative());
   EXPECT_TRUE(method->IsAnnotatedWithFastNative());
 }
-JNI_TEST(FastNative)
+
+// TODO: just rename the java functions  to the standard convention and remove duplicated tests
+JNI_TEST_NORMAL_ONLY(FastNative)
+
+int gJava_myClassNatives_criticalNative_calls[kJniKindCount] = {};
+// Methods annotated with @CriticalNative are considered "critical native"
+// -- Check that the annotation lookup succeeds.
+void Java_MyClassNatives_criticalNative() {
+  gJava_myClassNatives_criticalNative_calls[gCurrentJni]++;
+}
+
+void JniCompilerTest::CriticalNativeImpl() {
+  SetUpForTest(/* direct */ true,
+               // Important: Don't change the "current jni" yet to avoid a method name suffix.
+               "criticalNative",
+               "()V",
+               // TODO: Use CURRENT_JNI_WRAPPER instead which is more generic.
+               reinterpret_cast<void*>(&Java_MyClassNatives_criticalNative));
+
+  // TODO: remove this manual updating of the current JNI. Merge with the other tests.
+  UpdateCurrentJni(JniKind::kCritical);
+  ASSERT_TRUE(IsCurrentJniCritical());
+
+  ScopedObjectAccess soa(Thread::Current());
+  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ASSERT_TRUE(method != nullptr);
+
+  EXPECT_TRUE(method->IsAnnotatedWithCriticalNative());
+  EXPECT_FALSE(method->IsAnnotatedWithFastNative());
+
+  EXPECT_EQ(0, gJava_myClassNatives_criticalNative_calls[gCurrentJni]);
+  env_->CallStaticVoidMethod(jklass_, jmethod_);
+  EXPECT_EQ(1, gJava_myClassNatives_criticalNative_calls[gCurrentJni]);
+
+  gJava_myClassNatives_criticalNative_calls[gCurrentJni] = 0;
+}
+
+// TODO: just rename the java functions  to the standard convention and remove duplicated tests
+JNI_TEST_NORMAL_ONLY(CriticalNative)
 
 }  // namespace art
diff --git a/compiler/jni/quick/arm/calling_convention_arm.cc b/compiler/jni/quick/arm/calling_convention_arm.cc
index 0d16260..3f29ae5 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.cc
+++ b/compiler/jni/quick/arm/calling_convention_arm.cc
@@ -24,15 +24,33 @@
 
 static_assert(kArmPointerSize == PointerSize::k32, "Unexpected ARM pointer size");
 
-// Used by hard float.
+//
+// JNI calling convention constants.
+//
+
+// List of parameters passed via registers for JNI.
+// JNI uses soft-float, so there is only a GPR list.
+static const Register kJniArgumentRegisters[] = {
+  R0, R1, R2, R3
+};
+
+static const size_t kJniArgumentRegisterCount = arraysize(kJniArgumentRegisters);
+
+//
+// Managed calling convention constants.
+//
+
+// Used by hard float. (General purpose registers.)
 static const Register kHFCoreArgumentRegisters[] = {
   R0, R1, R2, R3
 };
 
+// (VFP single-precision registers.)
 static const SRegister kHFSArgumentRegisters[] = {
   S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
 };
 
+// (VFP double-precision registers.)
 static const DRegister kHFDArgumentRegisters[] = {
   D0, D1, D2, D3, D4, D5, D6, D7
 };
@@ -40,6 +58,10 @@
 static_assert(arraysize(kHFDArgumentRegisters) * 2 == arraysize(kHFSArgumentRegisters),
     "ks d argument registers mismatch");
 
+//
+// Shared managed+JNI calling convention constants.
+//
+
 static constexpr ManagedRegister kCalleeSaveRegisters[] = {
     // Core registers.
     ArmManagedRegister::FromCoreRegister(R5),
@@ -255,23 +277,95 @@
 }
 // JNI calling convention
 
-ArmJniCallingConvention::ArmJniCallingConvention(bool is_static, bool is_synchronized,
+ArmJniCallingConvention::ArmJniCallingConvention(bool is_static,
+                                                 bool is_synchronized,
+                                                 bool is_critical_native,
                                                  const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kArmPointerSize) {
-  // Compute padding to ensure longs and doubles are not split in AAPCS. Ignore the 'this' jobject
-  // or jclass for static methods and the JNIEnv. We start at the aligned register r2.
-  size_t padding = 0;
-  for (size_t cur_arg = IsStatic() ? 0 : 1, cur_reg = 2; cur_arg < NumArgs(); cur_arg++) {
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_critical_native,
+                           shorty,
+                           kArmPointerSize) {
+  // AAPCS 4.1 specifies fundamental alignments for each type. All of our stack arguments are
+  // usually 4-byte aligned, however longs and doubles must be 8 bytes aligned. Add padding to
+  // maintain 8-byte alignment invariant.
+  //
+  // Compute padding to ensure longs and doubles are not split in AAPCS.
+  size_t shift = 0;
+
+  size_t cur_arg, cur_reg;
+  if (LIKELY(HasExtraArgumentsForJni())) {
+    // Ignore the 'this' jobject or jclass for static methods and the JNIEnv.
+    // We start at the aligned register r2.
+    //
+    // Ignore the first 2 parameters because they are guaranteed to be aligned.
+    cur_arg = NumImplicitArgs();  // skip the "this" arg.
+    cur_reg = 2;  // skip {r0=JNIEnv, r1=jobject} / {r0=JNIEnv, r1=jclass} parameters (start at r2).
+  } else {
+    // Check every parameter.
+    cur_arg = 0;
+    cur_reg = 0;
+  }
+
+  // TODO: Maybe should just use IsCurrentParamALongOrDouble instead to be cleaner?
+  // (this just seems like an unnecessary micro-optimization).
+
+  // Shift across a logical register mapping that looks like:
+  //
+  //   | r0 | r1 | r2 | r3 | SP | SP+4| SP+8 | SP+12 | ... | SP+n | SP+n+4 |
+  //
+  //   (where SP is some arbitrary stack pointer that our 0th stack arg would go into).
+  //
+  // Any time there would normally be a long/double in an odd logical register,
+  // we have to push out the rest of the mappings by 4 bytes to maintain an 8-byte alignment.
+  //
+  // This works for both physical register pairs {r0, r1}, {r2, r3} and for when
+  // the value is on the stack.
+  //
+  // For example:
+  // (a) long would normally go into r1, but we shift it into r2
+  //  | INT | (PAD) | LONG      |
+  //  | r0  |  r1   |  r2  | r3 |
+  //
+  // (b) long would normally go into r3, but we shift it into SP
+  //  | INT | INT | INT | (PAD) | LONG     |
+  //  | r0  |  r1 |  r2 |  r3   | SP+4 SP+8|
+  //
+  // where INT is any <=4 byte arg, and LONG is any 8-byte arg.
+  for (; cur_arg < NumArgs(); cur_arg++) {
     if (IsParamALongOrDouble(cur_arg)) {
-      if ((cur_reg & 1) != 0) {
-        padding += 4;
+      if ((cur_reg & 1) != 0) {  // check that it's in a logical contiguous register pair
+        shift += 4;
         cur_reg++;  // additional bump to ensure alignment
       }
-      cur_reg++;  // additional bump to skip extra long word
+      cur_reg += 2;  // bump the iterator twice for every long argument
+    } else {
+      cur_reg++;  // bump the iterator for every non-long argument
     }
-    cur_reg++;  // bump the iterator for every argument
   }
-  padding_ = padding;
+
+  if (cur_reg < kJniArgumentRegisterCount) {
+    // As a special case when, as a result of shifting (or not) there are no arguments on the stack,
+    // we actually have 0 stack padding.
+    //
+    // For example with @CriticalNative and:
+    // (int, long) -> shifts the long but doesn't need to pad the stack
+    //
+    //          shift
+    //           \/
+    //  | INT | (PAD) | LONG      | (EMPTY) ...
+    //  | r0  |  r1   |  r2  | r3 |   SP    ...
+    //                                /\
+    //                          no stack padding
+    padding_ = 0;
+  } else {
+    padding_ = shift;
+  }
+
+  // TODO: add some new JNI tests for @CriticalNative that introduced new edge cases
+  // (a) Using r0,r1 pair = f(long,...)
+  // (b) Shifting r1 long into r2,r3 pair = f(int, long, int, ...);
+  // (c) Shifting but not introducing a stack padding = f(int, long);
 }
 
 uint32_t ArmJniCallingConvention::CoreSpillMask() const {
@@ -289,15 +383,34 @@
 
 size_t ArmJniCallingConvention::FrameSize() {
   // Method*, LR and callee save area size, local reference segment state
-  size_t frame_data_size = static_cast<size_t>(kArmPointerSize)
-      + (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
-  // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kArmPointerSize, ReferenceCount());
+  const size_t method_ptr_size = static_cast<size_t>(kArmPointerSize);
+  const size_t lr_return_addr_size = kFramePointerSize;
+  const size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
+  size_t frame_data_size = method_ptr_size + lr_return_addr_size + callee_save_area_size;
+
+  if (LIKELY(HasLocalReferenceSegmentState())) {
+    // local reference segment state
+    frame_data_size += kFramePointerSize;
+    // TODO: Probably better to use sizeof(IRTSegmentState) here...
+  }
+
+  // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
+  const size_t handle_scope_size = HandleScope::SizeOf(kArmPointerSize, ReferenceCount());
+
+  size_t total_size = frame_data_size;
+  if (LIKELY(HasHandleScope())) {
+    // HandleScope is sometimes excluded.
+    total_size += handle_scope_size;                                 // handle scope size
+  }
+
   // Plus return value spill area size
-  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
+  total_size += SizeOfReturnValue();
+
+  return RoundUp(total_size, kStackAlignment);
 }
 
 size_t ArmJniCallingConvention::OutArgSize() {
+  // TODO: Identical to x86_64 except for also adding additional padding.
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize + padding_,
                  kStackAlignment);
 }
@@ -309,55 +422,70 @@
 // JniCallingConvention ABI follows AAPCS where longs and doubles must occur
 // in even register numbers and stack slots
 void ArmJniCallingConvention::Next() {
+  // Update the iterator by usual JNI rules.
   JniCallingConvention::Next();
-  size_t arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-  if ((itr_args_ >= 2) &&
-      (arg_pos < NumArgs()) &&
-      IsParamALongOrDouble(arg_pos)) {
-    // itr_slots_ needs to be an even number, according to AAPCS.
-    if ((itr_slots_ & 0x1u) != 0) {
+
+  if (LIKELY(HasNext())) {  // Avoid CHECK failure for IsCurrentParam
+    // Ensure slot is 8-byte aligned for longs/doubles (AAPCS).
+    if (IsCurrentParamALongOrDouble() && ((itr_slots_ & 0x1u) != 0)) {
+      // itr_slots_ needs to be an even number, according to AAPCS.
       itr_slots_++;
     }
   }
 }
 
 bool ArmJniCallingConvention::IsCurrentParamInRegister() {
-  return itr_slots_ < 4;
+  return itr_slots_ < kJniArgumentRegisterCount;
 }
 
 bool ArmJniCallingConvention::IsCurrentParamOnStack() {
   return !IsCurrentParamInRegister();
 }
 
-static const Register kJniArgumentRegisters[] = {
-  R0, R1, R2, R3
-};
 ManagedRegister ArmJniCallingConvention::CurrentParamRegister() {
-  CHECK_LT(itr_slots_, 4u);
-  int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-  if ((itr_args_ >= 2) && IsParamALongOrDouble(arg_pos)) {
-    CHECK_EQ(itr_slots_, 2u);
-    return ArmManagedRegister::FromRegisterPair(R2_R3);
+  CHECK_LT(itr_slots_, kJniArgumentRegisterCount);
+  if (IsCurrentParamALongOrDouble()) {
+    // AAPCS 5.1.1 requires 64-bit values to be in a consecutive register pair:
+    // "A double-word sized type is passed in two consecutive registers (e.g., r0 and r1, or r2 and
+    // r3). The content of the registers is as if the value had been loaded from memory
+    // representation with a single LDM instruction."
+    if (itr_slots_ == 0u) {
+      return ArmManagedRegister::FromRegisterPair(R0_R1);
+    } else if (itr_slots_ == 2u) {
+      return ArmManagedRegister::FromRegisterPair(R2_R3);
+    } else {
+      // The register can either be R0 (+R1) or R2 (+R3). Cannot be other values.
+      LOG(FATAL) << "Invalid iterator register position for a long/double " << itr_args_;
+      UNREACHABLE();
+    }
   } else {
-    return
-      ArmManagedRegister::FromCoreRegister(kJniArgumentRegisters[itr_slots_]);
+    // All other types can fit into one register.
+    return ArmManagedRegister::FromCoreRegister(kJniArgumentRegisters[itr_slots_]);
   }
 }
 
 FrameOffset ArmJniCallingConvention::CurrentParamStackOffset() {
-  CHECK_GE(itr_slots_, 4u);
+  CHECK_GE(itr_slots_, kJniArgumentRegisterCount);
   size_t offset =
-      displacement_.Int32Value() - OutArgSize() + ((itr_slots_ - 4) * kFramePointerSize);
+      displacement_.Int32Value()
+          - OutArgSize()
+          + ((itr_slots_ - kJniArgumentRegisterCount) * kFramePointerSize);
   CHECK_LT(offset, OutArgSize());
   return FrameOffset(offset);
 }
 
 size_t ArmJniCallingConvention::NumberOfOutgoingStackArgs() {
-  size_t static_args = IsStatic() ? 1 : 0;  // count jclass
+  size_t static_args = HasSelfClass() ? 1 : 0;  // count jclass
   // regular argument parameters and this
-  size_t param_args = NumArgs() + NumLongOrDoubleArgs();
+  size_t param_args = NumArgs() + NumLongOrDoubleArgs();  // twice count 8-byte args
+  // XX: Why is the long/ordouble counted twice but not JNIEnv* ???
   // count JNIEnv* less arguments in registers
-  return static_args + param_args + 1 - 4;
+  size_t internal_args = (HasJniEnv() ? 1 : 0 /* jni env */);
+  size_t total_args = static_args + param_args + internal_args;
+
+  return total_args - std::min(kJniArgumentRegisterCount, static_cast<size_t>(total_args));
+
+  // TODO: Very similar to x86_64 except for the return pc.
 }
 
 }  // namespace arm
diff --git a/compiler/jni/quick/arm/calling_convention_arm.h b/compiler/jni/quick/arm/calling_convention_arm.h
index 7c717cc..249f202 100644
--- a/compiler/jni/quick/arm/calling_convention_arm.h
+++ b/compiler/jni/quick/arm/calling_convention_arm.h
@@ -52,7 +52,10 @@
 
 class ArmJniCallingConvention FINAL : public JniCallingConvention {
  public:
-  ArmJniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  ArmJniCallingConvention(bool is_static,
+                          bool is_synchronized,
+                          bool is_critical_native,
+                          const char* shorty);
   ~ArmJniCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.cc b/compiler/jni/quick/arm64/calling_convention_arm64.cc
index afa707d..3fb7b56 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.cc
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.cc
@@ -24,6 +24,13 @@
 
 static_assert(kArm64PointerSize == PointerSize::k64, "Unexpected ARM64 pointer size");
 
+// Up to how many float-like (float, double) args can be enregistered.
+// The rest of the args must go on the stack.
+constexpr size_t kMaxFloatOrDoubleRegisterArguments = 8u;
+// Up to how many integer-like (pointers, objects, longs, int, short, bool, etc) args can be
+// enregistered. The rest of the args must go on the stack.
+constexpr size_t kMaxIntLikeRegisterArguments = 8u;
+
 static const XRegister kXArgumentRegisters[] = {
   X0, X1, X2, X3, X4, X5, X6, X7
 };
@@ -211,9 +218,11 @@
 }
 
 // JNI calling convention
-Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static, bool is_synchronized,
+Arm64JniCallingConvention::Arm64JniCallingConvention(bool is_static,
+                                                     bool is_synchronized,
+                                                     bool is_critical_native,
                                                      const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kArm64PointerSize) {
+    : JniCallingConvention(is_static, is_synchronized, is_critical_native, shorty, kArm64PointerSize) {
 }
 
 uint32_t Arm64JniCallingConvention::CoreSpillMask() const {
@@ -230,38 +239,59 @@
 
 size_t Arm64JniCallingConvention::FrameSize() {
   // Method*, callee save area size, local reference segment state
-  size_t frame_data_size = kFramePointerSize +
-      CalleeSaveRegisters().size() * kFramePointerSize + sizeof(uint32_t);
+  //
+  // (Unlike x86_64, do not include return address, and the segment state is uint32
+  // instead of pointer).
+  size_t method_ptr_size = static_cast<size_t>(kFramePointerSize);
+  size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
+
+  size_t frame_data_size = method_ptr_size + callee_save_area_size;
+  if (LIKELY(HasLocalReferenceSegmentState())) {
+    frame_data_size += sizeof(uint32_t);
+  }
   // References plus 2 words for HandleScope header
   size_t handle_scope_size = HandleScope::SizeOf(kArm64PointerSize, ReferenceCount());
+
+  size_t total_size = frame_data_size;
+  if (LIKELY(HasHandleScope())) {
+    // HandleScope is sometimes excluded.
+    total_size += handle_scope_size;                                 // handle scope size
+  }
+
   // Plus return value spill area size
-  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
+  total_size += SizeOfReturnValue();
+
+  return RoundUp(total_size, kStackAlignment);
 }
 
 size_t Arm64JniCallingConvention::OutArgSize() {
+  // Same as X86_64
   return RoundUp(NumberOfOutgoingStackArgs() * kFramePointerSize, kStackAlignment);
 }
 
 ArrayRef<const ManagedRegister> Arm64JniCallingConvention::CalleeSaveRegisters() const {
+  // Same as X86_64
   return ArrayRef<const ManagedRegister>(kCalleeSaveRegisters);
 }
 
 bool Arm64JniCallingConvention::IsCurrentParamInRegister() {
   if (IsCurrentParamAFloatOrDouble()) {
-    return (itr_float_and_doubles_ < 8);
+    return (itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments);
   } else {
-    return ((itr_args_ - itr_float_and_doubles_) < 8);
+    return ((itr_args_ - itr_float_and_doubles_) < kMaxIntLikeRegisterArguments);
   }
+  // TODO: Can we just call CurrentParamRegister to figure this out?
 }
 
 bool Arm64JniCallingConvention::IsCurrentParamOnStack() {
+  // Is this ever not the same for all the architectures?
   return !IsCurrentParamInRegister();
 }
 
 ManagedRegister Arm64JniCallingConvention::CurrentParamRegister() {
   CHECK(IsCurrentParamInRegister());
   if (IsCurrentParamAFloatOrDouble()) {
-    CHECK_LT(itr_float_and_doubles_, 8u);
+    CHECK_LT(itr_float_and_doubles_, kMaxFloatOrDoubleRegisterArguments);
     if (IsCurrentParamADouble()) {
       return Arm64ManagedRegister::FromDRegister(kDArgumentRegisters[itr_float_and_doubles_]);
     } else {
@@ -269,7 +299,7 @@
     }
   } else {
     int gp_reg = itr_args_ - itr_float_and_doubles_;
-    CHECK_LT(static_cast<unsigned int>(gp_reg), 8u);
+    CHECK_LT(static_cast<unsigned int>(gp_reg), kMaxIntLikeRegisterArguments);
     if (IsCurrentParamALong() || IsCurrentParamAReference() || IsCurrentParamJniEnv())  {
       return Arm64ManagedRegister::FromXRegister(kXArgumentRegisters[gp_reg]);
     } else {
@@ -281,20 +311,30 @@
 FrameOffset Arm64JniCallingConvention::CurrentParamStackOffset() {
   CHECK(IsCurrentParamOnStack());
   size_t args_on_stack = itr_args_
-                  - std::min(8u, itr_float_and_doubles_)
-                  - std::min(8u, (itr_args_ - itr_float_and_doubles_));
+                  - std::min(kMaxFloatOrDoubleRegisterArguments,
+                             static_cast<size_t>(itr_float_and_doubles_))
+                  - std::min(kMaxIntLikeRegisterArguments,
+                             static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
   size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
   CHECK_LT(offset, OutArgSize());
   return FrameOffset(offset);
+  // TODO: Seems identical to X86_64 code.
 }
 
 size_t Arm64JniCallingConvention::NumberOfOutgoingStackArgs() {
   // all arguments including JNI args
   size_t all_args = NumArgs() + NumberOfExtraArgumentsForJni();
 
-  size_t all_stack_args = all_args -
-            std::min(8u, static_cast<unsigned int>(NumFloatOrDoubleArgs())) -
-            std::min(8u, static_cast<unsigned int>((all_args - NumFloatOrDoubleArgs())));
+  DCHECK_GE(all_args, NumFloatOrDoubleArgs());
+
+  size_t all_stack_args =
+      all_args
+      - std::min(kMaxFloatOrDoubleRegisterArguments,
+                 static_cast<size_t>(NumFloatOrDoubleArgs()))
+      - std::min(kMaxIntLikeRegisterArguments,
+                 static_cast<size_t>((all_args - NumFloatOrDoubleArgs())));
+
+  // TODO: Seems similar to X86_64 code except it doesn't count return pc.
 
   return all_stack_args;
 }
diff --git a/compiler/jni/quick/arm64/calling_convention_arm64.h b/compiler/jni/quick/arm64/calling_convention_arm64.h
index 90b12e5..5618942 100644
--- a/compiler/jni/quick/arm64/calling_convention_arm64.h
+++ b/compiler/jni/quick/arm64/calling_convention_arm64.h
@@ -52,7 +52,10 @@
 
 class Arm64JniCallingConvention FINAL : public JniCallingConvention {
  public:
-  Arm64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  Arm64JniCallingConvention(bool is_static,
+                            bool is_synchronized,
+                            bool is_critical_native,
+                            const char* shorty);
   ~Arm64JniCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
diff --git a/compiler/jni/quick/calling_convention.cc b/compiler/jni/quick/calling_convention.cc
index c7ed9c9..9859b5d 100644
--- a/compiler/jni/quick/calling_convention.cc
+++ b/compiler/jni/quick/calling_convention.cc
@@ -149,19 +149,44 @@
 std::unique_ptr<JniCallingConvention> JniCallingConvention::Create(ArenaAllocator* arena,
                                                                    bool is_static,
                                                                    bool is_synchronized,
+                                                                   bool is_critical_native,
                                                                    const char* shorty,
                                                                    InstructionSet instruction_set) {
+  if (UNLIKELY(is_critical_native)) {
+    // Sanity check that the requested JNI instruction set
+    // is supported for critical natives. Not every one is.
+    switch (instruction_set) {
+      case kX86_64:
+      case kX86:
+      case kArm64:
+      case kArm:
+      case kThumb2:
+        break;
+      default:
+        is_critical_native = false;
+        LOG(WARNING) << "@CriticalNative support not implemented for " << instruction_set
+                     << "; will crash at runtime if trying to invoke such a method.";
+        // TODO: implement for MIPS/MIPS64
+    }
+  }
+
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm
     case kArm:
     case kThumb2:
       return std::unique_ptr<JniCallingConvention>(
-          new (arena) arm::ArmJniCallingConvention(is_static, is_synchronized, shorty));
+          new (arena) arm::ArmJniCallingConvention(is_static,
+                                                   is_synchronized,
+                                                   is_critical_native,
+                                                   shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
       return std::unique_ptr<JniCallingConvention>(
-          new (arena) arm64::Arm64JniCallingConvention(is_static, is_synchronized, shorty));
+          new (arena) arm64::Arm64JniCallingConvention(is_static,
+                                                       is_synchronized,
+                                                       is_critical_native,
+                                                       shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
     case kMips:
@@ -176,12 +201,18 @@
 #ifdef ART_ENABLE_CODEGEN_x86
     case kX86:
       return std::unique_ptr<JniCallingConvention>(
-          new (arena) x86::X86JniCallingConvention(is_static, is_synchronized, shorty));
+          new (arena) x86::X86JniCallingConvention(is_static,
+                                                   is_synchronized,
+                                                   is_critical_native,
+                                                   shorty));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
       return std::unique_ptr<JniCallingConvention>(
-          new (arena) x86_64::X86_64JniCallingConvention(is_static, is_synchronized, shorty));
+          new (arena) x86_64::X86_64JniCallingConvention(is_static,
+                                                         is_synchronized,
+                                                         is_critical_native,
+                                                         shorty));
 #endif
     default:
       LOG(FATAL) << "Unknown InstructionSet: " << instruction_set;
@@ -199,27 +230,36 @@
 }
 
 FrameOffset JniCallingConvention::ReturnValueSaveLocation() const {
-  // Segment state is 4 bytes long
-  return FrameOffset(SavedLocalReferenceCookieOffset().Int32Value() + 4);
+  if (LIKELY(HasHandleScope())) {
+    // Initial offset already includes the displacement.
+    // -- Remove the additional local reference cookie offset if we don't have a handle scope.
+    const size_t saved_local_reference_cookie_offset =
+        SavedLocalReferenceCookieOffset().Int32Value();
+    // Segment state is 4 bytes long
+    const size_t segment_state_size = 4;
+    return FrameOffset(saved_local_reference_cookie_offset + segment_state_size);
+  } else {
+    // Include only the initial Method* as part of the offset.
+    CHECK_LT(displacement_.SizeValue(),
+             static_cast<size_t>(std::numeric_limits<int32_t>::max()));
+    return FrameOffset(displacement_.Int32Value() + static_cast<size_t>(frame_pointer_size_));
+  }
 }
 
 bool JniCallingConvention::HasNext() {
-  if (itr_args_ <= kObjectOrClass) {
+  if (IsCurrentArgExtraForJni()) {
     return true;
   } else {
-    unsigned int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
+    unsigned int arg_pos = GetIteratorPositionWithinShorty();
     return arg_pos < NumArgs();
   }
 }
 
 void JniCallingConvention::Next() {
   CHECK(HasNext());
-  if (itr_args_ > kObjectOrClass) {
-    int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-    if (IsParamALongOrDouble(arg_pos)) {
-      itr_longs_and_doubles_++;
-      itr_slots_++;
-    }
+  if (IsCurrentParamALong() || IsCurrentParamADouble()) {
+    itr_longs_and_doubles_++;
+    itr_slots_++;
   }
   if (IsCurrentParamAFloatOrDouble()) {
     itr_float_and_doubles_++;
@@ -227,63 +267,73 @@
   if (IsCurrentParamAReference()) {
     itr_refs_++;
   }
+  // This default/fallthrough case also covers the extra JNIEnv* argument,
+  // as well as any other single-slot primitives.
   itr_args_++;
   itr_slots_++;
 }
 
 bool JniCallingConvention::IsCurrentParamAReference() {
-  switch (itr_args_) {
-    case kJniEnv:
-      return false;  // JNIEnv*
-    case kObjectOrClass:
-      return true;   // jobject or jclass
-    default: {
-      int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-      return IsParamAReference(arg_pos);
-    }
+  bool return_value;
+  if (SwitchExtraJniArguments(itr_args_,
+                              false,  // JNIEnv*
+                              true,   // jobject or jclass
+                              /* out parameters */
+                              &return_value)) {
+    return return_value;
+  } else {
+    int arg_pos = GetIteratorPositionWithinShorty();
+    return IsParamAReference(arg_pos);
   }
 }
 
+
 bool JniCallingConvention::IsCurrentParamJniEnv() {
+  if (UNLIKELY(!HasJniEnv())) {
+    return false;
+  }
   return (itr_args_ == kJniEnv);
 }
 
 bool JniCallingConvention::IsCurrentParamAFloatOrDouble() {
-  switch (itr_args_) {
-    case kJniEnv:
-      return false;  // JNIEnv*
-    case kObjectOrClass:
-      return false;   // jobject or jclass
-    default: {
-      int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-      return IsParamAFloatOrDouble(arg_pos);
-    }
+  bool return_value;
+  if (SwitchExtraJniArguments(itr_args_,
+                              false,  // jnienv*
+                              false,  // jobject or jclass
+                              /* out parameters */
+                              &return_value)) {
+    return return_value;
+  } else {
+    int arg_pos = GetIteratorPositionWithinShorty();
+    return IsParamAFloatOrDouble(arg_pos);
   }
 }
 
 bool JniCallingConvention::IsCurrentParamADouble() {
-  switch (itr_args_) {
-    case kJniEnv:
-      return false;  // JNIEnv*
-    case kObjectOrClass:
-      return false;   // jobject or jclass
-    default: {
-      int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-      return IsParamADouble(arg_pos);
-    }
+  bool return_value;
+  if (SwitchExtraJniArguments(itr_args_,
+                              false,  // jnienv*
+                              false,  // jobject or jclass
+                              /* out parameters */
+                              &return_value)) {
+    return return_value;
+  } else {
+    int arg_pos = GetIteratorPositionWithinShorty();
+    return IsParamADouble(arg_pos);
   }
 }
 
 bool JniCallingConvention::IsCurrentParamALong() {
-  switch (itr_args_) {
-    case kJniEnv:
-      return false;  // JNIEnv*
-    case kObjectOrClass:
-      return false;   // jobject or jclass
-    default: {
-      int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
-      return IsParamALong(arg_pos);
-    }
+  bool return_value;
+  if (SwitchExtraJniArguments(itr_args_,
+                              false,  // jnienv*
+                              false,  // jobject or jclass
+                              /* out parameters */
+                              &return_value)) {
+    return return_value;
+  } else {
+    int arg_pos = GetIteratorPositionWithinShorty();
+    return IsParamALong(arg_pos);
   }
 }
 
@@ -297,19 +347,93 @@
   return FrameOffset(result);
 }
 
-size_t JniCallingConvention::CurrentParamSize() {
-  if (itr_args_ <= kObjectOrClass) {
+size_t JniCallingConvention::CurrentParamSize() const {
+  if (IsCurrentArgExtraForJni()) {
     return static_cast<size_t>(frame_pointer_size_);  // JNIEnv or jobject/jclass
   } else {
-    int arg_pos = itr_args_ - NumberOfExtraArgumentsForJni();
+    int arg_pos = GetIteratorPositionWithinShorty();
     return ParamSize(arg_pos);
   }
 }
 
-size_t JniCallingConvention::NumberOfExtraArgumentsForJni() {
-  // The first argument is the JNIEnv*.
-  // Static methods have an extra argument which is the jclass.
-  return IsStatic() ? 2 : 1;
+size_t JniCallingConvention::NumberOfExtraArgumentsForJni() const {
+  if (LIKELY(HasExtraArgumentsForJni())) {
+    // The first argument is the JNIEnv*.
+    // Static methods have an extra argument which is the jclass.
+    return IsStatic() ? 2 : 1;
+  } else {
+    // Critical natives exclude the JNIEnv and the jclass/this parameters.
+    return 0;
+  }
 }
 
+bool JniCallingConvention::HasHandleScope() const {
+  // Exclude HandleScope for @CriticalNative methods for optimization speed.
+  return is_critical_native_ == false;
+}
+
+bool JniCallingConvention::HasLocalReferenceSegmentState() const {
+  // Exclude local reference segment states for @CriticalNative methods for optimization speed.
+  return is_critical_native_ == false;
+}
+
+bool JniCallingConvention::HasJniEnv() const {
+  // Exclude "JNIEnv*" parameter for @CriticalNative methods.
+  return HasExtraArgumentsForJni();
+}
+
+bool JniCallingConvention::HasSelfClass() const {
+  if (!IsStatic()) {
+    // Virtual functions: There is never an implicit jclass parameter.
+    return false;
+  } else {
+    // Static functions: There is an implicit jclass parameter unless it's @CriticalNative.
+    return HasExtraArgumentsForJni();
+  }
+}
+
+bool JniCallingConvention::HasExtraArgumentsForJni() const {
+  // @CriticalNative jni implementations exclude both JNIEnv* and the jclass/jobject parameters.
+  return is_critical_native_ == false;
+}
+
+unsigned int JniCallingConvention::GetIteratorPositionWithinShorty() const {
+  // We need to subtract out the extra JNI arguments if we want to use this iterator position
+  // with the inherited CallingConvention member functions, which rely on scanning the shorty.
+  // Note that our shorty does *not* include the JNIEnv, jclass/jobject parameters.
+  DCHECK_GE(itr_args_, NumberOfExtraArgumentsForJni());
+  return itr_args_ - NumberOfExtraArgumentsForJni();
+}
+
+bool JniCallingConvention::IsCurrentArgExtraForJni() const {
+  if (UNLIKELY(!HasExtraArgumentsForJni())) {
+    return false;  // If there are no extra args, we can never be an extra.
+  }
+  // Only parameters kJniEnv and kObjectOrClass are considered extra.
+  return itr_args_ <= kObjectOrClass;
+}
+
+bool JniCallingConvention::SwitchExtraJniArguments(size_t switch_value,
+                                                   bool case_jni_env,
+                                                   bool case_object_or_class,
+                                                   /* out parameters */
+                                                   bool* return_value) const {
+  DCHECK(return_value != nullptr);
+  if (UNLIKELY(!HasExtraArgumentsForJni())) {
+    return false;
+  }
+
+  switch (switch_value) {
+    case kJniEnv:
+      *return_value = case_jni_env;
+      return true;
+    case kObjectOrClass:
+      *return_value = case_object_or_class;
+      return true;
+    default:
+      return false;
+  }
+}
+
+
 }  // namespace art
diff --git a/compiler/jni/quick/calling_convention.h b/compiler/jni/quick/calling_convention.h
index 995fa51..3d89146 100644
--- a/compiler/jni/quick/calling_convention.h
+++ b/compiler/jni/quick/calling_convention.h
@@ -161,6 +161,12 @@
   size_t NumArgs() const {
     return num_args_;
   }
+  // Implicit argument count: 1 for instance functions, 0 for static functions.
+  // (The implicit argument is only relevant to the shorty, i.e.
+  // the 0th arg is not in the shorty if it's implicit).
+  size_t NumImplicitArgs() const {
+    return IsStatic() ? 0 : 1;
+  }
   size_t NumLongOrDoubleArgs() const {
     return num_long_or_double_args_;
   }
@@ -281,6 +287,7 @@
   static std::unique_ptr<JniCallingConvention> Create(ArenaAllocator* arena,
                                                       bool is_static,
                                                       bool is_synchronized,
+                                                      bool is_critical_native,
                                                       const char* shorty,
                                                       InstructionSet instruction_set);
 
@@ -288,7 +295,8 @@
   // always at the bottom of a frame, but this doesn't work for outgoing
   // native args). Includes alignment.
   virtual size_t FrameSize() = 0;
-  // Size of outgoing arguments, including alignment
+  // Size of outgoing arguments (stack portion), including alignment.
+  // -- Arguments that are passed via registers are excluded from this size.
   virtual size_t OutArgSize() = 0;
   // Number of references in stack indirect reference table
   size_t ReferenceCount() const;
@@ -319,8 +327,11 @@
   bool IsCurrentParamAFloatOrDouble();
   bool IsCurrentParamADouble();
   bool IsCurrentParamALong();
+  bool IsCurrentParamALongOrDouble() {
+    return IsCurrentParamALong() || IsCurrentParamADouble();
+  }
   bool IsCurrentParamJniEnv();
-  size_t CurrentParamSize();
+  size_t CurrentParamSize() const;
   virtual bool IsCurrentParamInRegister() = 0;
   virtual bool IsCurrentParamOnStack() = 0;
   virtual ManagedRegister CurrentParamRegister() = 0;
@@ -359,18 +370,62 @@
     kObjectOrClass = 1
   };
 
+  // TODO: remove this constructor once all are changed to the below one.
   JniCallingConvention(bool is_static,
                        bool is_synchronized,
                        const char* shorty,
                        PointerSize frame_pointer_size)
-      : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size) {}
+      : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size),
+        is_critical_native_(false) {}
+
+  JniCallingConvention(bool is_static,
+                       bool is_synchronized,
+                       bool is_critical_native,
+                       const char* shorty,
+                       PointerSize frame_pointer_size)
+      : CallingConvention(is_static, is_synchronized, shorty, frame_pointer_size),
+        is_critical_native_(is_critical_native) {}
 
   // Number of stack slots for outgoing arguments, above which the handle scope is
   // located
   virtual size_t NumberOfOutgoingStackArgs() = 0;
 
  protected:
-  size_t NumberOfExtraArgumentsForJni();
+  size_t NumberOfExtraArgumentsForJni() const;
+
+  // Does the transition have a StackHandleScope?
+  bool HasHandleScope() const;
+  // Does the transition have a local reference segment state?
+  bool HasLocalReferenceSegmentState() const;
+  // Has a JNIEnv* parameter implicitly?
+  bool HasJniEnv() const;
+  // Has a 'jclass' parameter implicitly?
+  bool HasSelfClass() const;
+
+  // Are there extra JNI arguments (JNIEnv* and maybe jclass)?
+  bool HasExtraArgumentsForJni() const;
+
+  // Returns the position of itr_args_, fixed up by removing the offset of extra JNI arguments.
+  unsigned int GetIteratorPositionWithinShorty() const;
+
+  // Is the current argument (at the iterator) an extra argument for JNI?
+  bool IsCurrentArgExtraForJni() const;
+
+  const bool is_critical_native_;
+
+ private:
+  // Shorthand for switching on the switch value but only IF there are extra JNI arguments.
+  //
+  // Puts the case value into return_value.
+  // * (switch_value == kJniEnv) => case_jni_env
+  // * (switch_value == kObjectOrClass) => case_object_or_class
+  //
+  // Returns false otherwise (or if there are no extra JNI arguments).
+  bool SwitchExtraJniArguments(size_t switch_value,
+                               bool case_jni_env,
+                               bool case_object_or_class,
+                               /* out parameters */
+                               bool* return_value) const;
 };
 
 }  // namespace art
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index d092c3f..7e58d78 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -90,8 +90,10 @@
   const InstructionSetFeatures* instruction_set_features = driver->GetInstructionSetFeatures();
 
   // i.e. if the method was annotated with @FastNative
-  const bool is_fast_native =
-      (static_cast<uint32_t>(optimization_flags) & Compiler::kFastNative) != 0;
+  const bool is_fast_native = (optimization_flags == Compiler::kFastNative);
+
+  // i.e. if the method was annotated with @CriticalNative
+  bool is_critical_native = (optimization_flags == Compiler::kCriticalNative);
 
   VLOG(jni) << "JniCompile: Method :: "
               << art::PrettyMethod(method_idx, dex_file, /* with signature */ true)
@@ -102,12 +104,50 @@
               << art::PrettyMethod(method_idx, dex_file, /* with signature */ true);
   }
 
+  if (UNLIKELY(is_critical_native)) {
+    VLOG(jni) << "JniCompile: Critical native method detected :: "
+              << art::PrettyMethod(method_idx, dex_file, /* with signature */ true);
+  }
+
+  if (kIsDebugBuild) {
+    // Don't allow both @FastNative and @CriticalNative. They are mutually exclusive.
+    if (UNLIKELY(is_fast_native && is_critical_native)) {
+      LOG(FATAL) << "JniCompile: Method cannot be both @CriticalNative and @FastNative"
+                 << art::PrettyMethod(method_idx, dex_file, /* with_signature */ true);
+    }
+
+    // @CriticalNative - extra checks:
+    // -- Don't allow virtual criticals
+    // -- Don't allow synchronized criticals
+    // -- Don't allow any objects as parameter or return value
+    if (UNLIKELY(is_critical_native)) {
+      CHECK(is_static)
+          << "@CriticalNative functions cannot be virtual since that would"
+          << "require passing a reference parameter (this), which is illegal "
+          << art::PrettyMethod(method_idx, dex_file, /* with_signature */ true);
+      CHECK(!is_synchronized)
+          << "@CriticalNative functions cannot be synchronized since that would"
+          << "require passing a (class and/or this) reference parameter, which is illegal "
+          << art::PrettyMethod(method_idx, dex_file, /* with_signature */ true);
+      for (size_t i = 0; i < strlen(shorty); ++i) {
+        CHECK_NE(Primitive::kPrimNot, Primitive::GetType(shorty[i]))
+            << "@CriticalNative methods' shorty types must not have illegal references "
+            << art::PrettyMethod(method_idx, dex_file, /* with_signature */ true);
+      }
+    }
+  }
+
   ArenaPool pool;
   ArenaAllocator arena(&pool);
 
   // Calling conventions used to iterate over parameters to method
-  std::unique_ptr<JniCallingConvention> main_jni_conv(
-      JniCallingConvention::Create(&arena, is_static, is_synchronized, shorty, instruction_set));
+  std::unique_ptr<JniCallingConvention> main_jni_conv =
+      JniCallingConvention::Create(&arena,
+                                   is_static,
+                                   is_synchronized,
+                                   is_critical_native,
+                                   shorty,
+                                   instruction_set);
   bool reference_return = main_jni_conv->IsReturnAReference();
 
   std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
@@ -127,8 +167,13 @@
     jni_end_shorty = "V";
   }
 
-  std::unique_ptr<JniCallingConvention> end_jni_conv(JniCallingConvention::Create(
-      &arena, is_static, is_synchronized, jni_end_shorty, instruction_set));
+  std::unique_ptr<JniCallingConvention> end_jni_conv(
+      JniCallingConvention::Create(&arena,
+                                   is_static,
+                                   is_synchronized,
+                                   is_critical_native,
+                                   jni_end_shorty,
+                                   instruction_set));
 
   // Assembler that holds generated instructions
   std::unique_ptr<JNIMacroAssembler<kPointerSize>> jni_asm =
@@ -141,75 +186,89 @@
   const Offset monitor_enter(OFFSETOF_MEMBER(JNINativeInterface, MonitorEnter));
   const Offset monitor_exit(OFFSETOF_MEMBER(JNINativeInterface, MonitorExit));
 
-  // 1. Build the frame saving all callee saves
-  const size_t frame_size(main_jni_conv->FrameSize());
+  // 1. Build the frame saving all callee saves, Method*, and PC return address.
+  const size_t frame_size(main_jni_conv->FrameSize());  // Excludes outgoing args.
   ArrayRef<const ManagedRegister> callee_save_regs = main_jni_conv->CalleeSaveRegisters();
   __ BuildFrame(frame_size, mr_conv->MethodRegister(), callee_save_regs, mr_conv->EntrySpills());
   DCHECK_EQ(jni_asm->cfi().GetCurrentCFAOffset(), static_cast<int>(frame_size));
 
-  // 2. Set up the HandleScope
-  mr_conv->ResetIterator(FrameOffset(frame_size));
-  main_jni_conv->ResetIterator(FrameOffset(0));
-  __ StoreImmediateToFrame(main_jni_conv->HandleScopeNumRefsOffset(),
-                           main_jni_conv->ReferenceCount(),
-                           mr_conv->InterproceduralScratchRegister());
+  if (LIKELY(!is_critical_native)) {
+    // NOTE: @CriticalNative methods don't have a HandleScope
+    //       because they can't have any reference parameters or return values.
 
-  __ CopyRawPtrFromThread(main_jni_conv->HandleScopeLinkOffset(),
-                          Thread::TopHandleScopeOffset<kPointerSize>(),
-                          mr_conv->InterproceduralScratchRegister());
-  __ StoreStackOffsetToThread(Thread::TopHandleScopeOffset<kPointerSize>(),
-                              main_jni_conv->HandleScopeOffset(),
-                              mr_conv->InterproceduralScratchRegister());
+    // 2. Set up the HandleScope
+    mr_conv->ResetIterator(FrameOffset(frame_size));
+    main_jni_conv->ResetIterator(FrameOffset(0));
+    __ StoreImmediateToFrame(main_jni_conv->HandleScopeNumRefsOffset(),
+                             main_jni_conv->ReferenceCount(),
+                             mr_conv->InterproceduralScratchRegister());
 
-  // 3. Place incoming reference arguments into handle scope
-  main_jni_conv->Next();  // Skip JNIEnv*
-  // 3.5. Create Class argument for static methods out of passed method
-  if (is_static) {
-    FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
-    // Check handle scope offset is within frame
-    CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
-    // Note this LoadRef() doesn't need heap unpoisoning since it's from the ArtMethod.
-    // Note this LoadRef() does not include read barrier. It will be handled below.
-    __ LoadRef(main_jni_conv->InterproceduralScratchRegister(),
-               mr_conv->MethodRegister(), ArtMethod::DeclaringClassOffset(), false);
-    __ VerifyObject(main_jni_conv->InterproceduralScratchRegister(), false);
-    __ StoreRef(handle_scope_offset, main_jni_conv->InterproceduralScratchRegister());
-    main_jni_conv->Next();  // in handle scope so move to next argument
-  }
-  while (mr_conv->HasNext()) {
-    CHECK(main_jni_conv->HasNext());
-    bool ref_param = main_jni_conv->IsCurrentParamAReference();
-    CHECK(!ref_param || mr_conv->IsCurrentParamAReference());
-    // References need placing in handle scope and the entry value passing
-    if (ref_param) {
-      // Compute handle scope entry, note null is placed in the handle scope but its boxed value
-      // must be null.
+    __ CopyRawPtrFromThread(main_jni_conv->HandleScopeLinkOffset(),
+                            Thread::TopHandleScopeOffset<kPointerSize>(),
+                            mr_conv->InterproceduralScratchRegister());
+    __ StoreStackOffsetToThread(Thread::TopHandleScopeOffset<kPointerSize>(),
+                                main_jni_conv->HandleScopeOffset(),
+                                mr_conv->InterproceduralScratchRegister());
+
+    // 3. Place incoming reference arguments into handle scope
+    main_jni_conv->Next();  // Skip JNIEnv*
+    // 3.5. Create Class argument for static methods out of passed method
+    if (is_static) {
       FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
-      // Check handle scope offset is within frame and doesn't run into the saved segment state.
+      // Check handle scope offset is within frame
       CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
-      CHECK_NE(handle_scope_offset.Uint32Value(),
-               main_jni_conv->SavedLocalReferenceCookieOffset().Uint32Value());
-      bool input_in_reg = mr_conv->IsCurrentParamInRegister();
-      bool input_on_stack = mr_conv->IsCurrentParamOnStack();
-      CHECK(input_in_reg || input_on_stack);
-
-      if (input_in_reg) {
-        ManagedRegister in_reg  =  mr_conv->CurrentParamRegister();
-        __ VerifyObject(in_reg, mr_conv->IsCurrentArgPossiblyNull());
-        __ StoreRef(handle_scope_offset, in_reg);
-      } else if (input_on_stack) {
-        FrameOffset in_off  = mr_conv->CurrentParamStackOffset();
-        __ VerifyObject(in_off, mr_conv->IsCurrentArgPossiblyNull());
-        __ CopyRef(handle_scope_offset, in_off,
-                   mr_conv->InterproceduralScratchRegister());
-      }
+      // Note this LoadRef() doesn't need heap unpoisoning since it's from the ArtMethod.
+      // Note this LoadRef() does not include read barrier. It will be handled below.
+      //
+      // scratchRegister = *method[DeclaringClassOffset()];
+      __ LoadRef(main_jni_conv->InterproceduralScratchRegister(),
+                 mr_conv->MethodRegister(), ArtMethod::DeclaringClassOffset(), false);
+      __ VerifyObject(main_jni_conv->InterproceduralScratchRegister(), false);
+      // *handleScopeOffset = scratchRegister
+      __ StoreRef(handle_scope_offset, main_jni_conv->InterproceduralScratchRegister());
+      main_jni_conv->Next();  // in handle scope so move to next argument
     }
-    mr_conv->Next();
-    main_jni_conv->Next();
-  }
+    // Place every reference into the handle scope (ignore other parameters).
+    while (mr_conv->HasNext()) {
+      CHECK(main_jni_conv->HasNext());
+      bool ref_param = main_jni_conv->IsCurrentParamAReference();
+      CHECK(!ref_param || mr_conv->IsCurrentParamAReference());
+      // References need placing in handle scope and the entry value passing
+      if (ref_param) {
+        // Compute handle scope entry, note null is placed in the handle scope but its boxed value
+        // must be null.
+        FrameOffset handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
+        // Check handle scope offset is within frame and doesn't run into the saved segment state.
+        CHECK_LT(handle_scope_offset.Uint32Value(), frame_size);
+        CHECK_NE(handle_scope_offset.Uint32Value(),
+                 main_jni_conv->SavedLocalReferenceCookieOffset().Uint32Value());
+        bool input_in_reg = mr_conv->IsCurrentParamInRegister();
+        bool input_on_stack = mr_conv->IsCurrentParamOnStack();
+        CHECK(input_in_reg || input_on_stack);
 
-  // 4. Write out the end of the quick frames.
-  __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
+        if (input_in_reg) {
+          ManagedRegister in_reg  =  mr_conv->CurrentParamRegister();
+          __ VerifyObject(in_reg, mr_conv->IsCurrentArgPossiblyNull());
+          __ StoreRef(handle_scope_offset, in_reg);
+        } else if (input_on_stack) {
+          FrameOffset in_off  = mr_conv->CurrentParamStackOffset();
+          __ VerifyObject(in_off, mr_conv->IsCurrentArgPossiblyNull());
+          __ CopyRef(handle_scope_offset, in_off,
+                     mr_conv->InterproceduralScratchRegister());
+        }
+      }
+      mr_conv->Next();
+      main_jni_conv->Next();
+    }
+
+    // 4. Write out the end of the quick frames.
+    __ StoreStackPointerToThread(Thread::TopOfManagedStackOffset<kPointerSize>());
+
+    // NOTE: @CriticalNative does not need to store the stack pointer to the thread
+    //       because garbage collections are disabled within the execution of a
+    //       @CriticalNative method.
+    //       (TODO: We could probably disable it for @FastNative too).
+  }  // if (!is_critical_native)
 
   // 5. Move frame down to allow space for out going args.
   const size_t main_out_arg_size = main_jni_conv->OutArgSize();
@@ -218,7 +277,9 @@
 
   // Call the read barrier for the declaring class loaded from the method for a static call.
   // Note that we always have outgoing param space available for at least two params.
-  if (kUseReadBarrier && is_static) {
+  if (kUseReadBarrier && is_static && !is_critical_native) {
+    // XX: Why is this necessary only for the jclass? Why not for every single object ref?
+    // Skip this for @CriticalNative because we didn't build a HandleScope to begin with.
     ThreadOffset<kPointerSize> read_barrier = QUICK_ENTRYPOINT_OFFSET(kPointerSize,
                                                                       pReadBarrierJni);
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
@@ -255,46 +316,56 @@
   //    can occur. The result is the saved JNI local state that is restored by the exit call. We
   //    abuse the JNI calling convention here, that is guaranteed to support passing 2 pointer
   //    arguments.
-  ThreadOffset<kPointerSize> jni_start =
-      is_synchronized
-          ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
-          : (is_fast_native
-                 ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart)
-                 : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
+  FrameOffset locked_object_handle_scope_offset(0xBEEFDEAD);
+  if (LIKELY(!is_critical_native)) {
+    // Skip this for @CriticalNative methods. They do not call JniMethodStart.
+    ThreadOffset<kPointerSize> jni_start =
+        is_synchronized
+            ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
+            : (is_fast_native
+                   ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart)
+                   : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
 
-  main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
-  FrameOffset locked_object_handle_scope_offset(0);
-  if (is_synchronized) {
-    // Pass object for locking.
-    main_jni_conv->Next();  // Skip JNIEnv.
-    locked_object_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
-    if (main_jni_conv->IsCurrentParamOnStack()) {
-      FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
-      __ CreateHandleScopeEntry(out_off, locked_object_handle_scope_offset,
-                                mr_conv->InterproceduralScratchRegister(), false);
-    } else {
-      ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
-      __ CreateHandleScopeEntry(out_reg, locked_object_handle_scope_offset,
-                                ManagedRegister::NoRegister(), false);
+    locked_object_handle_scope_offset = FrameOffset(0);
+    if (is_synchronized) {
+      // Pass object for locking.
+      main_jni_conv->Next();  // Skip JNIEnv.
+      locked_object_handle_scope_offset = main_jni_conv->CurrentParamHandleScopeEntryOffset();
+      main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
+      if (main_jni_conv->IsCurrentParamOnStack()) {
+        FrameOffset out_off = main_jni_conv->CurrentParamStackOffset();
+        __ CreateHandleScopeEntry(out_off, locked_object_handle_scope_offset,
+                                  mr_conv->InterproceduralScratchRegister(), false);
+      } else {
+        ManagedRegister out_reg = main_jni_conv->CurrentParamRegister();
+        __ CreateHandleScopeEntry(out_reg, locked_object_handle_scope_offset,
+                                  ManagedRegister::NoRegister(), false);
+      }
+      main_jni_conv->Next();
     }
-    main_jni_conv->Next();
+    if (main_jni_conv->IsCurrentParamInRegister()) {
+      __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
+      __ Call(main_jni_conv->CurrentParamRegister(),
+              Offset(jni_start),
+              main_jni_conv->InterproceduralScratchRegister());
+    } else {
+      __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
+                          main_jni_conv->InterproceduralScratchRegister());
+      __ CallFromThread(jni_start, main_jni_conv->InterproceduralScratchRegister());
+    }
+    if (is_synchronized) {  // Check for exceptions from monitor enter.
+      __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), main_out_arg_size);
+    }
   }
-  if (main_jni_conv->IsCurrentParamInRegister()) {
-    __ GetCurrentThread(main_jni_conv->CurrentParamRegister());
-    __ Call(main_jni_conv->CurrentParamRegister(),
-            Offset(jni_start),
-            main_jni_conv->InterproceduralScratchRegister());
-  } else {
-    __ GetCurrentThread(main_jni_conv->CurrentParamStackOffset(),
-                        main_jni_conv->InterproceduralScratchRegister());
-    __ CallFromThread(jni_start, main_jni_conv->InterproceduralScratchRegister());
+
+  // Store into stack_frame[saved_cookie_offset] the return value of JniMethodStart.
+  FrameOffset saved_cookie_offset(
+      FrameOffset(0xDEADBEEFu));  // @CriticalNative - use obviously bad value for debugging
+  if (LIKELY(!is_critical_native)) {
+    saved_cookie_offset = main_jni_conv->SavedLocalReferenceCookieOffset();
+    __ Store(saved_cookie_offset, main_jni_conv->IntReturnRegister(), 4 /* sizeof cookie */);
   }
-  if (is_synchronized) {  // Check for exceptions from monitor enter.
-    __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), main_out_arg_size);
-  }
-  FrameOffset saved_cookie_offset = main_jni_conv->SavedLocalReferenceCookieOffset();
-  __ Store(saved_cookie_offset, main_jni_conv->IntReturnRegister(), 4);
 
   // 7. Iterate over arguments placing values from managed calling convention in
   //    to the convention required for a native call (shuffling). For references
@@ -315,9 +386,13 @@
   for (uint32_t i = 0; i < args_count; ++i) {
     mr_conv->ResetIterator(FrameOffset(frame_size + main_out_arg_size));
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
-    main_jni_conv->Next();  // Skip JNIEnv*.
-    if (is_static) {
-      main_jni_conv->Next();  // Skip Class for now.
+
+    // Skip the extra JNI parameters for now.
+    if (LIKELY(!is_critical_native)) {
+      main_jni_conv->Next();    // Skip JNIEnv*.
+      if (is_static) {
+        main_jni_conv->Next();  // Skip Class for now.
+      }
     }
     // Skip to the argument we're interested in.
     for (uint32_t j = 0; j < args_count - i - 1; ++j) {
@@ -326,7 +401,7 @@
     }
     CopyParameter(jni_asm.get(), mr_conv.get(), main_jni_conv.get(), frame_size, main_out_arg_size);
   }
-  if (is_static) {
+  if (is_static && !is_critical_native) {
     // Create argument for Class
     mr_conv->ResetIterator(FrameOffset(frame_size + main_out_arg_size));
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
@@ -344,24 +419,30 @@
     }
   }
 
-  // 8. Create 1st argument, the JNI environment ptr.
+  // Set the iterator back to the incoming Method*.
   main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
-  // Register that will hold local indirect reference table
-  if (main_jni_conv->IsCurrentParamInRegister()) {
-    ManagedRegister jni_env = main_jni_conv->CurrentParamRegister();
-    DCHECK(!jni_env.Equals(main_jni_conv->InterproceduralScratchRegister()));
-    __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset<kPointerSize>());
-  } else {
-    FrameOffset jni_env = main_jni_conv->CurrentParamStackOffset();
-    __ CopyRawPtrFromThread(jni_env,
-                            Thread::JniEnvOffset<kPointerSize>(),
-                            main_jni_conv->InterproceduralScratchRegister());
+  if (LIKELY(!is_critical_native)) {
+    // 8. Create 1st argument, the JNI environment ptr.
+    // Register that will hold local indirect reference table
+    if (main_jni_conv->IsCurrentParamInRegister()) {
+      ManagedRegister jni_env = main_jni_conv->CurrentParamRegister();
+      DCHECK(!jni_env.Equals(main_jni_conv->InterproceduralScratchRegister()));
+      __ LoadRawPtrFromThread(jni_env, Thread::JniEnvOffset<kPointerSize>());
+    } else {
+      FrameOffset jni_env = main_jni_conv->CurrentParamStackOffset();
+      __ CopyRawPtrFromThread(jni_env,
+                              Thread::JniEnvOffset<kPointerSize>(),
+                              main_jni_conv->InterproceduralScratchRegister());
+    }
   }
 
   // 9. Plant call to native code associated with method.
-  MemberOffset jni_entrypoint_offset = ArtMethod::EntryPointFromJniOffset(
-      InstructionSetPointerSize(instruction_set));
-  __ Call(main_jni_conv->MethodStackOffset(), jni_entrypoint_offset,
+  MemberOffset jni_entrypoint_offset =
+      ArtMethod::EntryPointFromJniOffset(InstructionSetPointerSize(instruction_set));
+  // FIXME: Not sure if MethodStackOffset will work here. What does it even do?
+  __ Call(main_jni_conv->MethodStackOffset(),
+          jni_entrypoint_offset,
+          // XX: Why not the jni conv scratch register?
           mr_conv->InterproceduralScratchRegister());
 
   // 10. Fix differences in result widths.
@@ -377,20 +458,45 @@
     }
   }
 
-  // 11. Save return value
+  // 11. Process return value
   FrameOffset return_save_location = main_jni_conv->ReturnValueSaveLocation();
   if (main_jni_conv->SizeOfReturnValue() != 0 && !reference_return) {
-    if ((instruction_set == kMips || instruction_set == kMips64) &&
-        main_jni_conv->GetReturnType() == Primitive::kPrimDouble &&
-        return_save_location.Uint32Value() % 8 != 0) {
-      // Ensure doubles are 8-byte aligned for MIPS
-      return_save_location = FrameOffset(return_save_location.Uint32Value()
-                                             + static_cast<size_t>(kMipsPointerSize));
+    if (LIKELY(!is_critical_native)) {
+      // For normal JNI, store the return value on the stack because the call to
+      // JniMethodEnd will clobber the return value. It will be restored in (13).
+      if ((instruction_set == kMips || instruction_set == kMips64) &&
+          main_jni_conv->GetReturnType() == Primitive::kPrimDouble &&
+          return_save_location.Uint32Value() % 8 != 0) {
+        // Ensure doubles are 8-byte aligned for MIPS
+        return_save_location = FrameOffset(return_save_location.Uint32Value()
+                                               + static_cast<size_t>(kMipsPointerSize));
+        // TODO: refactor this into the JniCallingConvention code
+        // as a return value alignment requirement.
+      }
+      CHECK_LT(return_save_location.Uint32Value(), frame_size + main_out_arg_size);
+      __ Store(return_save_location,
+               main_jni_conv->ReturnRegister(),
+               main_jni_conv->SizeOfReturnValue());
+    } else {
+      // For @CriticalNative only,
+      // move the JNI return register into the managed return register (if they don't match).
+      ManagedRegister jni_return_reg = main_jni_conv->ReturnRegister();
+      ManagedRegister mr_return_reg = mr_conv->ReturnRegister();
+
+      // Check if the JNI return register matches the managed return register.
+      // If they differ, only then do we have to do anything about it.
+      // Otherwise the return value is already in the right place when we return.
+      if (!jni_return_reg.Equals(mr_return_reg)) {
+        // This is typically only necessary on ARM32 due to native being softfloat
+        // while managed is hardfloat.
+        // -- For example VMOV {r0, r1} -> D0; VMOV r0 -> S0.
+        __ Move(mr_return_reg, jni_return_reg, main_jni_conv->SizeOfReturnValue());
+      } else if (jni_return_reg.IsNoRegister() && mr_return_reg.IsNoRegister()) {
+        // Sanity check: If the return value is passed on the stack for some reason,
+        // then make sure the size matches.
+        CHECK_EQ(main_jni_conv->SizeOfReturnValue(), mr_conv->SizeOfReturnValue());
+      }
     }
-    CHECK_LT(return_save_location.Uint32Value(), frame_size + main_out_arg_size);
-    __ Store(return_save_location,
-             main_jni_conv->ReturnRegister(),
-             main_jni_conv->SizeOfReturnValue());
   }
 
   // Increase frame size for out args if needed by the end_jni_conv.
@@ -398,6 +504,8 @@
   if (end_out_arg_size > current_out_arg_size) {
     size_t out_arg_size_diff = end_out_arg_size - current_out_arg_size;
     current_out_arg_size = end_out_arg_size;
+    // TODO: This is redundant for @CriticalNative but we need to
+    // conditionally do __DecreaseFrameSize below.
     __ IncreaseFrameSize(out_arg_size_diff);
     saved_cookie_offset = FrameOffset(saved_cookie_offset.SizeValue() + out_arg_size_diff);
     locked_object_handle_scope_offset =
@@ -407,65 +515,71 @@
   //     thread.
   end_jni_conv->ResetIterator(FrameOffset(end_out_arg_size));
 
-  ThreadOffset<kPointerSize> jni_end(-1);
-  if (reference_return) {
-    // Pass result.
-    jni_end = is_synchronized
-                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
-                  : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
-    SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
-    end_jni_conv->Next();
-  } else {
-    jni_end = is_synchronized
-                  ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
-                  : (is_fast_native
-                         ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd)
-                         : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd));
-  }
-  // Pass saved local reference state.
-  if (end_jni_conv->IsCurrentParamOnStack()) {
-    FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
-    __ Copy(out_off, saved_cookie_offset, end_jni_conv->InterproceduralScratchRegister(), 4);
-  } else {
-    ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
-    __ Load(out_reg, saved_cookie_offset, 4);
-  }
-  end_jni_conv->Next();
-  if (is_synchronized) {
-    // Pass object for unlocking.
+  if (LIKELY(!is_critical_native)) {
+    // 12. Call JniMethodEnd
+    ThreadOffset<kPointerSize> jni_end(-1);
+    if (reference_return) {
+      // Pass result.
+      jni_end = is_synchronized
+                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
+                    : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
+      SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
+      end_jni_conv->Next();
+    } else {
+      jni_end = is_synchronized
+                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
+                    : (is_fast_native
+                           ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd)
+                           : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd));
+    }
+    // Pass saved local reference state.
     if (end_jni_conv->IsCurrentParamOnStack()) {
       FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
-      __ CreateHandleScopeEntry(out_off, locked_object_handle_scope_offset,
-                         end_jni_conv->InterproceduralScratchRegister(),
-                         false);
+      __ Copy(out_off, saved_cookie_offset, end_jni_conv->InterproceduralScratchRegister(), 4);
     } else {
       ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
-      __ CreateHandleScopeEntry(out_reg, locked_object_handle_scope_offset,
-                         ManagedRegister::NoRegister(), false);
+      __ Load(out_reg, saved_cookie_offset, 4);
     }
     end_jni_conv->Next();
-  }
-  if (end_jni_conv->IsCurrentParamInRegister()) {
-    __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
-    __ Call(end_jni_conv->CurrentParamRegister(),
-            Offset(jni_end),
-            end_jni_conv->InterproceduralScratchRegister());
-  } else {
-    __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset(),
-                        end_jni_conv->InterproceduralScratchRegister());
-    __ CallFromThread(jni_end, end_jni_conv->InterproceduralScratchRegister());
-  }
+    if (is_synchronized) {
+      // Pass object for unlocking.
+      if (end_jni_conv->IsCurrentParamOnStack()) {
+        FrameOffset out_off = end_jni_conv->CurrentParamStackOffset();
+        __ CreateHandleScopeEntry(out_off, locked_object_handle_scope_offset,
+                           end_jni_conv->InterproceduralScratchRegister(),
+                           false);
+      } else {
+        ManagedRegister out_reg = end_jni_conv->CurrentParamRegister();
+        __ CreateHandleScopeEntry(out_reg, locked_object_handle_scope_offset,
+                           ManagedRegister::NoRegister(), false);
+      }
+      end_jni_conv->Next();
+    }
+    if (end_jni_conv->IsCurrentParamInRegister()) {
+      __ GetCurrentThread(end_jni_conv->CurrentParamRegister());
+      __ Call(end_jni_conv->CurrentParamRegister(),
+              Offset(jni_end),
+              end_jni_conv->InterproceduralScratchRegister());
+    } else {
+      __ GetCurrentThread(end_jni_conv->CurrentParamStackOffset(),
+                          end_jni_conv->InterproceduralScratchRegister());
+      __ CallFromThread(jni_end, end_jni_conv->InterproceduralScratchRegister());
+    }
 
-  // 13. Reload return value
-  if (main_jni_conv->SizeOfReturnValue() != 0 && !reference_return) {
-    __ Load(mr_conv->ReturnRegister(), return_save_location, mr_conv->SizeOfReturnValue());
-  }
+    // 13. Reload return value
+    if (main_jni_conv->SizeOfReturnValue() != 0 && !reference_return) {
+      __ Load(mr_conv->ReturnRegister(), return_save_location, mr_conv->SizeOfReturnValue());
+      // NIT: If it's @CriticalNative then we actually only need to do this IF
+      // the calling convention's native return register doesn't match the managed convention's
+      // return register.
+    }
+  }  // if (!is_critical_native)
 
   // 14. Move frame up now we're done with the out arg space.
   __ DecreaseFrameSize(current_out_arg_size);
 
   // 15. Process pending exceptions from JNI call or monitor exit.
-  __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), 0);
+  __ ExceptionPoll(main_jni_conv->InterproceduralScratchRegister(), 0 /* stack_adjust */);
 
   // 16. Remove activation - need to restore callee save registers since the GC may have changed
   //     them.
@@ -497,7 +611,8 @@
 static void CopyParameter(JNIMacroAssembler<kPointerSize>* jni_asm,
                           ManagedRuntimeCallingConvention* mr_conv,
                           JniCallingConvention* jni_conv,
-                          size_t frame_size, size_t out_arg_size) {
+                          size_t frame_size,
+                          size_t out_arg_size) {
   bool input_in_reg = mr_conv->IsCurrentParamInRegister();
   bool output_in_reg = jni_conv->IsCurrentParamInRegister();
   FrameOffset handle_scope_offset(0);
diff --git a/compiler/jni/quick/x86/calling_convention_x86.cc b/compiler/jni/quick/x86/calling_convention_x86.cc
index 1d06f26..0bfcc3f 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.cc
+++ b/compiler/jni/quick/x86/calling_convention_x86.cc
@@ -24,6 +24,7 @@
 namespace x86 {
 
 static_assert(kX86PointerSize == PointerSize::k32, "Unexpected x86 pointer size");
+static_assert(kStackAlignment >= 16u, "IA-32 cdecl requires at least 16 byte stack alignment");
 
 static constexpr ManagedRegister kCalleeSaveRegisters[] = {
     // Core registers.
@@ -190,9 +191,15 @@
 
 // JNI calling convention
 
-X86JniCallingConvention::X86JniCallingConvention(bool is_static, bool is_synchronized,
+X86JniCallingConvention::X86JniCallingConvention(bool is_static,
+                                                 bool is_synchronized,
+                                                 bool is_critical_native,
                                                  const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kX86PointerSize) {
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_critical_native,
+                           shorty,
+                           kX86PointerSize) {
 }
 
 uint32_t X86JniCallingConvention::CoreSpillMask() const {
@@ -204,13 +211,31 @@
 }
 
 size_t X86JniCallingConvention::FrameSize() {
-  // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = static_cast<size_t>(kX86PointerSize) +
-      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
-  // References plus 2 words for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kX86PointerSize, ReferenceCount());
+  // Method*, PC return address and callee save area size, local reference segment state
+  const size_t method_ptr_size = static_cast<size_t>(kX86PointerSize);
+  const size_t pc_return_addr_size = kFramePointerSize;
+  const size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
+  size_t frame_data_size = method_ptr_size + pc_return_addr_size + callee_save_area_size;
+
+  if (LIKELY(HasLocalReferenceSegmentState())) {                     // local ref. segment state
+    // Local reference segment state is sometimes excluded.
+    frame_data_size += kFramePointerSize;
+  }
+
+  // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
+  const size_t handle_scope_size = HandleScope::SizeOf(kX86PointerSize, ReferenceCount());
+
+  size_t total_size = frame_data_size;
+  if (LIKELY(HasHandleScope())) {
+    // HandleScope is sometimes excluded.
+    total_size += handle_scope_size;                                 // handle scope size
+  }
+
   // Plus return value spill area size
-  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
+  total_size += SizeOfReturnValue();
+
+  return RoundUp(total_size, kStackAlignment);
+  // TODO: Same thing as x64 except using different pointer size. Refactor?
 }
 
 size_t X86JniCallingConvention::OutArgSize() {
@@ -239,11 +264,13 @@
 }
 
 size_t X86JniCallingConvention::NumberOfOutgoingStackArgs() {
-  size_t static_args = IsStatic() ? 1 : 0;  // count jclass
+  size_t static_args = HasSelfClass() ? 1 : 0;  // count jclass
   // regular argument parameters and this
   size_t param_args = NumArgs() + NumLongOrDoubleArgs();
   // count JNIEnv* and return pc (pushed after Method*)
-  size_t total_args = static_args + param_args + 2;
+  size_t internal_args = 1 /* return pc */ + (HasJniEnv() ? 1 : 0 /* jni env */);
+  // No register args.
+  size_t total_args = static_args + param_args + internal_args;
   return total_args;
 }
 
diff --git a/compiler/jni/quick/x86/calling_convention_x86.h b/compiler/jni/quick/x86/calling_convention_x86.h
index ff92fc9..be83cda 100644
--- a/compiler/jni/quick/x86/calling_convention_x86.h
+++ b/compiler/jni/quick/x86/calling_convention_x86.h
@@ -52,9 +52,13 @@
   DISALLOW_COPY_AND_ASSIGN(X86ManagedRuntimeCallingConvention);
 };
 
+// Implements the x86 cdecl calling convention.
 class X86JniCallingConvention FINAL : public JniCallingConvention {
  public:
-  X86JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  X86JniCallingConvention(bool is_static,
+                          bool is_synchronized,
+                          bool is_critical_native,
+                          const char* shorty);
   ~X86JniCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
index cbf10bd..8ca0ffe 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.cc
@@ -25,8 +25,16 @@
 namespace x86_64 {
 
 constexpr size_t kFramePointerSize = static_cast<size_t>(PointerSize::k64);
-
 static_assert(kX86_64PointerSize == PointerSize::k64, "Unexpected x86_64 pointer size");
+static_assert(kStackAlignment >= 16u, "System V AMD64 ABI requires at least 16 byte stack alignment");
+
+// XMM0..XMM7 can be used to pass the first 8 floating args. The rest must go on the stack.
+// -- Managed and JNI calling conventions.
+constexpr size_t kMaxFloatOrDoubleRegisterArguments = 8u;
+// Up to how many integer-like (pointers, objects, longs, int, short, bool, etc) args can be
+// enregistered. The rest of the args must go on the stack.
+// -- JNI calling convention only (Managed excludes RDI, so it's actually 5).
+constexpr size_t kMaxIntLikeRegisterArguments = 6u;
 
 static constexpr ManagedRegister kCalleeSaveRegisters[] = {
     // Core registers.
@@ -130,7 +138,7 @@
     case 3: res = X86_64ManagedRegister::FromCpuRegister(R8); break;
     case 4: res = X86_64ManagedRegister::FromCpuRegister(R9); break;
     }
-  } else if (itr_float_and_doubles_ < 8) {
+  } else if (itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments) {
     // First eight float parameters are passed via XMM0..XMM7
     res = X86_64ManagedRegister::FromXmmRegister(
                                  static_cast<FloatRegister>(XMM0 + itr_float_and_doubles_));
@@ -165,9 +173,15 @@
 
 // JNI calling convention
 
-X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static, bool is_synchronized,
+X86_64JniCallingConvention::X86_64JniCallingConvention(bool is_static,
+                                                       bool is_synchronized,
+                                                       bool is_critical_native,
                                                        const char* shorty)
-    : JniCallingConvention(is_static, is_synchronized, shorty, kX86_64PointerSize) {
+    : JniCallingConvention(is_static,
+                           is_synchronized,
+                           is_critical_native,
+                           shorty,
+                           kX86_64PointerSize) {
 }
 
 uint32_t X86_64JniCallingConvention::CoreSpillMask() const {
@@ -179,13 +193,30 @@
 }
 
 size_t X86_64JniCallingConvention::FrameSize() {
-  // Method*, return address and callee save area size, local reference segment state
-  size_t frame_data_size = static_cast<size_t>(kX86_64PointerSize) +
-      (2 + CalleeSaveRegisters().size()) * kFramePointerSize;
+  // Method*, PC return address and callee save area size, local reference segment state
+  const size_t method_ptr_size = static_cast<size_t>(kX86_64PointerSize);
+  const size_t pc_return_addr_size = kFramePointerSize;
+  const size_t callee_save_area_size = CalleeSaveRegisters().size() * kFramePointerSize;
+  size_t frame_data_size = method_ptr_size + pc_return_addr_size + callee_save_area_size;
+
+  if (LIKELY(HasLocalReferenceSegmentState())) {                     // local ref. segment state
+    // Local reference segment state is sometimes excluded.
+    frame_data_size += kFramePointerSize;
+  }
+
   // References plus link_ (pointer) and number_of_references_ (uint32_t) for HandleScope header
-  size_t handle_scope_size = HandleScope::SizeOf(kX86_64PointerSize, ReferenceCount());
+  const size_t handle_scope_size = HandleScope::SizeOf(kX86_64PointerSize, ReferenceCount());
+
+  size_t total_size = frame_data_size;
+  if (LIKELY(HasHandleScope())) {
+    // HandleScope is sometimes excluded.
+    total_size += handle_scope_size;                                 // handle scope size
+  }
+
   // Plus return value spill area size
-  return RoundUp(frame_data_size + handle_scope_size + SizeOfReturnValue(), kStackAlignment);
+  total_size += SizeOfReturnValue();
+
+  return RoundUp(total_size, kStackAlignment);
 }
 
 size_t X86_64JniCallingConvention::OutArgSize() {
@@ -214,8 +245,9 @@
     case 3: res = X86_64ManagedRegister::FromCpuRegister(RCX); break;
     case 4: res = X86_64ManagedRegister::FromCpuRegister(R8); break;
     case 5: res = X86_64ManagedRegister::FromCpuRegister(R9); break;
+    static_assert(5u == kMaxIntLikeRegisterArguments - 1, "Missing case statement(s)");
     }
-  } else if (itr_float_and_doubles_ < 8) {
+  } else if (itr_float_and_doubles_ < kMaxFloatOrDoubleRegisterArguments) {
     // First eight float parameters are passed via XMM0..XMM7
     res = X86_64ManagedRegister::FromXmmRegister(
                                  static_cast<FloatRegister>(XMM0 + itr_float_and_doubles_));
@@ -224,24 +256,35 @@
 }
 
 FrameOffset X86_64JniCallingConvention::CurrentParamStackOffset() {
-  size_t offset = itr_args_
-      - std::min(8U, itr_float_and_doubles_)               // Float arguments passed through Xmm0..Xmm7
-      - std::min(6U, itr_args_ - itr_float_and_doubles_);  // Integer arguments passed through GPR
-  return FrameOffset(displacement_.Int32Value() - OutArgSize() + (offset * kFramePointerSize));
+  CHECK(IsCurrentParamOnStack());
+  size_t args_on_stack = itr_args_
+      - std::min(kMaxFloatOrDoubleRegisterArguments,
+                 static_cast<size_t>(itr_float_and_doubles_))
+          // Float arguments passed through Xmm0..Xmm7
+      - std::min(kMaxIntLikeRegisterArguments,
+                 static_cast<size_t>(itr_args_ - itr_float_and_doubles_));
+          // Integer arguments passed through GPR
+  size_t offset = displacement_.Int32Value() - OutArgSize() + (args_on_stack * kFramePointerSize);
+  CHECK_LT(offset, OutArgSize());
+  return FrameOffset(offset);
 }
 
+// TODO: Calling this "NumberArgs" is misleading.
+// It's really more like NumberSlots (like itr_slots_)
+// because doubles/longs get counted twice.
 size_t X86_64JniCallingConvention::NumberOfOutgoingStackArgs() {
-  size_t static_args = IsStatic() ? 1 : 0;  // count jclass
+  size_t static_args = HasSelfClass() ? 1 : 0;  // count jclass
   // regular argument parameters and this
   size_t param_args = NumArgs() + NumLongOrDoubleArgs();
   // count JNIEnv* and return pc (pushed after Method*)
-  size_t total_args = static_args + param_args + 2;
+  size_t internal_args = 1 /* return pc */ + (HasJniEnv() ? 1 : 0 /* jni env */);
+  size_t total_args = static_args + param_args + internal_args;
 
   // Float arguments passed through Xmm0..Xmm7
   // Other (integer) arguments passed through GPR (RDI, RSI, RDX, RCX, R8, R9)
   size_t total_stack_args = total_args
-                            - std::min(8U, static_cast<unsigned int>(NumFloatOrDoubleArgs()))
-                            - std::min(6U, static_cast<unsigned int>(NumArgs() - NumFloatOrDoubleArgs()));
+                            - std::min(kMaxFloatOrDoubleRegisterArguments, static_cast<size_t>(NumFloatOrDoubleArgs()))
+                            - std::min(kMaxIntLikeRegisterArguments, static_cast<size_t>(NumArgs() - NumFloatOrDoubleArgs()));
 
   return total_stack_args;
 }
diff --git a/compiler/jni/quick/x86_64/calling_convention_x86_64.h b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
index b98f505..cdba334 100644
--- a/compiler/jni/quick/x86_64/calling_convention_x86_64.h
+++ b/compiler/jni/quick/x86_64/calling_convention_x86_64.h
@@ -48,7 +48,10 @@
 
 class X86_64JniCallingConvention FINAL : public JniCallingConvention {
  public:
-  X86_64JniCallingConvention(bool is_static, bool is_synchronized, const char* shorty);
+  X86_64JniCallingConvention(bool is_static,
+                             bool is_synchronized,
+                             bool is_critical_native,
+                             const char* shorty);
   ~X86_64JniCallingConvention() OVERRIDE {}
   // Calling convention
   ManagedRegister ReturnRegister() OVERRIDE;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 3c4a3e8..e76190f 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -5422,17 +5422,6 @@
 
 HLoadString::LoadKind CodeGeneratorARM::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    switch (desired_string_load_kind) {
-      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      case HLoadString::LoadKind::kBootImageAddress:
-        // TODO: Implement for read barrier.
-        return HLoadString::LoadKind::kDexCacheViaMethod;
-      default:
-        break;
-    }
-  }
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -5485,13 +5474,11 @@
 
   switch (load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
       __ LoadLiteral(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
                                                                       load->GetStringIndex()));
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!kEmitCompilerReadBarrier);
       CodeGeneratorARM::PcRelativePatchInfo* labels =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       __ BindTrackedLabel(&labels->movw_label);
@@ -5503,7 +5490,6 @@
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
       DCHECK_NE(load->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
       __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 1d2f334..64231ad 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -4124,17 +4124,6 @@
 
 HLoadString::LoadKind CodeGeneratorARM64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    switch (desired_string_load_kind) {
-      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      case HLoadString::LoadKind::kBootImageAddress:
-        // TODO: Implement for read barrier.
-        return HLoadString::LoadKind::kDexCacheViaMethod;
-      default:
-        break;
-    }
-  }
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -4175,12 +4164,10 @@
 
   switch (load->GetLoadKind()) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!kEmitCompilerReadBarrier);
       __ Ldr(out, codegen_->DeduplicateBootImageStringLiteral(load->GetDexFile(),
                                                               load->GetStringIndex()));
       return;  // No dex cache slow path.
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!kEmitCompilerReadBarrier);
       // Add ADRP with its PC-relative String patch.
       const DexFile& dex_file = load->GetDexFile();
       uint32_t string_index = load->GetStringIndex();
@@ -4201,7 +4188,6 @@
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
       DCHECK(load->GetAddress() != 0u && IsUint<32>(load->GetAddress()));
       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(load->GetAddress()));
       return;  // No dex cache slow path.
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 92e9cd9..f07f8a0 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -2378,13 +2378,8 @@
 
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      // TODO: don't use branches.
-      GenerateFpCompareAndBranch(instruction->GetCondition(),
-                                 instruction->IsGtBias(),
-                                 type,
-                                 locations,
-                                 &true_label);
-      break;
+      GenerateFpCompare(instruction->GetCondition(), instruction->IsGtBias(), type, locations);
+      return;
   }
 
   // Convert the branches into the result.
@@ -3177,6 +3172,230 @@
   }
 }
 
+void InstructionCodeGeneratorMIPS::GenerateFpCompare(IfCondition cond,
+                                                     bool gt_bias,
+                                                     Primitive::Type type,
+                                                     LocationSummary* locations) {
+  Register dst = locations->Out().AsRegister<Register>();
+  FRegister lhs = locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister rhs = locations->InAt(1).AsFpuRegister<FRegister>();
+  bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+  if (type == Primitive::kPrimFloat) {
+    if (isR6) {
+      switch (cond) {
+        case kCondEQ:
+          __ CmpEqS(FTMP, lhs, rhs);
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        case kCondNE:
+          __ CmpEqS(FTMP, lhs, rhs);
+          __ Mfc1(dst, FTMP);
+          __ Addiu(dst, dst, 1);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ CmpLtS(FTMP, lhs, rhs);
+          } else {
+            __ CmpUltS(FTMP, lhs, rhs);
+          }
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ CmpLeS(FTMP, lhs, rhs);
+          } else {
+            __ CmpUleS(FTMP, lhs, rhs);
+          }
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CmpUltS(FTMP, rhs, lhs);
+          } else {
+            __ CmpLtS(FTMP, rhs, lhs);
+          }
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CmpUleS(FTMP, rhs, lhs);
+          } else {
+            __ CmpLeS(FTMP, rhs, lhs);
+          }
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+          UNREACHABLE();
+      }
+    } else {
+      switch (cond) {
+        case kCondEQ:
+          __ CeqS(0, lhs, rhs);
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        case kCondNE:
+          __ CeqS(0, lhs, rhs);
+          __ LoadConst32(dst, 1);
+          __ Movt(dst, ZERO, 0);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ ColtS(0, lhs, rhs);
+          } else {
+            __ CultS(0, lhs, rhs);
+          }
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ ColeS(0, lhs, rhs);
+          } else {
+            __ CuleS(0, lhs, rhs);
+          }
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CultS(0, rhs, lhs);
+          } else {
+            __ ColtS(0, rhs, lhs);
+          }
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CuleS(0, rhs, lhs);
+          } else {
+            __ ColeS(0, rhs, lhs);
+          }
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+          UNREACHABLE();
+      }
+    }
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    if (isR6) {
+      switch (cond) {
+        case kCondEQ:
+          __ CmpEqD(FTMP, lhs, rhs);
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        case kCondNE:
+          __ CmpEqD(FTMP, lhs, rhs);
+          __ Mfc1(dst, FTMP);
+          __ Addiu(dst, dst, 1);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ CmpLtD(FTMP, lhs, rhs);
+          } else {
+            __ CmpUltD(FTMP, lhs, rhs);
+          }
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ CmpLeD(FTMP, lhs, rhs);
+          } else {
+            __ CmpUleD(FTMP, lhs, rhs);
+          }
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CmpUltD(FTMP, rhs, lhs);
+          } else {
+            __ CmpLtD(FTMP, rhs, lhs);
+          }
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CmpUleD(FTMP, rhs, lhs);
+          } else {
+            __ CmpLeD(FTMP, rhs, lhs);
+          }
+          __ Mfc1(dst, FTMP);
+          __ Andi(dst, dst, 1);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+          UNREACHABLE();
+      }
+    } else {
+      switch (cond) {
+        case kCondEQ:
+          __ CeqD(0, lhs, rhs);
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        case kCondNE:
+          __ CeqD(0, lhs, rhs);
+          __ LoadConst32(dst, 1);
+          __ Movt(dst, ZERO, 0);
+          break;
+        case kCondLT:
+          if (gt_bias) {
+            __ ColtD(0, lhs, rhs);
+          } else {
+            __ CultD(0, lhs, rhs);
+          }
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        case kCondLE:
+          if (gt_bias) {
+            __ ColeD(0, lhs, rhs);
+          } else {
+            __ CuleD(0, lhs, rhs);
+          }
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        case kCondGT:
+          if (gt_bias) {
+            __ CultD(0, rhs, lhs);
+          } else {
+            __ ColtD(0, rhs, lhs);
+          }
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        case kCondGE:
+          if (gt_bias) {
+            __ CuleD(0, rhs, lhs);
+          } else {
+            __ ColeD(0, rhs, lhs);
+          }
+          __ LoadConst32(dst, 1);
+          __ Movf(dst, ZERO, 0);
+          break;
+        default:
+          LOG(FATAL) << "Unexpected non-floating-point condition " << cond;
+          UNREACHABLE();
+      }
+    }
+  }
+}
+
 void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
                                                               bool gt_bias,
                                                               Primitive::Type type,
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 7ba6c0d..0039981 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -243,6 +243,10 @@
   void GenerateLongCompareAndBranch(IfCondition cond,
                                     LocationSummary* locations,
                                     MipsLabel* label);
+  void GenerateFpCompare(IfCondition cond,
+                         bool gt_bias,
+                         Primitive::Type type,
+                         LocationSummary* locations);
   void GenerateFpCompareAndBranch(IfCondition cond,
                                   bool gt_bias,
                                   Primitive::Type type,
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b3b648f..72ab615 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -213,35 +213,6 @@
   DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86);
 };
 
-class LoadStringSlowPathX86 : public SlowPathCode {
- public:
-  explicit LoadStringSlowPathX86(HLoadString* instruction): SlowPathCode(instruction) {}
-
-  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = instruction_->GetLocations();
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
-
-    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
-    __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
-    __ movl(calling_convention.GetRegisterAt(0), Immediate(string_index));
-    x86_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
-    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
-    x86_codegen->Move32(locations->Out(), Location::RegisterLocation(EAX));
-    RestoreLiveRegisters(codegen, locations);
-
-    __ jmp(GetExitLabel());
-  }
-
-  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86"; }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86);
-};
-
 class LoadClassSlowPathX86 : public SlowPathCode {
  public:
   LoadClassSlowPathX86(HLoadClass* cls,
@@ -6094,17 +6065,6 @@
 
 HLoadString::LoadKind CodeGeneratorX86::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    switch (desired_string_load_kind) {
-      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      case HLoadString::LoadKind::kBootImageAddress:
-        // TODO: Implement for read barrier.
-        return HLoadString::LoadKind::kDexCacheViaMethod;
-      default:
-        break;
-    }
-  }
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -6135,20 +6095,20 @@
 
 void LocationsBuilderX86::VisitLoadString(HLoadString* load) {
   LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
-      ? LocationSummary::kCallOnSlowPath
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  if (kUseBakerReadBarrier && !load->NeedsEnvironment()) {
-    locations->SetCustomSlowPathCallerSaves(RegisterSet());  // No caller-save registers.
-  }
-
   HLoadString::LoadKind load_kind = load->GetLoadKind();
   if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
       load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
       load_kind == HLoadString::LoadKind::kDexCachePcRelative) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
-  locations->SetOut(Location::RequiresRegister());
+  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+    locations->SetOut(Location::RegisterLocation(EAX));
+  } else {
+    locations->SetOut(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorX86::VisitLoadString(HLoadString* load) {
@@ -6158,20 +6118,17 @@
 
   switch (load->GetLoadKind()) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
       __ movl(out, Immediate(/* placeholder */ 0));
       codegen_->RecordStringPatch(load);
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!kEmitCompilerReadBarrier);
       Register method_address = locations->InAt(0).AsRegister<Register>();
       __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
       codegen_->RecordStringPatch(load);
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
       DCHECK_NE(load->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
       __ movl(out, Immediate(address));
@@ -6183,10 +6140,10 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
-  codegen_->AddSlowPath(slow_path);
-  __ jmp(slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+  InvokeRuntimeCallingConvention calling_convention;
+  __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex()));
+  codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
+  CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
 
 static Address GetExceptionTlsAddress() {
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index b3228f8..cbb5799 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -288,37 +288,6 @@
   DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
 };
 
-class LoadStringSlowPathX86_64 : public SlowPathCode {
- public:
-  explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
-
-  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    LocationSummary* locations = instruction_->GetLocations();
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
-
-    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
-    __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
-
-    InvokeRuntimeCallingConvention calling_convention;
-    const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
-    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
-    x86_64_codegen->InvokeRuntime(kQuickResolveString,
-                                  instruction_,
-                                  instruction_->GetDexPc(),
-                                  this);
-    CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
-    x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
-    RestoreLiveRegisters(codegen, locations);
-    __ jmp(GetExitLabel());
-  }
-
-  const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
-};
-
 class TypeCheckSlowPathX86_64 : public SlowPathCode {
  public:
   TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
@@ -5526,17 +5495,6 @@
 
 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    switch (desired_string_load_kind) {
-      case HLoadString::LoadKind::kBootImageLinkTimeAddress:
-      case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
-      case HLoadString::LoadKind::kBootImageAddress:
-        // TODO: Implement for read barrier.
-        return HLoadString::LoadKind::kDexCacheViaMethod;
-      default:
-        break;
-    }
-  }
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(!GetCompilerOptions().GetCompilePic());
@@ -5560,18 +5518,16 @@
 }
 
 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
-  LocationSummary::CallKind call_kind = (load->NeedsEnvironment() || kEmitCompilerReadBarrier)
-      ? LocationSummary::kCallOnSlowPath
+  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+      ? LocationSummary::kCallOnMainOnly
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  if (kUseBakerReadBarrier && !load->NeedsEnvironment()) {
-    locations->SetCustomSlowPathCallerSaves(RegisterSet());  // No caller-save registers.
-  }
-
   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
     locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetOut(Location::RegisterLocation(RAX));
+  } else {
+    locations->SetOut(Location::RequiresRegister());
   }
-  locations->SetOut(Location::RequiresRegister());
 }
 
 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) {
@@ -5581,13 +5537,11 @@
 
   switch (load->GetLoadKind()) {
     case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!kEmitCompilerReadBarrier);
       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
       codegen_->RecordStringPatch(load);
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBootImageAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
       DCHECK_NE(load->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(load->GetAddress());
       __ movl(out, Immediate(address));  // Zero-extended.
@@ -5599,10 +5553,13 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
-  codegen_->AddSlowPath(slow_path);
-  __ jmp(slow_path->GetEntryLabel());
-  __ Bind(slow_path->GetExitLabel());
+  InvokeRuntimeCallingConvention calling_convention;
+  __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
+          Immediate(load->GetStringIndex()));
+  codegen_->InvokeRuntime(kQuickResolveString,
+                          load,
+                          load->GetDexPc());
+  CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
 }
 
 static Address GetExceptionTlsAddress() {
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index cea4a7e..eda0971 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -38,7 +38,7 @@
 static_assert((SP == 31) && (WSP == 31) && (XZR == 32) && (WZR == 32),
               "Unexpected values for register codes.");
 
-static inline int VIXLRegCodeFromART(int code) {
+inline int VIXLRegCodeFromART(int code) {
   if (code == SP) {
     return vixl::aarch64::kSPRegInternalCode;
   }
@@ -48,7 +48,7 @@
   return code;
 }
 
-static inline int ARTRegCodeFromVIXL(int code) {
+inline int ARTRegCodeFromVIXL(int code) {
   if (code == vixl::aarch64::kSPRegInternalCode) {
     return SP;
   }
@@ -58,85 +58,85 @@
   return code;
 }
 
-static inline vixl::aarch64::Register XRegisterFrom(Location location) {
+inline vixl::aarch64::Register XRegisterFrom(Location location) {
   DCHECK(location.IsRegister()) << location;
   return vixl::aarch64::Register::GetXRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
-static inline vixl::aarch64::Register WRegisterFrom(Location location) {
+inline vixl::aarch64::Register WRegisterFrom(Location location) {
   DCHECK(location.IsRegister()) << location;
   return vixl::aarch64::Register::GetWRegFromCode(VIXLRegCodeFromART(location.reg()));
 }
 
-static inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) {
+inline vixl::aarch64::Register RegisterFrom(Location location, Primitive::Type type) {
   DCHECK(type != Primitive::kPrimVoid && !Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimLong ? XRegisterFrom(location) : WRegisterFrom(location);
 }
 
-static inline vixl::aarch64::Register OutputRegister(HInstruction* instr) {
+inline vixl::aarch64::Register OutputRegister(HInstruction* instr) {
   return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
 
-static inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) {
+inline vixl::aarch64::Register InputRegisterAt(HInstruction* instr, int input_index) {
   return RegisterFrom(instr->GetLocations()->InAt(input_index),
                       instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::aarch64::FPRegister DRegisterFrom(Location location) {
+inline vixl::aarch64::FPRegister DRegisterFrom(Location location) {
   DCHECK(location.IsFpuRegister()) << location;
   return vixl::aarch64::FPRegister::GetDRegFromCode(location.reg());
 }
 
-static inline vixl::aarch64::FPRegister SRegisterFrom(Location location) {
+inline vixl::aarch64::FPRegister SRegisterFrom(Location location) {
   DCHECK(location.IsFpuRegister()) << location;
   return vixl::aarch64::FPRegister::GetSRegFromCode(location.reg());
 }
 
-static inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
+inline vixl::aarch64::FPRegister FPRegisterFrom(Location location, Primitive::Type type) {
   DCHECK(Primitive::IsFloatingPointType(type)) << type;
   return type == Primitive::kPrimDouble ? DRegisterFrom(location) : SRegisterFrom(location);
 }
 
-static inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) {
+inline vixl::aarch64::FPRegister OutputFPRegister(HInstruction* instr) {
   return FPRegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
 
-static inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
+inline vixl::aarch64::FPRegister InputFPRegisterAt(HInstruction* instr, int input_index) {
   return FPRegisterFrom(instr->GetLocations()->InAt(input_index),
                         instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
+inline vixl::aarch64::CPURegister CPURegisterFrom(Location location, Primitive::Type type) {
   return Primitive::IsFloatingPointType(type)
       ? vixl::aarch64::CPURegister(FPRegisterFrom(location, type))
       : vixl::aarch64::CPURegister(RegisterFrom(location, type));
 }
 
-static inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) {
+inline vixl::aarch64::CPURegister OutputCPURegister(HInstruction* instr) {
   return Primitive::IsFloatingPointType(instr->GetType())
       ? static_cast<vixl::aarch64::CPURegister>(OutputFPRegister(instr))
       : static_cast<vixl::aarch64::CPURegister>(OutputRegister(instr));
 }
 
-static inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
+inline vixl::aarch64::CPURegister InputCPURegisterAt(HInstruction* instr, int index) {
   return Primitive::IsFloatingPointType(instr->InputAt(index)->GetType())
       ? static_cast<vixl::aarch64::CPURegister>(InputFPRegisterAt(instr, index))
       : static_cast<vixl::aarch64::CPURegister>(InputRegisterAt(instr, index));
 }
 
-static inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* instr,
+inline vixl::aarch64::CPURegister InputCPURegisterOrZeroRegAt(HInstruction* instr,
                                                                      int index) {
   HInstruction* input = instr->InputAt(index);
   Primitive::Type input_type = input->GetType();
   if (input->IsConstant() && input->AsConstant()->IsZeroBitPattern()) {
     return (Primitive::ComponentSize(input_type) >= vixl::aarch64::kXRegSizeInBytes)
-        ?  vixl::aarch64::xzr
+        ? vixl::aarch64::xzr
         : vixl::aarch64::wzr;
   }
   return InputCPURegisterAt(instr, index);
 }
 
-static inline int64_t Int64ConstantFrom(Location location) {
+inline int64_t Int64ConstantFrom(Location location) {
   HConstant* instr = location.GetConstant();
   if (instr->IsIntConstant()) {
     return instr->AsIntConstant()->GetValue();
@@ -148,7 +148,7 @@
   }
 }
 
-static inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) {
+inline vixl::aarch64::Operand OperandFrom(Location location, Primitive::Type type) {
   if (location.IsRegister()) {
     return vixl::aarch64::Operand(RegisterFrom(location, type));
   } else {
@@ -156,23 +156,23 @@
   }
 }
 
-static inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) {
+inline vixl::aarch64::Operand InputOperandAt(HInstruction* instr, int input_index) {
   return OperandFrom(instr->GetLocations()->InAt(input_index),
                      instr->InputAt(input_index)->GetType());
 }
 
-static inline vixl::aarch64::MemOperand StackOperandFrom(Location location) {
+inline vixl::aarch64::MemOperand StackOperandFrom(Location location) {
   return vixl::aarch64::MemOperand(vixl::aarch64::sp, location.GetStackIndex());
 }
 
-static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
                                                     size_t offset = 0) {
   // A heap reference must be 32bit, so fit in a W register.
   DCHECK(base.IsW());
   return vixl::aarch64::MemOperand(base.X(), offset);
 }
 
-static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
                                                     const vixl::aarch64::Register& regoffset,
                                                     vixl::aarch64::Shift shift = vixl::aarch64::LSL,
                                                     unsigned shift_amount = 0) {
@@ -181,24 +181,24 @@
   return vixl::aarch64::MemOperand(base.X(), regoffset, shift, shift_amount);
 }
 
-static inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
+inline vixl::aarch64::MemOperand HeapOperand(const vixl::aarch64::Register& base,
                                                     Offset offset) {
   return HeapOperand(base, offset.SizeValue());
 }
 
-static inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) {
+inline vixl::aarch64::MemOperand HeapOperandFrom(Location location, Offset offset) {
   return HeapOperand(RegisterFrom(location, Primitive::kPrimNot), offset);
 }
 
-static inline Location LocationFrom(const vixl::aarch64::Register& reg) {
+inline Location LocationFrom(const vixl::aarch64::Register& reg) {
   return Location::RegisterLocation(ARTRegCodeFromVIXL(reg.GetCode()));
 }
 
-static inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) {
+inline Location LocationFrom(const vixl::aarch64::FPRegister& fpreg) {
   return Location::FpuRegisterLocation(fpreg.GetCode());
 }
 
-static inline vixl::aarch64::Operand OperandFromMemOperand(
+inline vixl::aarch64::Operand OperandFromMemOperand(
     const vixl::aarch64::MemOperand& mem_op) {
   if (mem_op.IsImmediateOffset()) {
     return vixl::aarch64::Operand(mem_op.GetOffset());
@@ -219,7 +219,7 @@
   }
 }
 
-static bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
+inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
   DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant())
       << constant->DebugName();
 
@@ -258,7 +258,7 @@
   }
 }
 
-static inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
+inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
                                                         HInstruction* instr) {
   if (constant->IsConstant()
       && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
@@ -272,7 +272,7 @@
 // codes are same, we can initialize vixl register list simply by the register masks. Currently,
 // only SP/WSP and ZXR/WZR codes are different between art and vixl.
 // Note: This function is only used for debug checks.
-static inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers,
+inline bool ArtVixlRegCodeCoherentForRegSet(uint32_t art_core_registers,
                                                    size_t num_core,
                                                    uint32_t art_fpu_registers,
                                                    size_t num_fpu) {
@@ -290,7 +290,7 @@
   return true;
 }
 
-static inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+inline vixl::aarch64::Shift ShiftFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
   switch (op_kind) {
     case HArm64DataProcWithShifterOp::kASR: return vixl::aarch64::ASR;
     case HArm64DataProcWithShifterOp::kLSL: return vixl::aarch64::LSL;
@@ -302,7 +302,7 @@
   }
 }
 
-static inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
+inline vixl::aarch64::Extend ExtendFromOpKind(HArm64DataProcWithShifterOp::OpKind op_kind) {
   switch (op_kind) {
     case HArm64DataProcWithShifterOp::kUXTB: return vixl::aarch64::UXTB;
     case HArm64DataProcWithShifterOp::kUXTH: return vixl::aarch64::UXTH;
@@ -317,7 +317,7 @@
   }
 }
 
-static inline bool CanFitInShifterOperand(HInstruction* instruction) {
+inline bool CanFitInShifterOperand(HInstruction* instruction) {
   if (instruction->IsTypeConversion()) {
     HTypeConversion* conversion = instruction->AsTypeConversion();
     Primitive::Type result_type = conversion->GetResultType();
@@ -332,7 +332,7 @@
   }
 }
 
-static inline bool HasShifterOperand(HInstruction* instr) {
+inline bool HasShifterOperand(HInstruction* instr) {
   // `neg` instructions are an alias of `sub` using the zero register as the
   // first register input.
   bool res = instr->IsAdd() || instr->IsAnd() || instr->IsNeg() ||
@@ -340,7 +340,7 @@
   return res;
 }
 
-static inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
+inline bool ShifterOperandSupportsExtension(HInstruction* instruction) {
   DCHECK(HasShifterOperand(instruction));
   // Although the `neg` instruction is an alias of the `sub` instruction, `HNeg`
   // does *not* support extension. This is because the `extended register` form
@@ -351,7 +351,7 @@
   return instruction->IsAdd() || instruction->IsSub();
 }
 
-static inline bool IsConstantZeroBitPattern(const HInstruction* instruction) {
+inline bool IsConstantZeroBitPattern(const HInstruction* instruction) {
   return instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern();
 }
 
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 8d4d143..b8e1379 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -297,7 +297,15 @@
       DCHECK(!runtime->UseJitCompilation());
       mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
       CHECK(string != nullptr);
-      // TODO: In follow up CL, add PcRelative and Address back in.
+      if (compiler_driver_->GetSupportBootImageFixup()) {
+        DCHECK(ContainsElement(compiler_driver_->GetDexFilesForOatFile(), &dex_file));
+        desired_load_kind = codegen_->GetCompilerOptions().GetCompilePic()
+            ? HLoadString::LoadKind::kBootImageLinkTimePcRelative
+            : HLoadString::LoadKind::kBootImageLinkTimeAddress;
+      } else {
+        // MIPS64 or compiler_driver_test. Do not sharpen.
+        DCHECK_EQ(desired_load_kind, HLoadString::LoadKind::kDexCacheViaMethod);
+      }
     } else if (runtime->UseJitCompilation()) {
       // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
       // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
diff --git a/compiler/utils/arm/jni_macro_assembler_arm.cc b/compiler/utils/arm/jni_macro_assembler_arm.cc
index e0bfa12..cf7a4d1 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm.cc
@@ -367,11 +367,21 @@
       CHECK(src.IsCoreRegister()) << src;
       __ mov(dst.AsCoreRegister(), ShifterOperand(src.AsCoreRegister()));
     } else if (dst.IsDRegister()) {
-      CHECK(src.IsDRegister()) << src;
-      __ vmovd(dst.AsDRegister(), src.AsDRegister());
+      if (src.IsDRegister()) {
+        __ vmovd(dst.AsDRegister(), src.AsDRegister());
+      } else {
+        // VMOV Dn, Rlo, Rhi (Dn = {Rlo, Rhi})
+        CHECK(src.IsRegisterPair()) << src;
+        __ vmovdrr(dst.AsDRegister(), src.AsRegisterPairLow(), src.AsRegisterPairHigh());
+      }
     } else if (dst.IsSRegister()) {
-      CHECK(src.IsSRegister()) << src;
-      __ vmovs(dst.AsSRegister(), src.AsSRegister());
+      if (src.IsSRegister()) {
+        __ vmovs(dst.AsSRegister(), src.AsSRegister());
+      } else {
+        // VMOV Sn, Rn  (Sn = Rn)
+        CHECK(src.IsCoreRegister()) << src;
+        __ vmovsr(dst.AsSRegister(), src.AsCoreRegister());
+      }
     } else {
       CHECK(dst.IsRegisterPair()) << dst;
       CHECK(src.IsRegisterPair()) << src;
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 367ed97..3b05173 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -1661,13 +1661,19 @@
 TEST_F(ArmVIXLAssemblerTest, VixlJniHelpers) {
   const bool is_static = true;
   const bool is_synchronized = false;
+  const bool is_critical_native = false;
   const char* shorty = "IIFII";
 
   ArenaPool pool;
   ArenaAllocator arena(&pool);
 
   std::unique_ptr<JniCallingConvention> jni_conv(
-      JniCallingConvention::Create(&arena, is_static, is_synchronized, shorty, kThumb2));
+      JniCallingConvention::Create(&arena,
+                                   is_static,
+                                   is_synchronized,
+                                   is_critical_native,
+                                   shorty,
+                                   kThumb2));
   std::unique_ptr<ManagedRuntimeCallingConvention> mr_conv(
       ManagedRuntimeCallingConvention::Create(&arena, is_static, is_synchronized, shorty, kThumb2));
   const int frame_size(jni_conv->FrameSize());
diff --git a/oatdump/Android.mk b/oatdump/Android.mk
index 7be8a8d..d80df70 100644
--- a/oatdump/Android.mk
+++ b/oatdump/Android.mk
@@ -16,6 +16,11 @@
 
 LOCAL_PATH := $(call my-dir)
 
+########################################################################
+# oatdump targets
+
+ART_DUMP_OAT_PATH ?= $(OUT_DIR)
+
 OATDUMP := $(HOST_OUT_EXECUTABLES)/oatdump$(HOST_EXECUTABLE_SUFFIX)
 OATDUMPD := $(HOST_OUT_EXECUTABLES)/oatdumpd$(HOST_EXECUTABLE_SUFFIX)
 # TODO: for now, override with debug version for better error reporting
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index e0e1e81..25aa8ce 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -19,6 +19,16 @@
 #include "arch/quick_alloc_entrypoints.S"
 
 
+.macro INCREASE_FRAME frame_adjustment
+    sub sp, sp, #(\frame_adjustment)
+    .cfi_adjust_cfa_offset (\frame_adjustment)
+.endm
+
+.macro DECREASE_FRAME frame_adjustment
+    add sp, sp, #(\frame_adjustment)
+    .cfi_adjust_cfa_offset -(\frame_adjustment)
+.endm
+
 .macro SAVE_REG reg, offset
     str \reg, [sp, #(\offset)]
     .cfi_rel_offset \reg, (\offset)
@@ -70,8 +80,7 @@
     // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
     ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
 
-    sub sp, sp, #176
-    .cfi_adjust_cfa_offset 176
+    INCREASE_FRAME 176
 
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
@@ -115,8 +124,7 @@
     // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
     ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
 
-    sub sp, sp, #96
-    .cfi_adjust_cfa_offset 96
+    INCREASE_FRAME 96
 
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
@@ -150,13 +158,11 @@
     RESTORE_TWO_REGS x27, x28, 64
     RESTORE_TWO_REGS x29, xLR, 80
 
-    add sp, sp, #96
-    .cfi_adjust_cfa_offset -96
+    DECREASE_FRAME 96
 .endm
 
 .macro POP_SAVE_REFS_ONLY_FRAME
-    add sp, sp, #96
-    .cfi_adjust_cfa_offset - 96
+    DECREASE_FRAME 96
 .endm
 
 .macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
@@ -166,8 +172,7 @@
 
 
 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
-    sub sp, sp, #224
-    .cfi_adjust_cfa_offset 224
+    INCREASE_FRAME 224
 
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
@@ -254,8 +259,7 @@
     // x29(callee-save) and LR.
     RESTORE_TWO_REGS x29, xLR, 208
 
-    add sp, sp, #224
-    .cfi_adjust_cfa_offset -224
+    DECREASE_FRAME 224
 .endm
 
     /*
@@ -263,8 +267,7 @@
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
 .macro SETUP_SAVE_EVERYTHING_FRAME
-    sub sp, sp, #512
-    .cfi_adjust_cfa_offset 512
+    INCREASE_FRAME 512
 
     // Ugly compile-time check, but we only have the preprocessor.
 #if (FRAME_SIZE_SAVE_EVERYTHING != 512)
@@ -364,8 +367,7 @@
     RESTORE_TWO_REGS x27, x28, 480
     RESTORE_TWO_REGS x29, xLR, 496
 
-    add sp, sp, #512
-    .cfi_adjust_cfa_offset -512
+    DECREASE_FRAME 512
 .endm
 
 .macro RETURN_IF_RESULT_IS_ZERO
@@ -1268,11 +1270,11 @@
     cbz x0, .Lthrow_class_cast_exception
 
     // Restore and return
+    .cfi_remember_state
     RESTORE_REG xLR, 24
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
     ret
-
-    .cfi_adjust_cfa_offset 32         // Reset unwind info so following code unwinds.
+    .cfi_restore_state                // Reset unwind info so following code unwinds.
 
 .Lthrow_class_cast_exception:
     // Restore
@@ -1351,8 +1353,7 @@
     POP_REG_NE x3, 24, \xDest
     POP_REG_NE x4, 32, \xDest
     RESTORE_REG xLR, 40
-    add sp, sp, #48
-    .cfi_adjust_cfa_offset -48
+    DECREASE_FRAME 48
 .Lrb_exit\number:
 #else
     ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
@@ -1428,6 +1429,7 @@
     cbz x0, .Lthrow_array_store_exception
 
     // Restore
+    .cfi_remember_state
     RESTORE_TWO_REGS x2, xLR, 16
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
 
@@ -1439,7 +1441,7 @@
     lsr x0, x0, #7
     strb w3, [x3, x0]
     ret
-    .cfi_adjust_cfa_offset 32  // 4 restores after cbz for unwinding.
+    .cfi_restore_state            // Reset unwind info so following code unwinds.
 .Lthrow_array_store_exception:
     RESTORE_TWO_REGS x2, xLR, 16
     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
@@ -2468,8 +2470,8 @@
     SAVE_TWO_REGS x10, x11, 80
     SAVE_TWO_REGS x12, x13, 96
     SAVE_TWO_REGS x14, x15, 112
-    SAVE_TWO_REGS   x16, x17, 128
-    SAVE_TWO_REGS   x18, x19, 144
+    SAVE_TWO_REGS x16, x17, 128
+    SAVE_TWO_REGS x18, x19, 144
     // Save all potentially live caller-save floating-point registers.
     stp   d0, d1,   [sp, #160]
     stp   d2, d3,   [sp, #176]
@@ -2522,8 +2524,7 @@
     ldp   d30, d31, [sp, #336]
     // Restore return address and remove padding.
     RESTORE_REG xLR, 360
-    add sp, sp, #368
-    .cfi_adjust_cfa_offset -368
+    DECREASE_FRAME 368
 .Lret_rb_\name:
     ret
 END \name
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 0bf2a35..26717ad 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1487,6 +1487,7 @@
     ld     $a1, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet32StaticFromCode        # (uint32_t field_idx, const Method* referrer, Thread*)
     move   $a2, rSELF                    # pass Thread::Current
+    sll    $v0, $v0, 0                   # sign-extend result
     RETURN_IF_NO_EXCEPTION
 END art_quick_get32_static
 
@@ -1571,6 +1572,7 @@
     ld     $a2, FRAME_SIZE_SAVE_REFS_ONLY($sp)  # pass referrer's Method*
     jal    artGet32InstanceFromCode      # (field_idx, Object*, referrer, Thread*)
     move   $a3, rSELF                    # pass Thread::Current
+    sll    $v0, $v0, 0                   # sign-extend result
     RETURN_IF_NO_EXCEPTION
 END art_quick_get32_instance
 
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index f9bc249..1392399 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -335,20 +335,30 @@
 }
 
 bool ArtMethod::IsAnnotatedWithFastNative() {
+  return IsAnnotatedWith(WellKnownClasses::dalvik_annotation_optimization_FastNative,
+                         DexFile::kDexVisibilityBuild);
+}
+
+bool ArtMethod::IsAnnotatedWithCriticalNative() {
+  return IsAnnotatedWith(WellKnownClasses::dalvik_annotation_optimization_CriticalNative,
+                         DexFile::kDexVisibilityBuild);
+}
+
+bool ArtMethod::IsAnnotatedWith(jclass klass, uint32_t visibility) {
   Thread* self = Thread::Current();
   ScopedObjectAccess soa(self);
   StackHandleScope<1> shs(self);
 
   const DexFile& dex_file = GetDeclaringClass()->GetDexFile();
 
-  mirror::Class* fast_native_annotation =
-      soa.Decode<mirror::Class*>(WellKnownClasses::dalvik_annotation_optimization_FastNative);
-  Handle<mirror::Class> fast_native_handle(shs.NewHandle(fast_native_annotation));
+  mirror::Class* annotation = soa.Decode<mirror::Class*>(klass);
+  DCHECK(annotation->IsAnnotation());
+  Handle<mirror::Class> annotation_handle(shs.NewHandle(annotation));
 
   // Note: Resolves any method annotations' classes as a side-effect.
   // -- This seems allowed by the spec since it says we can preload any classes
   //    referenced by another classes's constant pool table.
-  return dex_file.IsMethodAnnotationPresent(this, fast_native_handle, DexFile::kDexVisibilityBuild);
+  return dex_file.IsMethodAnnotationPresent(this, annotation_handle, visibility);
 }
 
 bool ArtMethod::EqualParameters(Handle<mirror::ObjectArray<mirror::Class>> params) {
diff --git a/runtime/art_method.h b/runtime/art_method.h
index b25087c..8051a1f 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -378,6 +378,10 @@
   // -- Independent of kAccFastNative access flags.
   bool IsAnnotatedWithFastNative();
 
+  // Checks to see if the method was annotated with @dalvik.annotation.optimization.CriticalNative
+  // -- Unrelated to the GC notion of "critical".
+  bool IsAnnotatedWithCriticalNative();
+
   // Returns true if this method could be overridden by a default method.
   bool IsOverridableByDefaultMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -776,6 +780,8 @@
   } ptr_sized_fields_;
 
  private:
+  bool IsAnnotatedWith(jclass klass, uint32_t visibility);
+
   static constexpr size_t PtrSizedFieldsOffset(PointerSize pointer_size) {
     // Round up to pointer size for padding field. Tested in art_method.cc.
     return RoundUp(offsetof(ArtMethod, hotness_count_) + sizeof(hotness_count_),
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 726e897..8edb1b4 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -6533,39 +6533,90 @@
   return true;
 }
 
-// Finds the method with a name/signature that matches cmp in the given list of methods. The list of
-// methods must be unique.
+// Finds the method with a name/signature that matches cmp in the given lists of methods. The list
+// of methods must be unique.
+static ArtMethod* FindSameNameAndSignature(MethodNameAndSignatureComparator& cmp ATTRIBUTE_UNUSED) {
+  return nullptr;
+}
+
+template <typename ... Types>
 static ArtMethod* FindSameNameAndSignature(MethodNameAndSignatureComparator& cmp,
-                                           const ScopedArenaVector<ArtMethod*>& list)
+                                           const ScopedArenaVector<ArtMethod*>& list,
+                                           const Types& ... rest)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   for (ArtMethod* method : list) {
     if (cmp.HasSameNameAndSignature(method)) {
       return method;
     }
   }
-  return nullptr;
+  return FindSameNameAndSignature(cmp, rest...);
 }
 
-static void SanityCheckVTable(Handle<mirror::Class> klass, PointerSize pointer_size)
+// Check that all vtable entries are present in this class's virtuals or are the same as a
+// superclasses vtable entry.
+static void CheckClassOwnsVTableEntries(Thread* self,
+                                        Handle<mirror::Class> klass,
+                                        PointerSize pointer_size)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  mirror::PointerArray* check_vtable = klass->GetVTableDuringLinking();
-  mirror::Class* superclass = (klass->HasSuperClass()) ? klass->GetSuperClass() : nullptr;
-  int32_t super_vtable_length = (superclass != nullptr) ? superclass->GetVTableLength() : 0;
+  StackHandleScope<2> hs(self);
+  Handle<mirror::PointerArray> check_vtable(hs.NewHandle(klass->GetVTableDuringLinking()));
+  mirror::Class* super_temp = (klass->HasSuperClass()) ? klass->GetSuperClass() : nullptr;
+  Handle<mirror::Class> superclass(hs.NewHandle(super_temp));
+  int32_t super_vtable_length = (superclass.Get() != nullptr) ? superclass->GetVTableLength() : 0;
   for (int32_t i = 0; i < check_vtable->GetLength(); ++i) {
     ArtMethod* m = check_vtable->GetElementPtrSize<ArtMethod*>(i, pointer_size);
     CHECK(m != nullptr);
 
+    CHECK_EQ(m->GetMethodIndexDuringLinking(), i)
+        << PrettyMethod(m) << " has an unexpected method index for its spot in the vtable for class"
+        << PrettyClass(klass.Get());
     ArraySlice<ArtMethod> virtuals = klass->GetVirtualMethodsSliceUnchecked(pointer_size);
     auto is_same_method = [m] (const ArtMethod& meth) {
       return &meth == m;
     };
     CHECK((super_vtable_length > i && superclass->GetVTableEntry(i, pointer_size) == m) ||
           std::find_if(virtuals.begin(), virtuals.end(), is_same_method) != virtuals.end())
-        << "While linking class '" << PrettyClass(klass.Get()) << "' unable to find owning class "
-        << "of '" << PrettyMethod(m) << "' (vtable index: " << i << ").";
+        << PrettyMethod(m) << " does not seem to be owned by current class "
+        << PrettyClass(klass.Get()) << " or any of its superclasses!";
   }
 }
 
+// Check to make sure the vtable does not have duplicates. Duplicates could cause problems when a
+// method is overridden in a subclass.
+static void CheckVTableHasNoDuplicates(Thread* self,
+                                       Handle<mirror::Class> klass,
+                                       PointerSize pointer_size)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  StackHandleScope<1> hs(self);
+  Handle<mirror::PointerArray> vtable(hs.NewHandle(klass->GetVTableDuringLinking()));
+  int32_t num_entries = vtable->GetLength();
+  for (int32_t i = 0; i < num_entries; i++) {
+    ArtMethod* vtable_entry = vtable->GetElementPtrSize<ArtMethod*>(i, pointer_size);
+    // Don't bother if we cannot 'see' the vtable entry (i.e. it is a package-private member maybe).
+    if (!klass->CanAccessMember(vtable_entry->GetDeclaringClass(),
+                                vtable_entry->GetAccessFlags())) {
+      continue;
+    }
+    MethodNameAndSignatureComparator name_comparator(
+        vtable_entry->GetInterfaceMethodIfProxy(pointer_size));
+    for (int32_t j = i+1; j < num_entries; j++) {
+      ArtMethod* other_entry = vtable->GetElementPtrSize<ArtMethod*>(j, pointer_size);
+      CHECK(vtable_entry != other_entry &&
+            !name_comparator.HasSameNameAndSignature(
+                other_entry->GetInterfaceMethodIfProxy(pointer_size)))
+          << "vtable entries " << i << " and " << j << " are identical for "
+          << PrettyClass(klass.Get()) << " in method " << PrettyMethod(vtable_entry) << " and "
+          << PrettyMethod(other_entry);
+    }
+  }
+}
+
+static void SanityCheckVTable(Thread* self, Handle<mirror::Class> klass, PointerSize pointer_size)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  CheckClassOwnsVTableEntries(self, klass, pointer_size);
+  CheckVTableHasNoDuplicates(self, klass, pointer_size);
+}
+
 void ClassLinker::FillImtFromSuperClass(Handle<mirror::Class> klass,
                                         ArtMethod* unimplemented_method,
                                         ArtMethod* imt_conflict_method,
@@ -6624,8 +6675,10 @@
   ScopedArenaAllocator allocator(&stack);
 
   ScopedArenaVector<ArtMethod*> default_conflict_methods(allocator.Adapter());
+  ScopedArenaVector<ArtMethod*> overriding_default_conflict_methods(allocator.Adapter());
   ScopedArenaVector<ArtMethod*> miranda_methods(allocator.Adapter());
   ScopedArenaVector<ArtMethod*> default_methods(allocator.Adapter());
+  ScopedArenaVector<ArtMethod*> overriding_default_methods(allocator.Adapter());
 
   MutableHandle<mirror::PointerArray> vtable(hs.NewHandle(klass->GetVTableDuringLinking()));
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
@@ -6829,8 +6882,10 @@
               default_conflict_method = vtable_impl;
             } else {
               // See if we already have a conflict method for this method.
-              ArtMethod* preexisting_conflict = FindSameNameAndSignature(interface_name_comparator,
-                                                                         default_conflict_methods);
+              ArtMethod* preexisting_conflict = FindSameNameAndSignature(
+                  interface_name_comparator,
+                  default_conflict_methods,
+                  overriding_default_conflict_methods);
               if (LIKELY(preexisting_conflict != nullptr)) {
                 // We already have another conflict we can reuse.
                 default_conflict_method = preexisting_conflict;
@@ -6841,7 +6896,13 @@
                 default_conflict_method =
                     reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
                 new(default_conflict_method) ArtMethod(interface_method, image_pointer_size_);
-                default_conflict_methods.push_back(default_conflict_method);
+                if (vtable_impl == nullptr) {
+                  // Save the conflict method. We need to add it to the vtable.
+                  default_conflict_methods.push_back(default_conflict_method);
+                } else {
+                  // Save the conflict method but it is already in the vtable.
+                  overriding_default_conflict_methods.push_back(default_conflict_method);
+                }
               }
             }
             current_method = default_conflict_method;
@@ -6861,11 +6922,18 @@
               // TODO It might be worthwhile to copy default methods on interfaces anyway since it
               //      would make lookup for interface super much faster. (We would only need to scan
               //      the iftable to find if there is a NSME or AME.)
-              ArtMethod* old = FindSameNameAndSignature(interface_name_comparator, default_methods);
+              ArtMethod* old = FindSameNameAndSignature(interface_name_comparator,
+                                                        default_methods,
+                                                        overriding_default_methods);
               if (old == nullptr) {
                 // We found a default method implementation and there were no conflicts.
-                // Save the default method. We need to add it to the vtable.
-                default_methods.push_back(current_method);
+                if (vtable_impl == nullptr) {
+                  // Save the default method. We need to add it to the vtable.
+                  default_methods.push_back(current_method);
+                } else {
+                  // Save the default method but it is already in the vtable.
+                  overriding_default_methods.push_back(current_method);
+                }
               } else {
                 CHECK(old == current_method) << "Multiple default implementations selected!";
               }
@@ -6920,6 +6988,8 @@
   }  // For each interface.
   const bool has_new_virtuals = !(miranda_methods.empty() &&
                                   default_methods.empty() &&
+                                  overriding_default_methods.empty() &&
+                                  overriding_default_conflict_methods.empty() &&
                                   default_conflict_methods.empty());
   // TODO don't extend virtuals of interface unless necessary (when is it?).
   if (has_new_virtuals) {
@@ -6927,11 +6997,16 @@
         << "Interfaces should only have default-conflict methods appended to them.";
     VLOG(class_linker) << PrettyClass(klass.Get()) << ": miranda_methods=" << miranda_methods.size()
                        << " default_methods=" << default_methods.size()
-                       << " default_conflict_methods=" << default_conflict_methods.size();
+                       << " overriding_default_methods=" << overriding_default_methods.size()
+                       << " default_conflict_methods=" << default_conflict_methods.size()
+                       << " overriding_default_conflict_methods="
+                       << overriding_default_conflict_methods.size();
     const size_t old_method_count = klass->NumMethods();
     const size_t new_method_count = old_method_count +
                                     miranda_methods.size() +
                                     default_methods.size() +
+                                    overriding_default_conflict_methods.size() +
+                                    overriding_default_methods.size() +
                                     default_conflict_methods.size();
     // Attempt to realloc to save RAM if possible.
     LengthPrefixedArray<ArtMethod>* old_methods = klass->GetMethodsPtr();
@@ -6986,36 +7061,42 @@
     // interface but will have different ArtMethod*s for them. This also means we cannot compare a
     // default method found on a class with one found on the declaring interface directly and must
     // look at the declaring class to determine if they are the same.
-    for (ArtMethod* def_method : default_methods) {
-      ArtMethod& new_method = *out;
-      new_method.CopyFrom(def_method, image_pointer_size_);
-      // Clear the kAccSkipAccessChecks flag if it is present. Since this class hasn't been verified
-      // yet it shouldn't have methods that are skipping access checks.
-      // TODO This is rather arbitrary. We should maybe support classes where only some of its
-      // methods are skip_access_checks.
-      constexpr uint32_t kSetFlags = kAccDefault | kAccCopied;
-      constexpr uint32_t kMaskFlags = ~kAccSkipAccessChecks;
-      new_method.SetAccessFlags((new_method.GetAccessFlags() | kSetFlags) & kMaskFlags);
-      move_table.emplace(def_method, &new_method);
-      ++out;
+    for (const ScopedArenaVector<ArtMethod*>& methods_vec : {default_methods,
+                                                             overriding_default_methods}) {
+      for (ArtMethod* def_method : methods_vec) {
+        ArtMethod& new_method = *out;
+        new_method.CopyFrom(def_method, image_pointer_size_);
+        // Clear the kAccSkipAccessChecks flag if it is present. Since this class hasn't been
+        // verified yet it shouldn't have methods that are skipping access checks.
+        // TODO This is rather arbitrary. We should maybe support classes where only some of its
+        // methods are skip_access_checks.
+        constexpr uint32_t kSetFlags = kAccDefault | kAccCopied;
+        constexpr uint32_t kMaskFlags = ~kAccSkipAccessChecks;
+        new_method.SetAccessFlags((new_method.GetAccessFlags() | kSetFlags) & kMaskFlags);
+        move_table.emplace(def_method, &new_method);
+        ++out;
+      }
     }
-    for (ArtMethod* conf_method : default_conflict_methods) {
-      ArtMethod& new_method = *out;
-      new_method.CopyFrom(conf_method, image_pointer_size_);
-      // This is a type of default method (there are default method impls, just a conflict) so mark
-      // this as a default, non-abstract method, since thats what it is. Also clear the
-      // kAccSkipAccessChecks bit since this class hasn't been verified yet it shouldn't have
-      // methods that are skipping access checks.
-      constexpr uint32_t kSetFlags = kAccDefault | kAccDefaultConflict | kAccCopied;
-      constexpr uint32_t kMaskFlags = ~(kAccAbstract | kAccSkipAccessChecks);
-      new_method.SetAccessFlags((new_method.GetAccessFlags() | kSetFlags) & kMaskFlags);
-      DCHECK(new_method.IsDefaultConflicting());
-      // The actual method might or might not be marked abstract since we just copied it from a
-      // (possibly default) interface method. We need to set it entry point to be the bridge so that
-      // the compiler will not invoke the implementation of whatever method we copied from.
-      EnsureThrowsInvocationError(&new_method);
-      move_table.emplace(conf_method, &new_method);
-      ++out;
+    for (const ScopedArenaVector<ArtMethod*>& methods_vec : {default_conflict_methods,
+                                                             overriding_default_conflict_methods}) {
+      for (ArtMethod* conf_method : methods_vec) {
+        ArtMethod& new_method = *out;
+        new_method.CopyFrom(conf_method, image_pointer_size_);
+        // This is a type of default method (there are default method impls, just a conflict) so
+        // mark this as a default, non-abstract method, since thats what it is. Also clear the
+        // kAccSkipAccessChecks bit since this class hasn't been verified yet it shouldn't have
+        // methods that are skipping access checks.
+        constexpr uint32_t kSetFlags = kAccDefault | kAccDefaultConflict | kAccCopied;
+        constexpr uint32_t kMaskFlags = ~(kAccAbstract | kAccSkipAccessChecks);
+        new_method.SetAccessFlags((new_method.GetAccessFlags() | kSetFlags) & kMaskFlags);
+        DCHECK(new_method.IsDefaultConflicting());
+        // The actual method might or might not be marked abstract since we just copied it from a
+        // (possibly default) interface method. We need to set it entry point to be the bridge so
+        // that the compiler will not invoke the implementation of whatever method we copied from.
+        EnsureThrowsInvocationError(&new_method);
+        move_table.emplace(conf_method, &new_method);
+        ++out;
+      }
     }
     methods->SetSize(new_method_count);
     UpdateClassMethods(klass.Get(), methods);
@@ -7031,22 +7112,31 @@
                                       miranda_methods.size() +
                                       default_methods.size() +
                                       default_conflict_methods.size();
+
       vtable.Assign(down_cast<mirror::PointerArray*>(vtable->CopyOf(self, new_vtable_count)));
       if (UNLIKELY(vtable.Get() == nullptr)) {
         self->AssertPendingOOMException();
         return false;
       }
-      out = methods->begin(method_size, method_alignment) + old_method_count;
       size_t vtable_pos = old_vtable_count;
       // Update all the newly copied method's indexes so they denote their placement in the vtable.
-      for (size_t i = old_method_count; i < new_method_count; ++i) {
-        // Leave the declaring class alone the method's dex_code_item_offset_ and dex_method_index_
-        // fields are references into the dex file the method was defined in. Since the ArtMethod
-        // does not store that information it uses declaring_class_->dex_cache_.
-        out->SetMethodIndex(0xFFFF & vtable_pos);
-        vtable->SetElementPtrSize(vtable_pos, &*out, image_pointer_size_);
-        ++out;
-        ++vtable_pos;
+      for (const ScopedArenaVector<ArtMethod*>& methods_vec : {default_methods,
+                                                               default_conflict_methods,
+                                                               miranda_methods}) {
+        // These are the functions that are not already in the vtable!
+        for (ArtMethod* new_method : methods_vec) {
+          auto translated_method_it = move_table.find(new_method);
+          CHECK(translated_method_it != move_table.end())
+              << "We must have a translation for methods added to the classes methods_ array! We "
+              << "could not find the ArtMethod added for " << PrettyMethod(new_method);
+          ArtMethod* new_vtable_method = translated_method_it->second;
+          // Leave the declaring class alone the method's dex_code_item_offset_ and dex_method_index_
+          // fields are references into the dex file the method was defined in. Since the ArtMethod
+          // does not store that information it uses declaring_class_->dex_cache_.
+          new_vtable_method->SetMethodIndex(0xFFFF & vtable_pos);
+          vtable->SetElementPtrSize(vtable_pos, new_vtable_method, image_pointer_size_);
+          ++vtable_pos;
+        }
       }
       CHECK_EQ(vtable_pos, new_vtable_count);
       // Update old vtable methods. We use the default_translations map to figure out what each
@@ -7062,8 +7152,10 @@
             // Find which conflict method we are to use for this method.
             MethodNameAndSignatureComparator old_method_comparator(
                 translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
-            ArtMethod* new_conflict_method = FindSameNameAndSignature(old_method_comparator,
-                                                                      default_conflict_methods);
+            // We only need to look through overriding_default_conflict_methods since this is an
+            // overridden method we are fixing up here.
+            ArtMethod* new_conflict_method = FindSameNameAndSignature(
+                old_method_comparator, overriding_default_conflict_methods);
             CHECK(new_conflict_method != nullptr) << "Expected a conflict method!";
             translated_method = new_conflict_method;
           } else if (translation_it->second.IsAbstract()) {
@@ -7071,7 +7163,7 @@
             MethodNameAndSignatureComparator old_method_comparator(
                 translated_method->GetInterfaceMethodIfProxy(image_pointer_size_));
             ArtMethod* miranda_method = FindSameNameAndSignature(old_method_comparator,
-                                                                miranda_methods);
+                                                                 miranda_methods);
             DCHECK(miranda_method != nullptr);
             translated_method = miranda_method;
           } else {
@@ -7086,6 +7178,14 @@
         if (it != move_table.end()) {
           auto* new_method = it->second;
           DCHECK(new_method != nullptr);
+          // Make sure the new_methods index is set.
+          if (new_method->GetMethodIndexDuringLinking() != i) {
+            DCHECK_LE(reinterpret_cast<uintptr_t>(&*methods->begin(method_size, method_alignment)),
+                      reinterpret_cast<uintptr_t>(new_method));
+            DCHECK_LT(reinterpret_cast<uintptr_t>(new_method),
+                      reinterpret_cast<uintptr_t>(&*methods->end(method_size, method_alignment)));
+            new_method->SetMethodIndex(0xFFFF & i);
+          }
           vtable->SetElementPtrSize(i, new_method, image_pointer_size_);
         } else {
           // If it was not going to be updated we wouldn't have put it into the default_translations
@@ -7149,7 +7249,7 @@
     self->EndAssertNoThreadSuspension(old_cause);
   }
   if (kIsDebugBuild && !is_interface) {
-    SanityCheckVTable(klass, image_pointer_size_);
+    SanityCheckVTable(self, klass, image_pointer_size_);
   }
   return true;
 }
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 741b682..dba0a81 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -608,6 +608,10 @@
   EXPECT_TRUE(actual_.empty()) << actual_;
 }
 
+void CheckJniAbortCatcher::Check(const std::string& expected_text) {
+  Check(expected_text.c_str());
+}
+
 void CheckJniAbortCatcher::Check(const char* expected_text) {
   EXPECT_TRUE(actual_.find(expected_text) != std::string::npos) << "\n"
       << "Expected to find: " << expected_text << "\n"
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 00394e9..b2090b7 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -184,6 +184,7 @@
 
   ~CheckJniAbortCatcher();
 
+  void Check(const std::string& expected_text);
   void Check(const char* expected_text);
 
  private:
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index d1d8caa..5a5f717 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -371,8 +371,21 @@
     // class rather than the declaring class itself.
     DexCache* referrer_dex_cache = use_referrers_cache ? this->GetDexCache() : dex_cache;
     uint32_t class_idx = referrer_dex_cache->GetDexFile()->GetFieldId(field_idx).class_idx_;
-    // The referenced class has already been resolved with the field, get it from the dex cache.
-    Class* dex_access_to = referrer_dex_cache->GetResolvedType(class_idx);
+    // The referenced class has already been resolved with the field, but may not be in the dex
+    // cache. Using ResolveType here without handles in the caller should be safe since there
+    // should be no thread suspension due to the class being resolved.
+    // TODO: Clean this up to use handles in the caller.
+    Class* dex_access_to;
+    {
+      StackHandleScope<2> hs(Thread::Current());
+      Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(referrer_dex_cache));
+      Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(access_to->GetClassLoader()));
+      dex_access_to = Runtime::Current()->GetClassLinker()->ResolveType(
+          *referrer_dex_cache->GetDexFile(),
+          class_idx,
+          h_dex_cache,
+          h_class_loader);
+    }
     DCHECK(dex_access_to != nullptr);
     if (UNLIKELY(!this->CanAccess(dex_access_to))) {
       if (throw_on_failure) {
@@ -401,8 +414,21 @@
     // class rather than the declaring class itself.
     DexCache* referrer_dex_cache = use_referrers_cache ? this->GetDexCache() : dex_cache;
     uint32_t class_idx = referrer_dex_cache->GetDexFile()->GetMethodId(method_idx).class_idx_;
-    // The referenced class has already been resolved with the method, get it from the dex cache.
-    Class* dex_access_to = referrer_dex_cache->GetResolvedType(class_idx);
+    // The referenced class has already been resolved with the method, but may not be in the dex
+    // cache. Using ResolveType here without handles in the caller should be safe since there
+    // should be no thread suspension due to the class being resolved.
+    // TODO: Clean this up to use handles in the caller.
+    Class* dex_access_to;
+    {
+      StackHandleScope<2> hs(Thread::Current());
+      Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(referrer_dex_cache));
+      Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(access_to->GetClassLoader()));
+      dex_access_to = Runtime::Current()->GetClassLinker()->ResolveType(
+          *referrer_dex_cache->GetDexFile(),
+          class_idx,
+          h_dex_cache,
+          h_class_loader);
+    }
     DCHECK(dex_access_to != nullptr);
     if (UNLIKELY(!this->CanAccess(dex_access_to))) {
       if (throw_on_failure) {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index 99b7769..548087e 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -1505,6 +1505,7 @@
   // java.lang.Class
   static GcRoot<Class> java_lang_Class_;
 
+  ART_FRIEND_TEST(DexCacheTest, TestResolvedFieldAccess);  // For ResolvedFieldAccessTest
   friend struct art::ClassOffsets;  // for verifying offset information
   friend class Object;  // For VisitReferences
   DISALLOW_IMPLICIT_CONSTRUCTORS(Class);
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index 175997c..43ba362 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -64,5 +64,33 @@
   EXPECT_TRUE(linear_alloc->Contains(klass->GetDexCache()->GetResolvedMethods()));
 }
 
+TEST_F(DexCacheTest, TestResolvedFieldAccess) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject jclass_loader(LoadDex("Packages"));
+  ASSERT_TRUE(jclass_loader != nullptr);
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* const class_linker = runtime->GetClassLinker();
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
+      soa.Decode<mirror::ClassLoader*>(jclass_loader)));
+  Handle<mirror::Class> klass1 =
+      hs.NewHandle(class_linker->FindClass(soa.Self(), "Lpackage1/Package1;", class_loader));
+  ASSERT_TRUE(klass1.Get() != nullptr);
+  Handle<mirror::Class> klass2 =
+      hs.NewHandle(class_linker->FindClass(soa.Self(), "Lpackage2/Package2;", class_loader));
+  ASSERT_TRUE(klass2.Get() != nullptr);
+  EXPECT_EQ(klass1->GetDexCache(), klass2->GetDexCache());
+
+  EXPECT_NE(klass1->NumStaticFields(), 0u);
+  for (ArtField& field : klass2->GetSFields()) {
+    EXPECT_FALSE((
+        klass1->ResolvedFieldAccessTest</*throw_on_failure*/ false,
+            /*use_referrers_cache*/ false>(klass2.Get(),
+                                           &field,
+                                           field.GetDexFieldIndex(),
+                                           klass1->GetDexCache())));
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 4678ac6..3b5360c 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -325,6 +325,14 @@
     reg = (kind == kDoubleHiVReg) ? (2 * reg + 1) : (2 * reg);
   }
 
+  // MIPS32 float registers are used as 64-bit (for MIPS32r2 it is pair
+  // F(2n)-F(2n+1), and for MIPS32r6 it is 64-bit register F(2n)). When
+  // accessing upper 32-bits from double, reg + 1 should be used.
+  if ((kRuntimeISA == InstructionSet::kMips) && (kind == kDoubleHiVReg)) {
+    DCHECK_ALIGNED(reg, 2);
+    reg++;
+  }
+
   if (!IsAccessibleRegister(reg, is_float)) {
     return false;
   }
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 2c99275..5f5fbc8 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -30,6 +30,7 @@
 namespace art {
 
 jclass WellKnownClasses::com_android_dex_Dex;
+jclass WellKnownClasses::dalvik_annotation_optimization_CriticalNative;
 jclass WellKnownClasses::dalvik_annotation_optimization_FastNative;
 jclass WellKnownClasses::dalvik_system_DexFile;
 jclass WellKnownClasses::dalvik_system_DexPathList;
@@ -216,6 +217,8 @@
 
 void WellKnownClasses::Init(JNIEnv* env) {
   com_android_dex_Dex = CacheClass(env, "com/android/dex/Dex");
+  dalvik_annotation_optimization_CriticalNative =
+      CacheClass(env, "dalvik/annotation/optimization/CriticalNative");
   dalvik_annotation_optimization_FastNative = CacheClass(env, "dalvik/annotation/optimization/FastNative");
   dalvik_system_DexFile = CacheClass(env, "dalvik/system/DexFile");
   dalvik_system_DexPathList = CacheClass(env, "dalvik/system/DexPathList");
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index 25c9424..ce710ff 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -41,6 +41,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   static jclass com_android_dex_Dex;
+  static jclass dalvik_annotation_optimization_CriticalNative;
   static jclass dalvik_annotation_optimization_FastNative;
   static jclass dalvik_system_DexFile;
   static jclass dalvik_system_DexPathList;
diff --git a/test/004-JniTest/jni_test.cc b/test/004-JniTest/jni_test.cc
index bb18a70..81be531 100644
--- a/test/004-JniTest/jni_test.cc
+++ b/test/004-JniTest/jni_test.cc
@@ -28,9 +28,11 @@
 static JavaVM* jvm = nullptr;
 
 static jint Java_Main_intFastNativeMethod(JNIEnv*, jclass, jint a, jint b, jint c);
+static jint Java_Main_intCriticalNativeMethod(jint a, jint b, jint c);
 
 static JNINativeMethod sMainMethods[] = {
-  {"intFastNativeMethod", "(III)I", reinterpret_cast<void*>(Java_Main_intFastNativeMethod) }
+  {"intFastNativeMethod", "(III)I", reinterpret_cast<void*>(Java_Main_intFastNativeMethod) },
+  {"intCriticalNativeMethod", "(III)I", reinterpret_cast<void*>(Java_Main_intCriticalNativeMethod) },
 };
 
 extern "C" JNIEXPORT jint JNI_OnLoad(JavaVM *vm, void*) {
@@ -766,5 +768,12 @@
   return a + b + c;
 }
 
+// Annotated with @CriticalNative in Java code. Doesn't need to be explicitly registered with "!".
+// NOTE: Has to be registered explicitly to avoid mutator lock check failures.
+static jint Java_Main_intCriticalNativeMethod(jint a, jint b, jint c) {
+  // Note that unlike a "Fast Native" method this excludes JNIEnv and the jclass parameters.
+  return a + b + c;
+}
+
 }  // namespace art
 
diff --git a/test/004-JniTest/src/Main.java b/test/004-JniTest/src/Main.java
index 573afdb..bb098e4 100644
--- a/test/004-JniTest/src/Main.java
+++ b/test/004-JniTest/src/Main.java
@@ -18,6 +18,7 @@
 import java.lang.reflect.Method;
 import java.lang.reflect.Proxy;
 
+import dalvik.annotation.optimization.CriticalNative;
 import dalvik.annotation.optimization.FastNative;
 
 public class Main {
@@ -49,6 +50,7 @@
 
         registerNativesJniTest();
         testFastNativeMethods();
+        testCriticalNativeMethods();
     }
 
     private static native boolean registerNativesJniTest();
@@ -288,7 +290,23 @@
       }
     }
 
+    // Smoke test for @CriticalNative
+    // TODO: Way more thorough tests since it involved quite a bit of changes.
 
+    // Return sum of a+b+c.
+    @CriticalNative
+    static native int intCriticalNativeMethod(int a, int b, int c);
+
+    private static void testCriticalNativeMethods() {
+      int returns[] = { 3, 6, 9, 12, 15 };
+      for (int i = 0; i < returns.length; i++) {
+        int result = intCriticalNativeMethod(i, i+1, i+2);
+        if (returns[i] != result) {
+          System.out.println("CriticalNative Int Run " + i + " with " + returns[i] + " vs " + result);
+          throw new AssertionError();
+        }
+      }
+    }
 }
 
 @FunctionalInterface
diff --git a/test/529-checker-unresolved/src/Main.java b/test/529-checker-unresolved/src/Main.java
index 7b5cbc1..5fd51e1 100644
--- a/test/529-checker-unresolved/src/Main.java
+++ b/test/529-checker-unresolved/src/Main.java
@@ -122,6 +122,16 @@
     expectEquals(123456789123456789f, c.instanceFloat);
     expectEquals(123456789123456789d, c.instanceDouble);
     expectEquals(o, c.instanceObject);
+
+    // Check "large" values.
+
+    c.instanceByte = (byte)-1;
+    c.instanceChar = (char)32768;
+    c.instanceInt = -1;
+
+    expectEquals((byte)-1, c.instanceByte);
+    expectEquals((char)32768, c.instanceChar);
+    expectEquals(-1, c.instanceInt);
   }
 
   /// CHECK-START: void Main.callUnresolvedNull(UnresolvedClass) register (before)
diff --git a/test/960-default-smali/expected.txt b/test/960-default-smali/expected.txt
index f3db93f..8153d7d 100644
--- a/test/960-default-smali/expected.txt
+++ b/test/960-default-smali/expected.txt
@@ -98,3 +98,34 @@
 M-virtual           L.bar()='BAZ!'
 M-virtual           M.bar()='BAZ!'
 End testing for type M
+Testing for type N
+N-interface       Foo.bar()='foobar'
+N-virtual           N.bar()='foobar'
+End testing for type N
+Testing for type O
+O-interface       Foo.bar()='foobar foobar'
+O-interface      Foo2.bar()='foobar foobar'
+O-virtual           N.bar()='foobar foobar'
+O-virtual           O.bar()='foobar foobar'
+End testing for type O
+Testing for type P
+P-interface       Foo.bar()='not foobar!'
+P-interface      Foo2.bar()='not foobar!'
+P-virtual           N.bar()='not foobar!'
+P-virtual           O.bar()='not foobar!'
+P-virtual           P.bar()='not foobar!'
+End testing for type P
+Testing for type Q
+Q-interface on Foo: bar() threw exception!
+Exception is of type java.lang.IncompatibleClassChangeError
+Q-interface on Foo2: bar() threw exception!
+Exception is of type java.lang.IncompatibleClassChangeError
+Q-interface on Foo3: bar() threw exception!
+Exception is of type java.lang.IncompatibleClassChangeError
+Q-virtual on N: bar() threw exception!
+Exception is of type java.lang.IncompatibleClassChangeError
+Q-virtual on O: bar() threw exception!
+Exception is of type java.lang.IncompatibleClassChangeError
+Q-virtual on Q: bar() threw exception!
+Exception is of type java.lang.IncompatibleClassChangeError
+End testing for type Q
diff --git a/test/960-default-smali/src/Foo2.java b/test/960-default-smali/src/Foo2.java
new file mode 100644
index 0000000..2a1bbc0
--- /dev/null
+++ b/test/960-default-smali/src/Foo2.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression test for b/31280371
+ */
+interface Foo2 extends Foo {
+  @Override
+  public default String bar() {
+    return "foobar foobar";
+  }
+}
diff --git a/test/960-default-smali/src/Foo3.java b/test/960-default-smali/src/Foo3.java
new file mode 100644
index 0000000..4c00425
--- /dev/null
+++ b/test/960-default-smali/src/Foo3.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression test for b/31280371
+ */
+interface Foo3 extends Foo {
+  default void doNothing() {}
+}
diff --git a/test/960-default-smali/src/N.java b/test/960-default-smali/src/N.java
new file mode 100644
index 0000000..9d33320
--- /dev/null
+++ b/test/960-default-smali/src/N.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression test for b/31280371
+ */
+class N implements Foo {
+}
diff --git a/test/960-default-smali/src/O.java b/test/960-default-smali/src/O.java
new file mode 100644
index 0000000..55126af
--- /dev/null
+++ b/test/960-default-smali/src/O.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression test for b/31280371
+ */
+class O extends N implements Foo2 {
+}
diff --git a/test/960-default-smali/src/P.java b/test/960-default-smali/src/P.java
new file mode 100644
index 0000000..1ee6c26
--- /dev/null
+++ b/test/960-default-smali/src/P.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression test for b/31280371
+ */
+class P extends O implements Foo2 {
+  @Override
+  public String bar() {
+    return "not foobar!";
+  }
+}
diff --git a/test/960-default-smali/src/Q.java b/test/960-default-smali/src/Q.java
new file mode 100644
index 0000000..bc1e164
--- /dev/null
+++ b/test/960-default-smali/src/Q.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression test for b/31280371
+ */
+class Q extends O implements Foo2, Foo3 {
+}
diff --git a/test/960-default-smali/src/classes.xml b/test/960-default-smali/src/classes.xml
index f3e50c5..c66d35b 100644
--- a/test/960-default-smali/src/classes.xml
+++ b/test/960-default-smali/src/classes.xml
@@ -102,6 +102,37 @@
         <method>bar</method>
       </methods>
     </class>
+
+    <class name="N" super="java/lang/Object">
+      <implements>
+        <item>Foo</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="O" super="N">
+      <implements>
+        <item>Foo2</item>
+      </implements>
+      <methods> </methods>
+    </class>
+
+    <class name="P" super="O">
+      <implements>
+        <item>Foo2</item>
+      </implements>
+      <methods>
+        <method>bar</method>
+      </methods>
+    </class>
+
+    <class name="Q" super="O">
+      <implements>
+        <item>Foo2</item>
+        <item>Foo3</item>
+      </implements>
+      <methods> </methods>
+    </class>
   </classes>
 
   <interfaces>
@@ -153,6 +184,24 @@
       </methods>
     </interface>
 
+    <interface name="Foo2" super="java/lang/Object">
+      <implements>
+        <item>Foo</item>
+      </implements>
+      <methods>
+        <method type="default">bar</method>
+      </methods>
+    </interface>
+
+    <interface name="Foo3" super="java/lang/Object">
+      <implements>
+        <item>Foo</item>
+      </implements>
+      <methods>
+        <method type="default">bar</method>
+      </methods>
+    </interface>
+
     <interface name="Fooer" super="java/lang/Object">
       <implements>
         <item>Foo</item>
diff --git a/test/960-default-smali/src2/Foo.java b/test/960-default-smali/src2/Foo.java
new file mode 100644
index 0000000..ed5b35f
--- /dev/null
+++ b/test/960-default-smali/src2/Foo.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+interface Foo {
+  public default String bar() {
+    return "foobar";
+  }
+}
diff --git a/test/960-default-smali/src2/Foo3.java b/test/960-default-smali/src2/Foo3.java
new file mode 100644
index 0000000..e96f98a
--- /dev/null
+++ b/test/960-default-smali/src2/Foo3.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression test for b/31280371
+ */
+interface Foo3 extends Foo {
+  @Override
+  public default String bar() {
+    return "I'm in conflict";
+  }
+}
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 75c4f34..d6957fc 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -338,7 +338,9 @@
 TEST_ART_BROKEN_NO_RELOCATE_TESTS :=
 
 # Temporarily disable some broken tests when forcing access checks in interpreter b/22414682
+# 004-JniTest is disabled because @CriticalNative is unsupported by generic JNI b/31400248
 TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS := \
+  004-JniTest \
   137-cfi
 
 ifneq (,$(filter interp-ac,$(COMPILER_TYPES)))
@@ -397,9 +399,11 @@
 # All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
 # Therefore we shouldn't run them in situations where we actually don't have these since they
 # explicitly test for them. These all also assume we have an image.
+# 004-JniTest is disabled because @CriticalNative is unsupported by generic JNI b/31400248
 # 147-stripped-dex-fallback is disabled because it requires --prebuild.
 # 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
 TEST_ART_BROKEN_FALLBACK_RUN_TESTS := \
+  004-JniTest \
   116-nodex2oat \
   117-nopatchoat \
   118-noimage-dex2oat \
@@ -473,7 +477,9 @@
 # Known broken tests for the JIT.
 # CFI unwinding expects managed frames, and the test does not iterate enough to even compile. JIT
 # also uses Generic JNI instead of the JNI compiler.
+# 004-JniTest is disabled because @CriticalNative is unsupported by generic JNI b/31400248
 TEST_ART_BROKEN_JIT_RUN_TESTS := \
+  004-JniTest \
   137-cfi
 
 ifneq (,$(filter jit,$(COMPILER_TYPES)))
@@ -500,7 +506,6 @@
 
 # Known broken tests for the mips32 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
-    510-checker-try-catch \
 
 ifeq (mips,$(TARGET_ARCH))
   ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
diff --git a/test/MyClassNatives/MyClassNatives.java b/test/MyClassNatives/MyClassNatives.java
index 45cfd0f..3cb1f23 100644
--- a/test/MyClassNatives/MyClassNatives.java
+++ b/test/MyClassNatives/MyClassNatives.java
@@ -14,40 +14,77 @@
  * limitations under the License.
  */
 
+import dalvik.annotation.optimization.CriticalNative;
 import dalvik.annotation.optimization.FastNative;
 
+/*
+ * AUTOMATICALLY GENERATED FROM art/tools/mako-source-generator/...../MyClassNatives.java.mako
+ *
+ * !!! DO NOT EDIT DIRECTLY !!!
+ *
+ */
 class MyClassNatives {
-    native void throwException();
-    native void foo();
-    native int bar(int count);
-    static native int sbar(int count);
-    native int fooI(int x);
-    native int fooII(int x, int y);
-    native long fooJJ(long x, long y);
-    native Object fooO(Object x);
-    native double fooDD(double x, double y);
-    synchronized native long fooJJ_synchronized(long x, long y);
-    native Object fooIOO(int x, Object y, Object z);
-    static native Object fooSIOO(int x, Object y, Object z);
-    static native int fooSII(int x, int y);
-    static native double fooSDD(double x, double y);
-    static synchronized native Object fooSSIOO(int x, Object y, Object z);
-    static native void arraycopy(Object src, int src_pos, Object dst, int dst_pos, int length);
-    native boolean compareAndSwapInt(Object obj, long offset, int expected, int newval);
-    static native int getText(long val1, Object obj1, long val2, Object obj2);
-    synchronized native Object []getSinkPropertiesNative(String path);
 
+    // Normal native
+    native void throwException();
+    // Normal native
+    native void foo();
+    // Normal native
+    native int bar(int count);
+    // Normal native
+    static native int sbar(int count);
+    // Normal native
+    native int fooI(int x);
+    // Normal native
+    native int fooII(int x, int y);
+    // Normal native
+    native long fooJJ(long x, long y);
+    // Normal native
+    native Object fooO(Object x);
+    // Normal native
+    native double fooDD(double x, double y);
+    // Normal native
+    synchronized native long fooJJ_synchronized(long x, long y);
+    // Normal native
+    native Object fooIOO(int x, Object y, Object z);
+    // Normal native
+    static native Object fooSIOO(int x, Object y, Object z);
+    // Normal native
+    static native int fooSII(int x, int y);
+    // Normal native
+    static native double fooSDD(double x, double y);
+    // Normal native
+    static synchronized native Object fooSSIOO(int x, Object y, Object z);
+    // Normal native
+    static native void arraycopy(Object src, int src_pos, Object dst, int dst_pos, int length);
+    // Normal native
+    native boolean compareAndSwapInt(Object obj, long offset, int expected, int newval);
+    // Normal native
+    static native int getText(long val1, Object obj1, long val2, Object obj2);
+    // Normal native
+    synchronized native Object[] getSinkPropertiesNative(String path);
+
+    // Normal native
     native Class<?> instanceMethodThatShouldReturnClass();
+    // Normal native
     static native Class<?> staticMethodThatShouldReturnClass();
 
+    // Normal native
     native void instanceMethodThatShouldTakeClass(int i, Class<?> c);
+    // Normal native
     static native void staticMethodThatShouldTakeClass(int i, Class<?> c);
 
+    // TODO: These 3 seem like they could work for @CriticalNative as well if they were static.
+    // Normal native
     native float checkFloats(float f1, float f2);
+    // Normal native
     native void forceStackParameters(int i1, int i2, int i3, int i4, int i5, int i6, int i8, int i9,
                                      float f1, float f2, float f3, float f4, float f5, float f6,
                                      float f7, float f8, float f9);
+    // Normal native
     native void checkParameterAlign(int i1, long l1);
+
+    // Normal native
     native void maxParamNumber(Object o0, Object o1, Object o2, Object o3, Object o4, Object o5, Object o6, Object o7,
         Object o8, Object o9, Object o10, Object o11, Object o12, Object o13, Object o14, Object o15,
         Object o16, Object o17, Object o18, Object o19, Object o20, Object o21, Object o22, Object o23,
@@ -81,32 +118,228 @@
         Object o240, Object o241, Object o242, Object o243, Object o244, Object o245, Object o246, Object o247,
         Object o248, Object o249, Object o250, Object o251, Object o252, Object o253);
 
+    // Normal native
     native void withoutImplementation();
+    // Normal native
     native Object withoutImplementationRefReturn();
 
+    // Normal native
     native static void stackArgsIntsFirst(int i1, int i2, int i3, int i4, int i5, int i6, int i7,
         int i8, int i9, int i10, float f1, float f2, float f3, float f4, float f5, float f6,
         float f7, float f8, float f9, float f10);
 
+    // Normal native
     native static void stackArgsFloatsFirst(float f1, float f2, float f3, float f4, float f5,
         float f6, float f7, float f8, float f9, float f10, int i1, int i2, int i3, int i4, int i5,
         int i6, int i7, int i8, int i9, int i10);
 
+    // Normal native
     native static void stackArgsMixed(int i1, float f1, int i2, float f2, int i3, float f3, int i4,
         float f4, int i5, float f5, int i6, float f6, int i7, float f7, int i8, float f8, int i9,
         float f9, int i10, float f10);
 
+    // Normal native
     native static void stackArgsSignExtendedMips64(int i1, int i2, int i3, int i4, int i5, int i6,
         int i7, int i8);
 
+    // Normal native
     static native double logD(double d);
+    // Normal native
     static native float logF(float f);
+    // Normal native
     static native boolean returnTrue();
+    // Normal native
     static native boolean returnFalse();
+    // Normal native
     static native int returnInt();
+    // Normal native
+    static native double returnDouble();
+    // Normal native
+    static native long returnLong();
 
-    // Check for @FastNative annotation presence [or lack of presence].
+
+
+    @FastNative
+    native void throwException_Fast();
+    @FastNative
+    native void foo_Fast();
+    @FastNative
+    native int bar_Fast(int count);
+    @FastNative
+    static native int sbar_Fast(int count);
+    @FastNative
+    native int fooI_Fast(int x);
+    @FastNative
+    native int fooII_Fast(int x, int y);
+    @FastNative
+    native long fooJJ_Fast(long x, long y);
+    @FastNative
+    native Object fooO_Fast(Object x);
+    @FastNative
+    native double fooDD_Fast(double x, double y);
+    @FastNative
+    synchronized native long fooJJ_synchronized_Fast(long x, long y);
+    @FastNative
+    native Object fooIOO_Fast(int x, Object y, Object z);
+    @FastNative
+    static native Object fooSIOO_Fast(int x, Object y, Object z);
+    @FastNative
+    static native int fooSII_Fast(int x, int y);
+    @FastNative
+    static native double fooSDD_Fast(double x, double y);
+    @FastNative
+    static synchronized native Object fooSSIOO_Fast(int x, Object y, Object z);
+    @FastNative
+    static native void arraycopy_Fast(Object src, int src_pos, Object dst, int dst_pos, int length);
+    @FastNative
+    native boolean compareAndSwapInt_Fast(Object obj, long offset, int expected, int newval);
+    @FastNative
+    static native int getText_Fast(long val1, Object obj1, long val2, Object obj2);
+    @FastNative
+    synchronized native Object[] getSinkPropertiesNative_Fast(String path);
+
+    @FastNative
+    native Class<?> instanceMethodThatShouldReturnClass_Fast();
+    @FastNative
+    static native Class<?> staticMethodThatShouldReturnClass_Fast();
+
+    @FastNative
+    native void instanceMethodThatShouldTakeClass_Fast(int i, Class<?> c);
+    @FastNative
+    static native void staticMethodThatShouldTakeClass_Fast(int i, Class<?> c);
+
+    // TODO: These 3 seem like they could work for @CriticalNative as well if they were static.
+    @FastNative
+    native float checkFloats_Fast(float f1, float f2);
+    @FastNative
+    native void forceStackParameters_Fast(int i1, int i2, int i3, int i4, int i5, int i6, int i8, int i9,
+                                     float f1, float f2, float f3, float f4, float f5, float f6,
+                                     float f7, float f8, float f9);
+    @FastNative
+    native void checkParameterAlign_Fast(int i1, long l1);
+
+    @FastNative
+    native void maxParamNumber_Fast(Object o0, Object o1, Object o2, Object o3, Object o4, Object o5, Object o6, Object o7,
+        Object o8, Object o9, Object o10, Object o11, Object o12, Object o13, Object o14, Object o15,
+        Object o16, Object o17, Object o18, Object o19, Object o20, Object o21, Object o22, Object o23,
+        Object o24, Object o25, Object o26, Object o27, Object o28, Object o29, Object o30, Object o31,
+        Object o32, Object o33, Object o34, Object o35, Object o36, Object o37, Object o38, Object o39,
+        Object o40, Object o41, Object o42, Object o43, Object o44, Object o45, Object o46, Object o47,
+        Object o48, Object o49, Object o50, Object o51, Object o52, Object o53, Object o54, Object o55,
+        Object o56, Object o57, Object o58, Object o59, Object o60, Object o61, Object o62, Object o63,
+        Object o64, Object o65, Object o66, Object o67, Object o68, Object o69, Object o70, Object o71,
+        Object o72, Object o73, Object o74, Object o75, Object o76, Object o77, Object o78, Object o79,
+        Object o80, Object o81, Object o82, Object o83, Object o84, Object o85, Object o86, Object o87,
+        Object o88, Object o89, Object o90, Object o91, Object o92, Object o93, Object o94, Object o95,
+        Object o96, Object o97, Object o98, Object o99, Object o100, Object o101, Object o102, Object o103,
+        Object o104, Object o105, Object o106, Object o107, Object o108, Object o109, Object o110, Object o111,
+        Object o112, Object o113, Object o114, Object o115, Object o116, Object o117, Object o118, Object o119,
+        Object o120, Object o121, Object o122, Object o123, Object o124, Object o125, Object o126, Object o127,
+        Object o128, Object o129, Object o130, Object o131, Object o132, Object o133, Object o134, Object o135,
+        Object o136, Object o137, Object o138, Object o139, Object o140, Object o141, Object o142, Object o143,
+        Object o144, Object o145, Object o146, Object o147, Object o148, Object o149, Object o150, Object o151,
+        Object o152, Object o153, Object o154, Object o155, Object o156, Object o157, Object o158, Object o159,
+        Object o160, Object o161, Object o162, Object o163, Object o164, Object o165, Object o166, Object o167,
+        Object o168, Object o169, Object o170, Object o171, Object o172, Object o173, Object o174, Object o175,
+        Object o176, Object o177, Object o178, Object o179, Object o180, Object o181, Object o182, Object o183,
+        Object o184, Object o185, Object o186, Object o187, Object o188, Object o189, Object o190, Object o191,
+        Object o192, Object o193, Object o194, Object o195, Object o196, Object o197, Object o198, Object o199,
+        Object o200, Object o201, Object o202, Object o203, Object o204, Object o205, Object o206, Object o207,
+        Object o208, Object o209, Object o210, Object o211, Object o212, Object o213, Object o214, Object o215,
+        Object o216, Object o217, Object o218, Object o219, Object o220, Object o221, Object o222, Object o223,
+        Object o224, Object o225, Object o226, Object o227, Object o228, Object o229, Object o230, Object o231,
+        Object o232, Object o233, Object o234, Object o235, Object o236, Object o237, Object o238, Object o239,
+        Object o240, Object o241, Object o242, Object o243, Object o244, Object o245, Object o246, Object o247,
+        Object o248, Object o249, Object o250, Object o251, Object o252, Object o253);
+
+    @FastNative
+    native void withoutImplementation_Fast();
+    @FastNative
+    native Object withoutImplementationRefReturn_Fast();
+
+    @FastNative
+    native static void stackArgsIntsFirst_Fast(int i1, int i2, int i3, int i4, int i5, int i6, int i7,
+        int i8, int i9, int i10, float f1, float f2, float f3, float f4, float f5, float f6,
+        float f7, float f8, float f9, float f10);
+
+    @FastNative
+    native static void stackArgsFloatsFirst_Fast(float f1, float f2, float f3, float f4, float f5,
+        float f6, float f7, float f8, float f9, float f10, int i1, int i2, int i3, int i4, int i5,
+        int i6, int i7, int i8, int i9, int i10);
+
+    @FastNative
+    native static void stackArgsMixed_Fast(int i1, float f1, int i2, float f2, int i3, float f3, int i4,
+        float f4, int i5, float f5, int i6, float f6, int i7, float f7, int i8, float f8, int i9,
+        float f9, int i10, float f10);
+
+    @FastNative
+    native static void stackArgsSignExtendedMips64_Fast(int i1, int i2, int i3, int i4, int i5, int i6,
+        int i7, int i8);
+
+    @FastNative
+    static native double logD_Fast(double d);
+    @FastNative
+    static native float logF_Fast(float f);
+    @FastNative
+    static native boolean returnTrue_Fast();
+    @FastNative
+    static native boolean returnFalse_Fast();
+    @FastNative
+    static native int returnInt_Fast();
+    @FastNative
+    static native double returnDouble_Fast();
+    @FastNative
+    static native long returnLong_Fast();
+
+
+
+    @CriticalNative
+    static native int sbar_Critical(int count);
+    @CriticalNative
+    static native int fooSII_Critical(int x, int y);
+    @CriticalNative
+    static native double fooSDD_Critical(double x, double y);
+
+    @CriticalNative
+    native static void stackArgsIntsFirst_Critical(int i1, int i2, int i3, int i4, int i5, int i6, int i7,
+        int i8, int i9, int i10, float f1, float f2, float f3, float f4, float f5, float f6,
+        float f7, float f8, float f9, float f10);
+
+    @CriticalNative
+    native static void stackArgsFloatsFirst_Critical(float f1, float f2, float f3, float f4, float f5,
+        float f6, float f7, float f8, float f9, float f10, int i1, int i2, int i3, int i4, int i5,
+        int i6, int i7, int i8, int i9, int i10);
+
+    @CriticalNative
+    native static void stackArgsMixed_Critical(int i1, float f1, int i2, float f2, int i3, float f3, int i4,
+        float f4, int i5, float f5, int i6, float f6, int i7, float f7, int i8, float f8, int i9,
+        float f9, int i10, float f10);
+
+    @CriticalNative
+    native static void stackArgsSignExtendedMips64_Critical(int i1, int i2, int i3, int i4, int i5, int i6,
+        int i7, int i8);
+
+    @CriticalNative
+    static native double logD_Critical(double d);
+    @CriticalNative
+    static native float logF_Critical(float f);
+    @CriticalNative
+    static native boolean returnTrue_Critical();
+    @CriticalNative
+    static native boolean returnFalse_Critical();
+    @CriticalNative
+    static native int returnInt_Critical();
+    @CriticalNative
+    static native double returnDouble_Critical();
+    @CriticalNative
+    static native long returnLong_Critical();
+
+
+
+    // Check for @FastNative/@CriticalNative annotation presence [or lack of presence].
     public static native void normalNative();
     @FastNative
     public static native void fastNative();
+    @CriticalNative
+    public static native void criticalNative();
 }
diff --git a/test/Packages/Package1.java b/test/Packages/Package1.java
new file mode 100644
index 0000000..6d58246
--- /dev/null
+++ b/test/Packages/Package1.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package package1;
+class Package1 {
+    static int someField;
+}
diff --git a/test/Packages/Package2.java b/test/Packages/Package2.java
new file mode 100644
index 0000000..9ae370a
--- /dev/null
+++ b/test/Packages/Package2.java
@@ -0,0 +1,20 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package package2;
+class Package2 {
+    static int someField;
+}
diff --git a/test/utils/python/generate_java_main.py b/test/utils/python/generate_java_main.py
index f66d0dd..1c76b06 100755
--- a/test/utils/python/generate_java_main.py
+++ b/test/utils/python/generate_java_main.py
@@ -175,7 +175,11 @@
       return;
     }} catch (Error e) {{
       System.out.printf("%s-{invoke_type} on {farg}: {callfunc}() threw exception!\\n", s);
-      e.printStackTrace(System.out);
+      if (e instanceof IncompatibleClassChangeError) {{
+        System.out.printf("Exception is of type %s\\n", e.getClass().getName());
+      }} else {{
+        e.printStackTrace(System.out);
+      }}
     }}
   }}
 """
