Merge "entrypoints: Remove DMBs from art_quick_alloc initialized variants."
diff --git a/Android.mk b/Android.mk
index 7beb30f..c0935a7 100644
--- a/Android.mk
+++ b/Android.mk
@@ -87,11 +87,20 @@
   $(ART_HOST_EXECUTABLES) \
   $(ART_HOST_DEX_DEPENDENCIES) \
   $(ART_HOST_SHARED_LIBRARY_DEPENDENCIES)
+
+ifeq ($(ART_BUILD_HOST_DEBUG),true)
+ART_HOST_DEPENDENCIES += $(ART_HOST_SHARED_LIBRARY_DEBUG_DEPENDENCIES)
+endif
+
 ART_TARGET_DEPENDENCIES := \
   $(ART_TARGET_EXECUTABLES) \
   $(ART_TARGET_DEX_DEPENDENCIES) \
   $(ART_TARGET_SHARED_LIBRARY_DEPENDENCIES)
 
+ifeq ($(ART_BUILD_TARGET_DEBUG),true)
+ART_TARGET_DEPENDENCIES += $(ART_TARGET_SHARED_LIBRARY_DEBUG_DEPENDENCIES)
+endif
+
 ########################################################################
 # test rules
 
@@ -123,7 +132,7 @@
 ifeq ($(ART_TEST_ANDROID_ROOT),)
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
 	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
-	adb sync
+	adb sync system && adb sync data
 else
 test-art-target-sync: $(TEST_ART_TARGET_SYNC_DEPS)
 	$(TEST_ART_ADB_ROOT_AND_REMOUNT)
diff --git a/benchmark/Android.bp b/benchmark/Android.bp
index e784508..d0dfec9 100644
--- a/benchmark/Android.bp
+++ b/benchmark/Android.bp
@@ -49,7 +49,7 @@
     name: "libartbenchmark-micronative-host",
     host_supported: true,
     device_supported: false,
-    defaults: ["art_defaults", "art_debug_defaults"],
+    defaults: ["art_debug_defaults", "art_defaults" ],
     srcs: [
         "jni_loader.cc",
         "micro-native/micro_native.cc",
diff --git a/build/Android.bp b/build/Android.bp
index 6c9f1d4..ed6de35 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -59,17 +59,9 @@
         "-Wunreachable-code-break",
         "-Wunreachable-code-return",
 
-        // Bug: http://b/29823425  Disable -Wconstant-conversion and
-        // -Wundefined-var-template for Clang update to r271374
+        // Bug: http://b/29823425  Disable -Wconstant-conversion for Clang update to r271374
         "-Wno-constant-conversion",
-        "-Wno-undefined-var-template",
 
-        "-DART_STACK_OVERFLOW_GAP_arm=8192",
-        "-DART_STACK_OVERFLOW_GAP_arm64=8192",
-        "-DART_STACK_OVERFLOW_GAP_mips=16384",
-        "-DART_STACK_OVERFLOW_GAP_mips64=16384",
-        "-DART_STACK_OVERFLOW_GAP_x86=8192",
-        "-DART_STACK_OVERFLOW_GAP_x86_64=8192",
         // Enable thread annotations for std::mutex, etc.
         "-D_LIBCPP_ENABLE_THREAD_SAFETY_ANNOTATIONS",
     ],
@@ -151,6 +143,15 @@
 
     tidy_checks: [
         "-google-default-arguments",
+        // We have local stores that are only used for debug checks.
+        "-clang-analyzer-deadcode.DeadStores",
+        // We are OK with some static globals and that they can, in theory, throw.
+        "-cert-err58-cpp",
+        // We have lots of C-style variadic functions, and are OK with them. JNI ensures
+        // that working around this warning would be extra-painful.
+        "-cert-dcl50-cpp",
+        // No exceptions.
+        "-misc-noexcept-move-constructor",
     ],
 }
 
diff --git a/build/Android.common_path.mk b/build/Android.common_path.mk
index 6de5aef..4466118 100644
--- a/build/Android.common_path.mk
+++ b/build/Android.common_path.mk
@@ -97,14 +97,19 @@
 ART_TARGET_DEX_DEPENDENCIES := $(foreach jar,$(TARGET_CORE_JARS),$(TARGET_OUT_JAVA_LIBRARIES)/$(jar).jar)
 
 ART_CORE_SHARED_LIBRARIES := libjavacore libopenjdk libopenjdkjvm libopenjdkjvmti
+ART_CORE_SHARED_DEBUG_LIBRARIES := libopenjdkd libopenjdkjvmd libopenjdkjvmtid
 ART_HOST_SHARED_LIBRARY_DEPENDENCIES := $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(ART_HOST_OUT_SHARED_LIBRARIES)/$(lib)$(ART_HOST_SHLIB_EXTENSION))
+ART_HOST_SHARED_LIBRARY_DEBUG_DEPENDENCIES := $(foreach lib,$(ART_CORE_SHARED_DEBUG_LIBRARIES), $(ART_HOST_OUT_SHARED_LIBRARIES)/$(lib)$(ART_HOST_SHLIB_EXTENSION))
 ifdef HOST_2ND_ARCH
 ART_HOST_SHARED_LIBRARY_DEPENDENCIES += $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(2ND_HOST_OUT_SHARED_LIBRARIES)/$(lib).so)
+ART_HOST_SHARED_LIBRARY_DEBUG_DEPENDENCIES += $(foreach lib,$(ART_CORE_SHARED_DEBUG_LIBRARIES), $(2ND_HOST_OUT_SHARED_LIBRARIES)/$(lib).so)
 endif
 
 ART_TARGET_SHARED_LIBRARY_DEPENDENCIES := $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(TARGET_OUT_SHARED_LIBRARIES)/$(lib).so)
+ART_TARGET_SHARED_LIBRARY_DEBUG_DEPENDENCIES := $(foreach lib,$(ART_CORE_SHARED_DEBUG_LIBRARIES), $(TARGET_OUT_SHARED_LIBRARIES)/$(lib).so)
 ifdef TARGET_2ND_ARCH
 ART_TARGET_SHARED_LIBRARY_DEPENDENCIES += $(foreach lib,$(ART_CORE_SHARED_LIBRARIES), $(2ND_TARGET_OUT_SHARED_LIBRARIES)/$(lib).so)
+ART_TARGET_SHARED_LIBRARY_DEBUG_DEPENDENCIES += $(foreach lib,$(ART_CORE_SHARED_DEBUG_LIBRARIES), $(2ND_TARGET_OUT_SHARED_LIBRARIES)/$(lib).so)
 endif
 
 ART_CORE_DEBUGGABLE_EXECUTABLES := \
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 0a465c4..bcf48fd 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -40,6 +40,7 @@
   Interfaces \
   Lookup \
   Main \
+  ManyMethods \
   MethodTypes \
   MultiDex \
   MultiDexModifiedSecondary \
@@ -103,6 +104,7 @@
 ART_GTEST_compiler_driver_test_DEX_DEPS := AbstractMethod StaticLeafMethods ProfileTestMultiDex
 ART_GTEST_dex_cache_test_DEX_DEPS := Main Packages MethodTypes
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested MultiDex
+ART_GTEST_dexlayout_test_DEX_DEPS := ManyMethods
 ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics VerifierDeps
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
 ART_GTEST_image_test_DEX_DEPS := ImageLayoutA ImageLayoutB DefaultMethods
diff --git a/build/art.go b/build/art.go
index 61a9759..f52c635 100644
--- a/build/art.go
+++ b/build/art.go
@@ -76,6 +76,29 @@
 		asflags = append(asflags, "-DART_USE_OLD_ARM_BACKEND=1")
 	}
 
+	// We need larger stack overflow guards for ASAN, as the compiled code will have
+	// larger frame sizes. For simplicity, just use global not-target-specific cflags.
+	// Note: We increase this for both debug and non-debug, as the overflow gap will
+	//       be compiled into managed code. We always preopt (and build core images) with
+	//       the debug version. So make the gap consistent (and adjust for the worst).
+	if len(ctx.AConfig().SanitizeDevice()) > 0 || len(ctx.AConfig().SanitizeHost()) > 0 {
+		cflags = append(cflags,
+				"-DART_STACK_OVERFLOW_GAP_arm=8192",
+				"-DART_STACK_OVERFLOW_GAP_arm64=8192",
+				"-DART_STACK_OVERFLOW_GAP_mips=16384",
+				"-DART_STACK_OVERFLOW_GAP_mips64=16384",
+				"-DART_STACK_OVERFLOW_GAP_x86=16384",
+				"-DART_STACK_OVERFLOW_GAP_x86_64=20480")
+	} else {
+		cflags = append(cflags,
+				"-DART_STACK_OVERFLOW_GAP_arm=8192",
+				"-DART_STACK_OVERFLOW_GAP_arm64=8192",
+				"-DART_STACK_OVERFLOW_GAP_mips=16384",
+				"-DART_STACK_OVERFLOW_GAP_mips64=16384",
+				"-DART_STACK_OVERFLOW_GAP_x86=8192",
+				"-DART_STACK_OVERFLOW_GAP_x86_64=8192")
+	}
+
 	return cflags, asflags
 }
 
@@ -147,12 +170,23 @@
 		}
 		Cflags  []string
 		Asflags []string
+		Sanitize struct {
+		  Recover []string
+		}
 	}
 
 	p := &props{}
 	p.Cflags, p.Asflags = globalFlags(ctx)
 	p.Target.Android.Cflags = deviceFlags(ctx)
 	p.Target.Host.Cflags = hostFlags(ctx)
+
+	if envTrue(ctx, "ART_DEX_FILE_ACCESS_TRACKING") {
+		p.Cflags = append(p.Cflags, "-DART_DEX_FILE_ACCESS_TRACKING")
+		p.Sanitize.Recover = []string {
+			"address",
+		}
+	}
+
 	ctx.AppendProperties(p)
 }
 
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 1a2b9cd..07639e8 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -22,6 +22,7 @@
 #include <numeric>
 #include "gtest/gtest.h"
 #include "runtime/experimental_flags.h"
+#include "runtime/runtime.h"
 
 #define EXPECT_NULL(expected) EXPECT_EQ(reinterpret_cast<const void*>(expected), \
                                         reinterpret_cast<void*>(nullptr));
@@ -34,7 +35,7 @@
     return lhs.enabled_ == rhs.enabled_ &&
         lhs.min_save_period_ms_ == rhs.min_save_period_ms_ &&
         lhs.save_resolved_classes_delay_ms_ == rhs.save_resolved_classes_delay_ms_ &&
-        lhs.startup_method_samples_ == rhs.startup_method_samples_ &&
+        lhs.hot_startup_method_samples_ == rhs.hot_startup_method_samples_ &&
         lhs.min_methods_to_save_ == rhs.min_methods_to_save_ &&
         lhs.min_classes_to_save_ == rhs.min_classes_to_save_ &&
         lhs.min_notification_before_wake_ == rhs.min_notification_before_wake_ &&
@@ -489,7 +490,7 @@
                             "-Xjitsaveprofilinginfo "
                             "-Xps-min-save-period-ms:1 "
                             "-Xps-save-resolved-classes-delay-ms:2 "
-                            "-Xps-startup-method-samples:3 "
+                            "-Xps-hot-startup-method-samples:3 "
                             "-Xps-min-methods-to-save:4 "
                             "-Xps-min-classes-to-save:5 "
                             "-Xps-min-notification-before-wake:6 "
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index e33a207..185a0e4 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -18,6 +18,8 @@
 
 #define CMDLINE_NDEBUG 1  // Do not output any debugging information for parsing.
 
+#include <list>
+
 #include "memory_representation.h"
 #include "detail/cmdline_debug_detail.h"
 #include "cmdline_type_parser.h"
@@ -725,10 +727,10 @@
              &ProfileSaverOptions::save_resolved_classes_delay_ms_,
              type_parser.Parse(suffix));
     }
-    if (android::base::StartsWith(option, "startup-method-samples:")) {
+    if (android::base::StartsWith(option, "hot-startup-method-samples:")) {
       CmdlineType<unsigned int> type_parser;
       return ParseInto(existing,
-             &ProfileSaverOptions::startup_method_samples_,
+             &ProfileSaverOptions::hot_startup_method_samples_,
              type_parser.Parse(suffix));
     }
     if (android::base::StartsWith(option, "min-methods-to-save:")) {
diff --git a/compiler/Android.bp b/compiler/Android.bp
index df896dc..a1269dc 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -67,6 +67,7 @@
         "optimizing/intrinsics.cc",
         "optimizing/licm.cc",
         "optimizing/linear_order.cc",
+        "optimizing/load_store_analysis.cc",
         "optimizing/load_store_elimination.cc",
         "optimizing/locations.cc",
         "optimizing/loop_optimization.cc",
@@ -109,7 +110,6 @@
                 "optimizing/code_generator_vector_arm.cc",
                 "optimizing/code_generator_arm_vixl.cc",
                 "optimizing/code_generator_vector_arm_vixl.cc",
-                "optimizing/dex_cache_array_fixups_arm.cc",
                 "optimizing/instruction_simplifier_arm.cc",
                 "optimizing/instruction_simplifier_shared.cc",
                 "optimizing/intrinsics_arm.cc",
@@ -144,7 +144,6 @@
                 "linker/mips/relative_patcher_mips.cc",
                 "optimizing/code_generator_mips.cc",
                 "optimizing/code_generator_vector_mips.cc",
-                "optimizing/dex_cache_array_fixups_mips.cc",
                 "optimizing/intrinsics_mips.cc",
                 "optimizing/pc_relative_fixups_mips.cc",
                 "utils/mips/assembler_mips.cc",
@@ -341,6 +340,7 @@
         "image_test.cc",
         "image_write_read_test.cc",
         "jni/jni_compiler_test.cc",
+        "linker/method_bss_mapping_encoder_test.cc",
         "linker/multi_oat_relative_patcher_test.cc",
         "linker/output_stream_test.cc",
         "oat_test.cc",
@@ -374,6 +374,7 @@
 
         "jni/jni_cfi_test.cc",
         "optimizing/codegen_test.cc",
+        "optimizing/load_store_analysis_test.cc",
         "optimizing/optimizing_cfi_test.cc",
         "optimizing/scheduler_test.cc",
     ],
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index 39edd1e..3683695 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -19,6 +19,7 @@
 #include "arch/instruction_set_features.h"
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "base/enums.h"
 #include "class_linker.h"
 #include "compiled_method.h"
@@ -33,7 +34,7 @@
 #include "mirror/object-inl.h"
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
@@ -166,8 +167,8 @@
     instruction_set_features_ = InstructionSetFeatures::FromCppDefines();
 
     runtime_->SetInstructionSet(instruction_set);
-    for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-      Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
+    for (uint32_t i = 0; i < static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType); ++i) {
+      CalleeSaveType type = CalleeSaveType(i);
       if (!runtime_->HasCalleeSaveMethod(type)) {
         runtime_->SetCalleeSaveMethod(runtime_->CreateCalleeSaveMethod(), type);
       }
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index 912c964..761e9e1 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -119,22 +119,34 @@
   // choose to squeeze the Type into fewer than 8 bits, we'll have to declare
   // patch_type_ as an uintN_t and do explicit static_cast<>s.
   enum class Type : uint8_t {
-    kMethod,
+    kMethodRelative,          // NOTE: Actual patching is instruction_set-dependent.
+    kMethodBssEntry,          // NOTE: Actual patching is instruction_set-dependent.
     kCall,
     kCallRelative,            // NOTE: Actual patching is instruction_set-dependent.
     kTypeRelative,            // NOTE: Actual patching is instruction_set-dependent.
     kTypeBssEntry,            // NOTE: Actual patching is instruction_set-dependent.
     kStringRelative,          // NOTE: Actual patching is instruction_set-dependent.
     kStringBssEntry,          // NOTE: Actual patching is instruction_set-dependent.
-    kDexCacheArray,           // NOTE: Actual patching is instruction_set-dependent.
     kBakerReadBarrierBranch,  // NOTE: Actual patching is instruction_set-dependent.
   };
 
-  static LinkerPatch MethodPatch(size_t literal_offset,
-                                 const DexFile* target_dex_file,
-                                 uint32_t target_method_idx) {
-    LinkerPatch patch(literal_offset, Type::kMethod, target_dex_file);
+  static LinkerPatch RelativeMethodPatch(size_t literal_offset,
+                                         const DexFile* target_dex_file,
+                                         uint32_t pc_insn_offset,
+                                         uint32_t target_method_idx) {
+    LinkerPatch patch(literal_offset, Type::kMethodRelative, target_dex_file);
     patch.method_idx_ = target_method_idx;
+    patch.pc_insn_offset_ = pc_insn_offset;
+    return patch;
+  }
+
+  static LinkerPatch MethodBssEntryPatch(size_t literal_offset,
+                                         const DexFile* target_dex_file,
+                                         uint32_t pc_insn_offset,
+                                         uint32_t target_method_idx) {
+    LinkerPatch patch(literal_offset, Type::kMethodBssEntry, target_dex_file);
+    patch.method_idx_ = target_method_idx;
+    patch.pc_insn_offset_ = pc_insn_offset;
     return patch;
   }
 
@@ -194,16 +206,6 @@
     return patch;
   }
 
-  static LinkerPatch DexCacheArrayPatch(size_t literal_offset,
-                                        const DexFile* target_dex_file,
-                                        uint32_t pc_insn_offset,
-                                        uint32_t element_offset) {
-    LinkerPatch patch(literal_offset, Type::kDexCacheArray, target_dex_file);
-    patch.pc_insn_offset_ = pc_insn_offset;
-    patch.element_offset_ = element_offset;
-    return patch;
-  }
-
   static LinkerPatch BakerReadBarrierBranchPatch(size_t literal_offset,
                                                  uint32_t custom_value1 = 0u,
                                                  uint32_t custom_value2 = 0u) {
@@ -226,12 +228,13 @@
 
   bool IsPcRelative() const {
     switch (GetType()) {
+      case Type::kMethodRelative:
+      case Type::kMethodBssEntry:
       case Type::kCallRelative:
       case Type::kTypeRelative:
       case Type::kTypeBssEntry:
       case Type::kStringRelative:
       case Type::kStringBssEntry:
-      case Type::kDexCacheArray:
       case Type::kBakerReadBarrierBranch:
         return true;
       default:
@@ -240,7 +243,8 @@
   }
 
   MethodReference TargetMethod() const {
-    DCHECK(patch_type_ == Type::kMethod ||
+    DCHECK(patch_type_ == Type::kMethodRelative ||
+           patch_type_ == Type::kMethodBssEntry ||
            patch_type_ == Type::kCall ||
            patch_type_ == Type::kCallRelative);
     return MethodReference(target_dex_file_, method_idx_);
@@ -270,22 +274,13 @@
     return dex::StringIndex(string_idx_);
   }
 
-  const DexFile* TargetDexCacheDexFile() const {
-    DCHECK(patch_type_ == Type::kDexCacheArray);
-    return target_dex_file_;
-  }
-
-  size_t TargetDexCacheElementOffset() const {
-    DCHECK(patch_type_ == Type::kDexCacheArray);
-    return element_offset_;
-  }
-
   uint32_t PcInsnOffset() const {
-    DCHECK(patch_type_ == Type::kTypeRelative ||
+    DCHECK(patch_type_ == Type::kMethodRelative ||
+           patch_type_ == Type::kMethodBssEntry ||
+           patch_type_ == Type::kTypeRelative ||
            patch_type_ == Type::kTypeBssEntry ||
            patch_type_ == Type::kStringRelative ||
-           patch_type_ == Type::kStringBssEntry ||
-           patch_type_ == Type::kDexCacheArray);
+           patch_type_ == Type::kStringBssEntry);
     return pc_insn_offset_;
   }
 
@@ -320,12 +315,10 @@
     uint32_t method_idx_;       // Method index for Call/Method patches.
     uint32_t type_idx_;         // Type index for Type patches.
     uint32_t string_idx_;       // String index for String patches.
-    uint32_t element_offset_;   // Element offset in the dex cache arrays.
     uint32_t baker_custom_value1_;
     static_assert(sizeof(method_idx_) == sizeof(cmp1_), "needed by relational operators");
     static_assert(sizeof(type_idx_) == sizeof(cmp1_), "needed by relational operators");
     static_assert(sizeof(string_idx_) == sizeof(cmp1_), "needed by relational operators");
-    static_assert(sizeof(element_offset_) == sizeof(cmp1_), "needed by relational operators");
     static_assert(sizeof(baker_custom_value1_) == sizeof(cmp1_), "needed by relational operators");
   };
   union {
diff --git a/compiler/compiled_method_test.cc b/compiler/compiled_method_test.cc
index 81a2cde..f4a72cf 100644
--- a/compiler/compiled_method_test.cc
+++ b/compiler/compiled_method_test.cc
@@ -50,10 +50,22 @@
   const DexFile* dex_file1 = reinterpret_cast<const DexFile*>(1);
   const DexFile* dex_file2 = reinterpret_cast<const DexFile*>(2);
   LinkerPatch patches[] = {
-      LinkerPatch::MethodPatch(16u, dex_file1, 1000u),
-      LinkerPatch::MethodPatch(16u, dex_file1, 1001u),
-      LinkerPatch::MethodPatch(16u, dex_file2, 1000u),
-      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // Index 3.
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1000u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1000u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3000u, 1001u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u),  // Index 3.
+      LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1000u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1000u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3000u, 1001u),
+      LinkerPatch::RelativeMethodPatch(16u, dex_file2, 3001u, 1001u),
+      LinkerPatch::MethodBssEntryPatch(16u, dex_file1, 3000u, 1000u),
+      LinkerPatch::MethodBssEntryPatch(16u, dex_file1, 3001u, 1000u),
+      LinkerPatch::MethodBssEntryPatch(16u, dex_file1, 3000u, 1001u),
+      LinkerPatch::MethodBssEntryPatch(16u, dex_file1, 3001u, 1001u),
+      LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3000u, 1000u),
+      LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3001u, 1000u),
+      LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3000u, 1001u),
+      LinkerPatch::MethodBssEntryPatch(16u, dex_file2, 3001u, 1001u),
       LinkerPatch::CodePatch(16u, dex_file1, 1000u),
       LinkerPatch::CodePatch(16u, dex_file1, 1001u),
       LinkerPatch::CodePatch(16u, dex_file2, 1000u),
@@ -94,23 +106,27 @@
       LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1000u),
       LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3000u, 1001u),
       LinkerPatch::StringBssEntryPatch(16u, dex_file2, 3001u, 1001u),
-      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2000u),
-      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2000u),
-      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3000u, 2001u),
-      LinkerPatch::DexCacheArrayPatch(16u, dex_file1, 3001u, 2001u),
-      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2000u),
-      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2000u),
-      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3000u, 2001u),
-      LinkerPatch::DexCacheArrayPatch(16u, dex_file2, 3001u, 2001u),
       LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 0u),
       LinkerPatch::BakerReadBarrierBranchPatch(16u, 0u, 1u),
       LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 0u),
       LinkerPatch::BakerReadBarrierBranchPatch(16u, 1u, 1u),
 
-      LinkerPatch::MethodPatch(32u, dex_file1, 1000u),
-      LinkerPatch::MethodPatch(32u, dex_file1, 1001u),
-      LinkerPatch::MethodPatch(32u, dex_file2, 1000u),
-      LinkerPatch::MethodPatch(32u, dex_file2, 1001u),  // Index 3.
+      LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1000u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1000u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3000u, 1001u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file1, 3001u, 1001u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1000u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1000u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3000u, 1001u),
+      LinkerPatch::RelativeMethodPatch(32u, dex_file2, 3001u, 1001u),
+      LinkerPatch::MethodBssEntryPatch(32u, dex_file1, 3000u, 1000u),
+      LinkerPatch::MethodBssEntryPatch(32u, dex_file1, 3001u, 1000u),
+      LinkerPatch::MethodBssEntryPatch(32u, dex_file1, 3000u, 1001u),
+      LinkerPatch::MethodBssEntryPatch(32u, dex_file1, 3001u, 1001u),
+      LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3000u, 1000u),
+      LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3001u, 1000u),
+      LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3000u, 1001u),
+      LinkerPatch::MethodBssEntryPatch(32u, dex_file2, 3001u, 1001u),
       LinkerPatch::CodePatch(32u, dex_file1, 1000u),
       LinkerPatch::CodePatch(32u, dex_file1, 1001u),
       LinkerPatch::CodePatch(32u, dex_file2, 1000u),
@@ -151,20 +167,12 @@
       LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1000u),
       LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3000u, 1001u),
       LinkerPatch::StringBssEntryPatch(32u, dex_file2, 3001u, 1001u),
-      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2000u),
-      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2000u),
-      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3000u, 2001u),
-      LinkerPatch::DexCacheArrayPatch(32u, dex_file1, 3001u, 2001u),
-      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2000u),
-      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2000u),
-      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3000u, 2001u),
-      LinkerPatch::DexCacheArrayPatch(32u, dex_file2, 3001u, 2001u),
       LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 0u),
       LinkerPatch::BakerReadBarrierBranchPatch(32u, 0u, 1u),
       LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 0u),
       LinkerPatch::BakerReadBarrierBranchPatch(32u, 1u, 1u),
 
-      LinkerPatch::MethodPatch(16u, dex_file2, 1001u),  // identical with patch at index 3.
+      LinkerPatch::RelativeMethodPatch(16u, dex_file1, 3001u, 1001u),  // Same as patch at index 3.
   };
   constexpr size_t last_index = arraysize(patches) - 1u;
 
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 908d366..cd4c591 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -25,11 +25,11 @@
 
 namespace jit {
   class JitCodeCache;
-}
+}  // namespace jit
 namespace mirror {
   class ClassLoader;
   class DexCache;
-}
+}  // namespace mirror
 
 class ArtMethod;
 class CompilerDriver;
diff --git a/compiler/debug/elf_debug_info_writer.h b/compiler/debug/elf_debug_info_writer.h
index 558c7d5..de32351 100644
--- a/compiler/debug/elf_debug_info_writer.h
+++ b/compiler/debug/elf_debug_info_writer.h
@@ -411,7 +411,7 @@
     for (const auto& base_class_reference : base_class_references) {
       size_t reference_offset = base_class_reference.first;
       mirror::Class* base_class = base_class_reference.second;
-      const auto& it = class_declarations.find(base_class);
+      const auto it = class_declarations.find(base_class);
       if (it != class_declarations.end()) {
         info_.UpdateUint32(reference_offset, it->second);
       } else {
@@ -512,7 +512,7 @@
     using namespace dwarf;  // NOLINT. For easy access to DWARF constants.
 
     DCHECK(!desc.empty());
-    const auto& it = type_cache_.find(desc);
+    const auto it = type_cache_.find(desc);
     if (it != type_cache_.end()) {
       return it->second;
     }
diff --git a/compiler/debug/elf_debug_loc_writer.h b/compiler/debug/elf_debug_loc_writer.h
index cbfdbdd..bf47e8f 100644
--- a/compiler/debug/elf_debug_loc_writer.h
+++ b/compiler/debug/elf_debug_loc_writer.h
@@ -85,7 +85,7 @@
 // The result will cover all ranges where the variable is in scope.
 // PCs corresponding to stackmap with dex register map are accurate,
 // all other PCs are best-effort only.
-std::vector<VariableLocation> GetVariableLocations(
+static std::vector<VariableLocation> GetVariableLocations(
     const MethodDebugInfo* method_info,
     const std::vector<DexRegisterMap>& dex_register_maps,
     uint16_t vreg,
diff --git a/compiler/debug/elf_debug_writer.cc b/compiler/debug/elf_debug_writer.cc
index d1c10a9..7fa6e14 100644
--- a/compiler/debug/elf_debug_writer.cc
+++ b/compiler/debug/elf_debug_writer.cc
@@ -30,6 +30,7 @@
 #include "debug/method_debug_info.h"
 #include "elf_builder.h"
 #include "linker/vector_output_stream.h"
+#include "oat.h"
 
 namespace art {
 namespace debug {
diff --git a/compiler/debug/elf_debug_writer.h b/compiler/debug/elf_debug_writer.h
index 07f7229..5d68810 100644
--- a/compiler/debug/elf_debug_writer.h
+++ b/compiler/debug/elf_debug_writer.h
@@ -29,7 +29,7 @@
 class OatHeader;
 namespace mirror {
 class Class;
-}
+}  // namespace mirror
 namespace debug {
 struct MethodDebugInfo;
 
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 1573062..2db99cd 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -28,7 +28,7 @@
 #include "driver/compiler_driver.h"
 #include "driver/dex_compilation_unit.h"
 #include "mirror/dex_cache.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace optimizer {
diff --git a/compiler/dex/inline_method_analyser.cc b/compiler/dex/inline_method_analyser.cc
index e691a67..2572291 100644
--- a/compiler/dex/inline_method_analyser.cc
+++ b/compiler/dex/inline_method_analyser.cc
@@ -433,8 +433,11 @@
     // Native or abstract.
     return false;
   }
-  return AnalyseMethodCode(
-      code_item, method->ToMethodReference(), method->IsStatic(), method, result);
+  return AnalyseMethodCode(code_item,
+                           MethodReference(method->GetDexFile(), method->GetDexMethodIndex()),
+                           method->IsStatic(),
+                           method,
+                           result);
 }
 
 bool InlineMethodAnalyser::AnalyseMethodCode(const DexFile::CodeItem* code_item,
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index 0338cfd..04ceca0 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -17,12 +17,13 @@
 #include "verification_results.h"
 
 #include "base/logging.h"
-#include "base/stl_util.h"
 #include "base/mutex-inl.h"
+#include "base/stl_util.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
+#include "runtime.h"
 #include "thread.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/atomic_method_ref_map-inl.h"
 #include "verified_method.h"
 #include "verifier/method_verifier-inl.h"
@@ -109,12 +110,12 @@
   // This method should only be called for classes verified at compile time,
   // which have no verifier error, nor has methods that we know will throw
   // at runtime.
-  atomic_verified_methods_.Insert(
-      ref,
-      /*expected*/ nullptr,
-      new VerifiedMethod(/* encountered_error_types */ 0, /* has_runtime_throw */ false));
-  // We don't check the result of `Insert` as we could insert twice for the same
-  // MethodReference in the presence of duplicate methods.
+  std::unique_ptr<VerifiedMethod> verified_method = std::make_unique<VerifiedMethod>(
+      /* encountered_error_types */ 0, /* has_runtime_throw */ false);
+  if (atomic_verified_methods_.Insert(ref, /*expected*/ nullptr, verified_method.get()) ==
+          AtomicMap::InsertResult::kInsertResultSuccess) {
+    verified_method.release();
+  }
 }
 
 void VerificationResults::AddRejectedClass(ClassReference ref) {
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index e6a47ba..528b0a2 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -21,7 +21,7 @@
 
 #include "base/logging.h"
 #include "compiled_method.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 #include "utils/dedupe_set-inl.h"
 #include "utils/swap_space.h"
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index 6572d17..bbd28b2 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -71,11 +71,11 @@
   };
   const LinkerPatch raw_patches1[] = {
       LinkerPatch::CodePatch(0u, nullptr, 1u),
-      LinkerPatch::MethodPatch(4u, nullptr, 1u),
+      LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 1u),
   };
   const LinkerPatch raw_patches2[] = {
       LinkerPatch::CodePatch(0u, nullptr, 1u),
-      LinkerPatch::MethodPatch(4u, nullptr, 2u),
+      LinkerPatch::RelativeMethodPatch(4u, nullptr, 0u, 2u),
   };
   ArrayRef<const LinkerPatch> patches[] = {
       ArrayRef<const LinkerPatch>(raw_patches1),
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 5823306..8cc1cc3 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -24,10 +24,11 @@
 #include "base/enums.h"
 #include "class_linker-inl.h"
 #include "dex_compilation_unit.h"
+#include "handle_scope-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
+#include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "handle_scope-inl.h"
 
 namespace art {
 
@@ -149,6 +150,11 @@
   return resolved_method;
 }
 
+inline VerificationResults* CompilerDriver::GetVerificationResults() const {
+  DCHECK(Runtime::Current()->IsAotCompiler());
+  return verification_results_;
+}
+
 }  // namespace art
 
 #endif  // ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index c2d792d..0d0769f 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -28,6 +28,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/arena_allocator.h"
 #include "base/array_ref.h"
 #include "base/bit_vector.h"
 #include "base/enums.h"
@@ -999,7 +1000,9 @@
   if (profile_compilation_info_ == nullptr) {
     return false;
   }
-  bool result = profile_compilation_info_->ContainsMethod(method_ref);
+  // Compile only hot methods, it is the profile saver's job to decide what startup methods to mark
+  // as hot.
+  bool result = profile_compilation_info_->ContainsHotMethod(method_ref);
 
   if (kDebugProfileGuidedCompilation) {
     LOG(INFO) << "[ProfileGuidedCompilation] "
@@ -2238,7 +2241,7 @@
  public:
   explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
 
-  void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
+  void Visit(size_t class_def_index) OVERRIDE {
     ATRACE_CALL();
     jobject jclass_loader = manager_->GetClassLoader();
     const DexFile& dex_file = *manager_->GetDexFile();
@@ -2253,89 +2256,123 @@
     Handle<mirror::Class> klass(
         hs.NewHandle(manager_->GetClassLinker()->FindClass(soa.Self(), descriptor, class_loader)));
 
-    if (klass != nullptr && !SkipClass(jclass_loader, dex_file, klass.Get())) {
-      // Only try to initialize classes that were successfully verified.
-      if (klass->IsVerified()) {
-        // Attempt to initialize the class but bail if we either need to initialize the super-class
-        // or static fields.
-        manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false);
-        if (!klass->IsInitialized()) {
-          // We don't want non-trivial class initialization occurring on multiple threads due to
-          // deadlock problems. For example, a parent class is initialized (holding its lock) that
-          // refers to a sub-class in its static/class initializer causing it to try to acquire the
-          // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock)
-          // after first initializing its parents, whose locks are acquired. This leads to a
-          // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock.
-          // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
-          // than use a special Object for the purpose we use the Class of java.lang.Class.
-          Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
-          ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
-          // Attempt to initialize allowing initialization of parent classes but still not static
-          // fields.
+    if (klass != nullptr && !SkipClass(manager_->GetClassLoader(), dex_file, klass.Get())) {
+      TryInitializeClass(klass, class_loader);
+    }
+    // Clear any class not found or verification exceptions.
+    soa.Self()->ClearException();
+  }
+
+  // A helper function for initializing klass.
+  void TryInitializeClass(Handle<mirror::Class> klass, Handle<mirror::ClassLoader>& class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    const DexFile& dex_file = klass->GetDexFile();
+    const DexFile::ClassDef* class_def = klass->GetClassDef();
+    const DexFile::TypeId& class_type_id = dex_file.GetTypeId(class_def->class_idx_);
+    const char* descriptor = dex_file.StringDataByIdx(class_type_id.descriptor_idx_);
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    StackHandleScope<3> hs(soa.Self());
+
+    mirror::Class::Status old_status = klass->GetStatus();;
+    // Only try to initialize classes that were successfully verified.
+    if (klass->IsVerified()) {
+      // Attempt to initialize the class but bail if we either need to initialize the super-class
+      // or static fields.
+      manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, false);
+      old_status = klass->GetStatus();
+      if (!klass->IsInitialized()) {
+        // We don't want non-trivial class initialization occurring on multiple threads due to
+        // deadlock problems. For example, a parent class is initialized (holding its lock) that
+        // refers to a sub-class in its static/class initializer causing it to try to acquire the
+        // sub-class' lock. While on a second thread the sub-class is initialized (holding its lock)
+        // after first initializing its parents, whose locks are acquired. This leads to a
+        // parent-to-child and a child-to-parent lock ordering and consequent potential deadlock.
+        // We need to use an ObjectLock due to potential suspension in the interpreting code. Rather
+        // than use a special Object for the purpose we use the Class of java.lang.Class.
+        Handle<mirror::Class> h_klass(hs.NewHandle(klass->GetClass()));
+        ObjectLock<mirror::Class> lock(soa.Self(), h_klass);
+        // Attempt to initialize allowing initialization of parent classes but still not static
+        // fields.
+        bool is_superclass_initialized = InitializeDependencies(klass, class_loader, soa.Self());
+        if (is_superclass_initialized) {
           manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, false, true);
-          if (!klass->IsInitialized()) {
+        }
+        old_status = klass->GetStatus();
+        // If superclass cannot be initialized, no need to proceed.
+        if (!klass->IsInitialized() &&
+            is_superclass_initialized &&
+            manager_->GetCompiler()->IsImageClass(descriptor)) {
+          bool can_init_static_fields = false;
+          if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage()) {
             // We need to initialize static fields, we only do this for image classes that aren't
             // marked with the $NoPreloadHolder (which implies this should not be initialized early).
-            bool can_init_static_fields =
-                manager_->GetCompiler()->GetCompilerOptions().IsBootImage() &&
-                manager_->GetCompiler()->IsImageClass(descriptor) &&
-                !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
-            if (can_init_static_fields) {
-              VLOG(compiler) << "Initializing: " << descriptor;
-              // TODO multithreading support. We should ensure the current compilation thread has
-              // exclusive access to the runtime and the transaction. To achieve this, we could use
-              // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
-              // checks in Thread::AssertThreadSuspensionIsAllowable.
-              Runtime* const runtime = Runtime::Current();
-              Transaction transaction;
+            can_init_static_fields = !StringPiece(descriptor).ends_with("$NoPreloadHolder;");
+          } else {
+            can_init_static_fields = manager_->GetCompiler()->GetCompilerOptions().IsAppImage() &&
+                !soa.Self()->IsExceptionPending() &&
+                NoClinitInDependency(klass, soa.Self(), &class_loader);
+            // TODO The checking for clinit can be removed since it's already
+            // checked when init superclass. Currently keep it because it contains
+            // processing of intern strings. Will be removed later when intern strings
+            // and clinit are both initialized.
+          }
 
-              // Run the class initializer in transaction mode.
-              runtime->EnterTransactionMode(&transaction);
-              const mirror::Class::Status old_status = klass->GetStatus();
-              bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true,
-                                                                           true);
-              // TODO we detach transaction from runtime to indicate we quit the transactional
-              // mode which prevents the GC from visiting objects modified during the transaction.
-              // Ensure GC is not run so don't access freed objects when aborting transaction.
+          if (can_init_static_fields) {
+            VLOG(compiler) << "Initializing: " << descriptor;
+            // TODO multithreading support. We should ensure the current compilation thread has
+            // exclusive access to the runtime and the transaction. To achieve this, we could use
+            // a ReaderWriterMutex but we're holding the mutator lock so we fail mutex sanity
+            // checks in Thread::AssertThreadSuspensionIsAllowable.
+            Runtime* const runtime = Runtime::Current();
+            Transaction transaction;
 
-              {
-                ScopedAssertNoThreadSuspension ants("Transaction end");
-                runtime->ExitTransactionMode();
+            // Run the class initializer in transaction mode.
+            runtime->EnterTransactionMode(&transaction);
+            bool success = manager_->GetClassLinker()->EnsureInitialized(soa.Self(), klass, true,
+                                                                         true);
+            // TODO we detach transaction from runtime to indicate we quit the transactional
+            // mode which prevents the GC from visiting objects modified during the transaction.
+            // Ensure GC is not run so don't access freed objects when aborting transaction.
 
-                if (!success) {
-                  CHECK(soa.Self()->IsExceptionPending());
-                  mirror::Throwable* exception = soa.Self()->GetException();
-                  VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
-                      << exception->Dump();
-                  std::ostream* file_log = manager_->GetCompiler()->
-                      GetCompilerOptions().GetInitFailureOutput();
-                  if (file_log != nullptr) {
-                    *file_log << descriptor << "\n";
-                    *file_log << exception->Dump() << "\n";
-                  }
-                  soa.Self()->ClearException();
-                  transaction.Rollback();
-                  CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
-                }
-              }
+            {
+              ScopedAssertNoThreadSuspension ants("Transaction end");
+              runtime->ExitTransactionMode();
 
               if (!success) {
-                // On failure, still intern strings of static fields and seen in <clinit>, as these
-                // will be created in the zygote. This is separated from the transaction code just
-                // above as we will allocate strings, so must be allowed to suspend.
+                CHECK(soa.Self()->IsExceptionPending());
+                mirror::Throwable* exception = soa.Self()->GetException();
+                VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
+                               << exception->Dump();
+                std::ostream* file_log = manager_->GetCompiler()->
+                    GetCompilerOptions().GetInitFailureOutput();
+                if (file_log != nullptr) {
+                  *file_log << descriptor << "\n";
+                  *file_log << exception->Dump() << "\n";
+                }
+                soa.Self()->ClearException();
+                transaction.Rollback();
+                CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+              }
+            }
+
+            if (!success) {
+              // On failure, still intern strings of static fields and seen in <clinit>, as these
+              // will be created in the zygote. This is separated from the transaction code just
+              // above as we will allocate strings, so must be allowed to suspend.
+              if (&klass->GetDexFile() == manager_->GetDexFile()) {
                 InternStrings(klass, class_loader);
               }
             }
           }
-          soa.Self()->AssertNoPendingException();
         }
+        soa.Self()->AssertNoPendingException();
       }
-      // Record the final class status if necessary.
-      ClassReference ref(manager_->GetDexFile(), class_def_index);
-      manager_->GetCompiler()->RecordClassStatus(ref, klass->GetStatus());
     }
-    // Clear any class not found or verification exceptions.
-    soa.Self()->ClearException();
+    // Record the final class status if necessary.
+    ClassReference ref(&dex_file, klass->GetDexClassDefIndex());
+    // Back up the status before doing initialization for static encoded fields,
+    // because the static encoded branch wants to keep the status to uninitialized.
+    manager_->GetCompiler()->RecordClassStatus(ref, old_status);
   }
 
  private:
@@ -2390,6 +2427,162 @@
     }
   }
 
+  bool NoPotentialInternStrings(Handle<mirror::Class> klass,
+                                Handle<mirror::ClassLoader>* class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::DexCache> h_dex_cache = hs.NewHandle(klass->GetDexCache());
+    const DexFile* dex_file = h_dex_cache->GetDexFile();
+    const DexFile::ClassDef* class_def = klass->GetClassDef();
+    annotations::RuntimeEncodedStaticFieldValueIterator value_it(*dex_file,
+                                                                 &h_dex_cache,
+                                                                 class_loader,
+                                                                 manager_->GetClassLinker(),
+                                                                 *class_def);
+
+    const auto jString = annotations::RuntimeEncodedStaticFieldValueIterator::kString;
+    for ( ; value_it.HasNext(); value_it.Next()) {
+      if (value_it.GetValueType() == jString) {
+        // We don't want cache the static encoded strings which is a potential intern.
+        return false;
+      }
+    }
+
+    return true;
+  }
+
+  bool ResolveTypesOfMethods(Thread* self, ArtMethod* m)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+      auto rtn_type = m->GetReturnType(true);  // return value is discarded because resolve will be done internally.
+      if (rtn_type == nullptr) {
+        self->ClearException();
+        return false;
+      }
+      const DexFile::TypeList* types = m->GetParameterTypeList();
+      if (types != nullptr) {
+        for (uint32_t i = 0; i < types->Size(); ++i) {
+          dex::TypeIndex param_type_idx = types->GetTypeItem(i).type_idx_;
+          auto param_type = m->GetClassFromTypeIndex(param_type_idx, true);
+          if (param_type == nullptr) {
+            self->ClearException();
+            return false;
+          }
+        }
+      }
+      return true;
+  }
+
+  // Pre resolve types mentioned in all method signatures before start a transaction
+  // since ResolveType doesn't work in transaction mode.
+  bool PreResolveTypes(Thread* self, const Handle<mirror::Class>& klass)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+      PointerSize pointer_size = manager_->GetClassLinker()->GetImagePointerSize();
+      for (ArtMethod& m : klass->GetMethods(pointer_size)) {
+        if (!ResolveTypesOfMethods(self, &m)) {
+          return false;
+        }
+      }
+      if (klass->IsInterface()) {
+        return true;
+      } else if (klass->HasSuperClass()) {
+        StackHandleScope<1> hs(self);
+        MutableHandle<mirror::Class> super_klass(hs.NewHandle<mirror::Class>(klass->GetSuperClass()));
+        for (int i = super_klass->GetVTableLength() - 1; i >= 0; --i) {
+          ArtMethod* m = klass->GetVTableEntry(i, pointer_size);
+          ArtMethod* super_m = super_klass->GetVTableEntry(i, pointer_size);
+          if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) {
+            return false;
+          }
+        }
+        for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
+          super_klass.Assign(klass->GetIfTable()->GetInterface(i));
+          if (klass->GetClassLoader() != super_klass->GetClassLoader()) {
+            uint32_t num_methods = super_klass->NumVirtualMethods();
+            for (uint32_t j = 0; j < num_methods; ++j) {
+              ArtMethod* m = klass->GetIfTable()->GetMethodArray(i)->GetElementPtrSize<ArtMethod*>(
+                  j, pointer_size);
+              ArtMethod* super_m = super_klass->GetVirtualMethod(j, pointer_size);
+              if (!ResolveTypesOfMethods(self, m) || !ResolveTypesOfMethods(self, super_m)) {
+                return false;
+              }
+            }
+          }
+        }
+      }
+      return true;
+  }
+
+  // Initialize the klass's dependencies recursively before initializing itself.
+  // Checking for interfaces is also necessary since interfaces can contain
+  // both default methods and static encoded fields.
+  bool InitializeDependencies(const Handle<mirror::Class>& klass,
+                              Handle<mirror::ClassLoader> class_loader,
+                              Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (klass->HasSuperClass()) {
+      ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class));
+      if (!handle_scope_super->IsInitialized()) {
+        this->TryInitializeClass(handle_scope_super, class_loader);
+        if (!handle_scope_super->IsInitialized()) {
+          return false;
+        }
+      }
+    }
+
+    uint32_t num_if = klass->NumDirectInterfaces();
+    for (size_t i = 0; i < num_if; i++) {
+      ObjPtr<mirror::Class>
+          interface = mirror::Class::GetDirectInterface(self, klass.Get(), i);
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> handle_interface(hs.NewHandle(interface));
+
+      TryInitializeClass(handle_interface, class_loader);
+
+      if (!handle_interface->IsInitialized()) {
+        return false;
+      }
+    }
+
+    return PreResolveTypes(self, klass);
+  }
+
+  // In this phase the classes containing class initializers are ignored. Make sure no
+  // clinit appears in kalss's super class chain and interfaces.
+  bool NoClinitInDependency(const Handle<mirror::Class>& klass,
+                            Thread* self,
+                            Handle<mirror::ClassLoader>* class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod* clinit =
+        klass->FindClassInitializer(manager_->GetClassLinker()->GetImagePointerSize());
+    if (clinit != nullptr) {
+      VLOG(compiler) << klass->PrettyClass() << ' ' << clinit->PrettyMethod(true);
+      return false;
+    }
+    if (klass->HasSuperClass()) {
+      ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> handle_scope_super(hs.NewHandle(super_class));
+      if (!NoClinitInDependency(handle_scope_super, self, class_loader)) {
+        return false;
+      }
+    }
+
+    uint32_t num_if = klass->NumDirectInterfaces();
+    for (size_t i = 0; i < num_if; i++) {
+      ObjPtr<mirror::Class>
+          interface = mirror::Class::GetDirectInterface(self, klass.Get(), i);
+      StackHandleScope<1> hs(self);
+      Handle<mirror::Class> handle_interface(hs.NewHandle(interface));
+      if (!NoClinitInDependency(handle_interface, self, class_loader)) {
+        return false;
+      }
+    }
+
+    return NoPotentialInternStrings(klass, class_loader);
+  }
+
   const ParallelCompilationManager* const manager_;
 };
 
@@ -2409,7 +2602,10 @@
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ParallelCompilationManager context(class_linker, jni_class_loader, this, &dex_file, dex_files,
                                      init_thread_pool);
-  if (GetCompilerOptions().IsBootImage()) {
+
+  if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsAppImage()) {
+    // Set the concurrency thread to 1 to support initialization for App Images since transaction
+    // doesn't support multithreading now.
     // TODO: remove this when transactional mode supports multithreading.
     init_thread_count = 1U;
   }
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index e0d97b7..38e7d2c 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -23,7 +23,6 @@
 #include <vector>
 
 #include "arch/instruction_set.h"
-#include "base/arena_allocator.h"
 #include "base/array_ref.h"
 #include "base/bit_utils.h"
 #include "base/mutex.h"
@@ -38,7 +37,6 @@
 #include "method_reference.h"
 #include "mirror/class.h"  // For mirror::Class::Status.
 #include "os.h"
-#include "runtime.h"
 #include "safe_map.h"
 #include "thread_pool.h"
 #include "utils/atomic_method_ref_map.h"
@@ -61,6 +59,7 @@
 class DexCompilationUnit;
 struct InlineIGetIPutData;
 class InstructionSetFeatures;
+class InternTable;
 class ParallelCompilationManager;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
@@ -131,10 +130,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!compiled_classes_lock_, !dex_to_dex_references_lock_);
 
-  VerificationResults* GetVerificationResults() const {
-    DCHECK(Runtime::Current()->IsAotCompiler());
-    return verification_results_;
-  }
+  VerificationResults* GetVerificationResults() const;
 
   InstructionSet GetInstructionSet() const {
     return instruction_set_;
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 26ea39f..4b979d8 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -239,8 +239,14 @@
 
     ProfileCompilationInfo info;
     for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
-      profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
-      profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
+      profile_info_.AddMethodIndex(dex_file->GetLocation(),
+                                   dex_file->GetLocationChecksum(),
+                                   1,
+                                   dex_file->NumMethodIds());
+      profile_info_.AddMethodIndex(dex_file->GetLocation(),
+                                   dex_file->GetLocationChecksum(),
+                                   2,
+                                   dex_file->NumMethodIds());
     }
     return &profile_info_;
   }
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 5a82021..89c2537 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -31,7 +31,7 @@
 
 namespace verifier {
   class VerifierDepsTest;
-}
+}  // namespace verifier
 
 class DexFile;
 
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 7c02384..2ef9fa1 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -670,6 +670,7 @@
                              Elf_Word rodata_size,
                              Elf_Word text_size,
                              Elf_Word bss_size,
+                             Elf_Word bss_methods_offset,
                              Elf_Word bss_roots_offset) {
     std::string soname(elf_file_path);
     size_t directory_separator_pos = soname.rfind('/');
@@ -715,9 +716,18 @@
       Elf_Word bss_index = rodata_index + 1u + (text_size != 0 ? 1u : 0u);
       Elf_Word oatbss = dynstr_.Add("oatbss");
       dynsym_.Add(oatbss, bss_index, bss_address, bss_roots_offset, STB_GLOBAL, STT_OBJECT);
+      DCHECK_LE(bss_methods_offset, bss_roots_offset);
+      DCHECK_LE(bss_roots_offset, bss_size);
+      // Add a symbol marking the start of the methods part of the .bss, if not empty.
+      if (bss_methods_offset != bss_roots_offset) {
+        Elf_Word bss_methods_address = bss_address + bss_methods_offset;
+        Elf_Word bss_methods_size = bss_roots_offset - bss_methods_offset;
+        Elf_Word oatbssroots = dynstr_.Add("oatbssmethods");
+        dynsym_.Add(
+            oatbssroots, bss_index, bss_methods_address, bss_methods_size, STB_GLOBAL, STT_OBJECT);
+      }
       // Add a symbol marking the start of the GC roots part of the .bss, if not empty.
       if (bss_roots_offset != bss_size) {
-        DCHECK_LT(bss_roots_offset, bss_size);
         Elf_Word bss_roots_address = bss_address + bss_roots_offset;
         Elf_Word bss_roots_size = bss_size - bss_roots_offset;
         Elf_Word oatbssroots = dynstr_.Add("oatbssroots");
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index 7baae52..a8a5bc3 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -55,6 +55,7 @@
   virtual void PrepareDynamicSection(size_t rodata_size,
                                      size_t text_size,
                                      size_t bss_size,
+                                     size_t bss_methods_offset,
                                      size_t bss_roots_offset) = 0;
   virtual void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0;
   virtual OutputStream* StartRoData() = 0;
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 28c35e9..5d6dd2e 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -22,7 +22,6 @@
 
 #include "base/casts.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
 #include "compiled_method.h"
 #include "debug/elf_debug_writer.h"
 #include "debug/method_debug_info.h"
@@ -34,7 +33,7 @@
 #include "leb128.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/file_output_stream.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_pool.h"
 #include "utils.h"
 
@@ -80,7 +79,7 @@
   const InstructionSetFeatures* instruction_set_features_;
   size_t rodata_section_size_;
   size_t text_section_size_;
-  const ArrayRef<const debug::MethodDebugInfo>& method_infos_;
+  const ArrayRef<const debug::MethodDebugInfo> method_infos_;
   std::vector<uint8_t> result_;
 };
 
@@ -97,6 +96,7 @@
   void PrepareDynamicSection(size_t rodata_size,
                              size_t text_size,
                              size_t bss_size,
+                             size_t bss_methods_offset,
                              size_t bss_roots_offset) OVERRIDE;
   void PrepareDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE;
   OutputStream* StartRoData() OVERRIDE;
@@ -136,15 +136,15 @@
                                                 const CompilerOptions* compiler_options,
                                                 File* elf_file) {
   if (Is64BitInstructionSet(instruction_set)) {
-    return MakeUnique<ElfWriterQuick<ElfTypes64>>(instruction_set,
-                                                  features,
-                                                  compiler_options,
-                                                  elf_file);
+    return std::make_unique<ElfWriterQuick<ElfTypes64>>(instruction_set,
+                                                        features,
+                                                        compiler_options,
+                                                        elf_file);
   } else {
-    return MakeUnique<ElfWriterQuick<ElfTypes32>>(instruction_set,
-                                                  features,
-                                                  compiler_options,
-                                                  elf_file);
+    return std::make_unique<ElfWriterQuick<ElfTypes32>>(instruction_set,
+                                                        features,
+                                                        compiler_options,
+                                                        elf_file);
   }
 }
 
@@ -160,7 +160,8 @@
       rodata_size_(0u),
       text_size_(0u),
       bss_size_(0u),
-      output_stream_(MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file))),
+      output_stream_(
+          std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(elf_file))),
       builder_(new ElfBuilder<ElfTypes>(instruction_set, features, output_stream_.get())) {}
 
 template <typename ElfTypes>
@@ -178,6 +179,7 @@
 void ElfWriterQuick<ElfTypes>::PrepareDynamicSection(size_t rodata_size,
                                                      size_t text_size,
                                                      size_t bss_size,
+                                                     size_t bss_methods_offset,
                                                      size_t bss_roots_offset) {
   DCHECK_EQ(rodata_size_, 0u);
   rodata_size_ = rodata_size;
@@ -189,6 +191,7 @@
                                   rodata_size_,
                                   text_size_,
                                   bss_size_,
+                                  bss_methods_offset,
                                   bss_roots_offset);
 }
 
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index dc880b0..b4777df 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -17,6 +17,7 @@
 #include <memory>
 
 #include "base/arena_allocator.h"
+#include "base/callee_save_type.h"
 #include "base/enums.h"
 #include "class_linker.h"
 #include "common_runtime_test.h"
@@ -170,7 +171,7 @@
   Runtime* r = Runtime::Current();
   r->SetInstructionSet(kRuntimeISA);
   ArtMethod* save_method = r->CreateCalleeSaveMethod();
-  r->SetCalleeSaveMethod(save_method, Runtime::kSaveAllCalleeSaves);
+  r->SetCalleeSaveMethod(save_method, CalleeSaveType::kSaveAllCalleeSaves);
   QuickMethodFrameInfo frame_info = r->GetRuntimeMethodFrameInfo(save_method);
 
   ASSERT_EQ(kStackAlignment, 16U);
diff --git a/compiler/image_test.h b/compiler/image_test.h
index 2f15ff4..3d89757 100644
--- a/compiler/image_test.h
+++ b/compiler/image_test.h
@@ -290,9 +290,9 @@
 
       if (kIsVdexEnabled) {
         for (size_t i = 0, size = vdex_files.size(); i != size; ++i) {
-          std::unique_ptr<BufferedOutputStream> vdex_out(
-              MakeUnique<BufferedOutputStream>(
-                  MakeUnique<FileOutputStream>(vdex_files[i].GetFile())));
+          std::unique_ptr<BufferedOutputStream> vdex_out =
+              std::make_unique<BufferedOutputStream>(
+                  std::make_unique<FileOutputStream>(vdex_files[i].GetFile()));
           oat_writers[i]->WriteVerifierDeps(vdex_out.get(), nullptr);
           oat_writers[i]->WriteChecksumsAndVdexHeader(vdex_out.get());
         }
@@ -311,6 +311,7 @@
         elf_writer->PrepareDynamicSection(rodata_size,
                                           text_size,
                                           oat_writer->GetBssSize(),
+                                          oat_writer->GetBssMethodsOffset(),
                                           oat_writer->GetBssRootsOffset());
 
         writer->UpdateOatFileLayout(i,
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 4d6db47..406892e 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -27,6 +27,8 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
+#include "base/enums.h"
 #include "base/logging.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker-inl.h"
@@ -47,7 +49,6 @@
 #include "globals.h"
 #include "image.h"
 #include "imt_conflict_table.h"
-#include "intern_table.h"
 #include "jni_internal.h"
 #include "linear_alloc.h"
 #include "lock_word.h"
@@ -1572,13 +1573,13 @@
   image_methods_[ImageHeader::kImtConflictMethod] = runtime->GetImtConflictMethod();
   image_methods_[ImageHeader::kImtUnimplementedMethod] = runtime->GetImtUnimplementedMethod();
   image_methods_[ImageHeader::kSaveAllCalleeSavesMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves);
+      runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveAllCalleeSaves);
   image_methods_[ImageHeader::kSaveRefsOnlyMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly);
+      runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsOnly);
   image_methods_[ImageHeader::kSaveRefsAndArgsMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs);
+      runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs);
   image_methods_[ImageHeader::kSaveEverythingMethod] =
-      runtime->GetCalleeSaveMethod(Runtime::kSaveEverything);
+      runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverything);
   // Visit image methods first to have the main runtime methods in the first image.
   for (auto* m : image_methods_) {
     CHECK(m != nullptr);
@@ -2482,8 +2483,8 @@
           GetOatAddress(kOatAddressQuickResolutionTrampoline), target_ptr_size_);
     } else {
       bool found_one = false;
-      for (size_t i = 0; i < static_cast<size_t>(Runtime::kLastCalleeSaveType); ++i) {
-        auto idx = static_cast<Runtime::CalleeSaveType>(i);
+      for (size_t i = 0; i < static_cast<size_t>(CalleeSaveType::kLastCalleeSaveType); ++i) {
+        auto idx = static_cast<CalleeSaveType>(i);
         if (runtime->HasCalleeSaveMethod(idx) && runtime->GetCalleeSaveMethod(idx) == orig) {
           found_one = true;
           break;
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index 39113c8..5e2db7d 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -33,9 +33,10 @@
 #include "base/enums.h"
 #include "base/length_prefixed_array.h"
 #include "base/macros.h"
+#include "class_table.h"
 #include "driver/compiler_driver.h"
-#include "gc/space/space.h"
 #include "image.h"
+#include "intern_table.h"
 #include "lock_word.h"
 #include "mem_map.h"
 #include "mirror/dex_cache.h"
@@ -47,6 +48,10 @@
 
 namespace art {
 namespace gc {
+namespace accounting {
+template <size_t kAlignment> class SpaceBitmap;
+typedef SpaceBitmap<kObjectAlignment> ContinuousSpaceBitmap;
+}  // namespace accounting
 namespace space {
 class ImageSpace;
 }  // namespace space
@@ -57,7 +62,6 @@
 }  // namespace mirror
 
 class ClassLoaderVisitor;
-class ClassTable;
 class ImtConflictTable;
 
 static constexpr int kInvalidFd = -1;
@@ -103,19 +107,6 @@
 
   ArtMethod* GetImageMethodAddress(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template <typename PtrType>
-  PtrType GetDexCacheArrayElementImageAddress(const DexFile* dex_file, uint32_t offset)
-      const REQUIRES_SHARED(Locks::mutator_lock_) {
-    auto oat_it = dex_file_oat_index_map_.find(dex_file);
-    DCHECK(oat_it != dex_file_oat_index_map_.end());
-    const ImageInfo& image_info = GetImageInfo(oat_it->second);
-    auto it = image_info.dex_cache_array_starts_.find(dex_file);
-    DCHECK(it != image_info.dex_cache_array_starts_.end());
-    return reinterpret_cast<PtrType>(
-        image_info.image_begin_ + image_info.bin_slot_offsets_[kBinDexCacheArray] +
-            it->second + offset);
-  }
-
   size_t GetOatFileOffset(size_t oat_index) const {
     return GetImageInfo(oat_index).oat_offset_;
   }
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index fed1f48..6613541 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -189,18 +189,12 @@
 
 bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) {
   DCHECK(!method->IsProxyMethod());
+  DCHECK(method->GetDeclaringClass()->IsResolved());
+
   TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit));
-  StackHandleScope<2> hs(self);
   self->AssertNoPendingException();
   Runtime* runtime = Runtime::Current();
 
-  // Ensure the class is initialized.
-  Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
-  if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
-    VLOG(jit) << "JIT failed to initialize " << method->PrettyMethod();
-    return false;
-  }
-
   // Do the compilation.
   bool success = false;
   {
diff --git a/compiler/linker/arm/relative_patcher_arm_base.cc b/compiler/linker/arm/relative_patcher_arm_base.cc
index c1ac230..18ff1c9 100644
--- a/compiler/linker/arm/relative_patcher_arm_base.cc
+++ b/compiler/linker/arm/relative_patcher_arm_base.cc
@@ -16,6 +16,7 @@
 
 #include "linker/arm/relative_patcher_arm_base.h"
 
+#include "base/stl_util.h"
 #include "compiled_method.h"
 #include "linker/output_stream.h"
 #include "oat.h"
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index c033c2d..bc21607 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -54,16 +54,16 @@
 
 inline bool IsAdrpPatch(const LinkerPatch& patch) {
   switch (patch.GetType()) {
-    case LinkerPatch::Type::kMethod:
     case LinkerPatch::Type::kCall:
     case LinkerPatch::Type::kCallRelative:
     case LinkerPatch::Type::kBakerReadBarrierBranch:
       return false;
+    case LinkerPatch::Type::kMethodRelative:
+    case LinkerPatch::Type::kMethodBssEntry:
     case LinkerPatch::Type::kTypeRelative:
     case LinkerPatch::Type::kTypeBssEntry:
     case LinkerPatch::Type::kStringRelative:
     case LinkerPatch::Type::kStringBssEntry:
-    case LinkerPatch::Type::kDexCacheArray:
       return patch.LiteralOffset() == patch.PcInsnOffset();
   }
 }
@@ -250,19 +250,21 @@
     if ((insn & 0xfffffc00) == 0x91000000) {
       // ADD immediate, 64-bit with imm12 == 0 (unset).
       if (!kEmitCompilerReadBarrier) {
-        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
-               patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType();
+        DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative ||
+               patch.GetType() == LinkerPatch::Type::kTypeRelative ||
+               patch.GetType() == LinkerPatch::Type::kStringRelative) << patch.GetType();
       } else {
         // With the read barrier (non-Baker) enabled, it could be kStringBssEntry or kTypeBssEntry.
-        DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
+        DCHECK(patch.GetType() == LinkerPatch::Type::kMethodRelative ||
                patch.GetType() == LinkerPatch::Type::kTypeRelative ||
-               patch.GetType() == LinkerPatch::Type::kStringBssEntry ||
-               patch.GetType() == LinkerPatch::Type::kTypeBssEntry) << patch.GetType();
+               patch.GetType() == LinkerPatch::Type::kStringRelative ||
+               patch.GetType() == LinkerPatch::Type::kTypeBssEntry ||
+               patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType();
       }
       shift = 0u;  // No shift for ADD.
     } else {
       // LDR/STR 32-bit or 64-bit with imm12 == 0 (unset).
-      DCHECK(patch.GetType() == LinkerPatch::Type::kDexCacheArray ||
+      DCHECK(patch.GetType() == LinkerPatch::Type::kMethodBssEntry ||
              patch.GetType() == LinkerPatch::Type::kTypeBssEntry ||
              patch.GetType() == LinkerPatch::Type::kStringBssEntry) << patch.GetType();
       DCHECK_EQ(insn & 0xbfbffc00, 0xb9000000) << std::hex << insn;
@@ -565,10 +567,10 @@
       return false;
     }
 
-    // And since LinkerPatch::Type::kStringRelative is using the result of the ADRP
-    // for an ADD immediate, check for that as well. We generalize a bit to include
-    // ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination or stores
-    // the result to a different register.
+    // And since LinkerPatch::Type::k{Method,Type,String}Relative is using the result
+    // of the ADRP for an ADD immediate, check for that as well. We generalize a bit
+    // to include ADD/ADDS/SUB/SUBS immediate that either uses the ADRP destination
+    // or stores the result to a different register.
     if ((next_insn & 0x1f000000) == 0x11000000 &&
         ((((next_insn >> 5) ^ adrp) & 0x1f) == 0 || ((next_insn ^ adrp) & 0x1f) != 0)) {
       return false;
diff --git a/compiler/linker/method_bss_mapping_encoder.h b/compiler/linker/method_bss_mapping_encoder.h
new file mode 100644
index 0000000..b2922ec
--- /dev/null
+++ b/compiler/linker/method_bss_mapping_encoder.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_LINKER_METHOD_BSS_MAPPING_ENCODER_H_
+#define ART_COMPILER_LINKER_METHOD_BSS_MAPPING_ENCODER_H_
+
+#include "base/enums.h"
+#include "base/logging.h"
+#include "dex_file.h"
+#include "method_bss_mapping.h"
+
+namespace art {
+namespace linker {
+
+// Helper class for encoding compressed MethodBssMapping.
+class MethodBssMappingEncoder {
+ public:
+  explicit MethodBssMappingEncoder(PointerSize pointer_size)
+      : pointer_size_(static_cast<size_t>(pointer_size)) {
+    entry_.method_index = DexFile::kDexNoIndex16;
+    entry_.index_mask = 0u;
+    entry_.bss_offset = static_cast<uint32_t>(-1);
+  }
+
+  // Try to merge the next method_index -> bss_offset mapping into the current entry.
+  // Return true on success, false on failure.
+  bool TryMerge(uint32_t method_index, uint32_t bss_offset) {
+    DCHECK_NE(method_index, entry_.method_index);
+    if (entry_.bss_offset + pointer_size_ != bss_offset) {
+      return false;
+    }
+    uint32_t diff = method_index - entry_.method_index;
+    if (diff > 16u) {
+      return false;
+    }
+    if ((entry_.index_mask & ~(static_cast<uint32_t>(-1) << diff)) != 0u) {
+      return false;
+    }
+    entry_.method_index = method_index;
+    // Insert the bit indicating the method index we've just overwritten
+    // and shift bits indicating method indexes before that.
+    entry_.index_mask = dchecked_integral_cast<uint16_t>(
+        (static_cast<uint32_t>(entry_.index_mask) | 0x10000u) >> diff);
+    entry_.bss_offset = bss_offset;
+    return true;
+  }
+
+  void Reset(uint32_t method_index, uint32_t bss_offset) {
+    entry_.method_index = method_index;
+    entry_.index_mask = 0u;
+    entry_.bss_offset = bss_offset;
+  }
+
+  MethodBssMappingEntry GetEntry() {
+    return entry_;
+  }
+
+ private:
+  size_t pointer_size_;
+  MethodBssMappingEntry entry_;
+};
+
+}  // namespace linker
+}  // namespace art
+
+#endif  // ART_COMPILER_LINKER_METHOD_BSS_MAPPING_ENCODER_H_
diff --git a/compiler/linker/method_bss_mapping_encoder_test.cc b/compiler/linker/method_bss_mapping_encoder_test.cc
new file mode 100644
index 0000000..1240389
--- /dev/null
+++ b/compiler/linker/method_bss_mapping_encoder_test.cc
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "method_bss_mapping_encoder.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+namespace linker {
+
+TEST(MethodBssMappingEncoder, TryMerge) {
+  for (PointerSize pointer_size : {PointerSize::k32, PointerSize::k64}) {
+    size_t raw_pointer_size = static_cast<size_t>(pointer_size);
+    MethodBssMappingEncoder encoder(pointer_size);
+    encoder.Reset(1u, 0u);
+    ASSERT_FALSE(encoder.TryMerge(5u, raw_pointer_size + 1));       // Wrong bss_offset difference.
+    ASSERT_FALSE(encoder.TryMerge(18u, raw_pointer_size));          // Method index out of range.
+    ASSERT_TRUE(encoder.TryMerge(5u, raw_pointer_size));
+    ASSERT_TRUE(encoder.GetEntry().CoversIndex(1u));
+    ASSERT_TRUE(encoder.GetEntry().CoversIndex(5u));
+    ASSERT_FALSE(encoder.GetEntry().CoversIndex(17u));
+    ASSERT_FALSE(encoder.TryMerge(17u, 2 * raw_pointer_size + 1));  // Wrong bss_offset difference.
+    ASSERT_FALSE(encoder.TryMerge(18u, 2 * raw_pointer_size));      // Method index out of range.
+    ASSERT_TRUE(encoder.TryMerge(17u, 2 * raw_pointer_size));
+    ASSERT_TRUE(encoder.GetEntry().CoversIndex(1u));
+    ASSERT_TRUE(encoder.GetEntry().CoversIndex(5u));
+    ASSERT_TRUE(encoder.GetEntry().CoversIndex(17u));
+    ASSERT_EQ(0u, encoder.GetEntry().GetBssOffset(1u, raw_pointer_size));
+    ASSERT_EQ(raw_pointer_size, encoder.GetEntry().GetBssOffset(5u, raw_pointer_size));
+    ASSERT_EQ(2 * raw_pointer_size, encoder.GetEntry().GetBssOffset(17u, raw_pointer_size));
+    ASSERT_EQ(0x0011u, encoder.GetEntry().index_mask);
+    ASSERT_FALSE(encoder.TryMerge(18u, 2 * raw_pointer_size));      // Method index out of range.
+  }
+}
+
+}  // namespace linker
+}  // namespace art
diff --git a/compiler/linker/mips/relative_patcher_mips.cc b/compiler/linker/mips/relative_patcher_mips.cc
index 8da530f..d99d237 100644
--- a/compiler/linker/mips/relative_patcher_mips.cc
+++ b/compiler/linker/mips/relative_patcher_mips.cc
@@ -50,7 +50,6 @@
   uint32_t anchor_literal_offset = patch.PcInsnOffset();
   uint32_t literal_offset = patch.LiteralOffset();
   uint32_t literal_low_offset;
-  bool dex_cache_array = (patch.GetType() == LinkerPatch::Type::kDexCacheArray);
 
   // Perform basic sanity checks and initialize `literal_low_offset` to point
   // to the instruction containing the 16 least significant bits of the
@@ -72,16 +71,8 @@
     DCHECK_GE(code->size(), 16u);
     DCHECK_LE(literal_offset, code->size() - 12u);
     DCHECK_GE(literal_offset, 4u);
-    // The NAL instruction may not precede immediately as the PC+0 value may
-    // come from HMipsComputeBaseMethodAddress.
-    if (dex_cache_array) {
-      DCHECK_EQ(literal_offset + 4u, anchor_literal_offset);
-      // NAL
-      DCHECK_EQ((*code)[literal_offset - 4], 0x00);
-      DCHECK_EQ((*code)[literal_offset - 3], 0x00);
-      DCHECK_EQ((*code)[literal_offset - 2], 0x10);
-      DCHECK_EQ((*code)[literal_offset - 1], 0x04);
-    }
+    // The NAL instruction does not precede immediately as the PC+0
+    // comes from HMipsComputeBaseMethodAddress.
     // LUI reg, offset_high
     DCHECK_EQ((*code)[literal_offset + 0], 0x34);
     DCHECK_EQ((*code)[literal_offset + 1], 0x12);
@@ -90,10 +81,6 @@
     // ADDU reg, reg, reg2
     DCHECK_EQ((*code)[literal_offset + 4], 0x21);
     DCHECK_EQ(((*code)[literal_offset + 5] & 0x07), 0x00);
-    if (dex_cache_array) {
-      // reg2 is either RA or from HMipsComputeBaseMethodAddress.
-      DCHECK_EQ(((*code)[literal_offset + 6] & 0x1F), 0x1F);
-    }
     DCHECK_EQ(((*code)[literal_offset + 7] & 0xFC), 0x00);
     // instr reg(s), offset_low
     DCHECK_EQ((*code)[literal_offset + 8], 0x78);
@@ -104,9 +91,6 @@
   // Apply patch.
   uint32_t anchor_offset = patch_offset - literal_offset + anchor_literal_offset;
   uint32_t diff = target_offset - anchor_offset;
-  if (dex_cache_array && !is_r6) {
-    diff += kDexCacheArrayLwOffset;
-  }
   diff += (diff & 0x8000) << 1;  // Account for sign extension in "instr reg(s), offset_low".
 
   // LUI reg, offset_high / AUIPC reg, offset_high
diff --git a/compiler/linker/mips/relative_patcher_mips.h b/compiler/linker/mips/relative_patcher_mips.h
index 852a345..0b74bd3 100644
--- a/compiler/linker/mips/relative_patcher_mips.h
+++ b/compiler/linker/mips/relative_patcher_mips.h
@@ -46,9 +46,6 @@
                                    uint32_t patch_offset) OVERRIDE;
 
  private:
-  // We'll maximize the range of a single load instruction for dex cache array accesses
-  // by aligning offset -32768 with the offset of the first used element.
-  static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
   bool is_r6;
 
   DISALLOW_COPY_AND_ASSIGN(MipsRelativePatcher);
diff --git a/compiler/linker/mips/relative_patcher_mips_test.cc b/compiler/linker/mips/relative_patcher_mips_test.cc
index 961b312..49af7c6 100644
--- a/compiler/linker/mips/relative_patcher_mips_test.cc
+++ b/compiler/linker/mips/relative_patcher_mips_test.cc
@@ -61,7 +61,6 @@
   ASSERT_TRUE(result.first);
 
   uint32_t diff = target_offset - (result.second + kAnchorOffset);
-  CHECK_NE(patches[0].GetType(), LinkerPatch::Type::kDexCacheArray);
   diff += (diff & 0x8000) << 1;  // Account for sign extension in addiu.
 
   const uint8_t expected_code[] = {
diff --git a/compiler/linker/output_stream_test.cc b/compiler/linker/output_stream_test.cc
index 84c76f2..09fef29 100644
--- a/compiler/linker/output_stream_test.cc
+++ b/compiler/linker/output_stream_test.cc
@@ -19,7 +19,6 @@
 
 #include "base/unix_file/fd_file.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
 #include "buffered_output_stream.h"
 #include "common_runtime_test.h"
 
@@ -79,7 +78,7 @@
 TEST_F(OutputStreamTest, Buffered) {
   ScratchFile tmp;
   {
-    BufferedOutputStream buffered_output_stream(MakeUnique<FileOutputStream>(tmp.GetFile()));
+    BufferedOutputStream buffered_output_stream(std::make_unique<FileOutputStream>(tmp.GetFile()));
     SetOutputStream(buffered_output_stream);
     GenerateTestOutput();
   }
@@ -125,7 +124,7 @@
     bool flush_called;
   };
 
-  std::unique_ptr<CheckingOutputStream> cos = MakeUnique<CheckingOutputStream>();
+  std::unique_ptr<CheckingOutputStream> cos = std::make_unique<CheckingOutputStream>();
   CheckingOutputStream* checking_output_stream = cos.get();
   BufferedOutputStream buffered(std::move(cos));
   ASSERT_FALSE(checking_output_stream->flush_called);
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 1578c0c..55d0bd9 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -19,6 +19,7 @@
 #include "arch/instruction_set_features.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "common_compiler_test.h"
@@ -220,11 +221,12 @@
     elf_writer->PrepareDynamicSection(rodata_size,
                                       text_size,
                                       oat_writer.GetBssSize(),
+                                      oat_writer.GetBssMethodsOffset(),
                                       oat_writer.GetBssRootsOffset());
 
     if (kIsVdexEnabled) {
-      std::unique_ptr<BufferedOutputStream> vdex_out(
-            MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
+      std::unique_ptr<BufferedOutputStream> vdex_out =
+            std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(vdex_file));
       if (!oat_writer.WriteVerifierDeps(vdex_out.get(), nullptr)) {
         return false;
       }
@@ -483,7 +485,7 @@
 TEST_F(OatTest, OatHeaderSizeCheck) {
   // If this test is failing and you have to update these constants,
   // it is time to update OatHeader::kOatVersion
-  EXPECT_EQ(72U, sizeof(OatHeader));
+  EXPECT_EQ(76U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
   EXPECT_EQ(161 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 5091c0b..59daf5a 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -22,7 +22,7 @@
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "art_method-inl.h"
 #include "base/allocator.h"
-#include "base/bit_vector.h"
+#include "base/bit_vector-inl.h"
 #include "base/enums.h"
 #include "base/file_magic.h"
 #include "base/stl_util.h"
@@ -33,7 +33,7 @@
 #include "dex/verification_results.h"
 #include "dex_file-inl.h"
 #include "dexlayout.h"
-#include "driver/compiler_driver.h"
+#include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space.h"
@@ -41,6 +41,7 @@
 #include "image_writer.h"
 #include "linker/buffered_output_stream.h"
 #include "linker/file_output_stream.h"
+#include "linker/method_bss_mapping_encoder.h"
 #include "linker/multi_oat_relative_patcher.h"
 #include "linker/output_stream.h"
 #include "mirror/array.h"
@@ -230,12 +231,14 @@
     return dex_file_location_data_;
   }
 
-  void ReserveClassOffsets(OatWriter* oat_writer);
-
   size_t SizeOf() const;
   bool Write(OatWriter* oat_writer, OutputStream* out) const;
   bool WriteClassOffsets(OatWriter* oat_writer, OutputStream* out);
 
+  size_t GetClassOffsetsRawSize() const {
+    return class_offsets_.size() * sizeof(class_offsets_[0]);
+  }
+
   // The source of the dex file.
   DexFileSource source_;
 
@@ -256,15 +259,12 @@
   uint32_t dex_file_offset_;
   uint32_t class_offsets_offset_;
   uint32_t lookup_table_offset_;
+  uint32_t method_bss_mapping_offset_;
 
   // Data to write to a separate section.
   dchecked_vector<uint32_t> class_offsets_;
 
  private:
-  size_t GetClassOffsetsRawSize() const {
-    return class_offsets_.size() * sizeof(class_offsets_[0]);
-  }
-
   DISALLOW_COPY_AND_ASSIGN(OatDexFile);
 };
 
@@ -294,7 +294,10 @@
     oat_size_(0u),
     bss_start_(0u),
     bss_size_(0u),
+    bss_methods_offset_(0u),
     bss_roots_offset_(0u),
+    bss_method_entry_references_(),
+    bss_method_entries_(),
     bss_type_entries_(),
     bss_string_entries_(),
     oat_data_offset_(0u),
@@ -331,6 +334,7 @@
     size_oat_dex_file_offset_(0),
     size_oat_dex_file_class_offsets_offset_(0),
     size_oat_dex_file_lookup_table_offset_(0),
+    size_oat_dex_file_method_bss_mapping_offset_(0),
     size_oat_lookup_table_alignment_(0),
     size_oat_lookup_table_(0),
     size_oat_class_offsets_alignment_(0),
@@ -339,6 +343,7 @@
     size_oat_class_status_(0),
     size_oat_class_method_bitmaps_(0),
     size_oat_class_method_offsets_(0),
+    size_method_bss_mappings_(0u),
     relative_patcher_(nullptr),
     absolute_patch_locations_(),
     profile_compilation_info_(info) {
@@ -502,17 +507,16 @@
     // Reserve space for Vdex header and checksums.
     vdex_size_ = sizeof(VdexFile::Header) + oat_dex_files_.size() * sizeof(VdexFile::VdexChecksum);
   }
-  size_t oat_data_offset = InitOatHeader(instruction_set,
-                                        instruction_set_features,
-                                        dchecked_integral_cast<uint32_t>(oat_dex_files_.size()),
-                                        key_value_store);
-  oat_size_ = InitOatDexFiles(oat_data_offset);
+  oat_size_ = InitOatHeader(instruction_set,
+                            instruction_set_features,
+                            dchecked_integral_cast<uint32_t>(oat_dex_files_.size()),
+                            key_value_store);
 
   ChecksumUpdatingOutputStream checksum_updating_rodata(oat_rodata, oat_header_.get());
 
   if (kIsVdexEnabled) {
-    std::unique_ptr<BufferedOutputStream> vdex_out(
-        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
+    std::unique_ptr<BufferedOutputStream> vdex_out =
+        std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(vdex_file));
     // Write DEX files into VDEX, mmap and open them.
     if (!WriteDexFiles(vdex_out.get(), vdex_file, update_input_vdex) ||
         !OpenDexFiles(vdex_file, verify, &dex_files_map, &dex_files)) {
@@ -539,16 +543,6 @@
     return false;
   }
 
-  // Reserve space for class offsets in OAT and update class_offsets_offset_.
-  for (OatDexFile& oat_dex_file : oat_dex_files_) {
-    oat_dex_file.ReserveClassOffsets(this);
-  }
-
-  // Write OatDexFiles into OAT. Needs to be done last, once offsets are collected.
-  if (!WriteOatDexFiles(&checksum_updating_rodata)) {
-    return false;
-  }
-
   *opened_dex_files_map = std::move(dex_files_map);
   *opened_dex_files = std::move(dex_files);
   write_state_ = WriteState::kPrepareLayout;
@@ -567,16 +561,34 @@
   InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
   CHECK_EQ(instruction_set, oat_header_->GetInstructionSet());
 
+  {
+    TimingLogger::ScopedTiming split("InitBssLayout", timings_);
+    InitBssLayout(instruction_set);
+  }
+
   uint32_t offset = oat_size_;
   {
+    TimingLogger::ScopedTiming split("InitClassOffsets", timings_);
+    offset = InitClassOffsets(offset);
+  }
+  {
     TimingLogger::ScopedTiming split("InitOatClasses", timings_);
     offset = InitOatClasses(offset);
   }
   {
+    TimingLogger::ScopedTiming split("InitMethodBssMappings", timings_);
+    offset = InitMethodBssMappings(offset);
+  }
+  {
     TimingLogger::ScopedTiming split("InitOatMaps", timings_);
     offset = InitOatMaps(offset);
   }
   {
+    TimingLogger::ScopedTiming split("InitOatDexFiles", timings_);
+    oat_header_->SetOatDexFilesOffset(offset);
+    offset = InitOatDexFiles(offset);
+  }
+  {
     TimingLogger::ScopedTiming split("InitOatCode", timings_);
     offset = InitOatCode(offset);
   }
@@ -585,11 +597,7 @@
     offset = InitOatCodeDexFiles(offset);
   }
   oat_size_ = offset;
-
-  {
-    TimingLogger::ScopedTiming split("InitBssLayout", timings_);
-    InitBssLayout(instruction_set);
-  }
+  bss_start_ = (bss_size_ != 0u) ? RoundUp(oat_size_, kPageSize) : 0u;
 
   CHECK_EQ(dex_files_->size(), oat_dex_files_.size());
   if (compiling_boot_image_) {
@@ -606,11 +614,10 @@
 class OatWriter::DexMethodVisitor {
  public:
   DexMethodVisitor(OatWriter* writer, size_t offset)
-    : writer_(writer),
-      offset_(offset),
-      dex_file_(nullptr),
-      class_def_index_(DexFile::kDexNoIndex) {
-  }
+      : writer_(writer),
+        offset_(offset),
+        dex_file_(nullptr),
+        class_def_index_(DexFile::kDexNoIndex) {}
 
   virtual bool StartClass(const DexFile* dex_file, size_t class_def_index) {
     DCHECK(dex_file_ == nullptr);
@@ -650,19 +657,18 @@
 class OatWriter::OatDexMethodVisitor : public DexMethodVisitor {
  public:
   OatDexMethodVisitor(OatWriter* writer, size_t offset)
-    : DexMethodVisitor(writer, offset),
-      oat_class_index_(0u),
-      method_offsets_index_(0u) {
-  }
+      : DexMethodVisitor(writer, offset),
+        oat_class_index_(0u),
+        method_offsets_index_(0u) {}
 
-  bool StartClass(const DexFile* dex_file, size_t class_def_index) {
+  bool StartClass(const DexFile* dex_file, size_t class_def_index) OVERRIDE {
     DexMethodVisitor::StartClass(dex_file, class_def_index);
     DCHECK_LT(oat_class_index_, writer_->oat_classes_.size());
     method_offsets_index_ = 0u;
     return true;
   }
 
-  bool EndClass() {
+  bool EndClass() OVERRIDE {
     ++oat_class_index_;
     return DexMethodVisitor::EndClass();
   }
@@ -672,21 +678,61 @@
   size_t method_offsets_index_;
 };
 
+class OatWriter::InitBssLayoutMethodVisitor : public DexMethodVisitor {
+ public:
+  explicit InitBssLayoutMethodVisitor(OatWriter* writer)
+      : DexMethodVisitor(writer, /* offset */ 0u) {}
+
+  bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED,
+                   const ClassDataItemIterator& it) OVERRIDE {
+    // Look for patches with .bss references and prepare maps with placeholders for their offsets.
+    CompiledMethod* compiled_method = writer_->compiler_driver_->GetCompiledMethod(
+        MethodReference(dex_file_, it.GetMemberIndex()));
+    if (compiled_method != nullptr) {
+      for (const LinkerPatch& patch : compiled_method->GetPatches()) {
+        if (patch.GetType() == LinkerPatch::Type::kMethodBssEntry) {
+          MethodReference target_method = patch.TargetMethod();
+          auto refs_it = writer_->bss_method_entry_references_.find(target_method.dex_file);
+          if (refs_it == writer_->bss_method_entry_references_.end()) {
+            refs_it = writer_->bss_method_entry_references_.Put(
+                target_method.dex_file,
+                BitVector(target_method.dex_file->NumMethodIds(),
+                          /* expandable */ false,
+                          Allocator::GetMallocAllocator()));
+            refs_it->second.ClearAllBits();
+          }
+          refs_it->second.SetBit(target_method.dex_method_index);
+          writer_->bss_method_entries_.Overwrite(target_method, /* placeholder */ 0u);
+        } else if (patch.GetType() == LinkerPatch::Type::kTypeBssEntry) {
+          TypeReference ref(patch.TargetTypeDexFile(), patch.TargetTypeIndex());
+          writer_->bss_type_entries_.Overwrite(ref, /* placeholder */ 0u);
+        } else if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) {
+          StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex());
+          writer_->bss_string_entries_.Overwrite(ref, /* placeholder */ 0u);
+        }
+      }
+    }
+    return true;
+  }
+};
+
 class OatWriter::InitOatClassesMethodVisitor : public DexMethodVisitor {
  public:
   InitOatClassesMethodVisitor(OatWriter* writer, size_t offset)
-    : DexMethodVisitor(writer, offset),
-      compiled_methods_(),
-      num_non_null_compiled_methods_(0u) {
+      : DexMethodVisitor(writer, offset),
+        compiled_methods_(),
+        num_non_null_compiled_methods_(0u) {
     size_t num_classes = 0u;
     for (const OatDexFile& oat_dex_file : writer_->oat_dex_files_) {
       num_classes += oat_dex_file.class_offsets_.size();
     }
     writer_->oat_classes_.reserve(num_classes);
     compiled_methods_.reserve(256u);
+    // If there are any classes, the class offsets allocation aligns the offset.
+    DCHECK(num_classes == 0u || IsAligned<4u>(offset));
   }
 
-  bool StartClass(const DexFile* dex_file, size_t class_def_index) {
+  bool StartClass(const DexFile* dex_file, size_t class_def_index) OVERRIDE {
     DexMethodVisitor::StartClass(dex_file, class_def_index);
     compiled_methods_.clear();
     num_non_null_compiled_methods_ = 0u;
@@ -694,7 +740,7 @@
   }
 
   bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED,
-                   const ClassDataItemIterator& it) {
+                   const ClassDataItemIterator& it) OVERRIDE {
     // Fill in the compiled_methods_ array for methods that have a
     // CompiledMethod. We track the number of non-null entries in
     // num_non_null_compiled_methods_ since we only want to allocate
@@ -704,12 +750,12 @@
         writer_->compiler_driver_->GetCompiledMethod(MethodReference(dex_file_, method_idx));
     compiled_methods_.push_back(compiled_method);
     if (compiled_method != nullptr) {
-        ++num_non_null_compiled_methods_;
+      ++num_non_null_compiled_methods_;
     }
     return true;
   }
 
-  bool EndClass() {
+  bool EndClass() OVERRIDE {
     ClassReference class_ref(dex_file_, class_def_index_);
     mirror::Class::Status status;
     bool found = writer_->compiler_driver_->GetCompiledClass(class_ref, &status);
@@ -740,14 +786,14 @@
 class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
  public:
   InitCodeMethodVisitor(OatWriter* writer, size_t offset, size_t quickening_info_offset)
-    : OatDexMethodVisitor(writer, offset),
-      debuggable_(writer->GetCompilerDriver()->GetCompilerOptions().GetDebuggable()),
-      current_quickening_info_offset_(quickening_info_offset) {
+      : OatDexMethodVisitor(writer, offset),
+        debuggable_(writer->GetCompilerDriver()->GetCompilerOptions().GetDebuggable()),
+        current_quickening_info_offset_(quickening_info_offset) {
     writer_->absolute_patch_locations_.reserve(
         writer_->compiler_driver_->GetNonRelativeLinkerPatchCount());
   }
 
-  bool EndClass() {
+  bool EndClass() OVERRIDE {
     OatDexMethodVisitor::EndClass();
     if (oat_class_index_ == writer_->oat_classes_.size()) {
       offset_ = writer_->relative_patcher_->ReserveSpaceEnd(offset_);
@@ -755,7 +801,7 @@
     return true;
   }
 
-  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_) {
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
@@ -858,14 +904,6 @@
             if (!patch.IsPcRelative()) {
               writer_->absolute_patch_locations_.push_back(base_loc + patch.LiteralOffset());
             }
-            if (patch.GetType() == LinkerPatch::Type::kTypeBssEntry) {
-              TypeReference ref(patch.TargetTypeDexFile(), patch.TargetTypeIndex());
-              writer_->bss_type_entries_.Overwrite(ref, /* placeholder */ 0u);
-            }
-            if (patch.GetType() == LinkerPatch::Type::kStringBssEntry) {
-              StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex());
-              writer_->bss_string_entries_.Overwrite(ref, /* placeholder */ 0u);
-            }
           }
         }
       }
@@ -950,11 +988,10 @@
 class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
  public:
   InitMapMethodVisitor(OatWriter* writer, size_t offset)
-    : OatDexMethodVisitor(writer, offset) {
-  }
+      : OatDexMethodVisitor(writer, offset) {}
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+      OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
@@ -997,7 +1034,7 @@
   InitMethodInfoVisitor(OatWriter* writer, size_t offset) : OatDexMethodVisitor(writer, offset) {}
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+      OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
@@ -1035,18 +1072,17 @@
   InitImageMethodVisitor(OatWriter* writer,
                          size_t offset,
                          const std::vector<const DexFile*>* dex_files)
-    : OatDexMethodVisitor(writer, offset),
-      pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())),
-      dex_files_(dex_files),
-      class_linker_(Runtime::Current()->GetClassLinker()) {
-    }
+      : OatDexMethodVisitor(writer, offset),
+        pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())),
+        dex_files_(dex_files),
+        class_linker_(Runtime::Current()->GetClassLinker()) {}
 
   // Handle copied methods here. Copy pointer to quick code from
   // an origin method to a copied method only if they are
   // in the same oat file. If the origin and the copied methods are
   // in different oat files don't touch the copied method.
   // References to other oat files are not supported yet.
-  bool StartClass(const DexFile* dex_file, size_t class_def_index)
+  bool StartClass(const DexFile* dex_file, size_t class_def_index) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_) {
     OatDexMethodVisitor::StartClass(dex_file, class_def_index);
     // Skip classes that are not in the image.
@@ -1085,7 +1121,7 @@
     return true;
   }
 
-  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Skip methods that are not in the image.
     if (!IsImageClass()) {
@@ -1131,8 +1167,7 @@
       // Should already have been resolved by the compiler, just peek into the dex cache.
       // It may not be resolved if the class failed to verify, in this case, don't set the
       // entrypoint. This is not fatal since the dex cache will contain a resolution method.
-      method = dex_cache->GetResolvedMethod(it.GetMemberIndex(),
-          class_linker_->GetImagePointerSize());
+      method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), pointer_size_);
     }
     if (method != nullptr &&
         compiled_method != nullptr &&
@@ -1171,7 +1206,7 @@
     }
   }
 
- protected:
+ private:
   const PointerSize pointer_size_;
   const std::vector<const DexFile*>* dex_files_;
   ClassLinker* const class_linker_;
@@ -1182,14 +1217,15 @@
  public:
   WriteCodeMethodVisitor(OatWriter* writer, OutputStream* out, const size_t file_offset,
                          size_t relative_offset) SHARED_LOCK_FUNCTION(Locks::mutator_lock_)
-    : OatDexMethodVisitor(writer, relative_offset),
-      class_loader_(writer->HasImage() ? writer->image_writer_->GetClassLoader() : nullptr),
-      out_(out),
-      file_offset_(file_offset),
-      soa_(Thread::Current()),
-      no_thread_suspension_("OatWriter patching"),
-      class_linker_(Runtime::Current()->GetClassLinker()),
-      dex_cache_(nullptr) {
+      : OatDexMethodVisitor(writer, relative_offset),
+        pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())),
+        class_loader_(writer->HasImage() ? writer->image_writer_->GetClassLoader() : nullptr),
+        out_(out),
+        file_offset_(file_offset),
+        soa_(Thread::Current()),
+        no_thread_suspension_("OatWriter patching"),
+        class_linker_(Runtime::Current()->GetClassLinker()),
+        dex_cache_(nullptr) {
     patched_code_.reserve(16 * KB);
     if (writer_->HasBootImage()) {
       // If we're creating the image, the address space must be ready so that we can apply patches.
@@ -1200,7 +1236,7 @@
   ~WriteCodeMethodVisitor() UNLOCK_FUNCTION(Locks::mutator_lock_) {
   }
 
-  bool StartClass(const DexFile* dex_file, size_t class_def_index)
+  bool StartClass(const DexFile* dex_file, size_t class_def_index) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_) {
     OatDexMethodVisitor::StartClass(dex_file, class_def_index);
     if (dex_cache_ == nullptr || dex_cache_->GetDexFile() != dex_file) {
@@ -1210,7 +1246,7 @@
     return true;
   }
 
-  bool EndClass() REQUIRES_SHARED(Locks::mutator_lock_) {
+  bool EndClass() OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     bool result = OatDexMethodVisitor::EndClass();
     if (oat_class_index_ == writer_->oat_classes_.size()) {
       DCHECK(result);  // OatDexMethodVisitor::EndClass() never fails.
@@ -1223,7 +1259,7 @@
     return result;
   }
 
-  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_) {
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
@@ -1275,6 +1311,15 @@
           for (const LinkerPatch& patch : compiled_method->GetPatches()) {
             uint32_t literal_offset = patch.LiteralOffset();
             switch (patch.GetType()) {
+              case LinkerPatch::Type::kMethodBssEntry: {
+                uint32_t target_offset =
+                    writer_->bss_start_ + writer_->bss_method_entries_.Get(patch.TargetMethod());
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
+                break;
+              }
               case LinkerPatch::Type::kCallRelative: {
                 // NOTE: Relative calls across oat files are not supported.
                 uint32_t target_offset = GetTargetOffset(patch);
@@ -1284,14 +1329,6 @@
                                                       target_offset);
                 break;
               }
-              case LinkerPatch::Type::kDexCacheArray: {
-                uint32_t target_offset = GetDexCacheOffset(patch);
-                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
-                                                                     patch,
-                                                                     offset_ + literal_offset,
-                                                                     target_offset);
-                break;
-              }
               case LinkerPatch::Type::kStringRelative: {
                 uint32_t target_offset = GetTargetObjectOffset(GetTargetString(patch));
                 writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
@@ -1302,7 +1339,8 @@
               }
               case LinkerPatch::Type::kStringBssEntry: {
                 StringReference ref(patch.TargetStringDexFile(), patch.TargetStringIndex());
-                uint32_t target_offset = writer_->bss_string_entries_.Get(ref);
+                uint32_t target_offset =
+                    writer_->bss_start_ + writer_->bss_string_entries_.Get(ref);
                 writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
                                                                      patch,
                                                                      offset_ + literal_offset,
@@ -1319,7 +1357,7 @@
               }
               case LinkerPatch::Type::kTypeBssEntry: {
                 TypeReference ref(patch.TargetTypeDexFile(), patch.TargetTypeIndex());
-                uint32_t target_offset = writer_->bss_type_entries_.Get(ref);
+                uint32_t target_offset = writer_->bss_start_ + writer_->bss_type_entries_.Get(ref);
                 writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
                                                                      patch,
                                                                      offset_ + literal_offset,
@@ -1331,9 +1369,12 @@
                 PatchCodeAddress(&patched_code_, literal_offset, target_offset);
                 break;
               }
-              case LinkerPatch::Type::kMethod: {
-                ArtMethod* method = GetTargetMethod(patch);
-                PatchMethodAddress(&patched_code_, literal_offset, method);
+              case LinkerPatch::Type::kMethodRelative: {
+                uint32_t target_offset = GetTargetMethodOffset(GetTargetMethod(patch));
+                writer_->relative_patcher_->PatchPcRelativeReference(&patched_code_,
+                                                                     patch,
+                                                                     offset_ + literal_offset,
+                                                                     target_offset);
                 break;
               }
               case LinkerPatch::Type::kBakerReadBarrierBranch: {
@@ -1365,6 +1406,7 @@
   }
 
  private:
+  const PointerSize pointer_size_;
   ObjPtr<mirror::ClassLoader> class_loader_;
   OutputStream* const out_;
   const size_t file_offset_;
@@ -1385,8 +1427,7 @@
     ObjPtr<mirror::DexCache> dex_cache =
         (dex_file_ == ref.dex_file) ? dex_cache_ : class_linker_->FindDexCache(
             Thread::Current(), *ref.dex_file);
-    ArtMethod* method = dex_cache->GetResolvedMethod(
-        ref.dex_method_index, class_linker_->GetImagePointerSize());
+    ArtMethod* method = dex_cache->GetResolvedMethod(ref.dex_method_index, pointer_size_);
     CHECK(method != nullptr);
     return method;
   }
@@ -1398,9 +1439,8 @@
     if (UNLIKELY(target_offset == 0)) {
       ArtMethod* target = GetTargetMethod(patch);
       DCHECK(target != nullptr);
-      PointerSize size =
-          GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet());
-      const void* oat_code_offset = target->GetEntryPointFromQuickCompiledCodePtrSize(size);
+      const void* oat_code_offset =
+          target->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
       if (oat_code_offset != 0) {
         DCHECK(!writer_->HasBootImage());
         DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickResolutionStub(oat_code_offset));
@@ -1444,17 +1484,13 @@
     return string;
   }
 
-  uint32_t GetDexCacheOffset(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (writer_->HasBootImage()) {
-      uintptr_t element = writer_->image_writer_->GetDexCacheArrayElementImageAddress<uintptr_t>(
-          patch.TargetDexCacheDexFile(), patch.TargetDexCacheElementOffset());
-      size_t oat_index = writer_->image_writer_->GetOatIndexForDexCache(dex_cache_);
-      uintptr_t oat_data = writer_->image_writer_->GetOatDataBegin(oat_index);
-      return element - oat_data;
-    } else {
-      size_t start = writer_->dex_cache_arrays_offsets_.Get(patch.TargetDexCacheDexFile());
-      return start + patch.TargetDexCacheElementOffset();
-    }
+  uint32_t GetTargetMethodOffset(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(writer_->HasBootImage());
+    method = writer_->image_writer_->GetImageMethodAddress(method);
+    size_t oat_index = writer_->image_writer_->GetOatIndexForDexFile(dex_file_);
+    uintptr_t oat_data_begin = writer_->image_writer_->GetOatDataBegin(oat_index);
+    // TODO: Clean up offset types. The target offset must be treated as signed.
+    return static_cast<uint32_t>(reinterpret_cast<uintptr_t>(method) - oat_data_begin);
   }
 
   uint32_t GetTargetObjectOffset(mirror::Object* object) REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -1486,34 +1522,6 @@
     data[3] = (address >> 24) & 0xffu;
   }
 
-  void PatchMethodAddress(std::vector<uint8_t>* code, uint32_t offset, ArtMethod* method)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (writer_->HasBootImage()) {
-      method = writer_->image_writer_->GetImageMethodAddress(method);
-    } else if (kIsDebugBuild) {
-      // NOTE: We're using linker patches for app->boot references when the image can
-      // be relocated and therefore we need to emit .oat_patches. We're not using this
-      // for app->app references, so check that the method is an image method.
-      std::vector<gc::space::ImageSpace*> image_spaces =
-          Runtime::Current()->GetHeap()->GetBootImageSpaces();
-      bool contains_method = false;
-      for (gc::space::ImageSpace* image_space : image_spaces) {
-        size_t method_offset = reinterpret_cast<const uint8_t*>(method) - image_space->Begin();
-        contains_method |=
-            image_space->GetImageHeader().GetMethodsSection().Contains(method_offset);
-      }
-      CHECK(contains_method);
-    }
-    // Note: We only patch targeting ArtMethods in image which is in the low 4gb.
-    uint32_t address = PointerToLowMemUInt32(method);
-    DCHECK_LE(offset + 4, code->size());
-    uint8_t* data = &(*code)[offset];
-    data[0] = address & 0xffu;
-    data[1] = (address >> 8) & 0xffu;
-    data[2] = (address >> 16) & 0xffu;
-    data[3] = (address >> 24) & 0xffu;
-  }
-
   void PatchCodeAddress(std::vector<uint8_t>* code, uint32_t offset, uint32_t target_offset)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     uint32_t address = target_offset;
@@ -1541,12 +1549,11 @@
                         OutputStream* out,
                         const size_t file_offset,
                         size_t relative_offset)
-    : OatDexMethodVisitor(writer, relative_offset),
-      out_(out),
-      file_offset_(file_offset) {
-  }
+      : OatDexMethodVisitor(writer, relative_offset),
+        out_(out),
+        file_offset_(file_offset) {}
 
-  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) {
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE {
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
@@ -1605,11 +1612,11 @@
                          OutputStream* out,
                          const size_t file_offset,
                          size_t relative_offset)
-    : OatDexMethodVisitor(writer, relative_offset),
-      out_(out),
-      file_offset_(file_offset) {}
+      : OatDexMethodVisitor(writer, relative_offset),
+        out_(out),
+        file_offset_(file_offset) {}
 
-  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) {
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) OVERRIDE {
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
@@ -1714,12 +1721,17 @@
   return oat_header_->GetHeaderSize();
 }
 
-size_t OatWriter::InitOatDexFiles(size_t offset) {
-  TimingLogger::ScopedTiming split("InitOatDexFiles", timings_);
-  // Initialize offsets of dex files.
+size_t OatWriter::InitClassOffsets(size_t offset) {
+  // Reserve space for class offsets in OAT and update class_offsets_offset_.
   for (OatDexFile& oat_dex_file : oat_dex_files_) {
-    oat_dex_file.offset_ = offset;
-    offset += oat_dex_file.SizeOf();
+    DCHECK_EQ(oat_dex_file.class_offsets_offset_, 0u);
+    if (!oat_dex_file.class_offsets_.empty()) {
+      // Class offsets are required to be 4 byte aligned.
+      offset = RoundUp(offset, 4u);
+      oat_dex_file.class_offsets_offset_ = offset;
+      offset += oat_dex_file.GetClassOffsetsRawSize();
+      DCHECK_ALIGNED(offset, 4u);
+    }
   }
   return offset;
 }
@@ -1764,6 +1776,50 @@
   return offset;
 }
 
+size_t OatWriter::InitMethodBssMappings(size_t offset) {
+  size_t number_of_dex_files = 0u;
+  for (size_t i = 0, size = dex_files_->size(); i != size; ++i) {
+    const DexFile* dex_file = (*dex_files_)[i];
+    auto it = bss_method_entry_references_.find(dex_file);
+    if (it != bss_method_entry_references_.end()) {
+      const BitVector& method_indexes = it->second;
+      ++number_of_dex_files;
+      // If there are any classes, the class offsets allocation aligns the offset
+      // and we cannot have method bss mappings without class offsets.
+      static_assert(alignof(MethodBssMapping) == 4u, "MethodBssMapping alignment check.");
+      DCHECK_ALIGNED(offset, 4u);
+      oat_dex_files_[i].method_bss_mapping_offset_ = offset;
+
+      linker::MethodBssMappingEncoder encoder(
+          GetInstructionSetPointerSize(oat_header_->GetInstructionSet()));
+      size_t number_of_entries = 0u;
+      bool first_index = true;
+      for (uint32_t method_index : method_indexes.Indexes()) {
+        uint32_t bss_offset = bss_method_entries_.Get(MethodReference(dex_file, method_index));
+        if (first_index || !encoder.TryMerge(method_index, bss_offset)) {
+          encoder.Reset(method_index, bss_offset);
+          ++number_of_entries;
+          first_index = false;
+        }
+      }
+      DCHECK_NE(number_of_entries, 0u);
+      offset += MethodBssMapping::ComputeSize(number_of_entries);
+    }
+  }
+  // Check that all dex files targeted by method bss entries are in `*dex_files_`.
+  CHECK_EQ(number_of_dex_files, bss_method_entry_references_.size());
+  return offset;
+}
+
+size_t OatWriter::InitOatDexFiles(size_t offset) {
+  // Initialize offsets of oat dex files.
+  for (OatDexFile& oat_dex_file : oat_dex_files_) {
+    oat_dex_file.offset_ = offset;
+    offset += oat_dex_file.SizeOf();
+  }
+  return offset;
+}
+
 size_t OatWriter::InitOatCode(size_t offset) {
   // calculate the offsets within OatHeader to executable code
   size_t old_offset = offset;
@@ -1822,38 +1878,51 @@
 }
 
 void OatWriter::InitBssLayout(InstructionSet instruction_set) {
+  {
+    InitBssLayoutMethodVisitor visitor(this);
+    bool success = VisitDexMethods(&visitor);
+    DCHECK(success);
+  }
+
+  DCHECK_EQ(bss_size_, 0u);
   if (HasBootImage()) {
     DCHECK(bss_string_entries_.empty());
-    if (bss_type_entries_.empty()) {
+    if (bss_method_entries_.empty() && bss_type_entries_.empty()) {
       // Nothing to put to the .bss section.
       return;
     }
   }
 
   // Allocate space for app dex cache arrays in the .bss section.
-  bss_start_ = RoundUp(oat_size_, kPageSize);
-  bss_size_ = 0u;
+  PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set);
   if (!HasBootImage()) {
-    PointerSize pointer_size = GetInstructionSetPointerSize(instruction_set);
     for (const DexFile* dex_file : *dex_files_) {
-      dex_cache_arrays_offsets_.Put(dex_file, bss_start_ + bss_size_);
       DexCacheArraysLayout layout(pointer_size, dex_file);
       bss_size_ += layout.Size();
     }
   }
 
+  bss_methods_offset_ = bss_size_;
+
+  // Prepare offsets for .bss ArtMethod entries.
+  for (auto& entry : bss_method_entries_) {
+    DCHECK_EQ(entry.second, 0u);
+    entry.second = bss_size_;
+    bss_size_ += static_cast<size_t>(pointer_size);
+  }
+
   bss_roots_offset_ = bss_size_;
 
   // Prepare offsets for .bss Class entries.
   for (auto& entry : bss_type_entries_) {
     DCHECK_EQ(entry.second, 0u);
-    entry.second = bss_start_ + bss_size_;
+    entry.second = bss_size_;
     bss_size_ += sizeof(GcRoot<mirror::Class>);
   }
   // Prepare offsets for .bss String entries.
   for (auto& entry : bss_string_entries_) {
     DCHECK_EQ(entry.second, 0u);
-    entry.second = bss_start_ + bss_size_;
+    entry.second = bss_size_;
     bss_size_ += sizeof(GcRoot<mirror::String>);
   }
 }
@@ -1861,30 +1930,45 @@
 bool OatWriter::WriteRodata(OutputStream* out) {
   CHECK(write_state_ == WriteState::kWriteRoData);
 
+  size_t file_offset = oat_data_offset_;
+  off_t current_offset = out->Seek(0, kSeekCurrent);
+  if (current_offset == static_cast<off_t>(-1)) {
+    PLOG(ERROR) << "Failed to retrieve current position in " << out->GetLocation();
+  }
+  DCHECK_GE(static_cast<size_t>(current_offset), file_offset + oat_header_->GetHeaderSize());
+  size_t relative_offset = current_offset - file_offset;
+
   // Wrap out to update checksum with each write.
   ChecksumUpdatingOutputStream checksum_updating_out(out, oat_header_.get());
   out = &checksum_updating_out;
 
-  if (!WriteClassOffsets(out)) {
-    LOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
+  relative_offset = WriteClassOffsets(out, file_offset, relative_offset);
+  if (relative_offset == 0) {
+    PLOG(ERROR) << "Failed to write class offsets to " << out->GetLocation();
     return false;
   }
 
-  if (!WriteClasses(out)) {
-    LOG(ERROR) << "Failed to write classes to " << out->GetLocation();
+  relative_offset = WriteClasses(out, file_offset, relative_offset);
+  if (relative_offset == 0) {
+    PLOG(ERROR) << "Failed to write classes to " << out->GetLocation();
     return false;
   }
 
-  off_t tables_end_offset = out->Seek(0, kSeekCurrent);
-  if (tables_end_offset == static_cast<off_t>(-1)) {
-    LOG(ERROR) << "Failed to get oat code position in " << out->GetLocation();
+  relative_offset = WriteMethodBssMappings(out, file_offset, relative_offset);
+  if (relative_offset == 0) {
+    PLOG(ERROR) << "Failed to write method bss mappings to " << out->GetLocation();
     return false;
   }
-  size_t file_offset = oat_data_offset_;
-  size_t relative_offset = static_cast<size_t>(tables_end_offset) - file_offset;
+
   relative_offset = WriteMaps(out, file_offset, relative_offset);
   if (relative_offset == 0) {
-    LOG(ERROR) << "Failed to write oat code to " << out->GetLocation();
+    PLOG(ERROR) << "Failed to write oat code to " << out->GetLocation();
+    return false;
+  }
+
+  relative_offset = WriteOatDexFiles(out, file_offset, relative_offset);
+  if (relative_offset == 0) {
+    PLOG(ERROR) << "Failed to write oat dex information to " << out->GetLocation();
     return false;
   }
 
@@ -1907,12 +1991,12 @@
 class OatWriter::WriteQuickeningInfoMethodVisitor : public DexMethodVisitor {
  public:
   WriteQuickeningInfoMethodVisitor(OatWriter* writer, OutputStream* out, uint32_t offset)
-    : DexMethodVisitor(writer, offset),
-      out_(out),
-      written_bytes_(0u) {}
+      : DexMethodVisitor(writer, offset),
+        out_(out),
+        written_bytes_(0u) {}
 
   bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED,
-                   const ClassDataItemIterator& it) {
+                   const ClassDataItemIterator& it) OVERRIDE {
     if (it.GetMethodCodeItem() == nullptr) {
       // No CodeItem. Native or abstract method.
       return true;
@@ -2108,6 +2192,7 @@
     DO_STAT(size_oat_dex_file_offset_);
     DO_STAT(size_oat_dex_file_class_offsets_offset_);
     DO_STAT(size_oat_dex_file_lookup_table_offset_);
+    DO_STAT(size_oat_dex_file_method_bss_mapping_offset_);
     DO_STAT(size_oat_lookup_table_alignment_);
     DO_STAT(size_oat_lookup_table_);
     DO_STAT(size_oat_class_offsets_alignment_);
@@ -2116,6 +2201,7 @@
     DO_STAT(size_oat_class_status_);
     DO_STAT(size_oat_class_method_bitmaps_);
     DO_STAT(size_oat_class_method_offsets_);
+    DO_STAT(size_method_bss_mappings_);
     #undef DO_STAT
 
     VLOG(compiler) << "size_total=" << PrettySize(size_total) << " (" << size_total << "B)";
@@ -2188,35 +2274,41 @@
   return true;
 }
 
-bool OatWriter::WriteClassOffsets(OutputStream* out) {
+size_t OatWriter::WriteClassOffsets(OutputStream* out, size_t file_offset, size_t relative_offset) {
   for (OatDexFile& oat_dex_file : oat_dex_files_) {
     if (oat_dex_file.class_offsets_offset_ != 0u) {
-      uint32_t expected_offset = oat_data_offset_ + oat_dex_file.class_offsets_offset_;
-      off_t actual_offset = out->Seek(expected_offset, kSeekSet);
-      if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-        PLOG(ERROR) << "Failed to seek to oat class offsets section. Actual: " << actual_offset
-                    << " Expected: " << expected_offset << " File: " << oat_dex_file.GetLocation();
-        return false;
+      // Class offsets are required to be 4 byte aligned.
+      if (UNLIKELY(!IsAligned<4u>(relative_offset))) {
+        size_t padding_size =  RoundUp(relative_offset, 4u) - relative_offset;
+        if (!WriteUpTo16BytesAlignment(out, padding_size, &size_oat_class_offsets_alignment_)) {
+          return 0u;
+        }
+        relative_offset += padding_size;
       }
+      DCHECK_OFFSET();
       if (!oat_dex_file.WriteClassOffsets(this, out)) {
-        return false;
+        return 0u;
       }
+      relative_offset += oat_dex_file.GetClassOffsetsRawSize();
     }
   }
-  return true;
+  return relative_offset;
 }
 
-bool OatWriter::WriteClasses(OutputStream* out) {
+size_t OatWriter::WriteClasses(OutputStream* out, size_t file_offset, size_t relative_offset) {
   for (OatClass& oat_class : oat_classes_) {
+    // If there are any classes, the class offsets allocation aligns the offset.
+    DCHECK_ALIGNED(relative_offset, 4u);
+    DCHECK_OFFSET();
     if (!oat_class.Write(this, out, oat_data_offset_)) {
-      PLOG(ERROR) << "Failed to write oat methods information to " << out->GetLocation();
-      return false;
+      return 0u;
     }
+    relative_offset += oat_class.SizeOf();
   }
-  return true;
+  return relative_offset;
 }
 
-size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset) {
+size_t OatWriter::WriteMaps(OutputStream* out, size_t file_offset, size_t relative_offset) {
   {
     size_t vmap_tables_offset = relative_offset;
     WriteMapMethodVisitor visitor(this, out, file_offset, relative_offset);
@@ -2239,7 +2331,87 @@
   return relative_offset;
 }
 
-size_t OatWriter::WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset) {
+size_t OatWriter::WriteMethodBssMappings(OutputStream* out,
+                                         size_t file_offset,
+                                         size_t relative_offset) {
+  TimingLogger::ScopedTiming split("WriteMethodBssMappings", timings_);
+
+  for (size_t i = 0, size = dex_files_->size(); i != size; ++i) {
+    const DexFile* dex_file = (*dex_files_)[i];
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    auto it = bss_method_entry_references_.find(dex_file);
+    if (it != bss_method_entry_references_.end()) {
+      const BitVector& method_indexes = it->second;
+      // If there are any classes, the class offsets allocation aligns the offset
+      // and we cannot have method bss mappings without class offsets.
+      static_assert(alignof(MethodBssMapping) == sizeof(uint32_t),
+                    "MethodBssMapping alignment check.");
+      DCHECK_ALIGNED(relative_offset, sizeof(uint32_t));
+
+      linker::MethodBssMappingEncoder encoder(
+          GetInstructionSetPointerSize(oat_header_->GetInstructionSet()));
+      // Allocate a sufficiently large MethodBssMapping.
+      size_t number_of_method_indexes = method_indexes.NumSetBits();
+      DCHECK_NE(number_of_method_indexes, 0u);
+      size_t max_mappings_size = MethodBssMapping::ComputeSize(number_of_method_indexes);
+      DCHECK_ALIGNED(max_mappings_size, sizeof(uint32_t));
+      std::unique_ptr<uint32_t[]> storage(new uint32_t[max_mappings_size / sizeof(uint32_t)]);
+      MethodBssMapping* mappings = new(storage.get()) MethodBssMapping(number_of_method_indexes);
+      mappings->ClearPadding();
+      // Encode the MethodBssMapping.
+      auto init_it = mappings->begin();
+      bool first_index = true;
+      for (uint32_t method_index : method_indexes.Indexes()) {
+        size_t bss_offset = bss_method_entries_.Get(MethodReference(dex_file, method_index));
+        if (first_index) {
+          first_index = false;
+          encoder.Reset(method_index, bss_offset);
+        } else if (!encoder.TryMerge(method_index, bss_offset)) {
+          *init_it = encoder.GetEntry();
+          ++init_it;
+          encoder.Reset(method_index, bss_offset);
+        }
+      }
+      // Store the last entry and shrink the mapping to the actual size.
+      *init_it = encoder.GetEntry();
+      ++init_it;
+      DCHECK(init_it <= mappings->end());
+      mappings->SetSize(std::distance(mappings->begin(), init_it));
+      size_t mappings_size = MethodBssMapping::ComputeSize(mappings->size());
+
+      DCHECK_EQ(relative_offset, oat_dex_file->method_bss_mapping_offset_);
+      DCHECK_OFFSET();
+      if (!out->WriteFully(storage.get(), mappings_size)) {
+        return 0u;
+      }
+      size_method_bss_mappings_ += mappings_size;
+      relative_offset += mappings_size;
+    } else {
+      DCHECK_EQ(0u, oat_dex_file->method_bss_mapping_offset_);
+    }
+  }
+  return relative_offset;
+}
+
+size_t OatWriter::WriteOatDexFiles(OutputStream* out, size_t file_offset, size_t relative_offset) {
+  TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_);
+
+  for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) {
+    OatDexFile* oat_dex_file = &oat_dex_files_[i];
+    DCHECK_EQ(relative_offset, oat_dex_file->offset_);
+    DCHECK_OFFSET();
+
+    // Write OatDexFile.
+    if (!oat_dex_file->Write(this, out)) {
+      return 0u;
+    }
+    relative_offset += oat_dex_file->SizeOf();
+  }
+
+  return relative_offset;
+}
+
+size_t OatWriter::WriteCode(OutputStream* out, size_t file_offset, size_t relative_offset) {
   if (compiler_driver_->GetCompilerOptions().IsBootImage()) {
     InstructionSet instruction_set = compiler_driver_->GetInstructionSet();
 
@@ -2269,7 +2441,7 @@
 }
 
 size_t OatWriter::WriteCodeDexFiles(OutputStream* out,
-                                    const size_t file_offset,
+                                    size_t file_offset,
                                     size_t relative_offset) {
   #define VISIT(VisitorType)                                              \
     do {                                                                  \
@@ -2683,50 +2855,6 @@
   return true;
 }
 
-bool OatWriter::WriteOatDexFiles(OutputStream* rodata) {
-  TimingLogger::ScopedTiming split("WriteOatDexFiles", timings_);
-
-  off_t initial_offset = rodata->Seek(0, kSeekCurrent);
-  if (initial_offset == static_cast<off_t>(-1)) {
-    LOG(ERROR) << "Failed to get current position in " << rodata->GetLocation();
-    return false;
-  }
-
-  // Seek to the start of OatDexFiles, i.e. to the end of the OatHeader.  If there are
-  // no OatDexFiles, no data is actually written to .rodata before WriteHeader() and
-  // this Seek() ensures that we reserve the space for OatHeader in .rodata.
-  DCHECK(oat_dex_files_.empty() || oat_dex_files_[0u].offset_ == oat_header_->GetHeaderSize());
-  uint32_t expected_offset = oat_data_offset_ + oat_header_->GetHeaderSize();
-  off_t actual_offset = rodata->Seek(expected_offset, kSeekSet);
-  if (static_cast<uint32_t>(actual_offset) != expected_offset) {
-    PLOG(ERROR) << "Failed to seek to OatDexFile table section. Actual: " << actual_offset
-                << " Expected: " << expected_offset << " File: " << rodata->GetLocation();
-    return false;
-  }
-
-  for (size_t i = 0, size = oat_dex_files_.size(); i != size; ++i) {
-    OatDexFile* oat_dex_file = &oat_dex_files_[i];
-
-    DCHECK_EQ(oat_data_offset_ + oat_dex_file->offset_,
-              static_cast<size_t>(rodata->Seek(0, kSeekCurrent)));
-
-    // Write OatDexFile.
-    if (!oat_dex_file->Write(this, rodata)) {
-      PLOG(ERROR) << "Failed to write oat dex information to " << rodata->GetLocation();
-      return false;
-    }
-  }
-
-  // Seek back to the initial position.
-  if (rodata->Seek(initial_offset, kSeekSet) != initial_offset) {
-    PLOG(ERROR) << "Failed to seek to initial position. Actual: " << actual_offset
-                << " Expected: " << initial_offset << " File: " << rodata->GetLocation();
-    return false;
-  }
-
-  return true;
-}
-
 bool OatWriter::OpenDexFiles(
     File* file,
     bool verify,
@@ -2945,14 +3073,18 @@
 }
 
 bool OatWriter::WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta) {
+  return WriteUpTo16BytesAlignment(out, aligned_code_delta, &size_code_alignment_);
+}
+
+bool OatWriter::WriteUpTo16BytesAlignment(OutputStream* out, uint32_t size, uint32_t* stat) {
   static const uint8_t kPadding[] = {
       0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u
   };
-  DCHECK_LE(aligned_code_delta, sizeof(kPadding));
-  if (UNLIKELY(!out->WriteFully(kPadding, aligned_code_delta))) {
+  DCHECK_LE(size, sizeof(kPadding));
+  if (UNLIKELY(!out->WriteFully(kPadding, size))) {
     return false;
   }
-  size_code_alignment_ += aligned_code_delta;
+  *stat += size;
   return true;
 }
 
@@ -2981,6 +3113,7 @@
       dex_file_offset_(0u),
       class_offsets_offset_(0u),
       lookup_table_offset_(0u),
+      method_bss_mapping_offset_(0u),
       class_offsets_() {
 }
 
@@ -2990,19 +3123,8 @@
           + sizeof(dex_file_location_checksum_)
           + sizeof(dex_file_offset_)
           + sizeof(class_offsets_offset_)
-          + sizeof(lookup_table_offset_);
-}
-
-void OatWriter::OatDexFile::ReserveClassOffsets(OatWriter* oat_writer) {
-  DCHECK_EQ(class_offsets_offset_, 0u);
-  if (!class_offsets_.empty()) {
-    // Class offsets are required to be 4 byte aligned.
-    size_t initial_offset = oat_writer->oat_size_;
-    size_t offset = RoundUp(initial_offset, 4);
-    oat_writer->size_oat_class_offsets_alignment_ += offset - initial_offset;
-    class_offsets_offset_ = offset;
-    oat_writer->oat_size_ = offset + GetClassOffsetsRawSize();
-  }
+          + sizeof(lookup_table_offset_)
+          + sizeof(method_bss_mapping_offset_);
 }
 
 bool OatWriter::OatDexFile::Write(OatWriter* oat_writer, OutputStream* out) const {
@@ -3045,6 +3167,12 @@
   }
   oat_writer->size_oat_dex_file_lookup_table_offset_ += sizeof(lookup_table_offset_);
 
+  if (!out->WriteFully(&method_bss_mapping_offset_, sizeof(method_bss_mapping_offset_))) {
+    PLOG(ERROR) << "Failed to write method bss mapping offset to " << out->GetLocation();
+    return false;
+  }
+  oat_writer->size_oat_dex_file_method_bss_mapping_offset_ += sizeof(method_bss_mapping_offset_);
+
   return true;
 }
 
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index e778f75..9217701 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -31,7 +31,7 @@
 #include "os.h"
 #include "safe_map.h"
 #include "string_reference.h"
-#include "utils/type_reference.h"
+#include "type_reference.h"
 
 namespace art {
 
@@ -60,11 +60,6 @@
 
 // OatHeader         variable length with count of D OatDexFiles
 //
-// OatDexFile[0]     one variable sized OatDexFile with offsets to Dex and OatClasses
-// OatDexFile[1]
-// ...
-// OatDexFile[D]
-//
 // TypeLookupTable[0] one descriptor to class def index hash table for each OatDexFile.
 // TypeLookupTable[1]
 // ...
@@ -80,20 +75,25 @@
 // ...
 // OatClass[C]
 //
-// GcMap             one variable sized blob with GC map.
-// GcMap             GC maps are deduplicated.
+// MethodBssMapping  one variable sized MethodBssMapping for each dex file, optional.
+// MethodBssMapping
 // ...
-// GcMap
+// MethodBssMapping
 //
-// VmapTable         one variable sized VmapTable blob (quick compiler only).
+// VmapTable         one variable sized VmapTable blob (CodeInfo or QuickeningInfo).
 // VmapTable         VmapTables are deduplicated.
 // ...
 // VmapTable
 //
-// MappingTable      one variable sized blob with MappingTable (quick compiler only).
-// MappingTable      MappingTables are deduplicated.
+// MethodInfo        one variable sized blob with MethodInfo.
+// MethodInfo        MethodInfos are deduplicated.
 // ...
-// MappingTable
+// MethodInfo
+//
+// OatDexFile[0]     one variable sized OatDexFile with offsets to Dex and OatClasses
+// OatDexFile[1]
+// ...
+// OatDexFile[D]
 //
 // padding           if necessary so that the following code will be page aligned
 //
@@ -217,6 +217,10 @@
     return bss_size_;
   }
 
+  size_t GetBssMethodsOffset() const {
+    return bss_methods_offset_;
+  }
+
   size_t GetBssRootsOffset() const {
     return bss_roots_offset_;
   }
@@ -251,6 +255,7 @@
   // to actually write it.
   class DexMethodVisitor;
   class OatDexMethodVisitor;
+  class InitBssLayoutMethodVisitor;
   class InitOatClassesMethodVisitor;
   class InitCodeMethodVisitor;
   class InitMapMethodVisitor;
@@ -295,26 +300,30 @@
                        const InstructionSetFeatures* instruction_set_features,
                        uint32_t num_dex_files,
                        SafeMap<std::string, std::string>* key_value_store);
-  size_t InitOatDexFiles(size_t offset);
+  size_t InitClassOffsets(size_t offset);
   size_t InitOatClasses(size_t offset);
   size_t InitOatMaps(size_t offset);
+  size_t InitMethodBssMappings(size_t offset);
+  size_t InitOatDexFiles(size_t offset);
   size_t InitOatCode(size_t offset);
   size_t InitOatCodeDexFiles(size_t offset);
   void InitBssLayout(InstructionSet instruction_set);
 
-  bool WriteClassOffsets(OutputStream* out);
-  bool WriteClasses(OutputStream* out);
-  size_t WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset);
-  size_t WriteCode(OutputStream* out, const size_t file_offset, size_t relative_offset);
-  size_t WriteCodeDexFiles(OutputStream* out, const size_t file_offset, size_t relative_offset);
+  size_t WriteClassOffsets(OutputStream* out, size_t file_offset, size_t relative_offset);
+  size_t WriteClasses(OutputStream* out, size_t file_offset, size_t relative_offset);
+  size_t WriteMaps(OutputStream* out, size_t file_offset, size_t relative_offset);
+  size_t WriteMethodBssMappings(OutputStream* out, size_t file_offset, size_t relative_offset);
+  size_t WriteOatDexFiles(OutputStream* out, size_t file_offset, size_t relative_offset);
+  size_t WriteCode(OutputStream* out, size_t file_offset, size_t relative_offset);
+  size_t WriteCodeDexFiles(OutputStream* out, size_t file_offset, size_t relative_offset);
 
   bool RecordOatDataOffset(OutputStream* out);
   bool ReadDexFileHeader(File* oat_file, OatDexFile* oat_dex_file);
   bool ValidateDexFileHeader(const uint8_t* raw_header, const char* location);
-  bool WriteOatDexFiles(OutputStream* oat_rodata);
   bool WriteTypeLookupTables(OutputStream* oat_rodata,
                              const std::vector<std::unique_ptr<const DexFile>>& opened_dex_files);
   bool WriteCodeAlignment(OutputStream* out, uint32_t aligned_code_delta);
+  bool WriteUpTo16BytesAlignment(OutputStream* out, uint32_t size, uint32_t* stat);
   void SetMultiOatRelativePatcherAdjustment();
   void CloseSources();
 
@@ -368,9 +377,20 @@
   // The size of the required .bss section holding the DexCache data and GC roots.
   size_t bss_size_;
 
+  // The offset of the methods in .bss section.
+  size_t bss_methods_offset_;
+
   // The offset of the GC roots in .bss section.
   size_t bss_roots_offset_;
 
+  // Map for recording references to ArtMethod entries in .bss.
+  SafeMap<const DexFile*, BitVector> bss_method_entry_references_;
+
+  // Map for allocating ArtMethod entries in .bss. Indexed by MethodReference for the target
+  // method in the dex file with the "method reference value comparator" for deduplication.
+  // The value is the target offset for patching, starting at `bss_start_ + bss_methods_offset_`.
+  SafeMap<MethodReference, size_t, MethodReferenceValueComparator> bss_method_entries_;
+
   // Map for allocating Class entries in .bss. Indexed by TypeReference for the source
   // type in the dex file with the "type value comparator" for deduplication. The value
   // is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`.
@@ -381,10 +401,6 @@
   // is the target offset for patching, starting at `bss_start_ + bss_roots_offset_`.
   SafeMap<StringReference, size_t, StringReferenceValueComparator> bss_string_entries_;
 
-  // Offsets of the dex cache arrays for each app dex file. For the
-  // boot image, this information is provided by the ImageWriter.
-  SafeMap<const DexFile*, size_t> dex_cache_arrays_offsets_;  // DexFiles not owned.
-
   // Offset of the oat data from the start of the mmapped region of the elf file.
   size_t oat_data_offset_;
 
@@ -434,6 +450,7 @@
   uint32_t size_oat_dex_file_offset_;
   uint32_t size_oat_dex_file_class_offsets_offset_;
   uint32_t size_oat_dex_file_lookup_table_offset_;
+  uint32_t size_oat_dex_file_method_bss_mapping_offset_;
   uint32_t size_oat_lookup_table_alignment_;
   uint32_t size_oat_lookup_table_;
   uint32_t size_oat_class_offsets_alignment_;
@@ -442,6 +459,7 @@
   uint32_t size_oat_class_status_;
   uint32_t size_oat_class_method_bitmaps_;
   uint32_t size_oat_class_method_offsets_;
+  uint32_t size_method_bss_mappings_;
 
   // The helper for processing relative patches is external so that we can patch across oat files.
   linker::MultiOatRelativePatcher* relative_patcher_;
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index cb6e14b..a949c33 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -43,7 +43,7 @@
   void RunBCE() {
     graph_->BuildDominatorTree();
 
-    InstructionSimplifier(graph_, /* codegen */ nullptr).Run();
+    InstructionSimplifier(graph_, /* codegen */ nullptr, /* driver */ nullptr).Run();
 
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 65f3c72..93234f9 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -41,6 +41,8 @@
 #include "code_generator_mips64.h"
 #endif
 
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "bytecode_utils.h"
 #include "class_linker.h"
 #include "compiled_method.h"
@@ -58,7 +60,7 @@
 #include "parallel_move_resolver.h"
 #include "ssa_liveness_analysis.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/assembler.h"
 
 namespace art {
@@ -337,7 +339,7 @@
       case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
         locations->SetInAt(call->GetSpecialInputIndex(), visitor->GetMethodLocation());
         break;
-      case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
+      case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall:
         locations->AddTemp(visitor->GetMethodLocation());
         locations->SetInAt(call->GetSpecialInputIndex(), Location::RequiresRegister());
         break;
@@ -350,6 +352,34 @@
   }
 }
 
+void CodeGenerator::GenerateInvokeStaticOrDirectRuntimeCall(
+    HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
+  MoveConstant(temp, invoke->GetDexMethodIndex());
+
+  // The access check is unnecessary but we do not want to introduce
+  // extra entrypoints for the codegens that do not support some
+  // invoke type and fall back to the runtime call.
+
+  // Initialize to anything to silent compiler warnings.
+  QuickEntrypointEnum entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck;
+  switch (invoke->GetInvokeType()) {
+    case kStatic:
+      entrypoint = kQuickInvokeStaticTrampolineWithAccessCheck;
+      break;
+    case kDirect:
+      entrypoint = kQuickInvokeDirectTrampolineWithAccessCheck;
+      break;
+    case kSuper:
+      entrypoint = kQuickInvokeSuperTrampolineWithAccessCheck;
+      break;
+    case kVirtual:
+    case kInterface:
+      LOG(FATAL) << "Unexpected invoke type: " << invoke->GetInvokeType();
+      UNREACHABLE();
+  }
+
+  InvokeRuntime(entrypoint, invoke, invoke->GetDexPc(), slow_path);
+}
 void CodeGenerator::GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke) {
   MoveConstant(invoke->GetLocations()->GetTemp(0), invoke->GetDexMethodIndex());
 
@@ -508,7 +538,7 @@
 void CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(HLoadClass* cls,
                                                               Location runtime_type_index_location,
                                                               Location runtime_return_location) {
-  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall);
   DCHECK_EQ(cls->InputCount(), 1u);
   LocationSummary* locations = new (cls->GetBlock()->GetGraph()->GetArena()) LocationSummary(
       cls, LocationSummary::kCallOnMainOnly);
@@ -518,7 +548,7 @@
 }
 
 void CodeGenerator::GenerateLoadClassRuntimeCall(HLoadClass* cls) {
-  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(cls->GetLoadKind(), HLoadClass::LoadKind::kRuntimeCall);
   LocationSummary* locations = cls->GetLocations();
   MoveConstant(locations->GetTemp(0), cls->GetTypeIndex().index_);
   if (cls->NeedsAccessCheck()) {
@@ -557,6 +587,9 @@
 }
 
 void CodeGenerator::AllocateLocations(HInstruction* instruction) {
+  for (HEnvironment* env = instruction->GetEnvironment(); env != nullptr; env = env->GetParent()) {
+    env->AllocateLocations();
+  }
   instruction->Accept(GetLocationBuilder());
   DCHECK(CheckTypeConsistency(instruction));
   LocationSummary* locations = instruction->GetLocations();
@@ -1400,20 +1433,6 @@
   locations->AddTemp(Location::RequiresRegister());
 }
 
-uint32_t CodeGenerator::GetReferenceSlowFlagOffset() const {
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
-  DCHECK(klass->IsInitialized());
-  return klass->GetSlowPathFlagOffset().Uint32Value();
-}
-
-uint32_t CodeGenerator::GetReferenceDisableFlagOffset() const {
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
-  DCHECK(klass->IsInitialized());
-  return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
-}
-
 void CodeGenerator::EmitJitRoots(uint8_t* code,
                                  Handle<mirror::ObjectArray<mirror::Object>> roots,
                                  const uint8_t* roots_data) {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 9ef692a..7bf43f7 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -31,10 +31,11 @@
 #include "nodes.h"
 #include "optimizing_compiler_stats.h"
 #include "read_barrier_option.h"
+#include "stack.h"
 #include "stack_map_stream.h"
 #include "string_reference.h"
+#include "type_reference.h"
 #include "utils/label.h"
-#include "utils/type_reference.h"
 
 namespace art {
 
@@ -495,6 +496,8 @@
   static void CreateCommonInvokeLocationSummary(
       HInvoke* invoke, InvokeDexCallingConventionVisitor* visitor);
 
+  void GenerateInvokeStaticOrDirectRuntimeCall(
+      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path);
   void GenerateInvokeUnresolvedRuntimeCall(HInvokeUnresolved* invoke);
 
   void GenerateInvokePolymorphicCall(HInvokePolymorphic* invoke);
@@ -541,7 +544,7 @@
       case HLoadString::LoadKind::kBssEntry:
         DCHECK(load->NeedsEnvironment());
         return LocationSummary::kCallOnSlowPath;
-      case HLoadString::LoadKind::kDexCacheViaMethod:
+      case HLoadString::LoadKind::kRuntimeCall:
         DCHECK(load->NeedsEnvironment());
         return LocationSummary::kCallOnMainOnly;
       case HLoadString::LoadKind::kJitTableAddress:
@@ -563,18 +566,17 @@
       HInvokeStaticOrDirect* invoke) = 0;
 
   // Generate a call to a static or direct method.
-  virtual void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) = 0;
+  virtual void GenerateStaticOrDirectCall(
+      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
   // Generate a call to a virtual method.
-  virtual void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) = 0;
+  virtual void GenerateVirtualCall(
+      HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) = 0;
 
   // Copy the result of a call into the given target.
   virtual void MoveFromReturnRegister(Location trg, Primitive::Type type) = 0;
 
   virtual void GenerateNop() = 0;
 
-  uint32_t GetReferenceSlowFlagOffset() const;
-  uint32_t GetReferenceDisableFlagOffset() const;
-
   static QuickEntrypointEnum GetArrayAllocationEntrypoint(Handle<mirror::Class> array_klass);
 
  protected:
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 713d370..e4efbef 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -19,6 +19,8 @@
 #include "arch/arm/asm_support_arm.h"
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "code_generator_utils.h"
 #include "common_arm.h"
 #include "compiled_method.h"
@@ -47,7 +49,6 @@
   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
 }
 
-static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = R0;
 
 static constexpr Register kCoreAlwaysSpillRegister = R5;
@@ -1656,6 +1657,34 @@
   }
 }
 
+static int64_t AdjustConstantForCondition(int64_t value,
+                                          IfCondition* condition,
+                                          IfCondition* opposite) {
+  if (value == 1) {
+    if (*condition == kCondB) {
+      value = 0;
+      *condition = kCondEQ;
+      *opposite = kCondNE;
+    } else if (*condition == kCondAE) {
+      value = 0;
+      *condition = kCondNE;
+      *opposite = kCondEQ;
+    }
+  } else if (value == -1) {
+    if (*condition == kCondGT) {
+      value = 0;
+      *condition = kCondGE;
+      *opposite = kCondLT;
+    } else if (*condition == kCondLE) {
+      value = 0;
+      *condition = kCondLT;
+      *opposite = kCondGE;
+    }
+  }
+
+  return value;
+}
+
 static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition,
                                                                 bool invert,
                                                                 CodeGeneratorARM* codegen) {
@@ -1669,7 +1698,7 @@
     std::swap(cond, opposite);
   }
 
-  std::pair<Condition, Condition> ret;
+  std::pair<Condition, Condition> ret(EQ, NE);
   const Location left = locations->InAt(0);
   const Location right = locations->InAt(1);
 
@@ -1677,7 +1706,38 @@
 
   const Register left_high = left.AsRegisterPairHigh<Register>();
   const Register left_low = left.AsRegisterPairLow<Register>();
-  int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+  int64_t value = AdjustConstantForCondition(right.GetConstant()->AsLongConstant()->GetValue(),
+                                             &cond,
+                                             &opposite);
+
+  // Comparisons against 0 are common enough to deserve special attention.
+  if (value == 0) {
+    switch (cond) {
+      case kCondNE:
+      // x > 0 iff x != 0 when the comparison is unsigned.
+      case kCondA:
+        ret = std::make_pair(NE, EQ);
+        FALLTHROUGH_INTENDED;
+      case kCondEQ:
+      // x <= 0 iff x == 0 when the comparison is unsigned.
+      case kCondBE:
+        __ orrs(IP, left_low, ShifterOperand(left_high));
+        return ret;
+      case kCondLT:
+      case kCondGE:
+        __ cmp(left_high, ShifterOperand(0));
+        return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+      // Trivially true or false.
+      case kCondB:
+        ret = std::make_pair(NE, EQ);
+        FALLTHROUGH_INTENDED;
+      case kCondAE:
+        __ cmp(left_low, ShifterOperand(left_low));
+        return ret;
+      default:
+        break;
+    }
+  }
 
   switch (cond) {
     case kCondEQ:
@@ -1837,10 +1897,14 @@
 static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
   if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
     const LocationSummary* const locations = condition->GetLocations();
-    const IfCondition c = condition->GetCondition();
 
     if (locations->InAt(1).IsConstant()) {
-      const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+      IfCondition c = condition->GetCondition();
+      IfCondition opposite = condition->GetOppositeCondition();
+      const int64_t value = AdjustConstantForCondition(
+          Int64FromConstant(locations->InAt(1).GetConstant()),
+          &c,
+          &opposite);
       ShifterOperand so;
 
       if (c < kCondLT || c > kCondGE) {
@@ -1848,9 +1912,11 @@
         // we check that the least significant half of the first input to be compared
         // is in a low register (the other half is read outside an IT block), and
         // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
-        // encoding can be used.
-        if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
-            !IsUint<8>(Low32Bits(value))) {
+        // encoding can be used; 0 is always handled, no matter what registers are
+        // used by the first input.
+        if (value != 0 &&
+            (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
+             !IsUint<8>(Low32Bits(value)))) {
           return false;
         }
       } else if (c == kCondLE || c == kCondGT) {
@@ -1877,6 +1943,329 @@
   return true;
 }
 
+static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARM* codegen) {
+  DCHECK(CanGenerateTest(cond, codegen->GetAssembler()));
+
+  const Register out = cond->GetLocations()->Out().AsRegister<Register>();
+  const auto condition = GenerateTest(cond, false, codegen);
+
+  __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+  if (ArmAssembler::IsLowRegister(out)) {
+    __ it(condition.first);
+    __ mov(out, ShifterOperand(1), condition.first);
+  } else {
+    Label done_label;
+    Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+
+    __ b(final_label, condition.second);
+    __ LoadImmediate(out, 1);
+
+    if (done_label.IsLinked()) {
+      __ Bind(&done_label);
+    }
+  }
+}
+
+static void GenerateEqualLong(HCondition* cond, CodeGeneratorARM* codegen) {
+  DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  const Register out = locations->Out().AsRegister<Register>();
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+  Register left_high = left.AsRegisterPairHigh<Register>();
+  Register left_low = left.AsRegisterPairLow<Register>();
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+    const int64_t value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
+                                                     &condition,
+                                                     &opposite);
+    int32_t value_high = -High32Bits(value);
+    int32_t value_low = -Low32Bits(value);
+
+    // The output uses Location::kNoOutputOverlap.
+    if (out == left_high) {
+      std::swap(left_low, left_high);
+      std::swap(value_low, value_high);
+    }
+
+    __ AddConstant(out, left_low, value_low);
+    __ AddConstant(IP, left_high, value_high);
+  } else {
+    DCHECK(right.IsRegisterPair());
+    __ sub(IP, left_high, ShifterOperand(right.AsRegisterPairHigh<Register>()));
+    __ sub(out, left_low, ShifterOperand(right.AsRegisterPairLow<Register>()));
+  }
+
+  // Need to check after calling AdjustConstantForCondition().
+  DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
+
+  if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) {
+    __ orrs(out, out, ShifterOperand(IP));
+    __ it(NE);
+    __ mov(out, ShifterOperand(1), NE);
+  } else {
+    __ orr(out, out, ShifterOperand(IP));
+    codegen->GenerateConditionWithZero(condition, out, out, IP);
+  }
+}
+
+static void GenerateLongComparesAndJumps(HCondition* cond,
+                                         Label* true_label,
+                                         Label* false_label,
+                                         CodeGeneratorARM* codegen) {
+  LocationSummary* locations = cond->GetLocations();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+  IfCondition if_cond = cond->GetCondition();
+
+  Register left_high = left.AsRegisterPairHigh<Register>();
+  Register left_low = left.AsRegisterPairLow<Register>();
+  IfCondition true_high_cond = if_cond;
+  IfCondition false_high_cond = cond->GetOppositeCondition();
+  Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
+
+  // Set the conditions for the test, remembering that == needs to be
+  // decided using the low words.
+  switch (if_cond) {
+    case kCondEQ:
+    case kCondNE:
+      // Nothing to do.
+      break;
+    case kCondLT:
+      false_high_cond = kCondGT;
+      break;
+    case kCondLE:
+      true_high_cond = kCondLT;
+      break;
+    case kCondGT:
+      false_high_cond = kCondLT;
+      break;
+    case kCondGE:
+      true_high_cond = kCondGT;
+      break;
+    case kCondB:
+      false_high_cond = kCondA;
+      break;
+    case kCondBE:
+      true_high_cond = kCondB;
+      break;
+    case kCondA:
+      false_high_cond = kCondB;
+      break;
+    case kCondAE:
+      true_high_cond = kCondA;
+      break;
+  }
+  if (right.IsConstant()) {
+    int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+    int32_t val_low = Low32Bits(value);
+    int32_t val_high = High32Bits(value);
+
+    __ CmpConstant(left_high, val_high);
+    if (if_cond == kCondNE) {
+      __ b(true_label, ARMCondition(true_high_cond));
+    } else if (if_cond == kCondEQ) {
+      __ b(false_label, ARMCondition(false_high_cond));
+    } else {
+      __ b(true_label, ARMCondition(true_high_cond));
+      __ b(false_label, ARMCondition(false_high_cond));
+    }
+    // Must be equal high, so compare the lows.
+    __ CmpConstant(left_low, val_low);
+  } else {
+    Register right_high = right.AsRegisterPairHigh<Register>();
+    Register right_low = right.AsRegisterPairLow<Register>();
+
+    __ cmp(left_high, ShifterOperand(right_high));
+    if (if_cond == kCondNE) {
+      __ b(true_label, ARMCondition(true_high_cond));
+    } else if (if_cond == kCondEQ) {
+      __ b(false_label, ARMCondition(false_high_cond));
+    } else {
+      __ b(true_label, ARMCondition(true_high_cond));
+      __ b(false_label, ARMCondition(false_high_cond));
+    }
+    // Must be equal high, so compare the lows.
+    __ cmp(left_low, ShifterOperand(right_low));
+  }
+  // The last comparison might be unsigned.
+  // TODO: optimize cases where this is always true/false
+  __ b(true_label, final_condition);
+}
+
+static void GenerateConditionLong(HCondition* cond, CodeGeneratorARM* codegen) {
+  DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  const Register out = locations->Out().AsRegister<Register>();
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+
+    // Comparisons against 0 are common enough to deserve special attention.
+    if (AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
+                                   &condition,
+                                   &opposite) == 0) {
+      switch (condition) {
+        case kCondNE:
+        case kCondA:
+          if (ArmAssembler::IsLowRegister(out)) {
+            // We only care if both input registers are 0 or not.
+            __ orrs(out,
+                    left.AsRegisterPairLow<Register>(),
+                    ShifterOperand(left.AsRegisterPairHigh<Register>()));
+            __ it(NE);
+            __ mov(out, ShifterOperand(1), NE);
+            return;
+          }
+
+          FALLTHROUGH_INTENDED;
+        case kCondEQ:
+        case kCondBE:
+          // We only care if both input registers are 0 or not.
+          __ orr(out,
+                 left.AsRegisterPairLow<Register>(),
+                 ShifterOperand(left.AsRegisterPairHigh<Register>()));
+          codegen->GenerateConditionWithZero(condition, out, out);
+          return;
+        case kCondLT:
+        case kCondGE:
+          // We only care about the sign bit.
+          FALLTHROUGH_INTENDED;
+        case kCondAE:
+        case kCondB:
+          codegen->GenerateConditionWithZero(condition, out, left.AsRegisterPairHigh<Register>());
+          return;
+        case kCondLE:
+        case kCondGT:
+        default:
+          break;
+      }
+    }
+  }
+
+  if ((condition == kCondEQ || condition == kCondNE) &&
+      // If `out` is a low register, then the GenerateConditionGeneric()
+      // function generates a shorter code sequence that is still branchless.
+      (!ArmAssembler::IsLowRegister(out) || !CanGenerateTest(cond, codegen->GetAssembler()))) {
+    GenerateEqualLong(cond, codegen);
+    return;
+  }
+
+  if (CanGenerateTest(cond, codegen->GetAssembler())) {
+    GenerateConditionGeneric(cond, codegen);
+    return;
+  }
+
+  // Convert the jumps into the result.
+  Label done_label;
+  Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+  Label true_label, false_label;
+
+  GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen);
+
+  // False case: result = 0.
+  __ Bind(&false_label);
+  __ mov(out, ShifterOperand(0));
+  __ b(final_label);
+
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ mov(out, ShifterOperand(1));
+
+  if (done_label.IsLinked()) {
+    __ Bind(&done_label);
+  }
+}
+
+static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARM* codegen) {
+  const Primitive::Type type = cond->GetLeft()->GetType();
+
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+  if (type == Primitive::kPrimLong) {
+    GenerateConditionLong(cond, codegen);
+    return;
+  }
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  Register in = locations->InAt(0).AsRegister<Register>();
+  const Register out = locations->Out().AsRegister<Register>();
+  const Location right = cond->GetLocations()->InAt(1);
+  int64_t value;
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+
+    value = AdjustConstantForCondition(Int64FromConstant(right.GetConstant()),
+                                       &condition,
+                                       &opposite);
+
+    // Comparisons against 0 are common enough to deserve special attention.
+    if (value == 0) {
+      switch (condition) {
+        case kCondNE:
+        case kCondA:
+          if (ArmAssembler::IsLowRegister(out) && out == in) {
+            __ cmp(out, ShifterOperand(0));
+            __ it(NE);
+            __ mov(out, ShifterOperand(1), NE);
+            return;
+          }
+
+          FALLTHROUGH_INTENDED;
+        case kCondEQ:
+        case kCondBE:
+        case kCondLT:
+        case kCondGE:
+        case kCondAE:
+        case kCondB:
+          codegen->GenerateConditionWithZero(condition, out, in);
+          return;
+        case kCondLE:
+        case kCondGT:
+        default:
+          break;
+      }
+    }
+  }
+
+  if (condition == kCondEQ || condition == kCondNE) {
+    ShifterOperand operand;
+
+    if (right.IsConstant()) {
+      operand = ShifterOperand(value);
+    } else if (out == right.AsRegister<Register>()) {
+      // Avoid 32-bit instructions if possible.
+      operand = ShifterOperand(in);
+      in = right.AsRegister<Register>();
+    } else {
+      operand = ShifterOperand(right.AsRegister<Register>());
+    }
+
+    if (condition == kCondNE && ArmAssembler::IsLowRegister(out)) {
+      __ subs(out, in, operand);
+      __ it(NE);
+      __ mov(out, ShifterOperand(1), NE);
+    } else {
+      __ sub(out, in, operand);
+      codegen->GenerateConditionWithZero(condition, out, out);
+    }
+
+    return;
+  }
+
+  GenerateConditionGeneric(cond, codegen);
+}
+
 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
   const Primitive::Type type = constant->GetType();
   bool ret = false;
@@ -2008,10 +2397,11 @@
       isa_features_(isa_features),
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -2479,89 +2869,6 @@
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
-                                                               Label* true_label,
-                                                               Label* false_label) {
-  LocationSummary* locations = cond->GetLocations();
-  Location left = locations->InAt(0);
-  Location right = locations->InAt(1);
-  IfCondition if_cond = cond->GetCondition();
-
-  Register left_high = left.AsRegisterPairHigh<Register>();
-  Register left_low = left.AsRegisterPairLow<Register>();
-  IfCondition true_high_cond = if_cond;
-  IfCondition false_high_cond = cond->GetOppositeCondition();
-  Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
-
-  // Set the conditions for the test, remembering that == needs to be
-  // decided using the low words.
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      // Nothing to do.
-      break;
-    case kCondLT:
-      false_high_cond = kCondGT;
-      break;
-    case kCondLE:
-      true_high_cond = kCondLT;
-      break;
-    case kCondGT:
-      false_high_cond = kCondLT;
-      break;
-    case kCondGE:
-      true_high_cond = kCondGT;
-      break;
-    case kCondB:
-      false_high_cond = kCondA;
-      break;
-    case kCondBE:
-      true_high_cond = kCondB;
-      break;
-    case kCondA:
-      false_high_cond = kCondB;
-      break;
-    case kCondAE:
-      true_high_cond = kCondA;
-      break;
-  }
-  if (right.IsConstant()) {
-    int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
-    int32_t val_low = Low32Bits(value);
-    int32_t val_high = High32Bits(value);
-
-    __ CmpConstant(left_high, val_high);
-    if (if_cond == kCondNE) {
-      __ b(true_label, ARMCondition(true_high_cond));
-    } else if (if_cond == kCondEQ) {
-      __ b(false_label, ARMCondition(false_high_cond));
-    } else {
-      __ b(true_label, ARMCondition(true_high_cond));
-      __ b(false_label, ARMCondition(false_high_cond));
-    }
-    // Must be equal high, so compare the lows.
-    __ CmpConstant(left_low, val_low);
-  } else {
-    Register right_high = right.AsRegisterPairHigh<Register>();
-    Register right_low = right.AsRegisterPairLow<Register>();
-
-    __ cmp(left_high, ShifterOperand(right_high));
-    if (if_cond == kCondNE) {
-      __ b(true_label, ARMCondition(true_high_cond));
-    } else if (if_cond == kCondEQ) {
-      __ b(false_label, ARMCondition(false_high_cond));
-    } else {
-      __ b(true_label, ARMCondition(true_high_cond));
-      __ b(false_label, ARMCondition(false_high_cond));
-    }
-    // Must be equal high, so compare the lows.
-    __ cmp(left_low, ShifterOperand(right_low));
-  }
-  // The last comparison might be unsigned.
-  // TODO: optimize cases where this is always true/false
-  __ b(true_label, final_condition);
-}
-
 void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
                                                                Label* true_target_in,
                                                                Label* false_target_in) {
@@ -2596,7 +2903,7 @@
   Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
 
   DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
-  GenerateLongComparesAndJumps(condition, true_target, false_target);
+  GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_);
 
   if (false_target != &fallthrough_target) {
     __ b(false_target);
@@ -2911,6 +3218,80 @@
   __ nop();
 }
 
+// `temp` is an extra temporary register that is used for some conditions;
+// callers may not specify it, in which case the method will use a scratch
+// register instead.
+void CodeGeneratorARM::GenerateConditionWithZero(IfCondition condition,
+                                                 Register out,
+                                                 Register in,
+                                                 Register temp) {
+  switch (condition) {
+    case kCondEQ:
+    // x <= 0 iff x == 0 when the comparison is unsigned.
+    case kCondBE:
+      if (temp == kNoRegister || (ArmAssembler::IsLowRegister(out) && out != in)) {
+        temp = out;
+      }
+
+      // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
+      // different as well.
+      if (ArmAssembler::IsLowRegister(in) && ArmAssembler::IsLowRegister(temp) && in != temp) {
+        // temp = - in; only 0 sets the carry flag.
+        __ rsbs(temp, in, ShifterOperand(0));
+
+        if (out == in) {
+          std::swap(in, temp);
+        }
+
+        // out = - in + in + carry = carry
+        __ adc(out, temp, ShifterOperand(in));
+      } else {
+        // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
+        __ clz(out, in);
+        // Any number less than 32 logically shifted right by 5 bits results in 0;
+        // the same operation on 32 yields 1.
+        __ Lsr(out, out, 5);
+      }
+
+      break;
+    case kCondNE:
+    // x > 0 iff x != 0 when the comparison is unsigned.
+    case kCondA:
+      if (out == in) {
+        if (temp == kNoRegister || in == temp) {
+          temp = IP;
+        }
+      } else if (temp == kNoRegister || !ArmAssembler::IsLowRegister(temp)) {
+        temp = out;
+      }
+
+      // temp = in - 1; only 0 does not set the carry flag.
+      __ subs(temp, in, ShifterOperand(1));
+      // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
+      __ sbc(out, in, ShifterOperand(temp));
+      break;
+    case kCondGE:
+      __ mvn(out, ShifterOperand(in));
+      in = out;
+      FALLTHROUGH_INTENDED;
+    case kCondLT:
+      // We only care about the sign bit.
+      __ Lsr(out, in, 31);
+      break;
+    case kCondAE:
+      // Trivially true.
+      __ mov(out, ShifterOperand(1));
+      break;
+    case kCondB:
+      // Trivially false.
+      __ mov(out, ShifterOperand(0));
+      break;
+    default:
+      LOG(FATAL) << "Unexpected condition " << condition;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARM::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -2947,48 +3328,48 @@
     return;
   }
 
-  const Register out = cond->GetLocations()->Out().AsRegister<Register>();
+  const Primitive::Type type = cond->GetLeft()->GetType();
 
-  if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) {
-    const auto condition = GenerateTest(cond, false, codegen_);
-
-    __ it(condition.first);
-    __ mov(out, ShifterOperand(1), condition.first);
-    __ it(condition.second);
-    __ mov(out, ShifterOperand(0), condition.second);
+  if (Primitive::IsFloatingPointType(type)) {
+    GenerateConditionGeneric(cond, codegen_);
     return;
   }
 
-  // Convert the jumps into the result.
-  Label done_label;
-  Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
 
-  if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
-    Label true_label, false_label;
+  const IfCondition condition = cond->GetCondition();
 
-    GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+  // A condition with only one boolean input, or two boolean inputs without being equality or
+  // inequality results from transformations done by the instruction simplifier, and is handled
+  // as a regular condition with integral inputs.
+  if (type == Primitive::kPrimBoolean &&
+      cond->GetRight()->GetType() == Primitive::kPrimBoolean &&
+      (condition == kCondEQ || condition == kCondNE)) {
+    const LocationSummary* const locations = cond->GetLocations();
+    Register left = locations->InAt(0).AsRegister<Register>();
+    const Register out = locations->Out().AsRegister<Register>();
+    const Location right_loc = locations->InAt(1);
 
-    // False case: result = 0.
-    __ Bind(&false_label);
-    __ LoadImmediate(out, 0);
-    __ b(final_label);
+    // The constant case is handled by the instruction simplifier.
+    DCHECK(!right_loc.IsConstant());
 
-    // True case: result = 1.
-    __ Bind(&true_label);
-    __ LoadImmediate(out, 1);
-  } else {
-    DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+    Register right = right_loc.AsRegister<Register>();
 
-    const auto condition = GenerateTest(cond, false, codegen_);
+    // Avoid 32-bit instructions if possible.
+    if (out == right) {
+      std::swap(left, right);
+    }
 
-    __ mov(out, ShifterOperand(0), AL, kCcKeep);
-    __ b(final_label, condition.second);
-    __ LoadImmediate(out, 1);
+    __ eor(out, left, ShifterOperand(right));
+
+    if (condition == kCondEQ) {
+      __ eor(out, out, ShifterOperand(1));
+    }
+
+    return;
   }
 
-  if (done_label.IsLinked()) {
-    __ Bind(&done_label);
-  }
+  GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
 }
 
 void LocationsBuilderARM::VisitEqual(HEqual* comp) {
@@ -3174,18 +3555,10 @@
 
   IntrinsicLocationsBuilderARM intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
-    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
-      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
-    }
     return;
   }
 
   HandleInvoke(invoke);
-
-  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
-  if (invoke->HasPcRelativeDexCache()) {
-    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARM* codegen) {
@@ -3209,7 +3582,6 @@
   LocationSummary* locations = invoke->GetLocations();
   codegen_->GenerateStaticOrDirectCall(
       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderARM::HandleInvoke(HInvoke* invoke) {
@@ -3233,7 +3605,6 @@
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderARM::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -6757,7 +7128,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -6765,7 +7136,7 @@
 
 void LocationsBuilderARM::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -6818,7 +7189,7 @@
 // move.
 void InstructionCodeGeneratorARM::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -6890,7 +7261,7 @@
       GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -6952,7 +7323,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -6962,7 +7333,7 @@
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   HLoadString::LoadKind load_kind = load->GetLoadKind();
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     locations->SetOut(Location::RegisterLocation(R0));
   } else {
     locations->SetOut(Location::RequiresRegister());
@@ -7049,7 +7420,7 @@
   }
 
   // TODO: Consider re-adding the compiler code to do string dex cache lookup again.
-  DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
@@ -8566,7 +8937,8 @@
   // save one load. However, since this is just an intrinsic slow path we prefer this
   // simple and more robust approach rather that trying to determine if that's the case.
   SlowPathCode* slow_path = GetCurrentSlowPath();
-  if (slow_path != nullptr && slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
     int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
     __ LoadFromOffset(kLoadWord, temp, SP, stack_offset);
     return temp;
@@ -8574,8 +8946,8 @@
   return location.AsRegister<Register>();
 }
 
-Location CodeGeneratorARM::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                                  Location temp) {
+void CodeGeneratorARM::GenerateStaticOrDirectCall(
+    HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
@@ -8588,47 +8960,39 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      Register temp_reg = temp.AsRegister<Register>();
+      PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(temp_reg, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(temp_reg, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(temp_reg, temp_reg, ShifterOperand(PC));
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadImmediate(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
-      HArmDexCacheArraysBase* base =
-          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
-      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
-                                                                temp.AsRegister<Register>());
-      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
-      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
+    case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
+      Register temp_reg = temp.AsRegister<Register>();
+      PcRelativePatchInfo* labels = NewMethodBssEntryPatch(
+          MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(temp_reg, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(temp_reg, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(temp_reg, temp_reg, ShifterOperand(PC));
+      __ LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset */ 0);
       break;
     }
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
-      Register method_reg;
-      Register reg = temp.AsRegister<Register>();
-      if (current_method.IsRegister()) {
-        method_reg = current_method.AsRegister<Register>();
-      } else {
-        DCHECK(invoke->GetLocations()->Intrinsified());
-        DCHECK(!current_method.IsValid());
-        method_reg = reg;
-        __ LoadFromOffset(kLoadWord, reg, SP, kCurrentMethodStackOffset);
-      }
-      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
-      __ LoadFromOffset(kLoadWord,
-                        reg,
-                        method_reg,
-                        ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
-      // temp = temp[index_in_cache];
-      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
-      uint32_t index_in_cache = invoke->GetDexMethodIndex();
-      __ LoadFromOffset(kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
-      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
+      GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+      return;  // No code pointer retrieval; the runtime performs the call directly.
     }
   }
-  return callee_method;
-}
-
-void CodeGeneratorARM::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
-  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -8643,11 +9007,13 @@
       __ blx(LR);
       break;
   }
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 
   DCHECK(!IsLeafMethod());
 }
 
-void CodeGeneratorARM::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
+void CodeGeneratorARM::GenerateVirtualCall(
+    HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
   Register temp = temp_location.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
@@ -8678,11 +9044,21 @@
   __ LoadFromOffset(kLoadWord, LR, temp, entry_point);
   // LR();
   __ blx(LR);
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 }
 
-CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch(
-    const DexFile& dex_file, dex::StringIndex string_index) {
-  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeMethodPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &pc_relative_method_patches_);
+}
+
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewMethodBssEntryPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &method_bss_entry_patches_);
 }
 
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeTypePatch(
@@ -8695,9 +9071,9 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
 }
 
-CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeDexCacheArrayPatch(
-    const DexFile& dex_file, uint32_t element_offset) {
-  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativeStringPatch(
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
 }
 
 CodeGeneratorARM::PcRelativePatchInfo* CodeGeneratorARM::NewPcRelativePatch(
@@ -8758,24 +9134,28 @@
 void CodeGeneratorARM::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
-      /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
-  EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
-                                                               linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
+                                                                linker_patches);
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
@@ -8912,23 +9292,6 @@
   }
 }
 
-void LocationsBuilderARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
-  locations->SetOut(Location::RequiresRegister());
-}
-
-void InstructionCodeGeneratorARM::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
-  Register base_reg = base->GetLocations()->Out().AsRegister<Register>();
-  CodeGeneratorARM::PcRelativePatchInfo* labels =
-      codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
-  __ BindTrackedLabel(&labels->movw_label);
-  __ movw(base_reg, /* placeholder */ 0u);
-  __ BindTrackedLabel(&labels->movt_label);
-  __ movt(base_reg, /* placeholder */ 0u);
-  __ BindTrackedLabel(&labels->add_pc_label);
-  __ add(base_reg, base_reg, ShifterOperand(PC));
-}
-
 void CodeGeneratorARM::MoveFromReturnRegister(Location trg, Primitive::Type type) {
   if (!trg.IsValid()) {
     DCHECK_EQ(type, Primitive::kPrimVoid);
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 47e6be5..9280e63 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -24,8 +24,8 @@
 #include "nodes.h"
 #include "string_reference.h"
 #include "parallel_move_resolver.h"
+#include "type_reference.h"
 #include "utils/arm/assembler_thumb2.h"
-#include "utils/type_reference.h"
 
 namespace art {
 namespace arm {
@@ -299,7 +299,6 @@
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
                                     Label* false_target);
-  void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -456,9 +455,10 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
-  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+  void GenerateStaticOrDirectCall(
+      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+  void GenerateVirtualCall(
+      HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
@@ -482,12 +482,12 @@
     Label add_pc_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                dex::StringIndex string_index);
+  PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
+  PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
-  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
-                                                       uint32_t element_offset);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
 
   // Add a new baker read barrier patch and return the label to be bound
   // before the BNE instruction.
@@ -623,6 +623,14 @@
   void GenerateImplicitNullCheck(HNullCheck* instruction) OVERRIDE;
   void GenerateExplicitNullCheck(HNullCheck* instruction) OVERRIDE;
 
+  // `temp` is an extra temporary register that is used for some conditions;
+  // callers may not specify it, in which case the method will use a scratch
+  // register instead.
+  void GenerateConditionWithZero(IfCondition condition,
+                                 Register out,
+                                 Register in,
+                                 Register temp = kNoRegister);
+
  private:
   Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke, Register temp);
 
@@ -660,14 +668,16 @@
 
   // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
   Uint32ToLiteralMap uint32_literals_;
-  // PC-relative patch info for each HArmDexCacheArraysBase.
-  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
+  // PC-relative method patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7ff100d..34397e6 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -19,6 +19,8 @@
 #include "arch/arm64/asm_support_arm64.h"
 #include "arch/arm64/instruction_set_features_arm64.h"
 #include "art_method.h"
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "code_generator_utils.h"
 #include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
@@ -29,6 +31,7 @@
 #include "linker/arm64/relative_patcher_arm64.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
+#include "lock_word.h"
 #include "offsets.h"
 #include "thread.h"
 #include "utils/arm64/assembler_arm64.h"
@@ -77,7 +80,6 @@
 using helpers::WRegisterFrom;
 using helpers::XRegisterFrom;
 
-static constexpr int kCurrentMethodStackOffset = 0;
 // The compare/jump sequence will generate about (1.5 * num_entries + 3) instructions. While jump
 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
 // generates less code/data with a small num_entries.
@@ -1448,10 +1450,11 @@
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       uint64_literals_(std::less<uint64_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -4495,8 +4498,8 @@
   return desired_dispatch_info;
 }
 
-Location CodeGeneratorARM64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                                    Location temp) {
+void CodeGeneratorARM64::GenerateStaticOrDirectCall(
+    HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
   // Make sure that ArtMethod* is passed in kArtMethodRegister as per the calling convention.
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
@@ -4510,56 +4513,48 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      // Add ADRP with its PC-relative method patch.
+      vixl::aarch64::Label* adrp_label = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
+      // Add ADD with its PC-relative method patch.
+      vixl::aarch64::Label* add_label =
+          NewPcRelativeMethodPatch(invoke->GetTargetMethod(), adrp_label);
+      EmitAddPlaceholder(add_label, XRegisterFrom(temp), XRegisterFrom(temp));
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       // Load method address from literal pool.
       __ Ldr(XRegisterFrom(temp), DeduplicateUint64Literal(invoke->GetMethodAddress()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+    case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
       // Add ADRP with its PC-relative DexCache access patch.
-      const DexFile& dex_file = invoke->GetDexFileForPcRelativeDexCache();
-      uint32_t element_offset = invoke->GetDexCacheArrayOffset();
-      vixl::aarch64::Label* adrp_label = NewPcRelativeDexCacheArrayPatch(dex_file, element_offset);
+      MethodReference target_method(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
+      vixl::aarch64::Label* adrp_label = NewMethodBssEntryPatch(target_method);
       EmitAdrpPlaceholder(adrp_label, XRegisterFrom(temp));
       // Add LDR with its PC-relative DexCache access patch.
       vixl::aarch64::Label* ldr_label =
-          NewPcRelativeDexCacheArrayPatch(dex_file, element_offset, adrp_label);
+          NewMethodBssEntryPatch(target_method, adrp_label);
       EmitLdrOffsetPlaceholder(ldr_label, XRegisterFrom(temp), XRegisterFrom(temp));
       break;
     }
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
-      Register reg = XRegisterFrom(temp);
-      Register method_reg;
-      if (current_method.IsRegister()) {
-        method_reg = XRegisterFrom(current_method);
-      } else {
-        DCHECK(invoke->GetLocations()->Intrinsified());
-        DCHECK(!current_method.IsValid());
-        method_reg = reg;
-        __ Ldr(reg.X(), MemOperand(sp, kCurrentMethodStackOffset));
-      }
-
-      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
-      __ Ldr(reg.X(),
-             MemOperand(method_reg.X(),
-                        ArtMethod::DexCacheResolvedMethodsOffset(kArm64PointerSize).Int32Value()));
-      // temp = temp[index_in_cache];
-      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
-      uint32_t index_in_cache = invoke->GetDexMethodIndex();
-    __ Ldr(reg.X(), MemOperand(reg.X(), GetCachePointerOffset(index_in_cache)));
-      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
+      GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+      return;  // No code pointer retrieval; the runtime performs the call directly.
     }
   }
-  return callee_method;
-}
-
-void CodeGeneratorARM64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
-  // All registers are assumed to be correctly set up.
-  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
-      __ Bl(&frame_entry_label_);
+      {
+        // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+        ExactAssemblyScope eas(GetVIXLAssembler(),
+                               kInstructionSize,
+                               CodeBufferCheckScope::kExactSize);
+        __ bl(&frame_entry_label_);
+        RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+      }
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // LR = callee_method->entry_point_from_quick_compiled_code_;
@@ -4567,14 +4562,13 @@
           XRegisterFrom(callee_method),
           ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize).Int32Value()));
       {
-        // To ensure that the pc position is recorded immediately after the `blr` instruction
-        // BLR must be the last instruction emitted in this function.
-        // Recording the pc will occur right after returning from this function.
+        // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
         ExactAssemblyScope eas(GetVIXLAssembler(),
                                kInstructionSize,
                                CodeBufferCheckScope::kExactSize);
         // lr()
         __ blr(lr);
+        RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
       }
       break;
   }
@@ -4582,7 +4576,8 @@
   DCHECK(!IsLeafMethod());
 }
 
-void CodeGeneratorARM64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
+void CodeGeneratorARM64::GenerateVirtualCall(
+    HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
   // Use the calling convention instead of the location of the receiver, as
   // intrinsics may have put the receiver in a different register. In the intrinsics
   // slow path, the arguments have been moved to the right place, so here we are
@@ -4616,12 +4611,11 @@
   // lr = temp->GetEntryPoint();
   __ Ldr(lr, MemOperand(temp, entry_point.SizeValue()));
   {
-    // To ensure that the pc position is recorded immediately after the `blr` instruction
-    // BLR should be the last instruction emitted in this function.
-    // Recording the pc will occur right after returning from this function.
+    // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
     ExactAssemblyScope eas(GetVIXLAssembler(), kInstructionSize, CodeBufferCheckScope::kExactSize);
     // lr();
     __ blr(lr);
+    RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
   }
 }
 
@@ -4633,12 +4627,22 @@
   codegen_->GenerateInvokePolymorphicCall(invoke);
 }
 
-vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
-    const DexFile& dex_file,
-    dex::StringIndex string_index,
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeMethodPatch(
+    MethodReference target_method,
     vixl::aarch64::Label* adrp_label) {
-  return
-      NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            adrp_label,
+                            &pc_relative_method_patches_);
+}
+
+vixl::aarch64::Label* CodeGeneratorARM64::NewMethodBssEntryPatch(
+    MethodReference target_method,
+    vixl::aarch64::Label* adrp_label) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            adrp_label,
+                            &method_bss_entry_patches_);
 }
 
 vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeTypePatch(
@@ -4655,11 +4659,12 @@
   return NewPcRelativePatch(dex_file, type_index.index_, adrp_label, &type_bss_entry_patches_);
 }
 
-vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeDexCacheArrayPatch(
+vixl::aarch64::Label* CodeGeneratorARM64::NewPcRelativeStringPatch(
     const DexFile& dex_file,
-    uint32_t element_offset,
+    dex::StringIndex string_index,
     vixl::aarch64::Label* adrp_label) {
-  return NewPcRelativePatch(dex_file, element_offset, adrp_label, &pc_relative_dex_cache_patches_);
+  return
+      NewPcRelativePatch(dex_file, string_index.index_, adrp_label, &pc_relative_string_patches_);
 }
 
 vixl::aarch64::Label* CodeGeneratorARM64::NewBakerReadBarrierPatch(uint32_t custom_data) {
@@ -4683,7 +4688,7 @@
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
     uint64_t address) {
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address));
 }
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
@@ -4746,28 +4751,28 @@
 void CodeGeneratorARM64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
-      pc_relative_dex_cache_patches_.size() +
-      pc_relative_string_patches_.size() +
+      pc_relative_method_patches_.size() +
+      method_bss_entry_patches_.size() +
       pc_relative_type_patches_.size() +
       type_bss_entry_patches_.size() +
+      pc_relative_string_patches_.size() +
       baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
-  for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
-    linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
-                                                              &info.target_dex_file,
-                                                              info.pc_insn_label->GetLocation(),
-                                                              info.offset_or_index));
-  }
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
+                                                                linker_patches);
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
@@ -4777,9 +4782,8 @@
   DCHECK_EQ(size, linker_patches->size());
 }
 
-vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value,
-                                                                      Uint32ToLiteralMap* map) {
-  return map->GetOrCreate(
+vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateUint32Literal(uint32_t value) {
+  return uint32_literals_.GetOrCreate(
       value,
       [this, value]() { return __ CreateLiteralDestroyedWithPool<uint32_t>(value); });
 }
@@ -4805,7 +4809,6 @@
   LocationSummary* locations = invoke->GetLocations();
   codegen_->GenerateStaticOrDirectCall(
       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void InstructionCodeGeneratorARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -4818,7 +4821,6 @@
   EmissionCheckScope guard(GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 HLoadClass::LoadKind CodeGeneratorARM64::GetSupportedLoadClassKind(
@@ -4837,7 +4839,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -4845,7 +4847,7 @@
 
 void LocationsBuilderARM64::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -4890,7 +4892,7 @@
 // move.
 void InstructionCodeGeneratorARM64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -4972,7 +4974,7 @@
                               read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -5028,7 +5030,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -5037,7 +5039,7 @@
 void LocationsBuilderARM64::VisitLoadString(HLoadString* load) {
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
   } else {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 56444dc..d9c49d1 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -25,8 +25,8 @@
 #include "nodes.h"
 #include "parallel_move_resolver.h"
 #include "string_reference.h"
+#include "type_reference.h"
 #include "utils/arm64/assembler_arm64.h"
-#include "utils/type_reference.h"
 
 // TODO(VIXL): Make VIXL compile with -Wshadow.
 #pragma GCC diagnostic push
@@ -540,23 +540,30 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
-  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+  void GenerateStaticOrDirectCall(
+      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+  void GenerateVirtualCall(
+      HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
     UNIMPLEMENTED(FATAL);
   }
 
-  // Add a new PC-relative string patch for an instruction and return the label
+  // Add a new PC-relative method patch for an instruction and return the label
   // to be bound before the instruction. The instruction will be either the
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
   // to the associated ADRP patch label).
-  vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                 dex::StringIndex string_index,
+  vixl::aarch64::Label* NewPcRelativeMethodPatch(MethodReference target_method,
                                                  vixl::aarch64::Label* adrp_label = nullptr);
 
+  // Add a new .bss entry method patch for an instruction and return
+  // the label to be bound before the instruction. The instruction will be
+  // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
+  // pointing to the associated ADRP patch label).
+  vixl::aarch64::Label* NewMethodBssEntryPatch(MethodReference target_method,
+                                               vixl::aarch64::Label* adrp_label = nullptr);
+
   // Add a new PC-relative type patch for an instruction and return the label
   // to be bound before the instruction. The instruction will be either the
   // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
@@ -573,14 +580,13 @@
                                              dex::TypeIndex type_index,
                                              vixl::aarch64::Label* adrp_label = nullptr);
 
-  // Add a new PC-relative dex cache array patch for an instruction and return
-  // the label to be bound before the instruction. The instruction will be
-  // either the ADRP (pass `adrp_label = null`) or the LDR (pass `adrp_label`
-  // pointing to the associated ADRP patch label).
-  vixl::aarch64::Label* NewPcRelativeDexCacheArrayPatch(
-      const DexFile& dex_file,
-      uint32_t element_offset,
-      vixl::aarch64::Label* adrp_label = nullptr);
+  // Add a new PC-relative string patch for an instruction and return the label
+  // to be bound before the instruction. The instruction will be either the
+  // ADRP (pass `adrp_label = null`) or the ADD (pass `adrp_label` pointing
+  // to the associated ADRP patch label).
+  vixl::aarch64::Label* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                 dex::StringIndex string_index,
+                                                 vixl::aarch64::Label* adrp_label = nullptr);
 
   // Add a new baker read barrier patch and return the label to be bound
   // before the CBNZ instruction.
@@ -733,8 +739,7 @@
                                         vixl::aarch64::Literal<uint32_t>*,
                                         TypeReferenceValueComparator>;
 
-  vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value,
-                                                             Uint32ToLiteralMap* map);
+  vixl::aarch64::Literal<uint32_t>* DeduplicateUint32Literal(uint32_t value);
   vixl::aarch64::Literal<uint64_t>* DeduplicateUint64Literal(uint64_t value);
 
   // The PcRelativePatchInfo is used for PC-relative addressing of dex cache arrays
@@ -785,14 +790,16 @@
   Uint32ToLiteralMap uint32_literals_;
   // Deduplication map for 64-bit literals, used for non-patchable method address or method code.
   Uint64ToLiteralMap uint64_literals_;
-  // PC-relative DexCache access info.
-  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
+  // PC-relative method patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 015e6dd..c6bd871 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -19,6 +19,8 @@
 #include "arch/arm/asm_support_arm.h"
 #include "arch/arm/instruction_set_features_arm.h"
 #include "art_method.h"
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "code_generator_utils.h"
 #include "common_arm.h"
 #include "compiled_method.h"
@@ -76,7 +78,6 @@
 // Use a local definition to prevent copying mistakes.
 static constexpr size_t kArmWordSize = static_cast<size_t>(kArmPointerSize);
 static constexpr size_t kArmBitsPerWord = kArmWordSize * kBitsPerByte;
-static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 // Reference load (except object array loads) is using LDR Rt, [Rn, #offset] which can handle
@@ -1775,6 +1776,34 @@
   }
 }
 
+static int64_t AdjustConstantForCondition(int64_t value,
+                                          IfCondition* condition,
+                                          IfCondition* opposite) {
+  if (value == 1) {
+    if (*condition == kCondB) {
+      value = 0;
+      *condition = kCondEQ;
+      *opposite = kCondNE;
+    } else if (*condition == kCondAE) {
+      value = 0;
+      *condition = kCondNE;
+      *opposite = kCondEQ;
+    }
+  } else if (value == -1) {
+    if (*condition == kCondGT) {
+      value = 0;
+      *condition = kCondGE;
+      *opposite = kCondLT;
+    } else if (*condition == kCondLE) {
+      value = 0;
+      *condition = kCondLT;
+      *opposite = kCondGE;
+    }
+  }
+
+  return value;
+}
+
 static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
     HCondition* condition,
     bool invert,
@@ -1797,7 +1826,37 @@
 
   const vixl32::Register left_high = HighRegisterFrom(left);
   const vixl32::Register left_low = LowRegisterFrom(left);
-  int64_t value = Int64ConstantFrom(right);
+  int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right), &cond, &opposite);
+  UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+  // Comparisons against 0 are common enough to deserve special attention.
+  if (value == 0) {
+    switch (cond) {
+      case kCondNE:
+      // x > 0 iff x != 0 when the comparison is unsigned.
+      case kCondA:
+        ret = std::make_pair(ne, eq);
+        FALLTHROUGH_INTENDED;
+      case kCondEQ:
+      // x <= 0 iff x == 0 when the comparison is unsigned.
+      case kCondBE:
+        __ Orrs(temps.Acquire(), left_low, left_high);
+        return ret;
+      case kCondLT:
+      case kCondGE:
+        __ Cmp(left_high, 0);
+        return std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+      // Trivially true or false.
+      case kCondB:
+        ret = std::make_pair(ne, eq);
+        FALLTHROUGH_INTENDED;
+      case kCondAE:
+        __ Cmp(left_low, left_low);
+        return ret;
+      default:
+        break;
+    }
+  }
 
   switch (cond) {
     case kCondEQ:
@@ -1842,8 +1901,6 @@
       FALLTHROUGH_INTENDED;
     case kCondGE:
     case kCondLT: {
-      UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
-
       __ Cmp(left_low, Low32Bits(value));
       __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
       ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
@@ -1961,18 +2018,22 @@
 static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) {
   if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
     const LocationSummary* const locations = condition->GetLocations();
-    const IfCondition c = condition->GetCondition();
 
     if (locations->InAt(1).IsConstant()) {
-      const int64_t value = Int64ConstantFrom(locations->InAt(1));
+      IfCondition c = condition->GetCondition();
+      IfCondition opposite = condition->GetOppositeCondition();
+      const int64_t value =
+          AdjustConstantForCondition(Int64ConstantFrom(locations->InAt(1)), &c, &opposite);
 
       if (c < kCondLT || c > kCondGE) {
         // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
         // we check that the least significant half of the first input to be compared
         // is in a low register (the other half is read outside an IT block), and
         // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
-        // encoding can be used.
-        if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
+        // encoding can be used; 0 is always handled, no matter what registers are
+        // used by the first input.
+        if (value != 0 &&
+            (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value)))) {
           return false;
         }
       // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
@@ -1991,6 +2052,354 @@
   return true;
 }
 
+static void GenerateConditionGeneric(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+  DCHECK(CanGenerateTest(cond, codegen->GetAssembler()));
+
+  const vixl32::Register out = OutputRegister(cond);
+  const auto condition = GenerateTest(cond, false, codegen);
+
+  __ Mov(LeaveFlags, out, 0);
+
+  if (out.IsLow()) {
+    // We use the scope because of the IT block that follows.
+    ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                             2 * vixl32::k16BitT32InstructionSizeInBytes,
+                             CodeBufferCheckScope::kExactSize);
+
+    __ it(condition.first);
+    __ mov(condition.first, out, 1);
+  } else {
+    vixl32::Label done_label;
+    vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+
+    __ B(condition.second, final_label, /* far_target */ false);
+    __ Mov(out, 1);
+
+    if (done_label.IsReferenced()) {
+      __ Bind(&done_label);
+    }
+  }
+}
+
+static void GenerateEqualLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+  DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  const vixl32::Register out = OutputRegister(cond);
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+  vixl32::Register left_high = HighRegisterFrom(left);
+  vixl32::Register left_low = LowRegisterFrom(left);
+  vixl32::Register temp;
+  UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+    const int64_t value = AdjustConstantForCondition(Int64ConstantFrom(right),
+                                                     &condition,
+                                                     &opposite);
+    Operand right_high = High32Bits(value);
+    Operand right_low = Low32Bits(value);
+
+    // The output uses Location::kNoOutputOverlap.
+    if (out.Is(left_high)) {
+      std::swap(left_low, left_high);
+      std::swap(right_low, right_high);
+    }
+
+    __ Sub(out, left_low, right_low);
+    temp = temps.Acquire();
+    __ Sub(temp, left_high, right_high);
+  } else {
+    DCHECK(right.IsRegisterPair());
+    temp = temps.Acquire();
+    __ Sub(temp, left_high, HighRegisterFrom(right));
+    __ Sub(out, left_low, LowRegisterFrom(right));
+  }
+
+  // Need to check after calling AdjustConstantForCondition().
+  DCHECK(condition == kCondEQ || condition == kCondNE) << condition;
+
+  if (condition == kCondNE && out.IsLow()) {
+    __ Orrs(out, out, temp);
+
+    // We use the scope because of the IT block that follows.
+    ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                             2 * vixl32::k16BitT32InstructionSizeInBytes,
+                             CodeBufferCheckScope::kExactSize);
+
+    __ it(ne);
+    __ mov(ne, out, 1);
+  } else {
+    __ Orr(out, out, temp);
+    codegen->GenerateConditionWithZero(condition, out, out, temp);
+  }
+}
+
+static void GenerateLongComparesAndJumps(HCondition* cond,
+                                         vixl32::Label* true_label,
+                                         vixl32::Label* false_label,
+                                         CodeGeneratorARMVIXL* codegen,
+                                         bool is_far_target = true) {
+  LocationSummary* locations = cond->GetLocations();
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+  IfCondition if_cond = cond->GetCondition();
+
+  vixl32::Register left_high = HighRegisterFrom(left);
+  vixl32::Register left_low = LowRegisterFrom(left);
+  IfCondition true_high_cond = if_cond;
+  IfCondition false_high_cond = cond->GetOppositeCondition();
+  vixl32::Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
+
+  // Set the conditions for the test, remembering that == needs to be
+  // decided using the low words.
+  switch (if_cond) {
+    case kCondEQ:
+    case kCondNE:
+      // Nothing to do.
+      break;
+    case kCondLT:
+      false_high_cond = kCondGT;
+      break;
+    case kCondLE:
+      true_high_cond = kCondLT;
+      break;
+    case kCondGT:
+      false_high_cond = kCondLT;
+      break;
+    case kCondGE:
+      true_high_cond = kCondGT;
+      break;
+    case kCondB:
+      false_high_cond = kCondA;
+      break;
+    case kCondBE:
+      true_high_cond = kCondB;
+      break;
+    case kCondA:
+      false_high_cond = kCondB;
+      break;
+    case kCondAE:
+      true_high_cond = kCondA;
+      break;
+  }
+  if (right.IsConstant()) {
+    int64_t value = Int64ConstantFrom(right);
+    int32_t val_low = Low32Bits(value);
+    int32_t val_high = High32Bits(value);
+
+    __ Cmp(left_high, val_high);
+    if (if_cond == kCondNE) {
+      __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+    } else if (if_cond == kCondEQ) {
+      __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+    } else {
+      __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+      __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+    }
+    // Must be equal high, so compare the lows.
+    __ Cmp(left_low, val_low);
+  } else {
+    vixl32::Register right_high = HighRegisterFrom(right);
+    vixl32::Register right_low = LowRegisterFrom(right);
+
+    __ Cmp(left_high, right_high);
+    if (if_cond == kCondNE) {
+      __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+    } else if (if_cond == kCondEQ) {
+      __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+    } else {
+      __ B(ARMCondition(true_high_cond), true_label, is_far_target);
+      __ B(ARMCondition(false_high_cond), false_label, is_far_target);
+    }
+    // Must be equal high, so compare the lows.
+    __ Cmp(left_low, right_low);
+  }
+  // The last comparison might be unsigned.
+  // TODO: optimize cases where this is always true/false
+  __ B(final_condition, true_label, is_far_target);
+}
+
+static void GenerateConditionLong(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+  DCHECK_EQ(cond->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = cond->GetLocations();
+  IfCondition condition = cond->GetCondition();
+  const vixl32::Register out = OutputRegister(cond);
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+
+    // Comparisons against 0 are common enough to deserve special attention.
+    if (AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite) == 0) {
+      switch (condition) {
+        case kCondNE:
+        case kCondA:
+          if (out.IsLow()) {
+            // We only care if both input registers are 0 or not.
+            __ Orrs(out, LowRegisterFrom(left), HighRegisterFrom(left));
+
+            // We use the scope because of the IT block that follows.
+            ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                                     2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                     CodeBufferCheckScope::kExactSize);
+
+            __ it(ne);
+            __ mov(ne, out, 1);
+            return;
+          }
+
+          FALLTHROUGH_INTENDED;
+        case kCondEQ:
+        case kCondBE:
+          // We only care if both input registers are 0 or not.
+          __ Orr(out, LowRegisterFrom(left), HighRegisterFrom(left));
+          codegen->GenerateConditionWithZero(condition, out, out);
+          return;
+        case kCondLT:
+        case kCondGE:
+          // We only care about the sign bit.
+          FALLTHROUGH_INTENDED;
+        case kCondAE:
+        case kCondB:
+          codegen->GenerateConditionWithZero(condition, out, HighRegisterFrom(left));
+          return;
+        case kCondLE:
+        case kCondGT:
+        default:
+          break;
+      }
+    }
+  }
+
+  if ((condition == kCondEQ || condition == kCondNE) &&
+      // If `out` is a low register, then the GenerateConditionGeneric()
+      // function generates a shorter code sequence that is still branchless.
+      (!out.IsLow() || !CanGenerateTest(cond, codegen->GetAssembler()))) {
+    GenerateEqualLong(cond, codegen);
+    return;
+  }
+
+  if (CanGenerateTest(cond, codegen->GetAssembler())) {
+    GenerateConditionGeneric(cond, codegen);
+    return;
+  }
+
+  // Convert the jumps into the result.
+  vixl32::Label done_label;
+  vixl32::Label* const final_label = codegen->GetFinalLabel(cond, &done_label);
+  vixl32::Label true_label, false_label;
+
+  GenerateLongComparesAndJumps(cond, &true_label, &false_label, codegen, /* is_far_target */ false);
+
+  // False case: result = 0.
+  __ Bind(&false_label);
+  __ Mov(out, 0);
+  __ B(final_label);
+
+  // True case: result = 1.
+  __ Bind(&true_label);
+  __ Mov(out, 1);
+
+  if (done_label.IsReferenced()) {
+    __ Bind(&done_label);
+  }
+}
+
+static void GenerateConditionIntegralOrNonPrimitive(HCondition* cond, CodeGeneratorARMVIXL* codegen) {
+  const Primitive::Type type = cond->GetLeft()->GetType();
+
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+  if (type == Primitive::kPrimLong) {
+    GenerateConditionLong(cond, codegen);
+    return;
+  }
+
+  IfCondition condition = cond->GetCondition();
+  vixl32::Register in = InputRegisterAt(cond, 0);
+  const vixl32::Register out = OutputRegister(cond);
+  const Location right = cond->GetLocations()->InAt(1);
+  int64_t value;
+
+  if (right.IsConstant()) {
+    IfCondition opposite = cond->GetOppositeCondition();
+
+    value = AdjustConstantForCondition(Int64ConstantFrom(right), &condition, &opposite);
+
+    // Comparisons against 0 are common enough to deserve special attention.
+    if (value == 0) {
+      switch (condition) {
+        case kCondNE:
+        case kCondA:
+          if (out.IsLow() && out.Is(in)) {
+            __ Cmp(out, 0);
+
+            // We use the scope because of the IT block that follows.
+            ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                                     2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                     CodeBufferCheckScope::kExactSize);
+
+            __ it(ne);
+            __ mov(ne, out, 1);
+            return;
+          }
+
+          FALLTHROUGH_INTENDED;
+        case kCondEQ:
+        case kCondBE:
+        case kCondLT:
+        case kCondGE:
+        case kCondAE:
+        case kCondB:
+          codegen->GenerateConditionWithZero(condition, out, in);
+          return;
+        case kCondLE:
+        case kCondGT:
+        default:
+          break;
+      }
+    }
+  }
+
+  if (condition == kCondEQ || condition == kCondNE) {
+    Operand operand(0);
+
+    if (right.IsConstant()) {
+      operand = Operand::From(value);
+    } else if (out.Is(RegisterFrom(right))) {
+      // Avoid 32-bit instructions if possible.
+      operand = InputOperandAt(cond, 0);
+      in = RegisterFrom(right);
+    } else {
+      operand = InputOperandAt(cond, 1);
+    }
+
+    if (condition == kCondNE && out.IsLow()) {
+      __ Subs(out, in, operand);
+
+      // We use the scope because of the IT block that follows.
+      ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                               2 * vixl32::k16BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kExactSize);
+
+      __ it(ne);
+      __ mov(ne, out, 1);
+    } else {
+      __ Sub(out, in, operand);
+      codegen->GenerateConditionWithZero(condition, out, out);
+    }
+
+    return;
+  }
+
+  GenerateConditionGeneric(cond, codegen);
+}
+
 static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
   const Primitive::Type type = constant->GetType();
   bool ret = false;
@@ -2092,10 +2501,11 @@
       isa_features_(isa_features),
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       baker_read_barrier_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -2547,92 +2957,10 @@
 void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond,
-                                                                   vixl32::Label* true_label,
-                                                                   vixl32::Label* false_label) {
-  LocationSummary* locations = cond->GetLocations();
-  Location left = locations->InAt(0);
-  Location right = locations->InAt(1);
-  IfCondition if_cond = cond->GetCondition();
-
-  vixl32::Register left_high = HighRegisterFrom(left);
-  vixl32::Register left_low = LowRegisterFrom(left);
-  IfCondition true_high_cond = if_cond;
-  IfCondition false_high_cond = cond->GetOppositeCondition();
-  vixl32::Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
-
-  // Set the conditions for the test, remembering that == needs to be
-  // decided using the low words.
-  switch (if_cond) {
-    case kCondEQ:
-    case kCondNE:
-      // Nothing to do.
-      break;
-    case kCondLT:
-      false_high_cond = kCondGT;
-      break;
-    case kCondLE:
-      true_high_cond = kCondLT;
-      break;
-    case kCondGT:
-      false_high_cond = kCondLT;
-      break;
-    case kCondGE:
-      true_high_cond = kCondGT;
-      break;
-    case kCondB:
-      false_high_cond = kCondA;
-      break;
-    case kCondBE:
-      true_high_cond = kCondB;
-      break;
-    case kCondA:
-      false_high_cond = kCondB;
-      break;
-    case kCondAE:
-      true_high_cond = kCondA;
-      break;
-  }
-  if (right.IsConstant()) {
-    int64_t value = Int64ConstantFrom(right);
-    int32_t val_low = Low32Bits(value);
-    int32_t val_high = High32Bits(value);
-
-    __ Cmp(left_high, val_high);
-    if (if_cond == kCondNE) {
-      __ B(ARMCondition(true_high_cond), true_label);
-    } else if (if_cond == kCondEQ) {
-      __ B(ARMCondition(false_high_cond), false_label);
-    } else {
-      __ B(ARMCondition(true_high_cond), true_label);
-      __ B(ARMCondition(false_high_cond), false_label);
-    }
-    // Must be equal high, so compare the lows.
-    __ Cmp(left_low, val_low);
-  } else {
-    vixl32::Register right_high = HighRegisterFrom(right);
-    vixl32::Register right_low = LowRegisterFrom(right);
-
-    __ Cmp(left_high, right_high);
-    if (if_cond == kCondNE) {
-      __ B(ARMCondition(true_high_cond), true_label);
-    } else if (if_cond == kCondEQ) {
-      __ B(ARMCondition(false_high_cond), false_label);
-    } else {
-      __ B(ARMCondition(true_high_cond), true_label);
-      __ B(ARMCondition(false_high_cond), false_label);
-    }
-    // Must be equal high, so compare the lows.
-    __ Cmp(left_low, right_low);
-  }
-  // The last comparison might be unsigned.
-  // TODO: optimize cases where this is always true/false
-  __ B(final_condition, true_label);
-}
-
 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
                                                                    vixl32::Label* true_target_in,
-                                                                   vixl32::Label* false_target_in) {
+                                                                   vixl32::Label* false_target_in,
+                                                                   bool is_far_target) {
   if (CanGenerateTest(condition, codegen_->GetAssembler())) {
     vixl32::Label* non_fallthrough_target;
     bool invert;
@@ -2648,7 +2976,7 @@
 
     const auto cond = GenerateTest(condition, invert, codegen_);
 
-    __ B(cond.first, non_fallthrough_target);
+    __ B(cond.first, non_fallthrough_target, is_far_target);
 
     if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
       __ B(false_target_in);
@@ -2664,7 +2992,7 @@
   vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
 
   DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
-  GenerateLongComparesAndJumps(condition, true_target, false_target);
+  GenerateLongComparesAndJumps(condition, true_target, false_target, codegen_, is_far_target);
 
   if (false_target != &fallthrough) {
     __ B(false_target);
@@ -2732,7 +3060,7 @@
     // the HCondition, generate the comparison directly.
     Primitive::Type type = condition->InputAt(0)->GetType();
     if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
-      GenerateCompareTestAndBranch(condition, true_target, false_target);
+      GenerateCompareTestAndBranch(condition, true_target, false_target, far_target);
       return;
     }
 
@@ -2751,14 +3079,14 @@
 
     if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
       if (arm_cond.Is(eq)) {
-        __ CompareAndBranchIfZero(left, non_fallthrough_target);
+        __ CompareAndBranchIfZero(left, non_fallthrough_target, far_target);
       } else {
         DCHECK(arm_cond.Is(ne));
-        __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+        __ CompareAndBranchIfNonZero(left, non_fallthrough_target, far_target);
       }
     } else {
       __ Cmp(left, right);
-      __ B(arm_cond, non_fallthrough_target);
+      __ B(arm_cond, non_fallthrough_target, far_target);
     }
   }
 
@@ -2975,6 +3303,83 @@
   __ Nop();
 }
 
+// `temp` is an extra temporary register that is used for some conditions;
+// callers may not specify it, in which case the method will use a scratch
+// register instead.
+void CodeGeneratorARMVIXL::GenerateConditionWithZero(IfCondition condition,
+                                                     vixl32::Register out,
+                                                     vixl32::Register in,
+                                                     vixl32::Register temp) {
+  switch (condition) {
+    case kCondEQ:
+    // x <= 0 iff x == 0 when the comparison is unsigned.
+    case kCondBE:
+      if (!temp.IsValid() || (out.IsLow() && !out.Is(in))) {
+        temp = out;
+      }
+
+      // Avoid 32-bit instructions if possible; note that `in` and `temp` must be
+      // different as well.
+      if (in.IsLow() && temp.IsLow() && !in.Is(temp)) {
+        // temp = - in; only 0 sets the carry flag.
+        __ Rsbs(temp, in, 0);
+
+        if (out.Is(in)) {
+          std::swap(in, temp);
+        }
+
+        // out = - in + in + carry = carry
+        __ Adc(out, temp, in);
+      } else {
+        // If `in` is 0, then it has 32 leading zeros, and less than that otherwise.
+        __ Clz(out, in);
+        // Any number less than 32 logically shifted right by 5 bits results in 0;
+        // the same operation on 32 yields 1.
+        __ Lsr(out, out, 5);
+      }
+
+      break;
+    case kCondNE:
+    // x > 0 iff x != 0 when the comparison is unsigned.
+    case kCondA: {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+
+      if (out.Is(in)) {
+        if (!temp.IsValid() || in.Is(temp)) {
+          temp = temps.Acquire();
+        }
+      } else if (!temp.IsValid() || !temp.IsLow()) {
+        temp = out;
+      }
+
+      // temp = in - 1; only 0 does not set the carry flag.
+      __ Subs(temp, in, 1);
+      // out = in + ~temp + carry = in + (-(in - 1) - 1) + carry = in - in + 1 - 1 + carry = carry
+      __ Sbc(out, in, temp);
+      break;
+    }
+    case kCondGE:
+      __ Mvn(out, in);
+      in = out;
+      FALLTHROUGH_INTENDED;
+    case kCondLT:
+      // We only care about the sign bit.
+      __ Lsr(out, in, 31);
+      break;
+    case kCondAE:
+      // Trivially true.
+      __ Mov(out, 1);
+      break;
+    case kCondB:
+      // Trivially false.
+      __ Mov(out, 0);
+      break;
+    default:
+      LOG(FATAL) << "Unexpected condition " << condition;
+      UNREACHABLE();
+  }
+}
+
 void LocationsBuilderARMVIXL::HandleCondition(HCondition* cond) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
@@ -3011,52 +3416,47 @@
     return;
   }
 
-  const vixl32::Register out = OutputRegister(cond);
+  const Primitive::Type type = cond->GetLeft()->GetType();
 
-  if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) {
-    const auto condition = GenerateTest(cond, false, codegen_);
-    // We use the scope because of the IT block that follows.
-    ExactAssemblyScope guard(GetVIXLAssembler(),
-                             4 * vixl32::k16BitT32InstructionSizeInBytes,
-                             CodeBufferCheckScope::kExactSize);
-
-    __ it(condition.first);
-    __ mov(condition.first, out, 1);
-    __ it(condition.second);
-    __ mov(condition.second, out, 0);
+  if (Primitive::IsFloatingPointType(type)) {
+    GenerateConditionGeneric(cond, codegen_);
     return;
   }
 
-  // Convert the jumps into the result.
-  vixl32::Label done_label;
-  vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
 
-  if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
-    vixl32::Label true_label, false_label;
+  const IfCondition condition = cond->GetCondition();
 
-    GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+  // A condition with only one boolean input, or two boolean inputs without being equality or
+  // inequality results from transformations done by the instruction simplifier, and is handled
+  // as a regular condition with integral inputs.
+  if (type == Primitive::kPrimBoolean &&
+      cond->GetRight()->GetType() == Primitive::kPrimBoolean &&
+      (condition == kCondEQ || condition == kCondNE)) {
+    vixl32::Register left = InputRegisterAt(cond, 0);
+    const vixl32::Register out = OutputRegister(cond);
+    const Location right_loc = cond->GetLocations()->InAt(1);
 
-    // False case: result = 0.
-    __ Bind(&false_label);
-    __ Mov(out, 0);
-    __ B(final_label);
+    // The constant case is handled by the instruction simplifier.
+    DCHECK(!right_loc.IsConstant());
 
-    // True case: result = 1.
-    __ Bind(&true_label);
-    __ Mov(out, 1);
-  } else {
-    DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+    vixl32::Register right = RegisterFrom(right_loc);
 
-    const auto condition = GenerateTest(cond, false, codegen_);
+    // Avoid 32-bit instructions if possible.
+    if (out.Is(right)) {
+      std::swap(left, right);
+    }
 
-    __ Mov(LeaveFlags, out, 0);
-    __ B(condition.second, final_label, /* far_target */ false);
-    __ Mov(out, 1);
+    __ Eor(out, left, right);
+
+    if (condition == kCondEQ) {
+      __ Eor(out, out, 1);
+    }
+
+    return;
   }
 
-  if (done_label.IsReferenced()) {
-    __ Bind(&done_label);
-  }
+  GenerateConditionIntegralOrNonPrimitive(cond, codegen_);
 }
 
 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
@@ -3244,18 +3644,10 @@
 
   IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
-    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
-      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
-    }
     return;
   }
 
   HandleInvoke(invoke);
-
-  // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
-  if (invoke->HasPcRelativeDexCache()) {
-    invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
-  }
 }
 
 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
@@ -3279,7 +3671,6 @@
   LocationSummary* locations = invoke->GetLocations();
   codegen_->GenerateStaticOrDirectCall(
       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
@@ -3302,7 +3693,6 @@
   }
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
   DCHECK(!codegen_->IsLeafMethod());
 }
 
@@ -6853,7 +7243,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -6861,7 +7251,7 @@
 
 void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConventionARMVIXL calling_convention;
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -6914,7 +7304,7 @@
 // move.
 void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -6976,7 +7366,7 @@
       GenerateGcRootFieldLoad(cls, out_loc, out, /* offset */ 0, read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -7045,7 +7435,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -7055,7 +7445,7 @@
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   HLoadString::LoadKind load_kind = load->GetLoadKind();
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     locations->SetOut(LocationFrom(r0));
   } else {
     locations->SetOut(Location::RequiresRegister());
@@ -7133,7 +7523,7 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
   InvokeRuntimeCallingConventionARMVIXL calling_convention;
   __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
@@ -8720,8 +9110,8 @@
   return RegisterFrom(location);
 }
 
-Location CodeGeneratorARMVIXL::GenerateCalleeMethodStaticOrDirectCall(
-    HInvokeStaticOrDirect* invoke, Location temp) {
+void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
+    HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
@@ -8734,53 +9124,40 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      PcRelativePatchInfo* labels = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      vixl32::Register temp_reg = RegisterFrom(temp);
+      EmitMovwMovtPlaceholder(labels, temp_reg);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ Mov(RegisterFrom(temp), Operand::From(invoke->GetMethodAddress()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
-      HArmDexCacheArraysBase* base =
-          invoke->InputAt(invoke->GetSpecialInputIndex())->AsArmDexCacheArraysBase();
-      vixl32::Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke, RegisterFrom(temp));
-      int32_t offset = invoke->GetDexCacheArrayOffset() - base->GetElementOffset();
-      GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(temp), base_reg, offset);
+    case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
+      PcRelativePatchInfo* labels = NewMethodBssEntryPatch(
+          MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
+      vixl32::Register temp_reg = RegisterFrom(temp);
+      EmitMovwMovtPlaceholder(labels, temp_reg);
+      GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, temp_reg, /* offset*/ 0);
       break;
     }
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
-      vixl32::Register method_reg;
-      vixl32::Register reg = RegisterFrom(temp);
-      if (current_method.IsRegister()) {
-        method_reg = RegisterFrom(current_method);
-      } else {
-        DCHECK(invoke->GetLocations()->Intrinsified());
-        DCHECK(!current_method.IsValid());
-        method_reg = reg;
-        GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, kCurrentMethodStackOffset);
-      }
-      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
-      GetAssembler()->LoadFromOffset(
-          kLoadWord,
-          reg,
-          method_reg,
-          ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
-      // temp = temp[index_in_cache];
-      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
-      uint32_t index_in_cache = invoke->GetDexMethodIndex();
-      GetAssembler()->LoadFromOffset(
-          kLoadWord, reg, reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
-      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
+      GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+      return;  // No code pointer retrieval; the runtime performs the call directly.
     }
   }
-  return callee_method;
-}
-
-void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                      Location temp) {
-  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
-      __ Bl(GetFrameEntryLabel());
+      {
+        // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+        ExactAssemblyScope aas(GetVIXLAssembler(),
+                               vixl32::k32BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+        __ bl(GetFrameEntryLabel());
+        RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+      }
       break;
     case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
       // LR = callee_method->entry_point_from_quick_compiled_code_
@@ -8790,12 +9167,14 @@
             RegisterFrom(callee_method),
             ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
       {
+        // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
         // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
         ExactAssemblyScope aas(GetVIXLAssembler(),
                                vixl32::k16BitT32InstructionSizeInBytes,
                                CodeBufferCheckScope::kExactSize);
         // LR()
         __ blx(lr);
+        RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
       }
       break;
   }
@@ -8803,7 +9182,8 @@
   DCHECK(!IsLeafMethod());
 }
 
-void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
+void CodeGeneratorARMVIXL::GenerateVirtualCall(
+    HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
   vixl32::Register temp = RegisterFrom(temp_location);
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
@@ -8839,20 +9219,30 @@
   GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // LR = temp->GetEntryPoint();
   GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
-  // LR();
-  // This `blx` *must* be the *last* instruction generated by this stub, so that calls to
-  // `RecordPcInfo()` immediately following record the correct pc. Use a scope to help guarantee
-  // that.
-  // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
-  ExactAssemblyScope aas(GetVIXLAssembler(),
-                         vixl32::k16BitT32InstructionSizeInBytes,
-                         CodeBufferCheckScope::kExactSize);
-  __ blx(lr);
+  {
+    // Use a scope to help guarantee that `RecordPcInfo()` records the correct pc.
+    // blx in T32 has only 16bit encoding that's why a stricter check for the scope is used.
+    ExactAssemblyScope aas(GetVIXLAssembler(),
+                           vixl32::k16BitT32InstructionSizeInBytes,
+                           CodeBufferCheckScope::kExactSize);
+    // LR();
+    __ blx(lr);
+    RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+  }
 }
 
-CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch(
-    const DexFile& dex_file, dex::StringIndex string_index) {
-  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeMethodPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &pc_relative_method_patches_);
+}
+
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewMethodBssEntryPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &method_bss_entry_patches_);
 }
 
 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeTypePatch(
@@ -8865,9 +9255,9 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
 }
 
-CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeDexCacheArrayPatch(
-    const DexFile& dex_file, uint32_t element_offset) {
-  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativeStringPatch(
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
 }
 
 CodeGeneratorARMVIXL::PcRelativePatchInfo* CodeGeneratorARMVIXL::NewPcRelativePatch(
@@ -8933,24 +9323,28 @@
 void CodeGeneratorARMVIXL::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
-      /* MOVW+MOVT for each entry */ 2u * pc_relative_dex_cache_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_method_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * method_bss_entry_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
+      /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       baker_read_barrier_patches_.size();
   linker_patches->reserve(size);
-  EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
-                                                               linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
+                                                                linker_patches);
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   for (const BakerReadBarrierPatchInfo& info : baker_read_barrier_patches_) {
@@ -9100,17 +9494,6 @@
     }
   }
 }
-void LocationsBuilderARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
-  locations->SetOut(Location::RequiresRegister());
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitArmDexCacheArraysBase(HArmDexCacheArraysBase* base) {
-  vixl32::Register base_reg = OutputRegister(base);
-  CodeGeneratorARMVIXL::PcRelativePatchInfo* labels =
-      codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
-  codegen_->EmitMovwMovtPlaceholder(labels, base_reg);
-}
 
 // Copy the result of a call into the given target.
 void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) {
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index daba9bf..805a3f4 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -24,8 +24,8 @@
 #include "nodes.h"
 #include "string_reference.h"
 #include "parallel_move_resolver.h"
+#include "type_reference.h"
 #include "utils/arm/assembler_arm_vixl.h"
-#include "utils/type_reference.h"
 
 // TODO(VIXL): make vixl clean wrt -Wshadow.
 #pragma GCC diagnostic push
@@ -400,10 +400,8 @@
                              bool far_target = true);
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     vixl::aarch32::Label* true_target,
-                                    vixl::aarch32::Label* false_target);
-  void GenerateLongComparesAndJumps(HCondition* cond,
-                                    vixl::aarch32::Label* true_label,
-                                    vixl::aarch32::Label* false_label);
+                                    vixl::aarch32::Label* false_target,
+                                    bool is_far_target = true);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
@@ -540,9 +538,10 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
-  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+  void GenerateStaticOrDirectCall(
+      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+  void GenerateVirtualCall(
+      HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg, Primitive::Type type) OVERRIDE;
 
@@ -566,12 +565,12 @@
     vixl::aarch32::Label add_pc_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                dex::StringIndex string_index);
+  PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
+  PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
-  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
-                                                       uint32_t element_offset);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
 
   // Add a new baker read barrier patch and return the label to be bound
   // before the BNE instruction.
@@ -716,6 +715,14 @@
   void EmitMovwMovtPlaceholder(CodeGeneratorARMVIXL::PcRelativePatchInfo* labels,
                                vixl::aarch32::Register out);
 
+  // `temp` is an extra temporary register that is used for some conditions;
+  // callers may not specify it, in which case the method will use a scratch
+  // register instead.
+  void GenerateConditionWithZero(IfCondition condition,
+                                 vixl::aarch32::Register out,
+                                 vixl::aarch32::Register in,
+                                 vixl::aarch32::Register temp = vixl32::Register());
+
  private:
   vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
                                                                 vixl::aarch32::Register temp);
@@ -758,14 +765,16 @@
 
   // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
   Uint32ToLiteralMap uint32_literals_;
-  // PC-relative patch info for each HArmDexCacheArraysBase.
-  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
+  // PC-relative method patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
   // Baker read barrier patch info.
   ArenaDeque<BakerReadBarrierPatchInfo> baker_read_barrier_patches_;
 
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 95be3d7..b39d412 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -40,10 +40,6 @@
 static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr Register kMethodRegisterArgument = A0;
 
-// We'll maximize the range of a single load instruction for dex cache array accesses
-// by aligning offset -32768 with the offset of the first used element.
-static constexpr uint32_t kDexCacheArrayLwOffset = 0x8000;
-
 Location MipsReturnLocation(Primitive::Type return_type) {
   switch (return_type) {
     case Primitive::kPrimBoolean:
@@ -1060,10 +1056,11 @@
       isa_features_(isa_features),
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       clobbered_ra_(false) {
@@ -1601,31 +1598,44 @@
 void CodeGeneratorMIPS::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
-      pc_relative_dex_cache_patches_.size() +
-      pc_relative_string_patches_.size() +
+      pc_relative_method_patches_.size() +
+      method_bss_entry_patches_.size() +
       pc_relative_type_patches_.size() +
-      type_bss_entry_patches_.size();
+      type_bss_entry_patches_.size() +
+      pc_relative_string_patches_.size();
   linker_patches->reserve(size);
-  EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
-                                                               linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
+                                                                linker_patches);
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   DCHECK_EQ(size, linker_patches->size());
 }
 
-CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
-    const DexFile& dex_file, dex::StringIndex string_index) {
-  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeMethodPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &pc_relative_method_patches_);
+}
+
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewMethodBssEntryPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &method_bss_entry_patches_);
 }
 
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeTypePatch(
@@ -1638,9 +1648,9 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
 }
 
-CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeDexCacheArrayPatch(
-    const DexFile& dex_file, uint32_t element_offset) {
-  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativeStringPatch(
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
 }
 
 CodeGeneratorMIPS::PcRelativePatchInfo* CodeGeneratorMIPS::NewPcRelativePatch(
@@ -1662,6 +1672,7 @@
 void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
                                                              Register out,
                                                              Register base) {
+  DCHECK_NE(out, base);
   if (GetInstructionSetFeatures().IsR6()) {
     DCHECK_EQ(base, ZERO);
     __ Bind(&info->high_label);
@@ -6947,7 +6958,7 @@
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
   bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
-  bool has_extra_input = invoke->HasPcRelativeDexCache() && !is_r6;
+  bool has_extra_input = invoke->HasPcRelativeMethodLoadKind() && !is_r6;
 
   IntrinsicLocationsBuilderMIPS intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
@@ -6987,7 +6998,7 @@
     HLoadString::LoadKind desired_string_load_kind) {
   // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
-  // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
+  // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods
   // with irreducible loops.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
   bool is_r6 = GetInstructionSetFeatures().IsR6();
@@ -7003,12 +7014,12 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       fallback_load = false;
       break;
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       fallback_load = false;
       break;
   }
   if (fallback_load) {
-    desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+    desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall;
   }
   return desired_string_load_kind;
 }
@@ -7017,6 +7028,8 @@
     HLoadClass::LoadKind desired_class_load_kind) {
   // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
+  // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods
+  // with irreducible loops.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
   bool is_r6 = GetInstructionSetFeatures().IsR6();
   bool fallback_load = has_irreducible_loops && !is_r6;
@@ -7037,12 +7050,12 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       fallback_load = false;
       break;
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       fallback_load = false;
       break;
   }
   if (fallback_load) {
-    desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+    desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall;
   }
   return desired_class_load_kind;
 }
@@ -7080,30 +7093,34 @@
   HInvokeStaticOrDirect::DispatchInfo dispatch_info = desired_dispatch_info;
   // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
+  // TODO: Create as many HMipsComputeBaseMethodAddress instructions as needed for methods
+  // with irreducible loops.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
   bool is_r6 = GetInstructionSetFeatures().IsR6();
   bool fallback_load = has_irreducible_loops && !is_r6;
   switch (dispatch_info.method_load_kind) {
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
+    case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry:
       break;
     default:
       fallback_load = false;
       break;
   }
   if (fallback_load) {
-    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod;
+    dispatch_info.method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall;
     dispatch_info.method_load_data = 0;
   }
   return dispatch_info;
 }
 
-void CodeGeneratorMIPS::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+void CodeGeneratorMIPS::GenerateStaticOrDirectCall(
+    HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
   // All registers are assumed to be correctly set up per the calling convention.
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
   HInvokeStaticOrDirect::CodePtrLocation code_ptr_location = invoke->GetCodePtrLocation();
   bool is_r6 = GetInstructionSetFeatures().IsR6();
-  Register base_reg = (invoke->HasPcRelativeDexCache() && !is_r6)
+  Register base_reg = (invoke->HasPcRelativeMethodLoadKind() && !is_r6)
       ? GetInvokeStaticOrDirectExtraParameter(invoke, temp.AsRegister<Register>())
       : ZERO;
 
@@ -7121,53 +7138,32 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      PcRelativePatchInfo* info = NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      bool reordering = __ SetReorder(false);
+      Register temp_reg = temp.AsRegister<Register>();
+      EmitPcRelativeAddressPlaceholderHigh(info, TMP, base_reg);
+      __ Addiu(temp_reg, TMP, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadConst32(temp.AsRegister<Register>(), invoke->GetMethodAddress());
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      if (is_r6) {
-        uint32_t offset = invoke->GetDexCacheArrayOffset();
-        CodeGeneratorMIPS::PcRelativePatchInfo* info =
-            NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset);
-        bool reordering = __ SetReorder(false);
-        EmitPcRelativeAddressPlaceholderHigh(info, TMP, ZERO);
-        __ Lw(temp.AsRegister<Register>(), TMP, /* placeholder */ 0x5678);
-        __ SetReorder(reordering);
-      } else {
-        HMipsDexCacheArraysBase* base =
-            invoke->InputAt(invoke->GetSpecialInputIndex())->AsMipsDexCacheArraysBase();
-        int32_t offset =
-            invoke->GetDexCacheArrayOffset() - base->GetElementOffset() - kDexCacheArrayLwOffset;
-        __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), base_reg, offset);
-      }
+    case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
+      PcRelativePatchInfo* info = NewMethodBssEntryPatch(
+          MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
+      Register temp_reg = temp.AsRegister<Register>();
+      bool reordering = __ SetReorder(false);
+      EmitPcRelativeAddressPlaceholderHigh(info, TMP, base_reg);
+      __ Lw(temp_reg, TMP, /* placeholder */ 0x5678);
+      __ SetReorder(reordering);
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
-      Register reg = temp.AsRegister<Register>();
-      Register method_reg;
-      if (current_method.IsRegister()) {
-        method_reg = current_method.AsRegister<Register>();
-      } else {
-        // TODO: use the appropriate DCHECK() here if possible.
-        // DCHECK(invoke->GetLocations()->Intrinsified());
-        DCHECK(!current_method.IsValid());
-        method_reg = reg;
-        __ Lw(reg, SP, kCurrentMethodStackOffset);
-      }
-
-      // temp = temp->dex_cache_resolved_methods_;
-      __ LoadFromOffset(kLoadWord,
-                        reg,
-                        method_reg,
-                        ArtMethod::DexCacheResolvedMethodsOffset(kMipsPointerSize).Int32Value());
-      // temp = temp[index_in_cache];
-      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
-      uint32_t index_in_cache = invoke->GetDexMethodIndex();
-      __ LoadFromOffset(kLoadWord,
-                        reg,
-                        reg,
-                        CodeGenerator::GetCachePointerOffset(index_in_cache));
-      break;
+    }
+    case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
+      GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+      return;  // No code pointer retrieval; the runtime performs the call directly.
     }
   }
 
@@ -7187,6 +7183,8 @@
       __ NopIfNoReordering();
       break;
   }
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+
   DCHECK(!IsLeafMethod());
 }
 
@@ -7204,10 +7202,10 @@
                                        locations->HasTemps()
                                            ? locations->GetTemp(0)
                                            : Location::NoLocation());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-void CodeGeneratorMIPS::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
+void CodeGeneratorMIPS::GenerateVirtualCall(
+    HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
   // Use the calling convention instead of the location of the receiver, as
   // intrinsics may have put the receiver in a different register. In the intrinsics
   // slow path, the arguments have been moved to the right place, so here we are
@@ -7239,6 +7237,7 @@
   // T9();
   __ Jalr(T9);
   __ NopIfNoReordering();
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 }
 
 void InstructionCodeGeneratorMIPS::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -7248,12 +7247,11 @@
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderMIPS::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
@@ -7307,7 +7305,7 @@
 // move.
 void InstructionCodeGeneratorMIPS::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -7326,7 +7324,7 @@
       base_or_current_method_reg = isR6 ? ZERO : locations->InAt(0).AsRegister<Register>();
       break;
     case HLoadClass::LoadKind::kReferrersClass:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       base_or_current_method_reg = locations->InAt(0).AsRegister<Register>();
       break;
     default:
@@ -7404,7 +7402,7 @@
       __ SetReorder(reordering);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -7464,13 +7462,13 @@
       }
       FALLTHROUGH_INTENDED;
     // We need an extra register for PC-relative dex cache accesses.
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       locations->SetInAt(0, Location::RequiresRegister());
       break;
     default:
       break;
   }
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   } else {
@@ -7586,7 +7584,7 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
@@ -8708,29 +8706,11 @@
   __ Nal();
   // Grab the return address off RA.
   __ Move(reg, RA);
-  // TODO: Can we share this code with that of VisitMipsDexCacheArraysBase()?
 
   // Remember this offset (the obtained PC value) for later use with constant area.
   __ BindPcRelBaseLabel();
 }
 
-void LocationsBuilderMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(base);
-  locations->SetOut(Location::RequiresRegister());
-}
-
-void InstructionCodeGeneratorMIPS::VisitMipsDexCacheArraysBase(HMipsDexCacheArraysBase* base) {
-  Register reg = base->GetLocations()->Out().AsRegister<Register>();
-  CodeGeneratorMIPS::PcRelativePatchInfo* info =
-      codegen_->NewPcRelativeDexCacheArrayPatch(base->GetDexFile(), base->GetElementOffset());
-  CHECK(!codegen_->GetInstructionSetFeatures().IsR6());
-  bool reordering = __ SetReorder(false);
-  // TODO: Reuse MipsComputeBaseMethodAddress on R2 instead of passing ZERO to force emitting NAL.
-  codegen_->EmitPcRelativeAddressPlaceholderHigh(info, reg, ZERO);
-  __ Addiu(reg, reg, /* placeholder */ 0x5678);
-  __ SetReorder(reordering);
-}
-
 void LocationsBuilderMIPS::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
   // The trampoline uses the same calling convention as dex calling conventions,
   // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 449cb4c..e72e838d 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -23,8 +23,8 @@
 #include "nodes.h"
 #include "parallel_move_resolver.h"
 #include "string_reference.h"
+#include "type_reference.h"
 #include "utils/mips/assembler_mips.h"
-#include "utils/type_reference.h"
 
 namespace art {
 namespace mips {
@@ -552,8 +552,10 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+  void GenerateStaticOrDirectCall(
+      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+  void GenerateVirtualCall(
+      HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
@@ -582,12 +584,12 @@
     MipsLabel pc_rel_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                dex::StringIndex string_index);
+  PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
+  PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
-  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
-                                                       uint32_t element_offset);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   Literal* DeduplicateBootImageAddressLiteral(uint32_t address);
 
   void EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info, Register out, Register base);
@@ -642,14 +644,17 @@
 
   // Deduplication map for 32-bit literals, used for non-patchable boot image addresses.
   Uint32ToLiteralMap uint32_literals_;
-  // PC-relative patch info for each HMipsDexCacheArraysBase.
-  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
+  // PC-relative method patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+
   // Patches for string root accesses in JIT compiled code.
   ArenaDeque<JitPatchInfo> jit_string_patches_;
   // Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5cdff5a..e4f1cbd 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -951,16 +951,17 @@
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
-      assembler_(graph->GetArena()),
+      assembler_(graph->GetArena(), &isa_features),
       isa_features_(isa_features),
       uint32_literals_(std::less<uint32_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       uint64_literals_(std::less<uint64_t>(),
                        graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      pc_relative_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -1439,31 +1440,44 @@
 void CodeGeneratorMIPS64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
-      pc_relative_dex_cache_patches_.size() +
-      pc_relative_string_patches_.size() +
+      pc_relative_method_patches_.size() +
+      method_bss_entry_patches_.size() +
       pc_relative_type_patches_.size() +
-      type_bss_entry_patches_.size();
+      type_bss_entry_patches_.size() +
+      pc_relative_string_patches_.size();
   linker_patches->reserve(size);
-  EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
-                                                               linker_patches);
-  if (!GetCompilerOptions().IsBootImage()) {
-    DCHECK(pc_relative_type_patches_.empty());
-    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+  if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(pc_relative_method_patches_,
                                                                   linker_patches);
-  } else {
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(pc_relative_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(pc_relative_string_patches_,
                                                                   linker_patches);
+  } else {
+    DCHECK(pc_relative_method_patches_.empty());
+    DCHECK(pc_relative_type_patches_.empty());
+    EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(pc_relative_string_patches_,
+                                                                  linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
+                                                                linker_patches);
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   DCHECK_EQ(size, linker_patches->size());
 }
 
-CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch(
-    const DexFile& dex_file, dex::StringIndex string_index) {
-  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeMethodPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &pc_relative_method_patches_);
+}
+
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewMethodBssEntryPatch(
+    MethodReference target_method) {
+  return NewPcRelativePatch(*target_method.dex_file,
+                            target_method.dex_method_index,
+                            &method_bss_entry_patches_);
 }
 
 CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeTypePatch(
@@ -1476,9 +1490,9 @@
   return NewPcRelativePatch(dex_file, type_index.index_, &type_bss_entry_patches_);
 }
 
-CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeDexCacheArrayPatch(
-    const DexFile& dex_file, uint32_t element_offset) {
-  return NewPcRelativePatch(dex_file, element_offset, &pc_relative_dex_cache_patches_);
+CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativeStringPatch(
+    const DexFile& dex_file, dex::StringIndex string_index) {
+  return NewPcRelativePatch(dex_file, string_index.index_, &pc_relative_string_patches_);
 }
 
 CodeGeneratorMIPS64::PcRelativePatchInfo* CodeGeneratorMIPS64::NewPcRelativePatch(
@@ -4861,11 +4875,11 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   if (fallback_load) {
-    desired_string_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+    desired_string_load_kind = HLoadString::LoadKind::kRuntimeCall;
   }
   return desired_string_load_kind;
 }
@@ -4887,11 +4901,11 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   if (fallback_load) {
-    desired_class_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+    desired_class_load_kind = HLoadClass::LoadKind::kRuntimeCall;
   }
   return desired_class_load_kind;
 }
@@ -4903,7 +4917,8 @@
   return desired_dispatch_info;
 }
 
-void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
+void CodeGeneratorMIPS64::GenerateStaticOrDirectCall(
+    HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
   // All registers are assumed to be correctly set up per the calling convention.
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   HInvokeStaticOrDirect::MethodLoadKind method_load_kind = invoke->GetMethodLoadKind();
@@ -4923,46 +4938,29 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      CodeGeneratorMIPS64::PcRelativePatchInfo* info =
+          NewPcRelativeMethodPatch(invoke->GetTargetMethod());
+      EmitPcRelativeAddressPlaceholderHigh(info, AT);
+      __ Daddiu(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ LoadLiteral(temp.AsRegister<GpuRegister>(),
                      kLoadDoubleword,
                      DeduplicateUint64Literal(invoke->GetMethodAddress()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
-      uint32_t offset = invoke->GetDexCacheArrayOffset();
-      CodeGeneratorMIPS64::PcRelativePatchInfo* info =
-          NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset);
+    case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
+      PcRelativePatchInfo* info = NewMethodBssEntryPatch(
+          MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex()));
       EmitPcRelativeAddressPlaceholderHigh(info, AT);
       __ Ld(temp.AsRegister<GpuRegister>(), AT, /* placeholder */ 0x5678);
       break;
     }
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
-      GpuRegister reg = temp.AsRegister<GpuRegister>();
-      GpuRegister method_reg;
-      if (current_method.IsRegister()) {
-        method_reg = current_method.AsRegister<GpuRegister>();
-      } else {
-        // TODO: use the appropriate DCHECK() here if possible.
-        // DCHECK(invoke->GetLocations()->Intrinsified());
-        DCHECK(!current_method.IsValid());
-        method_reg = reg;
-        __ Ld(reg, SP, kCurrentMethodStackOffset);
-      }
-
-      // temp = temp->dex_cache_resolved_methods_;
-      __ LoadFromOffset(kLoadDoubleword,
-                        reg,
-                        method_reg,
-                        ArtMethod::DexCacheResolvedMethodsOffset(kMips64PointerSize).Int32Value());
-      // temp = temp[index_in_cache];
-      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
-      uint32_t index_in_cache = invoke->GetDexMethodIndex();
-      __ LoadFromOffset(kLoadDoubleword,
-                        reg,
-                        reg,
-                        CodeGenerator::GetCachePointerOffset(index_in_cache));
-      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
+      GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+      return;  // No code pointer retrieval; the runtime performs the call directly.
     }
   }
 
@@ -4982,6 +4980,8 @@
       __ Nop();
       break;
   }
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
+
   DCHECK(!IsLeafMethod());
 }
 
@@ -4999,10 +4999,10 @@
                                        locations->HasTemps()
                                            ? locations->GetTemp(0)
                                            : Location::NoLocation());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
-void CodeGeneratorMIPS64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
+void CodeGeneratorMIPS64::GenerateVirtualCall(
+    HInvokeVirtual* invoke, Location temp_location, SlowPathCode* slow_path) {
   // Use the calling convention instead of the location of the receiver, as
   // intrinsics may have put the receiver in a different register. In the intrinsics
   // slow path, the arguments have been moved to the right place, so here we are
@@ -5034,6 +5034,7 @@
   // T9();
   __ Jalr(T9);
   __ Nop();
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -5043,12 +5044,11 @@
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderMIPS64::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     Location loc = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(cls, loc, loc);
@@ -5085,7 +5085,7 @@
 // move.
 void InstructionCodeGeneratorMIPS64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -5096,7 +5096,7 @@
   GpuRegister out = out_loc.AsRegister<GpuRegister>();
   GpuRegister current_method_reg = ZERO;
   if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
-      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+      load_kind == HLoadClass::LoadKind::kRuntimeCall) {
       current_method_reg = locations->InAt(0).AsRegister<GpuRegister>();
   }
 
@@ -5150,7 +5150,7 @@
                                                           cls->GetClass()));
       GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option);
       break;
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -5199,7 +5199,7 @@
   HLoadString::LoadKind load_kind = load->GetLoadKind();
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetOut(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   } else {
@@ -5273,7 +5273,7 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK(load_kind == HLoadString::LoadKind::kRuntimeCall);
   InvokeRuntimeCallingConvention calling_convention;
   DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadConst32(calling_convention.GetRegisterAt(0), load->GetStringIndex().index_);
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 1f34ced..6260c73 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -21,8 +21,8 @@
 #include "driver/compiler_options.h"
 #include "nodes.h"
 #include "parallel_move_resolver.h"
+#include "type_reference.h"
 #include "utils/mips64/assembler_mips64.h"
-#include "utils/type_reference.h"
 
 namespace art {
 namespace mips64 {
@@ -314,6 +314,9 @@
                                  uint32_t num_entries,
                                  HBasicBlock* switch_block,
                                  HBasicBlock* default_block);
+  int32_t VecAddress(LocationSummary* locations,
+                     size_t size,
+                     /* out */ GpuRegister* adjusted_base);
 
   Mips64Assembler* const assembler_;
   CodeGeneratorMIPS64* const codegen_;
@@ -518,8 +521,10 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+  void GenerateStaticOrDirectCall(
+      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+  void GenerateVirtualCall(
+      HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
 
   void MoveFromReturnRegister(Location trg ATTRIBUTE_UNUSED,
                               Primitive::Type type ATTRIBUTE_UNUSED) OVERRIDE {
@@ -545,12 +550,12 @@
     Mips64Label pc_rel_label;
   };
 
-  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
-                                                dex::StringIndex string_index);
+  PcRelativePatchInfo* NewPcRelativeMethodPatch(MethodReference target_method);
+  PcRelativePatchInfo* NewMethodBssEntryPatch(MethodReference target_method);
   PcRelativePatchInfo* NewPcRelativeTypePatch(const DexFile& dex_file, dex::TypeIndex type_index);
   PcRelativePatchInfo* NewTypeBssEntryPatch(const DexFile& dex_file, dex::TypeIndex type_index);
-  PcRelativePatchInfo* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
-                                                       uint32_t element_offset);
+  PcRelativePatchInfo* NewPcRelativeStringPatch(const DexFile& dex_file,
+                                                dex::StringIndex string_index);
   PcRelativePatchInfo* NewPcRelativeCallPatch(const DexFile& dex_file,
                                               uint32_t method_index);
   Literal* DeduplicateBootImageAddressLiteral(uint64_t address);
@@ -603,14 +608,17 @@
   // Deduplication map for 64-bit literals, used for non-patchable method address or method code
   // address.
   Uint64ToLiteralMap uint64_literals_;
-  // PC-relative patch info.
-  ArenaDeque<PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
-  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PcRelativePatchInfo> pc_relative_method_patches_;
+  // PC-relative method patch info for kBssEntry.
+  ArenaDeque<PcRelativePatchInfo> method_bss_entry_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
+  // PC-relative String patch info; type depends on configuration (app .bss or boot image PIC).
+  ArenaDeque<PcRelativePatchInfo> pc_relative_string_patches_;
+
   // Patches for string root accesses in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
   // Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
index a41adca..f422b9f 100644
--- a/compiler/optimizing/code_generator_vector_arm64.cc
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -22,6 +22,8 @@
 namespace art {
 namespace arm64 {
 
+using helpers::ARM64EncodableConstantOrRegister;
+using helpers::Arm64CanEncodeConstantAsImmediate;
 using helpers::DRegisterFrom;
 using helpers::VRegisterFrom;
 using helpers::HeapOperand;
@@ -34,6 +36,7 @@
 
 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  HInstruction* input = instruction->InputAt(0);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -41,13 +44,19 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
       locations->SetOut(Location::RequiresFpuRegister());
       break;
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      locations->SetInAt(0, Location::RequiresFpuRegister());
-      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      if (input->IsConstant() &&
+          Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
+        locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
+        locations->SetOut(Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(0, Location::RequiresFpuRegister());
+        locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      }
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -57,33 +66,58 @@
 
 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
   LocationSummary* locations = instruction->GetLocations();
+  Location src_loc = locations->InAt(0);
   VRegister dst = VRegisterFrom(locations->Out());
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
       DCHECK_EQ(16u, instruction->GetVectorLength());
-      __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V16B(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
+      }
       break;
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
       DCHECK_EQ(8u, instruction->GetVectorLength());
-      __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V8H(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
+      }
       break;
     case Primitive::kPrimInt:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V4S(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
+      }
       break;
     case Primitive::kPrimLong:
       DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ Dup(dst.V2D(), XRegisterFrom(locations->InAt(0)));
+      if (src_loc.IsConstant()) {
+        __ Movi(dst.V2D(), Int64ConstantFrom(src_loc));
+      } else {
+        __ Dup(dst.V2D(), XRegisterFrom(src_loc));
+      }
       break;
     case Primitive::kPrimFloat:
       DCHECK_EQ(4u, instruction->GetVectorLength());
-      __ Dup(dst.V4S(), VRegisterFrom(locations->InAt(0)).V4S(), 0);
+      if (src_loc.IsConstant()) {
+        __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
+      } else {
+        __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
+      }
       break;
     case Primitive::kPrimDouble:
       DCHECK_EQ(2u, instruction->GetVectorLength());
-      __ Dup(dst.V2D(), VRegisterFrom(locations->InAt(0)).V2D(), 0);
+      if (src_loc.IsConstant()) {
+        __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
+      } else {
+        __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
+      }
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
index 50b95c1..0395db1 100644
--- a/compiler/optimizing/code_generator_vector_mips64.cc
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -15,6 +15,7 @@
  */
 
 #include "code_generator_mips64.h"
+#include "mirror/array-inl.h"
 
 namespace art {
 namespace mips64 {
@@ -22,12 +23,72 @@
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<Mips64Assembler*>(GetAssembler())->  // NOLINT
 
+VectorRegister VectorRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegister());
+  return static_cast<VectorRegister>(location.AsFpuRegister<FpuRegister>());
+}
+
 void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ FillB(dst, locations->InAt(0).AsRegister<GpuRegister>());
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ FillH(dst, locations->InAt(0).AsRegister<GpuRegister>());
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, locations->InAt(0).AsRegister<GpuRegister>());
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillD(dst, locations->InAt(0).AsRegister<GpuRegister>());
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ ReplicateFPToVectorRegister(dst,
+                                     locations->InAt(0).AsFpuRegister<FpuRegister>(),
+                                     /* is_double */ false);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ ReplicateFPToVectorRegister(dst,
+                                     locations->InAt(0).AsFpuRegister<FpuRegister>(),
+                                     /* is_double */ true);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
@@ -51,13 +112,23 @@
   LocationSummary* locations = new (arena) LocationSummary(instruction);
   switch (instruction->GetPackedType()) {
     case Primitive::kPrimBoolean:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(),
+                        instruction->IsVecNot() ? Location::kOutputOverlap
+                                                : Location::kNoOutputOverlap);
+      break;
     case Primitive::kPrimByte:
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      DCHECK(locations);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(),
+                        (instruction->IsVecNeg() || instruction->IsVecAbs())
+                            ? Location::kOutputOverlap
+                            : Location::kNoOutputOverlap);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -70,7 +141,18 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  Primitive::Type from = instruction->GetInputType();
+  Primitive::Type to = instruction->GetResultType();
+  if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+    DCHECK_EQ(4u, instruction->GetVectorLength());
+    __ Ffint_sW(dst, src);
+  } else {
+    LOG(FATAL) << "Unsupported SIMD type";
+    UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) {
@@ -78,7 +160,45 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ FillB(dst, ZERO);
+      __ SubvB(dst, dst, src);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ FillH(dst, ZERO);
+      __ SubvH(dst, dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);
+      __ SubvW(dst, dst, src);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillD(dst, ZERO);
+      __ SubvD(dst, dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);
+      __ FsubW(dst, dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillD(dst, ZERO);
+      __ FsubD(dst, dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) {
@@ -86,7 +206,47 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ FillB(dst, ZERO);       // all zeroes
+      __ Add_aB(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ FillH(dst, ZERO);       // all zeroes
+      __ Add_aH(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FillW(dst, ZERO);       // all zeroes
+      __ Add_aW(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FillD(dst, ZERO);       // all zeroes
+      __ Add_aD(dst, dst, src);  // dst = abs(0) + abs(src)
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ LdiW(dst, -1);          // all ones
+      __ SrliW(dst, dst, 1);
+      __ AndV(dst, dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ LdiD(dst, -1);          // all ones
+      __ SrliD(dst, dst, 1);
+      __ AndV(dst, dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) {
@@ -94,7 +254,30 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister src = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:  // special case boolean-not
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ LdiB(dst, 1);
+      __ XorV(dst, dst, src);
+      break;
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ NorV(dst, src, src);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector binary operations.
@@ -106,9 +289,12 @@
     case Primitive::kPrimChar:
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      DCHECK(locations);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -121,7 +307,40 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ AddvB(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ AddvH(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ AddvW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ AddvD(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FaddW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FaddD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
@@ -129,7 +348,40 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        instruction->IsRounded()
+            ? __ Aver_uB(dst, lhs, rhs)
+            : __ Ave_uB(dst, lhs, rhs);
+      } else {
+        instruction->IsRounded()
+            ? __ Aver_sB(dst, lhs, rhs)
+            : __ Ave_sB(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        instruction->IsRounded()
+            ? __ Aver_uH(dst, lhs, rhs)
+            : __ Ave_uH(dst, lhs, rhs);
+      } else {
+        instruction->IsRounded()
+            ? __ Aver_sH(dst, lhs, rhs)
+            : __ Ave_sH(dst, lhs, rhs);
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
@@ -137,7 +389,40 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SubvB(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SubvH(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SubvW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SubvD(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FsubW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FsubD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
@@ -145,7 +430,40 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ MulvB(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ MulvH(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ MulvW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ MulvD(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FmulW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FmulD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) {
@@ -153,7 +471,23 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ FdivW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ FdivD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecMin(HVecMin* instruction) {
@@ -161,7 +495,60 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecMin(HVecMin* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uB(dst, lhs, rhs);
+      } else {
+        __ Min_sB(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uH(dst, lhs, rhs);
+      } else {
+        __ Min_sH(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uW(dst, lhs, rhs);
+      } else {
+        __ Min_sW(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Min_uD(dst, lhs, rhs);
+      } else {
+        __ Min_sD(dst, lhs, rhs);
+      }
+      break;
+    // When one of arguments is NaN, fmin.df returns other argument, but Java expects a NaN value.
+    // TODO: Fix min(x, NaN) cases for float and double.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FminW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FminD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecMax(HVecMax* instruction) {
@@ -169,7 +556,60 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecMax(HVecMax* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uB(dst, lhs, rhs);
+      } else {
+        __ Max_sB(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uH(dst, lhs, rhs);
+      } else {
+        __ Max_sH(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uW(dst, lhs, rhs);
+      } else {
+        __ Max_sW(dst, lhs, rhs);
+      }
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      if (instruction->IsUnsigned()) {
+        __ Max_uD(dst, lhs, rhs);
+      } else {
+        __ Max_sD(dst, lhs, rhs);
+      }
+      break;
+    // When one of arguments is NaN, fmax.df returns other argument, but Java expects a NaN value.
+    // TODO: Fix max(x, NaN) cases for float and double.
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FmaxW(dst, lhs, rhs);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      DCHECK(!instruction->IsUnsigned());
+      __ FmaxD(dst, lhs, rhs);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
@@ -177,7 +617,27 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ AndV(dst, lhs, rhs);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) {
@@ -193,7 +653,27 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ OrV(dst, lhs, rhs);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) {
@@ -201,7 +681,27 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister rhs = VectorRegisterFrom(locations->InAt(1));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ XorV(dst, lhs, rhs);  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 // Helper to set up locations for vector shift operations.
@@ -213,7 +713,9 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimInt:
     case Primitive::kPrimLong:
-      DCHECK(locations);
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
       break;
     default:
       LOG(FATAL) << "Unsupported SIMD type";
@@ -226,7 +728,32 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SlliB(dst, lhs, value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SlliH(dst, lhs, value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SlliW(dst, lhs, value);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SlliD(dst, lhs, value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) {
@@ -234,7 +761,32 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SraiB(dst, lhs, value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SraiH(dst, lhs, value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SraiW(dst, lhs, value);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SraiD(dst, lhs, value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) {
@@ -242,7 +794,32 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  VectorRegister lhs = VectorRegisterFrom(locations->InAt(0));
+  VectorRegister dst = VectorRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ SrliB(dst, lhs, value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ SrliH(dst, lhs, value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ SrliW(dst, lhs, value);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ SrliD(dst, lhs, value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instr) {
@@ -253,20 +830,143 @@
   LOG(FATAL) << "No SIMD for " << instr->GetId();
 }
 
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+                                  HVecMemoryOperation* instruction,
+                                  bool is_load) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      if (is_load) {
+        locations->SetOut(Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(2, Location::RequiresFpuRegister());
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to prepare register and offset for vector memory operations. Returns the offset and sets
+// the output parameter adjusted_base to the original base or to a reserved temporary register (AT).
+int32_t InstructionCodeGeneratorMIPS64::VecAddress(LocationSummary* locations,
+                                                   size_t size,
+                                                   /* out */ GpuRegister* adjusted_base) {
+  GpuRegister base = locations->InAt(0).AsRegister<GpuRegister>();
+  Location index = locations->InAt(1);
+  int scale = TIMES_1;
+  switch (size) {
+    case 2: scale = TIMES_2; break;
+    case 4: scale = TIMES_4; break;
+    case 8: scale = TIMES_8; break;
+    default: break;
+  }
+  int32_t offset = mirror::Array::DataOffset(size).Int32Value();
+
+  if (index.IsConstant()) {
+    offset += index.GetConstant()->AsIntConstant()->GetValue() << scale;
+    __ AdjustBaseOffsetAndElementSizeShift(base, offset, scale);
+    *adjusted_base = base;
+  } else {
+    GpuRegister index_reg = index.AsRegister<GpuRegister>();
+    if (scale != TIMES_1) {
+      __ Dlsa(AT, index_reg, base, scale);
+    } else {
+      __ Daddu(AT, base, index_reg);
+    }
+    *adjusted_base = AT;
+  }
+  return offset;
+}
+
 void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ true);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VectorRegister reg = VectorRegisterFrom(locations->Out());
+  GpuRegister base;
+  int32_t offset = VecAddress(locations, size, &base);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ LdB(reg, base, offset);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      // Loading 8-bytes (needed if dealing with compressed strings in StringCharAt) from unaligned
+      // memory address may cause a trap to the kernel if the CPU doesn't directly support unaligned
+      // loads and stores.
+      // TODO: Implement support for StringCharAt.
+      DCHECK(!instruction->IsStringCharAt());
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ LdH(reg, base, offset);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ LdW(reg, base, offset);
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ LdD(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /* is_load */ false);
 }
 
 void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
-  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+  LocationSummary* locations = instruction->GetLocations();
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  VectorRegister reg = VectorRegisterFrom(locations->InAt(2));
+  GpuRegister base;
+  int32_t offset = VecAddress(locations, size, &base);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ StB(reg, base, offset);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ StH(reg, base, offset);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ StW(reg, base, offset);
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ StD(reg, base, offset);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
 }
 
 #undef __
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 4a279d8..83a261d 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -26,6 +26,7 @@
 #include "intrinsics_x86.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
+#include "lock_word.h"
 #include "thread.h"
 #include "utils/assembler.h"
 #include "utils/stack_checks.h"
@@ -1031,10 +1032,11 @@
       move_resolver_(graph->GetArena(), this),
       assembler_(graph->GetArena()),
       isa_features_(isa_features),
-      pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       constant_area_start_(-1),
@@ -2167,7 +2169,7 @@
 
   IntrinsicLocationsBuilderX86 intrinsic(codegen_);
   if (intrinsic.TryDispatch(invoke)) {
-    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeMethodLoadKind()) {
       invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
     }
     return;
@@ -2176,7 +2178,7 @@
   HandleInvoke(invoke);
 
   // For PC-relative dex cache the invoke has an extra input, the PC-relative address base.
-  if (invoke->HasPcRelativeDexCache()) {
+  if (invoke->HasPcRelativeMethodLoadKind()) {
     invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::RequiresRegister());
   }
 }
@@ -2202,7 +2204,6 @@
   LocationSummary* locations = invoke->GetLocations();
   codegen_->GenerateStaticOrDirectCall(
       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderX86::VisitInvokeVirtual(HInvokeVirtual* invoke) {
@@ -2226,7 +2227,6 @@
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderX86::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -4519,18 +4519,17 @@
   // save one load. However, since this is just an intrinsic slow path we prefer this
   // simple and more robust approach rather that trying to determine if that's the case.
   SlowPathCode* slow_path = GetCurrentSlowPath();
-  if (slow_path != nullptr) {
-    if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
-      int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
-      __ movl(temp, Address(ESP, stack_offset));
-      return temp;
-    }
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(location.AsRegister<Register>())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(location.AsRegister<Register>());
+    __ movl(temp, Address(ESP, stack_offset));
+    return temp;
   }
   return location.AsRegister<Register>();
 }
 
-Location CodeGeneratorX86::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                                  Location temp) {
+void CodeGeneratorX86::GenerateStaticOrDirectCall(
+    HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
@@ -4543,48 +4542,32 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative: {
+      DCHECK(GetCompilerOptions().IsBootImage());
+      Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
+                                                                temp.AsRegister<Register>());
+      __ leal(temp.AsRegister<Register>(), Address(base_reg, CodeGeneratorX86::kDummy32BitOffset));
+      RecordBootMethodPatch(invoke);
+      break;
+    }
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       __ movl(temp.AsRegister<Register>(), Immediate(invoke->GetMethodAddress()));
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+    case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
       Register base_reg = GetInvokeStaticOrDirectExtraParameter(invoke,
                                                                 temp.AsRegister<Register>());
       __ movl(temp.AsRegister<Register>(), Address(base_reg, kDummy32BitOffset));
       // Bind a new fixup label at the end of the "movl" insn.
-      uint32_t offset = invoke->GetDexCacheArrayOffset();
-      __ Bind(NewPcRelativeDexCacheArrayPatch(
+      __ Bind(NewMethodBssEntryPatch(
           invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress(),
-          invoke->GetDexFileForPcRelativeDexCache(),
-          offset));
+          MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())));
       break;
     }
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
-      Register method_reg;
-      Register reg = temp.AsRegister<Register>();
-      if (current_method.IsRegister()) {
-        method_reg = current_method.AsRegister<Register>();
-      } else {
-        DCHECK(invoke->GetLocations()->Intrinsified());
-        DCHECK(!current_method.IsValid());
-        method_reg = reg;
-        __ movl(reg, Address(ESP, kCurrentMethodStackOffset));
-      }
-      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
-      __ movl(reg, Address(method_reg,
-                           ArtMethod::DexCacheResolvedMethodsOffset(kX86PointerSize).Int32Value()));
-      // temp = temp[index_in_cache];
-      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
-      uint32_t index_in_cache = invoke->GetDexMethodIndex();
-      __ movl(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
-      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
+      GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+      return;  // No code pointer retrieval; the runtime performs the call directly.
     }
   }
-  return callee_method;
-}
-
-void CodeGeneratorX86::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) {
-  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -4597,11 +4580,13 @@
                           kX86PointerSize).Int32Value()));
       break;
   }
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 
   DCHECK(!IsLeafMethod());
 }
 
-void CodeGeneratorX86::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
+void CodeGeneratorX86::GenerateVirtualCall(
+    HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
   Register temp = temp_in.AsRegister<Register>();
   uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86PointerSize).Uint32Value();
@@ -4629,15 +4614,27 @@
   // call temp->GetEntryPoint();
   __ call(Address(
       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 }
 
-void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
-  DCHECK(GetCompilerOptions().IsBootImage());
-  HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
-  string_patches_.emplace_back(address,
-                               load_string->GetDexFile(),
-                               load_string->GetStringIndex().index_);
-  __ Bind(&string_patches_.back().label);
+void CodeGeneratorX86::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  HX86ComputeBaseMethodAddress* address =
+      invoke->InputAt(invoke->GetSpecialInputIndex())->AsX86ComputeBaseMethodAddress();
+  boot_image_method_patches_.emplace_back(address,
+                                          *invoke->GetTargetMethod().dex_file,
+                                          invoke->GetTargetMethod().dex_method_index);
+  __ Bind(&boot_image_method_patches_.back().label);
+}
+
+Label* CodeGeneratorX86::NewMethodBssEntryPatch(
+    HX86ComputeBaseMethodAddress* method_address,
+    MethodReference target_method) {
+  // Add the patch entry and bind its label at the end of the instruction.
+  method_bss_entry_patches_.emplace_back(method_address,
+                                         *target_method.dex_file,
+                                         target_method.dex_method_index);
+  return &method_bss_entry_patches_.back().label;
 }
 
 void CodeGeneratorX86::RecordBootTypePatch(HLoadClass* load_class) {
@@ -4656,6 +4653,15 @@
   return &type_bss_entry_patches_.back().label;
 }
 
+void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
+  DCHECK(GetCompilerOptions().IsBootImage());
+  HX86ComputeBaseMethodAddress* address = load_string->InputAt(0)->AsX86ComputeBaseMethodAddress();
+  string_patches_.emplace_back(address,
+                               load_string->GetDexFile(),
+                               load_string->GetStringIndex().index_);
+  __ Bind(&string_patches_.back().label);
+}
+
 Label* CodeGeneratorX86::NewStringBssEntryPatch(HLoadString* load_string) {
   DCHECK(!GetCompilerOptions().IsBootImage());
   HX86ComputeBaseMethodAddress* address =
@@ -4665,15 +4671,6 @@
   return &string_patches_.back().label;
 }
 
-Label* CodeGeneratorX86::NewPcRelativeDexCacheArrayPatch(
-    HX86ComputeBaseMethodAddress* method_address,
-    const DexFile& dex_file,
-    uint32_t element_offset) {
-  // Add the patch entry and bind its label at the end of the instruction.
-  pc_relative_dex_cache_patches_.emplace_back(method_address, dex_file, element_offset);
-  return &pc_relative_dex_cache_patches_.back().label;
-}
-
 // The label points to the end of the "movl" or another instruction but the literal offset
 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
@@ -4692,21 +4689,25 @@
 void CodeGeneratorX86::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
-      pc_relative_dex_cache_patches_.size() +
-      string_patches_.size() +
+      boot_image_method_patches_.size() +
+      method_bss_entry_patches_.size() +
       boot_image_type_patches_.size() +
-      type_bss_entry_patches_.size();
+      type_bss_entry_patches_.size() +
+      string_patches_.size();
   linker_patches->reserve(size);
-  EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
-                                                               linker_patches);
   if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
+                                                                  linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
   } else {
+    DCHECK(boot_image_method_patches_.empty());
     DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
+                                                                linker_patches);
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   DCHECK_EQ(size, linker_patches->size());
@@ -6042,7 +6043,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -6050,7 +6051,7 @@
 
 void LocationsBuilderX86::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     InvokeRuntimeCallingConvention calling_convention;
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -6104,7 +6105,7 @@
 // move.
 void InstructionCodeGeneratorX86::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -6164,7 +6165,7 @@
       GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
       break;
     }
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
     case HLoadClass::LoadKind::kInvalid:
       LOG(FATAL) << "UNREACHABLE";
       UNREACHABLE();
@@ -6227,7 +6228,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -6241,7 +6242,7 @@
       load_kind == HLoadString::LoadKind::kBssEntry) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadString::LoadKind::kRuntimeCall) {
     locations->SetOut(Location::RegisterLocation(EAX));
   } else {
     locations->SetOut(Location::RequiresRegister());
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index f08d642..f48753b 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -408,18 +408,19 @@
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
   // Generate a call to a static or direct method.
-  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
+  void GenerateStaticOrDirectCall(
+      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
   // Generate a call to a virtual method.
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+  void GenerateVirtualCall(
+      HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
 
-  void RecordBootStringPatch(HLoadString* load_string);
+  void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke);
+  Label* NewMethodBssEntryPatch(HX86ComputeBaseMethodAddress* method_address,
+                                MethodReference target_method);
   void RecordBootTypePatch(HLoadClass* load_class);
   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
+  void RecordBootStringPatch(HLoadString* load_string);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
-  Label* NewPcRelativeDexCacheArrayPatch(HX86ComputeBaseMethodAddress* method_address,
-                                         const DexFile& dex_file,
-                                         uint32_t element_offset);
   Label* NewJitRootStringPatch(const DexFile& dex_file,
                                dex::StringIndex dex_index,
                                Handle<mirror::String> handle);
@@ -631,18 +632,19 @@
   X86Assembler assembler_;
   const X86InstructionSetFeatures& isa_features_;
 
-  // PC-relative DexCache access info.
-  ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // String patch locations; type depends on configuration (app .bss or boot image).
-  ArenaDeque<X86PcRelativePatchInfo> string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<X86PcRelativePatchInfo> boot_image_method_patches_;
+  // PC-relative method patch info for kBssEntry.
+  ArenaDeque<X86PcRelativePatchInfo> method_bss_entry_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<X86PcRelativePatchInfo> boot_image_type_patches_;
   // Type patch locations for kBssEntry.
   ArenaDeque<X86PcRelativePatchInfo> type_bss_entry_patches_;
+  // String patch locations; type depends on configuration (app .bss or boot image).
+  ArenaDeque<X86PcRelativePatchInfo> string_patches_;
 
   // Patches for string root accesses in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
-
   // Patches for class root accesses in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
 
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index ac0f37b..7331a9e 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -23,6 +23,7 @@
 #include "gc/accounting/card_table.h"
 #include "intrinsics.h"
 #include "intrinsics_x86_64.h"
+#include "lock_word.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object_reference.h"
@@ -976,9 +977,10 @@
   return desired_dispatch_info;
 }
 
-Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                                     Location temp) {
+void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
+    HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
   // All registers are assumed to be correctly set up.
+
   Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
   switch (invoke->GetMethodLoadKind()) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
@@ -991,47 +993,28 @@
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
+      DCHECK(GetCompilerOptions().IsBootImage());
+      __ leal(temp.AsRegister<CpuRegister>(),
+              Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
+      RecordBootMethodPatch(invoke);
+      break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
       Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
       break;
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
+    case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
       __ movq(temp.AsRegister<CpuRegister>(),
               Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
       // Bind a new fixup label at the end of the "movl" insn.
-      uint32_t offset = invoke->GetDexCacheArrayOffset();
-      __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset));
+      __ Bind(NewMethodBssEntryPatch(
+          MethodReference(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex())));
       break;
     }
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
-      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
-      Register method_reg;
-      CpuRegister reg = temp.AsRegister<CpuRegister>();
-      if (current_method.IsRegister()) {
-        method_reg = current_method.AsRegister<Register>();
-      } else {
-        DCHECK(invoke->GetLocations()->Intrinsified());
-        DCHECK(!current_method.IsValid());
-        method_reg = reg.AsRegister();
-        __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
-      }
-      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
-      __ movq(reg,
-              Address(CpuRegister(method_reg),
-                      ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
-      // temp = temp[index_in_cache];
-      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
-      uint32_t index_in_cache = invoke->GetDexMethodIndex();
-      __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
-      break;
+    case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
+      GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
+      return;  // No code pointer retrieval; the runtime performs the call directly.
     }
   }
-  return callee_method;
-}
-
-void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                     Location temp) {
-  // All registers are assumed to be correctly set up.
-  Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
 
   switch (invoke->GetCodePtrLocation()) {
     case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
@@ -1044,11 +1027,13 @@
                           kX86_64PointerSize).SizeValue()));
       break;
   }
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 
   DCHECK(!IsLeafMethod());
 }
 
-void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
+void CodeGeneratorX86_64::GenerateVirtualCall(
+    HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
   CpuRegister temp = temp_in.AsRegister<CpuRegister>();
   size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
       invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
@@ -1077,12 +1062,19 @@
   // call temp->GetEntryPoint();
   __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
       kX86_64PointerSize).SizeValue()));
+  RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
 }
 
-void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
-  DCHECK(GetCompilerOptions().IsBootImage());
-  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
-  __ Bind(&string_patches_.back().label);
+void CodeGeneratorX86_64::RecordBootMethodPatch(HInvokeStaticOrDirect* invoke) {
+  boot_image_method_patches_.emplace_back(*invoke->GetTargetMethod().dex_file,
+                                          invoke->GetTargetMethod().dex_method_index);
+  __ Bind(&boot_image_method_patches_.back().label);
+}
+
+Label* CodeGeneratorX86_64::NewMethodBssEntryPatch(MethodReference target_method) {
+  // Add a patch entry and return the label.
+  method_bss_entry_patches_.emplace_back(*target_method.dex_file, target_method.dex_method_index);
+  return &method_bss_entry_patches_.back().label;
 }
 
 void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) {
@@ -1096,19 +1088,18 @@
   return &type_bss_entry_patches_.back().label;
 }
 
+void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
+  DCHECK(GetCompilerOptions().IsBootImage());
+  string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
+  __ Bind(&string_patches_.back().label);
+}
+
 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
   DCHECK(!GetCompilerOptions().IsBootImage());
   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
   return &string_patches_.back().label;
 }
 
-Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
-                                                            uint32_t element_offset) {
-  // Add a patch entry and return the label.
-  pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
-  return &pc_relative_dex_cache_patches_.back().label;
-}
-
 // The label points to the end of the "movl" or another instruction but the literal offset
 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
@@ -1127,21 +1118,25 @@
 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
   DCHECK(linker_patches->empty());
   size_t size =
-      pc_relative_dex_cache_patches_.size() +
-      string_patches_.size() +
+      boot_image_method_patches_.size() +
+      method_bss_entry_patches_.size() +
       boot_image_type_patches_.size() +
-      type_bss_entry_patches_.size();
+      type_bss_entry_patches_.size() +
+      string_patches_.size();
   linker_patches->reserve(size);
-  EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
-                                                               linker_patches);
   if (GetCompilerOptions().IsBootImage()) {
+    EmitPcRelativeLinkerPatches<LinkerPatch::RelativeMethodPatch>(boot_image_method_patches_,
+                                                                  linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
                                                                 linker_patches);
     EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
   } else {
+    DCHECK(boot_image_method_patches_.empty());
     DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
   }
+  EmitPcRelativeLinkerPatches<LinkerPatch::MethodBssEntryPatch>(method_bss_entry_patches_,
+                                                                linker_patches);
   EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
                                                               linker_patches);
   DCHECK_EQ(size, linker_patches->size());
@@ -1230,13 +1225,14 @@
         assembler_(graph->GetArena()),
         isa_features_(isa_features),
         constant_area_start_(0),
-        pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        boot_image_method_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        method_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
+        jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+        fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
 }
 
@@ -2375,7 +2371,6 @@
   LocationSummary* locations = invoke->GetLocations();
   codegen_->GenerateStaticOrDirectCall(
       invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
@@ -2399,7 +2394,6 @@
 
   codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
   DCHECK(!codegen_->IsLeafMethod());
-  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
 }
 
 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -5465,7 +5459,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadClass::LoadKind::kBootImageAddress:
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
+    case HLoadClass::LoadKind::kRuntimeCall:
       break;
   }
   return desired_class_load_kind;
@@ -5473,7 +5467,7 @@
 
 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     // Custom calling convention: RAX serves as both input and output.
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
@@ -5524,7 +5518,7 @@
 // move.
 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
   HLoadClass::LoadKind load_kind = cls->GetLoadKind();
-  if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
+  if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
     codegen_->GenerateLoadClassRuntimeCall(cls);
     return;
   }
@@ -5635,7 +5629,7 @@
       DCHECK(Runtime::Current()->UseJitCompilation());
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheViaMethod:
+    case HLoadString::LoadKind::kRuntimeCall:
       break;
   }
   return desired_string_load_kind;
@@ -5644,7 +5638,7 @@
 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
   LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
-  if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
+  if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
     locations->SetOut(Location::RegisterLocation(RAX));
   } else {
     locations->SetOut(Location::RequiresRegister());
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index d8005cc..33c6429 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -404,15 +404,17 @@
       const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
       HInvokeStaticOrDirect* invoke) OVERRIDE;
 
-  Location GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp);
-  void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
-  void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
+  void GenerateStaticOrDirectCall(
+      HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
+  void GenerateVirtualCall(
+      HInvokeVirtual* invoke, Location temp, SlowPathCode* slow_path = nullptr) OVERRIDE;
 
-  void RecordBootStringPatch(HLoadString* load_string);
+  void RecordBootMethodPatch(HInvokeStaticOrDirect* invoke);
+  Label* NewMethodBssEntryPatch(MethodReference target_method);
   void RecordBootTypePatch(HLoadClass* load_class);
   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
+  void RecordBootStringPatch(HLoadString* load_string);
   Label* NewStringBssEntryPatch(HLoadString* load_string);
-  Label* NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file, uint32_t element_offset);
   Label* NewJitRootStringPatch(const DexFile& dex_file,
                                dex::StringIndex dex_index,
                                Handle<mirror::String> handle);
@@ -601,24 +603,25 @@
   // Used for fixups to the constant area.
   int constant_area_start_;
 
-  // PC-relative DexCache access info.
-  ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_;
-  // String patch locations; type depends on configuration (app .bss or boot image).
-  ArenaDeque<PatchInfo<Label>> string_patches_;
+  // PC-relative method patch info for kBootImageLinkTimePcRelative.
+  ArenaDeque<PatchInfo<Label>> boot_image_method_patches_;
+  // PC-relative method patch info for kBssEntry.
+  ArenaDeque<PatchInfo<Label>> method_bss_entry_patches_;
   // PC-relative type patch info for kBootImageLinkTimePcRelative.
   ArenaDeque<PatchInfo<Label>> boot_image_type_patches_;
   // Type patch locations for kBssEntry.
   ArenaDeque<PatchInfo<Label>> type_bss_entry_patches_;
-
-  // Fixups for jump tables need to be handled specially.
-  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+  // String patch locations; type depends on configuration (app .bss or boot image).
+  ArenaDeque<PatchInfo<Label>> string_patches_;
 
   // Patches for string literals in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_string_patches_;
-
   // Patches for class literals in JIT compiled code.
   ArenaDeque<PatchInfo<Label>> jit_class_patches_;
 
+  // Fixups for jump tables need to be handled specially.
+  ArenaVector<JumpTableRIPFixup*> fixups_to_jump_tables_;
+
   DISALLOW_COPY_AND_ASSIGN(CodeGeneratorX86_64);
 };
 
diff --git a/compiler/optimizing/common_arm64.h b/compiler/optimizing/common_arm64.h
index 721f74e..e73fd7d 100644
--- a/compiler/optimizing/common_arm64.h
+++ b/compiler/optimizing/common_arm64.h
@@ -234,9 +234,20 @@
   }
 }
 
-inline bool CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
-  DCHECK(constant->IsIntConstant() || constant->IsLongConstant() || constant->IsNullConstant())
-      << constant->DebugName();
+inline bool Arm64CanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
+  int64_t value = CodeGenerator::GetInt64ValueOf(constant);
+
+  // TODO: Improve this when IsSIMDConstantEncodable method is implemented in VIXL.
+  if (instr->IsVecReplicateScalar()) {
+    if (constant->IsLongConstant()) {
+      return false;
+    } else if (constant->IsFloatConstant()) {
+      return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
+    } else if (constant->IsDoubleConstant()) {
+      return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
+    }
+    return IsUint<8>(value);
+  }
 
   // For single uses we let VIXL handle the constant generation since it will
   // use registers that are not managed by the register allocator (wip0, wip1).
@@ -249,8 +260,6 @@
     return true;
   }
 
-  int64_t value = CodeGenerator::GetInt64ValueOf(constant);
-
   if (instr->IsAnd() || instr->IsOr() || instr->IsXor()) {
     // Uses logical operations.
     return vixl::aarch64::Assembler::IsImmLogical(value, vixl::aarch64::kXRegSize);
@@ -276,7 +285,7 @@
 inline Location ARM64EncodableConstantOrRegister(HInstruction* constant,
                                                         HInstruction* instr) {
   if (constant->IsConstant()
-      && CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
+      && Arm64CanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
     return Location::ConstantLocation(constant->AsConstant());
   }
 
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.cc b/compiler/optimizing/dex_cache_array_fixups_arm.cc
deleted file mode 100644
index 0c832a5..0000000
--- a/compiler/optimizing/dex_cache_array_fixups_arm.cc
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "dex_cache_array_fixups_arm.h"
-
-#include "base/arena_containers.h"
-#ifdef ART_USE_OLD_ARM_BACKEND
-#include "code_generator_arm.h"
-#include "intrinsics_arm.h"
-#else
-#include "code_generator_arm_vixl.h"
-#include "intrinsics_arm_vixl.h"
-#endif
-#include "utils/dex_cache_arrays_layout-inl.h"
-
-namespace art {
-namespace arm {
-#ifdef ART_USE_OLD_ARM_BACKEND
-typedef CodeGeneratorARM CodeGeneratorARMType;
-typedef IntrinsicLocationsBuilderARM IntrinsicLocationsBuilderARMType;
-#else
-typedef CodeGeneratorARMVIXL CodeGeneratorARMType;
-typedef IntrinsicLocationsBuilderARMVIXL IntrinsicLocationsBuilderARMType;
-#endif
-
-/**
- * Finds instructions that need the dex cache arrays base as an input.
- */
-class DexCacheArrayFixupsVisitor : public HGraphVisitor {
- public:
-  DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen)
-      : HGraphVisitor(graph),
-        codegen_(down_cast<CodeGeneratorARMType*>(codegen)),
-        dex_cache_array_bases_(std::less<const DexFile*>(),
-                               // Attribute memory use to code generator.
-                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
-
-  void MoveBasesIfNeeded() {
-    for (const auto& entry : dex_cache_array_bases_) {
-      // Bring the base closer to the first use (previously, it was in the
-      // entry block) and relieve some pressure on the register allocator
-      // while avoiding recalculation of the base in a loop.
-      HArmDexCacheArraysBase* base = entry.second;
-      base->MoveBeforeFirstUserAndOutOfLoops();
-    }
-  }
-
- private:
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
-    // If this is an invoke with PC-relative access to the dex cache methods array,
-    // we need to add the dex cache arrays base as the special input.
-    if (invoke->HasPcRelativeDexCache() &&
-        !IsCallFreeIntrinsic<IntrinsicLocationsBuilderARMType>(invoke, codegen_)) {
-      HArmDexCacheArraysBase* base =
-          GetOrCreateDexCacheArrayBase(invoke, invoke->GetDexFileForPcRelativeDexCache());
-      // Update the element offset in base.
-      DexCacheArraysLayout layout(kArmPointerSize, &invoke->GetDexFileForPcRelativeDexCache());
-      base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex()));
-      // Add the special argument base to the method.
-      DCHECK(!invoke->HasCurrentMethodInput());
-      invoke->AddSpecialInput(base);
-    }
-  }
-
-  HArmDexCacheArraysBase* GetOrCreateDexCacheArrayBase(HInstruction* cursor,
-                                                       const DexFile& dex_file) {
-    if (GetGraph()->HasIrreducibleLoops()) {
-      HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
-      cursor->GetBlock()->InsertInstructionBefore(base, cursor);
-      return base;
-    } else {
-      // Ensure we only initialize the pointer once for each dex file.
-      auto lb = dex_cache_array_bases_.lower_bound(&dex_file);
-      if (lb != dex_cache_array_bases_.end() &&
-          !dex_cache_array_bases_.key_comp()(&dex_file, lb->first)) {
-        return lb->second;
-      }
-
-      // Insert the base at the start of the entry block, move it to a better
-      // position later in MoveBaseIfNeeded().
-      HArmDexCacheArraysBase* base = new (GetGraph()->GetArena()) HArmDexCacheArraysBase(dex_file);
-      HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
-      entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
-      dex_cache_array_bases_.PutBefore(lb, &dex_file, base);
-      return base;
-    }
-  }
-
-  CodeGeneratorARMType* codegen_;
-
-  using DexCacheArraysBaseMap =
-      ArenaSafeMap<const DexFile*, HArmDexCacheArraysBase*, std::less<const DexFile*>>;
-  DexCacheArraysBaseMap dex_cache_array_bases_;
-};
-
-void DexCacheArrayFixups::Run() {
-  DexCacheArrayFixupsVisitor visitor(graph_, codegen_);
-  visitor.VisitInsertionOrder();
-  visitor.MoveBasesIfNeeded();
-}
-
-}  // namespace arm
-}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_arm.h b/compiler/optimizing/dex_cache_array_fixups_arm.h
deleted file mode 100644
index 9d67a31..0000000
--- a/compiler/optimizing/dex_cache_array_fixups_arm.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
-#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
-
-#include "nodes.h"
-#include "optimization.h"
-
-namespace art {
-
-class CodeGenerator;
-
-namespace arm {
-
-class DexCacheArrayFixups : public HOptimization {
- public:
-  DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
-      : HOptimization(graph, kDexCacheArrayFixupsArmPassName, stats),
-        codegen_(codegen) {}
-
-  static constexpr const char* kDexCacheArrayFixupsArmPassName = "dex_cache_array_fixups_arm";
-
-  void Run() OVERRIDE;
-
- private:
-  CodeGenerator* codegen_;
-};
-
-}  // namespace arm
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_ARM_H_
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.cc b/compiler/optimizing/dex_cache_array_fixups_mips.cc
deleted file mode 100644
index 7734f91..0000000
--- a/compiler/optimizing/dex_cache_array_fixups_mips.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "code_generator_mips.h"
-#include "dex_cache_array_fixups_mips.h"
-
-#include "base/arena_containers.h"
-#include "intrinsics_mips.h"
-#include "utils/dex_cache_arrays_layout-inl.h"
-
-namespace art {
-namespace mips {
-
-/**
- * Finds instructions that need the dex cache arrays base as an input.
- */
-class DexCacheArrayFixupsVisitor : public HGraphVisitor {
- public:
-  explicit DexCacheArrayFixupsVisitor(HGraph* graph, CodeGenerator* codegen)
-      : HGraphVisitor(graph),
-        codegen_(down_cast<CodeGeneratorMIPS*>(codegen)),
-        dex_cache_array_bases_(std::less<const DexFile*>(),
-                               // Attribute memory use to code generator.
-                               graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {}
-
-  void MoveBasesIfNeeded() {
-    for (const auto& entry : dex_cache_array_bases_) {
-      // Bring the base closer to the first use (previously, it was in the
-      // entry block) and relieve some pressure on the register allocator
-      // while avoiding recalculation of the base in a loop.
-      HMipsDexCacheArraysBase* base = entry.second;
-      base->MoveBeforeFirstUserAndOutOfLoops();
-    }
-    // Computing the dex cache base for PC-relative accesses will clobber RA with
-    // the NAL instruction on R2. Take a note of this before generating the method
-    // entry.
-    if (!dex_cache_array_bases_.empty()) {
-      codegen_->ClobberRA();
-    }
-  }
-
- private:
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
-    // If this is an invoke with PC-relative access to the dex cache methods array,
-    // we need to add the dex cache arrays base as the special input.
-    if (invoke->HasPcRelativeDexCache() &&
-        !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) {
-      // Initialize base for target method dex file if needed.
-      HMipsDexCacheArraysBase* base =
-          GetOrCreateDexCacheArrayBase(invoke->GetDexFileForPcRelativeDexCache());
-      // Update the element offset in base.
-      DexCacheArraysLayout layout(kMipsPointerSize, &invoke->GetDexFileForPcRelativeDexCache());
-      base->UpdateElementOffset(layout.MethodOffset(invoke->GetDexMethodIndex()));
-      // Add the special argument base to the method.
-      DCHECK(!invoke->HasCurrentMethodInput());
-      invoke->AddSpecialInput(base);
-    }
-  }
-
-  HMipsDexCacheArraysBase* GetOrCreateDexCacheArrayBase(const DexFile& dex_file) {
-    return dex_cache_array_bases_.GetOrCreate(
-        &dex_file,
-        [this, &dex_file]() {
-          HMipsDexCacheArraysBase* base =
-              new (GetGraph()->GetArena()) HMipsDexCacheArraysBase(dex_file);
-          HBasicBlock* entry_block = GetGraph()->GetEntryBlock();
-          // Insert the base at the start of the entry block, move it to a better
-          // position later in MoveBaseIfNeeded().
-          entry_block->InsertInstructionBefore(base, entry_block->GetFirstInstruction());
-          return base;
-        });
-  }
-
-  CodeGeneratorMIPS* codegen_;
-
-  using DexCacheArraysBaseMap =
-      ArenaSafeMap<const DexFile*, HMipsDexCacheArraysBase*, std::less<const DexFile*>>;
-  DexCacheArraysBaseMap dex_cache_array_bases_;
-};
-
-void DexCacheArrayFixups::Run() {
-  CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen_);
-  if (mips_codegen->GetInstructionSetFeatures().IsR6()) {
-    // Do nothing for R6 because it has PC-relative addressing.
-    return;
-  }
-  if (graph_->HasIrreducibleLoops()) {
-    // Do not run this optimization, as irreducible loops do not work with an instruction
-    // that can be live-in at the irreducible loop header.
-    return;
-  }
-  DexCacheArrayFixupsVisitor visitor(graph_, codegen_);
-  visitor.VisitInsertionOrder();
-  visitor.MoveBasesIfNeeded();
-}
-
-}  // namespace mips
-}  // namespace art
diff --git a/compiler/optimizing/dex_cache_array_fixups_mips.h b/compiler/optimizing/dex_cache_array_fixups_mips.h
deleted file mode 100644
index 861a199..0000000
--- a/compiler/optimizing/dex_cache_array_fixups_mips.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
-#define ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
-
-#include "nodes.h"
-#include "optimization.h"
-
-namespace art {
-
-class CodeGenerator;
-
-namespace mips {
-
-class DexCacheArrayFixups : public HOptimization {
- public:
-  DexCacheArrayFixups(HGraph* graph, CodeGenerator* codegen, OptimizingCompilerStats* stats)
-      : HOptimization(graph, kDexCacheArrayFixupsMipsPassName, stats),
-        codegen_(codegen) {}
-
-  static constexpr const char* kDexCacheArrayFixupsMipsPassName = "dex_cache_array_fixups_mips";
-
-  void Run() OVERRIDE;
-
- private:
-  CodeGenerator* codegen_;
-};
-
-}  // namespace mips
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_DEX_CACHE_ARRAY_FIXUPS_MIPS_H_
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 02816cf..7dcf244 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -34,6 +34,7 @@
 #include "register_allocator_linear_scan.h"
 #include "ssa_liveness_analysis.h"
 #include "utils/assembler.h"
+#include "utils/intrusive_forward_list.h"
 
 namespace art {
 
@@ -66,6 +67,13 @@
       current->Dump(NewEntryStream());
     }
   }
+  // Construct StringList from a list of elements. The value type must provide method `Dump`.
+  template <typename Container>
+  explicit StringList(const Container& list, Format format = kArrayBrackets) : StringList(format) {
+    for (const typename Container::value_type& current : list) {
+      current.Dump(NewEntryStream());
+    }
+  }
 
   std::ostream& NewEntryStream() {
     if (is_empty_) {
@@ -584,8 +592,8 @@
         LiveInterval* interval = instruction->GetLiveInterval();
         StartAttributeStream("ranges")
             << StringList(interval->GetFirstRange(), StringList::kSetBrackets);
-        StartAttributeStream("uses") << StringList(interval->GetFirstUse());
-        StartAttributeStream("env_uses") << StringList(interval->GetFirstEnvironmentUse());
+        StartAttributeStream("uses") << StringList(interval->GetUses());
+        StartAttributeStream("env_uses") << StringList(interval->GetEnvironmentUses());
         StartAttributeStream("is_fixed") << interval->IsFixed();
         StartAttributeStream("is_split") << interval->IsSplit();
         StartAttributeStream("is_low") << interval->IsLowInterval();
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index 88473f02..84b20f6 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -695,8 +695,8 @@
                                  /*fetch*/ nullptr,
                                  type_);
         default:
-          CHECK(false) << op;
-          break;
+          LOG(FATAL) << op;
+          UNREACHABLE();
       }
     }
   }
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 7c833cf..c0ec58f 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -1132,11 +1132,27 @@
                                                   /*out*/bool* needs_taken_test) const {
   DCHECK(info != nullptr);
   DCHECK_EQ(info->induction_class, HInductionVarAnalysis::kPeriodic);
-  // Count period.
+  // Count period and detect all-invariants.
   int64_t period = 1;
-  for (HInductionVarAnalysis::InductionInfo* p = info;
-       p->induction_class == HInductionVarAnalysis::kPeriodic;
-       p = p->op_b, ++period) {}
+  bool all_invariants = true;
+  HInductionVarAnalysis::InductionInfo* p = info;
+  for (; p->induction_class == HInductionVarAnalysis::kPeriodic; p = p->op_b, ++period) {
+    DCHECK_EQ(p->op_a->induction_class, HInductionVarAnalysis::kInvariant);
+    if (p->op_a->operation != HInductionVarAnalysis::kFetch) {
+      all_invariants = false;
+    }
+  }
+  DCHECK_EQ(p->induction_class, HInductionVarAnalysis::kInvariant);
+  if (p->operation != HInductionVarAnalysis::kFetch) {
+    all_invariants = false;
+  }
+  // Don't rely on FP arithmetic to be precise, unless the full period
+  // consist of pre-computed expressions only.
+  if (info->type == Primitive::kPrimFloat || info->type == Primitive::kPrimDouble) {
+    if (!all_invariants) {
+      return false;
+    }
+  }
   // Handle any periodic(x, periodic(.., y)) for known maximum index value m.
   int64_t m = 0;
   if (IsConstant(trip->op_a, kExact, &m) && m >= 1) {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 8674e72..142c957 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -56,7 +56,7 @@
 
 // Limit the number of dex registers that we accumulate while inlining
 // to avoid creating large amount of nested environments.
-static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
+static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 32;
 
 // Limit recursive call inlining, which do not benefit from too
 // much inlining compared to code locality.
@@ -470,6 +470,33 @@
   return inline_cache;
 }
 
+bool HInliner::UseOnlyPolymorphicInliningWithNoDeopt() {
+  // If we are compiling AOT or OSR, pretend the call using inline caches is polymorphic and
+  // do not generate a deopt.
+  //
+  // For AOT:
+  //    Generating a deopt does not ensure that we will actually capture the new types;
+  //    and the danger is that we could be stuck in a loop with "forever" deoptimizations.
+  //    Take for example the following scenario:
+  //      - we capture the inline cache in one run
+  //      - the next run, we deoptimize because we miss a type check, but the method
+  //        never becomes hot again
+  //    In this case, the inline cache will not be updated in the profile and the AOT code
+  //    will keep deoptimizing.
+  //    Another scenario is if we use profile compilation for a process which is not allowed
+  //    to JIT (e.g. system server). If we deoptimize we will run interpreted code for the
+  //    rest of the lifetime.
+  // TODO(calin):
+  //    This is a compromise because we will most likely never update the inline cache
+  //    in the profile (unless there's another reason to deopt). So we might be stuck with
+  //    a sub-optimal inline cache.
+  //    We could be smarter when capturing inline caches to mitigate this.
+  //    (e.g. by having different thresholds for new and old methods).
+  //
+  // For OSR:
+  //     We may come from the interpreter and it may have seen different receiver types.
+  return Runtime::Current()->IsAotCompiler() || outermost_graph_->IsCompilingOsr();
+}
 bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
                                         HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method)
@@ -503,9 +530,7 @@
 
     case kInlineCacheMonomorphic: {
       MaybeRecordStat(kMonomorphicCall);
-      if (outermost_graph_->IsCompilingOsr()) {
-        // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
-        // interpreter and it may have seen different receiver types.
+      if (UseOnlyPolymorphicInliningWithNoDeopt()) {
         return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
       } else {
         return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
@@ -578,12 +603,11 @@
     return kInlineCacheNoData;
   }
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo offline_profile;
-  bool found = pci->GetMethod(caller_dex_file.GetLocation(),
-                              caller_dex_file.GetLocationChecksum(),
-                              caller_compilation_unit_.GetDexMethodIndex(),
-                              &offline_profile);
-  if (!found) {
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> offline_profile =
+      pci->GetMethod(caller_dex_file.GetLocation(),
+                     caller_dex_file.GetLocationChecksum(),
+                     caller_compilation_unit_.GetDexMethodIndex());
+  if (offline_profile == nullptr) {
     return kInlineCacheNoData;  // no profile information for this invocation.
   }
 
@@ -593,7 +617,7 @@
     return kInlineCacheNoData;
   } else {
     return ExtractClassesFromOfflineProfile(invoke_instruction,
-                                            offline_profile,
+                                            *(offline_profile.get()),
                                             *inline_cache);
   }
 }
@@ -603,8 +627,8 @@
     const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
     /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const auto it = offline_profile.inline_caches.find(invoke_instruction->GetDexPc());
-  if (it == offline_profile.inline_caches.end()) {
+  const auto it = offline_profile.inline_caches->find(invoke_instruction->GetDexPc());
+  if (it == offline_profile.inline_caches->end()) {
     return kInlineCacheUninitialized;
   }
 
@@ -648,6 +672,12 @@
     ObjPtr<mirror::DexCache> dex_cache =
         dex_profile_index_to_dex_cache[class_ref.dex_profile_index];
     DCHECK(dex_cache != nullptr);
+
+    if (!dex_cache->GetDexFile()->IsTypeIndexValid(class_ref.type_index)) {
+      VLOG(compiler) << "Profile data corrupt: type index " << class_ref.type_index
+            << "is invalid in location" << dex_cache->GetDexFile()->GetLocation();
+      return kInlineCacheNoData;
+    }
     ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType(
           class_ref.type_index,
           dex_cache,
@@ -926,14 +956,11 @@
 
       // If we have inlined all targets before, and this receiver is the last seen,
       // we deoptimize instead of keeping the original invoke instruction.
-      bool deoptimize = all_targets_inlined &&
+      bool deoptimize = !UseOnlyPolymorphicInliningWithNoDeopt() &&
+          all_targets_inlined &&
           (i != InlineCache::kIndividualCacheSize - 1) &&
           (classes->Get(i + 1) == nullptr);
 
-      if (outermost_graph_->IsCompilingOsr()) {
-        // We do not support HDeoptimize in OSR methods.
-        deoptimize = false;
-      }
       HInstruction* compare = AddTypeGuard(receiver,
                                            cursor,
                                            bb_cursor,
@@ -1856,7 +1883,7 @@
   HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
   HConstantFolding fold(callee_graph, "constant_folding$inliner");
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
-  InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
+  InstructionSimplifier simplify(callee_graph, codegen_, compiler_driver_, inline_stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
 
   HOptimization* optimizations[] = {
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 9e4685c..67476b6 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -180,6 +180,9 @@
                                             Handle<mirror::ObjectArray<mirror::Class>> classes)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Returns whether or not we should use only polymorphic inlining with no deoptimizations.
+  bool UseOnlyPolymorphicInliningWithNoDeopt();
+
   // Try CHA-based devirtualization to change virtual method calls into
   // direct calls.
   // Returns the actual method that resolved_method can be devirtualized to.
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index 40fafb0..a73b124 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -888,7 +888,7 @@
     }
 
     HInvokeStaticOrDirect::DispatchInfo dispatch_info = {
-        HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+        HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall,
         HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
         0u
     };
@@ -1000,8 +1000,8 @@
 
 void HInstructionBuilder::BuildConstructorFenceForAllocation(HInstruction* allocation) {
   DCHECK(allocation != nullptr &&
-             allocation->IsNewInstance() ||
-             allocation->IsNewArray());  // corresponding to "new" keyword in JLS.
+             (allocation->IsNewInstance() ||
+              allocation->IsNewArray()));  // corresponding to "new" keyword in JLS.
 
   if (allocation->IsNewInstance()) {
     // STRING SPECIAL HANDLING:
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 2cedde9..d147166 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -30,9 +30,11 @@
  public:
   InstructionSimplifierVisitor(HGraph* graph,
                                CodeGenerator* codegen,
+                               CompilerDriver* compiler_driver,
                                OptimizingCompilerStats* stats)
       : HGraphDelegateVisitor(graph),
         codegen_(codegen),
+        compiler_driver_(compiler_driver),
         stats_(stats) {}
 
   void Run();
@@ -119,6 +121,7 @@
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   CodeGenerator* codegen_;
+  CompilerDriver* compiler_driver_;
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
@@ -130,7 +133,7 @@
 };
 
 void InstructionSimplifier::Run() {
-  InstructionSimplifierVisitor visitor(graph_, codegen_, stats_);
+  InstructionSimplifierVisitor visitor(graph_, codegen_, compiler_driver_, stats_);
   visitor.Run();
 }
 
@@ -1896,7 +1899,7 @@
       // the invoke, as we would need to look it up in the current dex file, and it
       // is unlikely that it exists. The most usual situation for such typed
       // arraycopy methods is a direct pointer to the boot image.
-      HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_);
+      HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_, compiler_driver_);
     }
   }
 }
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index f7329a4..5e20455 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -24,6 +24,7 @@
 namespace art {
 
 class CodeGenerator;
+class CompilerDriver;
 
 /**
  * Implements optimizations specific to each instruction.
@@ -37,12 +38,14 @@
  */
 class InstructionSimplifier : public HOptimization {
  public:
-  explicit InstructionSimplifier(HGraph* graph,
-                                 CodeGenerator* codegen,
-                                 OptimizingCompilerStats* stats = nullptr,
-                                 const char* name = kInstructionSimplifierPassName)
+  InstructionSimplifier(HGraph* graph,
+                        CodeGenerator* codegen,
+                        CompilerDriver* compiler_driver,
+                        OptimizingCompilerStats* stats = nullptr,
+                        const char* name = kInstructionSimplifierPassName)
       : HOptimization(graph, name, stats),
-        codegen_(codegen) {}
+        codegen_(codegen),
+        compiler_driver_(compiler_driver) {}
 
   static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
 
@@ -50,6 +53,7 @@
 
  private:
   CodeGenerator* codegen_;
+  CompilerDriver* compiler_driver_;
 
   DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier);
 };
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 6236bd8..b664d41 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -25,7 +25,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "nodes.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
@@ -146,7 +146,7 @@
           Intrinsics intrinsic = static_cast<Intrinsics>(art_method->GetIntrinsic());
           if (!CheckInvokeType(intrinsic, invoke)) {
             LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
-                << intrinsic << " for "
+                << static_cast<uint32_t>(intrinsic) << " for "
                 << art_method->PrettyMethod()
                 << invoke->DebugName();
           } else {
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 1df884e..ae5f8d1 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -28,7 +28,7 @@
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/arm/assembler_arm.h"
 
 namespace art {
@@ -2598,11 +2598,7 @@
   // We don't care about the sign bit, so shift left.
   __ Lsl(out, out, 1);
   __ eor(out, out, ShifterOperand(infinity));
-  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
-  __ clz(out, out);
-  // Any number less than 32 logically shifted right by 5 bits results in 0;
-  // the same operation on 32 yields 1.
-  __ Lsr(out, out, 5);
+  codegen_->GenerateConditionWithZero(kCondEQ, out, out);
 }
 
 void IntrinsicLocationsBuilderARM::VisitDoubleIsInfinite(HInvoke* invoke) {
@@ -2625,63 +2621,7 @@
   __ eor(out, out, ShifterOperand(infinity_high2));
   // We don't care about the sign bit, so shift left.
   __ orr(out, IP, ShifterOperand(out, LSL, 1));
-  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
-  __ clz(out, out);
-  // Any number less than 32 logically shifted right by 5 bits results in 0;
-  // the same operation on 32 yields 1.
-  __ Lsr(out, out, 5);
-}
-
-void IntrinsicLocationsBuilderARM::VisitReferenceGetReferent(HInvoke* invoke) {
-  if (kEmitCompilerReadBarrier) {
-    // Do not intrinsify this call with the read barrier configuration.
-    return;
-  }
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARM::VisitReferenceGetReferent(HInvoke* invoke) {
-  DCHECK(!kEmitCompilerReadBarrier);
-  ArmAssembler* const assembler = GetAssembler();
-  LocationSummary* locations = invoke->GetLocations();
-
-  Register obj = locations->InAt(0).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathARM(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  // Load ArtMethod first.
-  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
-  DCHECK(invoke_direct != nullptr);
-  Register temp = codegen_->GenerateCalleeMethodStaticOrDirectCall(
-      invoke_direct, locations->GetTemp(0)).AsRegister<Register>();
-
-  // Now get declaring class.
-  __ ldr(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
-
-  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
-  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
-  DCHECK_NE(slow_path_flag_offset, 0u);
-  DCHECK_NE(disable_flag_offset, 0u);
-  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
-  // Check static flags that prevent using intrinsic.
-  __ ldr(IP, Address(temp, disable_flag_offset));
-  __ ldr(temp, Address(temp, slow_path_flag_offset));
-  __ orr(IP, IP, ShifterOperand(temp));
-  __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
-
-  // Fast path.
-  __ ldr(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ MaybeUnpoisonHeapReference(out);
-  __ Bind(slow_path->GetExitLabel());
+  codegen_->GenerateConditionWithZero(kCondEQ, out, out);
 }
 
 void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) {
@@ -2766,12 +2706,15 @@
   int32_t offset = Thread::InterruptedOffset<kArmPointerSize>().Int32Value();
   __ LoadFromOffset(kLoadWord, out, TR, offset);
   Label done;
-  __ CompareAndBranchIfZero(out, &done);
+  Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
+  __ CompareAndBranchIfZero(out, final_label);
   __ dmb(ISH);
   __ LoadImmediate(IP, 0);
   __ StoreToOffset(kStoreWord, IP, TR, offset);
   __ dmb(ISH);
-  __ Bind(&done);
+  if (done.IsLinked()) {
+    __ Bind(&done);
+  }
 }
 
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
@@ -2787,6 +2730,7 @@
 UNIMPLEMENTED_INTRINSIC(ARM, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARM, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(ARM, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index b511c5a..37d7981 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -28,7 +28,7 @@
 #include "mirror/reference.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/arm64/assembler_arm64.h"
 
 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
@@ -124,12 +124,12 @@
       // are no pools emitted.
       vixl::EmissionCheckScope guard(codegen->GetVIXLAssembler(), kInvokeCodeMarginSizeInBytes);
       if (invoke_->IsInvokeStaticOrDirect()) {
-        codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
-                                            LocationFrom(kArtMethodRegister));
+        codegen->GenerateStaticOrDirectCall(
+            invoke_->AsInvokeStaticOrDirect(), LocationFrom(kArtMethodRegister), this);
       } else {
-        codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister));
+        codegen->GenerateVirtualCall(
+            invoke_->AsInvokeVirtual(), LocationFrom(kArtMethodRegister), this);
       }
-      codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
     }
 
     // Copy the result back to the expected output.
@@ -2897,69 +2897,6 @@
   GenIsInfinite(invoke->GetLocations(), /* is64bit */ true, GetVIXLAssembler());
 }
 
-void IntrinsicLocationsBuilderARM64::VisitReferenceGetReferent(HInvoke* invoke) {
-  if (kEmitCompilerReadBarrier) {
-    // Do not intrinsify this call with the read barrier configuration.
-    return;
-  }
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARM64::VisitReferenceGetReferent(HInvoke* invoke) {
-  DCHECK(!kEmitCompilerReadBarrier);
-  MacroAssembler* masm = GetVIXLAssembler();
-  LocationSummary* locations = invoke->GetLocations();
-
-  Register obj = InputRegisterAt(invoke, 0);
-  Register out = OutputRegister(invoke);
-
-  SlowPathCodeARM64* slow_path = new (GetAllocator()) IntrinsicSlowPathARM64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  // Load ArtMethod first.
-  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
-  DCHECK(invoke_direct != nullptr);
-  Register temp0 = XRegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
-                                 invoke_direct, locations->GetTemp(0)));
-
-  // Now get declaring class.
-  __ Ldr(temp0.W(), MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
-
-  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
-  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
-  DCHECK_NE(slow_path_flag_offset, 0u);
-  DCHECK_NE(disable_flag_offset, 0u);
-  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
-  // Check static flags that prevent using intrinsic.
-  if (slow_path_flag_offset == disable_flag_offset + 1) {
-    // Load two adjacent flags in one 64-bit load.
-    __ Ldr(temp0, MemOperand(temp0, disable_flag_offset));
-  } else {
-    UseScratchRegisterScope temps(masm);
-    Register temp1 = temps.AcquireW();
-    __ Ldr(temp1.W(), MemOperand(temp0, disable_flag_offset));
-    __ Ldr(temp0.W(), MemOperand(temp0, slow_path_flag_offset));
-    __ Orr(temp0, temp1, temp0);
-  }
-  __ Cbnz(temp0, slow_path->GetEntryLabel());
-
-  {
-    // Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
-    vixl::EmissionCheckScope guard(codegen_->GetVIXLAssembler(), kMaxMacroInstructionSizeInBytes);
-    // Fast path.
-    __ Ldr(out, HeapOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
-    codegen_->MaybeRecordImplicitNullCheck(invoke);
-  }
-  codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
   InvokeRuntimeCallingConvention calling_convention;
   IntrinsicVisitor::ComputeIntegerValueOfLocations(
@@ -3055,6 +2992,7 @@
   __ Bind(&done);
 }
 
+UNIMPLEMENTED_INTRINSIC(ARM64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 3c53517..ff59ce9 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -24,7 +24,8 @@
 
 class MacroAssembler;
 
-}}  // namespace vixl::aarch64
+}  // namespace aarch64
+}  // namespace vixl
 
 namespace art {
 
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index 2d9781a..3c9b613 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -26,7 +26,7 @@
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 #include "aarch32/constants-aarch32.h"
 
@@ -97,11 +97,10 @@
     Location method_loc = MoveArguments(codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
-      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
     } else {
-      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
     }
-    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
 
     // Copy the result back to the expected output.
     Location out = invoke_->GetLocations()->Out();
@@ -2971,11 +2970,7 @@
   // We don't care about the sign bit, so shift left.
   __ Lsl(out, out, 1);
   __ Eor(out, out, infinity);
-  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
-  __ Clz(out, out);
-  // Any number less than 32 logically shifted right by 5 bits results in 0;
-  // the same operation on 32 yields 1.
-  __ Lsr(out, out, 5);
+  codegen_->GenerateConditionWithZero(kCondEQ, out, out);
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
@@ -3001,65 +2996,7 @@
   __ Eor(out, out, infinity_high2);
   // We don't care about the sign bit, so shift left.
   __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
-  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
-  __ Clz(out, out);
-  // Any number less than 32 logically shifted right by 5 bits results in 0;
-  // the same operation on 32 yields 1.
-  __ Lsr(out, out, 5);
-}
-
-void IntrinsicLocationsBuilderARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
-  if (kEmitCompilerReadBarrier) {
-    // Do not intrinsify this call with the read barrier configuration.
-    return;
-  }
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorARMVIXL::VisitReferenceGetReferent(HInvoke* invoke) {
-  DCHECK(!kEmitCompilerReadBarrier);
-  ArmVIXLAssembler* assembler = GetAssembler();
-  LocationSummary* locations = invoke->GetLocations();
-
-  vixl32::Register obj = InputRegisterAt(invoke, 0);
-  vixl32::Register out = OutputRegister(invoke);
-
-  SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  // Load ArtMethod first.
-  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
-  DCHECK(invoke_direct != nullptr);
-  vixl32::Register temp0 = RegisterFrom(codegen_->GenerateCalleeMethodStaticOrDirectCall(
-      invoke_direct, locations->GetTemp(0)));
-
-  // Now get declaring class.
-  __ Ldr(temp0, MemOperand(temp0, ArtMethod::DeclaringClassOffset().Int32Value()));
-
-  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
-  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
-  DCHECK_NE(slow_path_flag_offset, 0u);
-  DCHECK_NE(disable_flag_offset, 0u);
-  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
-  // Check static flags that prevent using intrinsic.
-  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
-  vixl32::Register temp1 = temps.Acquire();
-  __ Ldr(temp1, MemOperand(temp0, disable_flag_offset));
-  __ Ldr(temp0, MemOperand(temp0, slow_path_flag_offset));
-  __ Orr(temp0, temp1, temp0);
-  __ CompareAndBranchIfNonZero(temp0, slow_path->GetEntryLabel());
-
-  // Fast path.
-  __ Ldr(out, MemOperand(obj, mirror::Reference::ReferentOffset().Int32Value()));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  assembler->MaybeUnpoisonHeapReference(out);
-  __ Bind(slow_path->GetExitLabel());
+  codegen_->GenerateConditionWithZero(kCondEQ, out, out);
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitMathCeil(HInvoke* invoke) {
@@ -3135,7 +3072,7 @@
     __ Add(out, in, -info.low);
     __ Cmp(out, info.high - info.low + 1);
     vixl32::Label allocate, done;
-    __ B(hs, &allocate);
+    __ B(hs, &allocate, /* is_far_target */ false);
     // If the value is within the bounds, load the j.l.Integer directly from the array.
     uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
     uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
@@ -3172,17 +3109,21 @@
   UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
   vixl32::Register temp = temps.Acquire();
   vixl32::Label done;
-  __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+  vixl32::Label* const final_label = codegen_->GetFinalLabel(invoke, &done);
+  __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
   __ Dmb(vixl32::ISH);
   __ Mov(temp, 0);
   assembler->StoreToOffset(kStoreWord, temp, tr, offset);
   __ Dmb(vixl32::ISH);
-  __ Bind(&done);
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
 }
 
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 4731da1..4cea6df 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -23,6 +23,7 @@
 #include "intrinsics.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "utils/mips/assembler_mips.h"
 #include "utils/mips/constants_mips.h"
@@ -111,12 +112,12 @@
     MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
-      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
-                                          Location::RegisterLocation(A0));
+      codegen->GenerateStaticOrDirectCall(
+          invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this);
     } else {
-      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0));
+      codegen->GenerateVirtualCall(
+          invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this);
     }
-    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
 
     // Copy the result back to the expected output.
     Location out = invoke_->GetLocations()->Out();
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 00afbcd..d785567 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -23,6 +23,7 @@
 #include "intrinsics.h"
 #include "mirror/array-inl.h"
 #include "mirror/string.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "utils/mips64/assembler_mips64.h"
 #include "utils/mips64/constants_mips64.h"
@@ -100,12 +101,12 @@
     MoveArguments(invoke_, codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
-      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(),
-                                          Location::RegisterLocation(A0));
+      codegen->GenerateStaticOrDirectCall(
+          invoke_->AsInvokeStaticOrDirect(), Location::RegisterLocation(A0), this);
     } else {
-      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0));
+      codegen->GenerateVirtualCall(
+          invoke_->AsInvokeVirtual(), Location::RegisterLocation(A0), this);
     }
-    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
 
     // Copy the result back to the expected output.
     Location out = invoke_->GetLocations()->Out();
diff --git a/compiler/optimizing/intrinsics_utils.h b/compiler/optimizing/intrinsics_utils.h
index c1f9ae6..8c69d9b 100644
--- a/compiler/optimizing/intrinsics_utils.h
+++ b/compiler/optimizing/intrinsics_utils.h
@@ -56,11 +56,10 @@
     Location method_loc = MoveArguments(codegen);
 
     if (invoke_->IsInvokeStaticOrDirect()) {
-      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc, this);
     } else {
-      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc, this);
     }
-    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
 
     // Copy the result back to the expected output.
     Location out = invoke_->GetLocations()->Out();
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index 57adcc3..6b4851d 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -31,7 +31,7 @@
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/x86/assembler_x86.h"
 #include "utils/x86/constants_x86.h"
 
@@ -796,7 +796,6 @@
   DCHECK(invoke->IsInvokeStaticOrDirect());
   codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
                                       Location::RegisterLocation(EAX));
-  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
 
   // Copy the result back to the expected output.
   Location out = invoke->GetLocations()->Out();
@@ -2819,65 +2818,6 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
-  if (kEmitCompilerReadBarrier) {
-    // Do not intrinsify this call with the read barrier configuration.
-    return;
-  }
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
-  DCHECK(!kEmitCompilerReadBarrier);
-  LocationSummary* locations = invoke->GetLocations();
-  X86Assembler* assembler = GetAssembler();
-
-  Register obj = locations->InAt(0).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  // Load ArtMethod first.
-  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
-  DCHECK(invoke_direct != nullptr);
-  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
-      invoke_direct, locations->GetTemp(0));
-  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
-  Register temp = temp_loc.AsRegister<Register>();
-
-  // Now get declaring class.
-  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
-
-  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
-  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
-  DCHECK_NE(slow_path_flag_offset, 0u);
-  DCHECK_NE(disable_flag_offset, 0u);
-  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
-  // Check static flags preventing us for using intrinsic.
-  if (slow_path_flag_offset == disable_flag_offset + 1) {
-    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
-    __ j(kNotEqual, slow_path->GetEntryLabel());
-  } else {
-    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
-    __ j(kNotEqual, slow_path->GetEntryLabel());
-    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
-    __ j(kNotEqual, slow_path->GetEntryLabel());
-  }
-
-  // Fast path.
-  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ MaybeUnpoisonHeapReference(out);
-  __ Bind(slow_path->GetExitLabel());
-}
-
 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
   return instruction->InputAt(input0) == instruction->InputAt(input1);
 }
@@ -3429,6 +3369,7 @@
 
 
 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(X86, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 773383e..ef98b7b 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -31,7 +31,7 @@
 #include "mirror/reference.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils/x86_64/assembler_x86_64.h"
 #include "utils/x86_64/constants_x86_64.h"
 
@@ -567,7 +567,6 @@
   DCHECK(invoke->IsInvokeStaticOrDirect());
   codegen->GenerateStaticOrDirectCall(
       invoke->AsInvokeStaticOrDirect(), Location::RegisterLocation(RDI));
-  codegen->RecordPcInfo(invoke, invoke->GetDexPc());
 
   // Copy the result back to the expected output.
   Location out = invoke->GetLocations()->Out();
@@ -2959,65 +2958,6 @@
   GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
 }
 
-void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
-  if (kEmitCompilerReadBarrier) {
-    // Do not intrinsify this call with the read barrier configuration.
-    return;
-  }
-  LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnSlowPath,
-                                                            kIntrinsified);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::SameAsFirstInput());
-  locations->AddTemp(Location::RequiresRegister());
-}
-
-void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
-  DCHECK(!kEmitCompilerReadBarrier);
-  LocationSummary* locations = invoke->GetLocations();
-  X86_64Assembler* assembler = GetAssembler();
-
-  CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
-
-  SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
-  codegen_->AddSlowPath(slow_path);
-
-  // Load ArtMethod first.
-  HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
-  DCHECK(invoke_direct != nullptr);
-  Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
-      invoke_direct, locations->GetTemp(0));
-  DCHECK(temp_loc.Equals(locations->GetTemp(0)));
-  CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-
-  // Now get declaring class.
-  __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
-
-  uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
-  uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
-  DCHECK_NE(slow_path_flag_offset, 0u);
-  DCHECK_NE(disable_flag_offset, 0u);
-  DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
-
-  // Check static flags preventing us for using intrinsic.
-  if (slow_path_flag_offset == disable_flag_offset + 1) {
-    __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
-    __ j(kNotEqual, slow_path->GetEntryLabel());
-  } else {
-    __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
-    __ j(kNotEqual, slow_path->GetEntryLabel());
-    __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
-    __ j(kNotEqual, slow_path->GetEntryLabel());
-  }
-
-  // Fast path.
-  __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
-  codegen_->MaybeRecordImplicitNullCheck(invoke);
-  __ MaybeUnpoisonHeapReference(out);
-  __ Bind(slow_path->GetExitLabel());
-}
-
 void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
   InvokeRuntimeCallingConvention calling_convention;
   IntrinsicVisitor::ComputeIntegerValueOfLocations(
@@ -3106,6 +3046,7 @@
   __ Bind(&done);
 }
 
+UNIMPLEMENTED_INTRINSIC(X86_64, ReferenceGetReferent)
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
diff --git a/compiler/optimizing/load_store_analysis.cc b/compiler/optimizing/load_store_analysis.cc
new file mode 100644
index 0000000..f2ee345
--- /dev/null
+++ b/compiler/optimizing/load_store_analysis.cc
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "load_store_analysis.h"
+
+namespace art {
+
+// A cap for the number of heap locations to prevent pathological time/space consumption.
+// The number of heap locations for most of the methods stays below this threshold.
+constexpr size_t kMaxNumberOfHeapLocations = 32;
+
+void LoadStoreAnalysis::Run() {
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    heap_location_collector_.VisitBasicBlock(block);
+  }
+
+  if (heap_location_collector_.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
+    // Bail out if there are too many heap locations to deal with.
+    heap_location_collector_.CleanUp();
+    return;
+  }
+  if (!heap_location_collector_.HasHeapStores()) {
+    // Without heap stores, this pass would act mostly as GVN on heap accesses.
+    heap_location_collector_.CleanUp();
+    return;
+  }
+  if (heap_location_collector_.HasVolatile() || heap_location_collector_.HasMonitorOps()) {
+    // Don't do load/store elimination if the method has volatile field accesses or
+    // monitor operations, for now.
+    // TODO: do it right.
+    heap_location_collector_.CleanUp();
+    return;
+  }
+
+  heap_location_collector_.BuildAliasingMatrix();
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/load_store_analysis.h b/compiler/optimizing/load_store_analysis.h
new file mode 100644
index 0000000..4e940f3
--- /dev/null
+++ b/compiler/optimizing/load_store_analysis.h
@@ -0,0 +1,518 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
+#define ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
+
+#include "escape.h"
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+// A ReferenceInfo contains additional info about a reference such as
+// whether it's a singleton, returned, etc.
+class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
+ public:
+  ReferenceInfo(HInstruction* reference, size_t pos)
+      : reference_(reference),
+        position_(pos),
+        is_singleton_(true),
+        is_singleton_and_not_returned_(true),
+        is_singleton_and_not_deopt_visible_(true),
+        has_index_aliasing_(false) {
+    CalculateEscape(reference_,
+                    nullptr,
+                    &is_singleton_,
+                    &is_singleton_and_not_returned_,
+                    &is_singleton_and_not_deopt_visible_);
+  }
+
+  HInstruction* GetReference() const {
+    return reference_;
+  }
+
+  size_t GetPosition() const {
+    return position_;
+  }
+
+  // Returns true if reference_ is the only name that can refer to its value during
+  // the lifetime of the method. So it's guaranteed to not have any alias in
+  // the method (including its callees).
+  bool IsSingleton() const {
+    return is_singleton_;
+  }
+
+  // Returns true if reference_ is a singleton and not returned to the caller or
+  // used as an environment local of an HDeoptimize instruction.
+  // The allocation and stores into reference_ may be eliminated for such cases.
+  bool IsSingletonAndRemovable() const {
+    return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
+  }
+
+  // Returns true if reference_ is a singleton and returned to the caller or
+  // used as an environment local of an HDeoptimize instruction.
+  bool IsSingletonAndNonRemovable() const {
+    return is_singleton_ &&
+           (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_);
+  }
+
+  bool HasIndexAliasing() {
+    return has_index_aliasing_;
+  }
+
+  void SetHasIndexAliasing(bool has_index_aliasing) {
+    // Only allow setting to true.
+    DCHECK(has_index_aliasing);
+    has_index_aliasing_ = has_index_aliasing;
+  }
+
+ private:
+  HInstruction* const reference_;
+  const size_t position_;  // position in HeapLocationCollector's ref_info_array_.
+
+  // Can only be referred to by a single name in the method.
+  bool is_singleton_;
+  // Is singleton and not returned to caller.
+  bool is_singleton_and_not_returned_;
+  // Is singleton and not used as an environment local of HDeoptimize.
+  bool is_singleton_and_not_deopt_visible_;
+  // Some heap locations with reference_ have array index aliasing,
+  // e.g. arr[i] and arr[j] may be the same location.
+  bool has_index_aliasing_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
+};
+
+// A heap location is a reference-offset/index pair that a value can be loaded from
+// or stored to.
+class HeapLocation : public ArenaObject<kArenaAllocMisc> {
+ public:
+  static constexpr size_t kInvalidFieldOffset = -1;
+
+  // TODO: more fine-grained array types.
+  static constexpr int16_t kDeclaringClassDefIndexForArrays = -1;
+
+  HeapLocation(ReferenceInfo* ref_info,
+               size_t offset,
+               HInstruction* index,
+               int16_t declaring_class_def_index)
+      : ref_info_(ref_info),
+        offset_(offset),
+        index_(index),
+        declaring_class_def_index_(declaring_class_def_index),
+        value_killed_by_loop_side_effects_(true) {
+    DCHECK(ref_info != nullptr);
+    DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
+           (offset != kInvalidFieldOffset && index == nullptr));
+    if (ref_info->IsSingleton() && !IsArrayElement()) {
+      // Assume this location's value cannot be killed by loop side effects
+      // until proven otherwise.
+      value_killed_by_loop_side_effects_ = false;
+    }
+  }
+
+  ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
+  size_t GetOffset() const { return offset_; }
+  HInstruction* GetIndex() const { return index_; }
+
+  // Returns the definition of declaring class' dex index.
+  // It's kDeclaringClassDefIndexForArrays for an array element.
+  int16_t GetDeclaringClassDefIndex() const {
+    return declaring_class_def_index_;
+  }
+
+  bool IsArrayElement() const {
+    return index_ != nullptr;
+  }
+
+  bool IsValueKilledByLoopSideEffects() const {
+    return value_killed_by_loop_side_effects_;
+  }
+
+  void SetValueKilledByLoopSideEffects(bool val) {
+    value_killed_by_loop_side_effects_ = val;
+  }
+
+ private:
+  ReferenceInfo* const ref_info_;      // reference for instance/static field or array access.
+  const size_t offset_;                // offset of static/instance field.
+  HInstruction* const index_;          // index of an array element.
+  const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
+  bool value_killed_by_loop_side_effects_;   // value of this location may be killed by loop
+                                             // side effects because this location is stored
+                                             // into inside a loop. This gives
+                                             // better info on whether a singleton's location
+                                             // value may be killed by loop side effects.
+
+  DISALLOW_COPY_AND_ASSIGN(HeapLocation);
+};
+
+// A HeapLocationCollector collects all relevant heap locations and keeps
+// an aliasing matrix for all locations.
+class HeapLocationCollector : public HGraphVisitor {
+ public:
+  static constexpr size_t kHeapLocationNotFound = -1;
+  // Start with a single uint32_t word. That's enough bits for pair-wise
+  // aliasing matrix of 8 heap locations.
+  static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32;
+
+  explicit HeapLocationCollector(HGraph* graph)
+      : HGraphVisitor(graph),
+        ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        aliasing_matrix_(graph->GetArena(),
+                         kInitialAliasingMatrixBitVectorSize,
+                         true,
+                         kArenaAllocLSE),
+        has_heap_stores_(false),
+        has_volatile_(false),
+        has_monitor_operations_(false) {}
+
+  void CleanUp() {
+    heap_locations_.clear();
+    ref_info_array_.clear();
+  }
+
+  size_t GetNumberOfHeapLocations() const {
+    return heap_locations_.size();
+  }
+
+  HeapLocation* GetHeapLocation(size_t index) const {
+    return heap_locations_[index];
+  }
+
+  HInstruction* HuntForOriginalReference(HInstruction* ref) const {
+    DCHECK(ref != nullptr);
+    while (ref->IsNullCheck() || ref->IsBoundType()) {
+      ref = ref->InputAt(0);
+    }
+    return ref;
+  }
+
+  ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const {
+    for (size_t i = 0; i < ref_info_array_.size(); i++) {
+      ReferenceInfo* ref_info = ref_info_array_[i];
+      if (ref_info->GetReference() == ref) {
+        DCHECK_EQ(i, ref_info->GetPosition());
+        return ref_info;
+      }
+    }
+    return nullptr;
+  }
+
+  bool HasHeapStores() const {
+    return has_heap_stores_;
+  }
+
+  bool HasVolatile() const {
+    return has_volatile_;
+  }
+
+  bool HasMonitorOps() const {
+    return has_monitor_operations_;
+  }
+
+  // Find and return the heap location index in heap_locations_.
+  size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
+                               size_t offset,
+                               HInstruction* index,
+                               int16_t declaring_class_def_index) const {
+    for (size_t i = 0; i < heap_locations_.size(); i++) {
+      HeapLocation* loc = heap_locations_[i];
+      if (loc->GetReferenceInfo() == ref_info &&
+          loc->GetOffset() == offset &&
+          loc->GetIndex() == index &&
+          loc->GetDeclaringClassDefIndex() == declaring_class_def_index) {
+        return i;
+      }
+    }
+    return kHeapLocationNotFound;
+  }
+
+  // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias.
+  bool MayAlias(size_t index1, size_t index2) const {
+    if (index1 < index2) {
+      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2));
+    } else if (index1 > index2) {
+      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1));
+    } else {
+      DCHECK(false) << "index1 and index2 are expected to be different";
+      return true;
+    }
+  }
+
+  void BuildAliasingMatrix() {
+    const size_t number_of_locations = heap_locations_.size();
+    if (number_of_locations == 0) {
+      return;
+    }
+    size_t pos = 0;
+    // Compute aliasing info between every pair of different heap locations.
+    // Save the result in a matrix represented as a BitVector.
+    for (size_t i = 0; i < number_of_locations - 1; i++) {
+      for (size_t j = i + 1; j < number_of_locations; j++) {
+        if (ComputeMayAlias(i, j)) {
+          aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos));
+        }
+        pos++;
+      }
+    }
+  }
+
+ private:
+  // An allocation cannot alias with a name which already exists at the point
+  // of the allocation, such as a parameter or a load happening before the allocation.
+  bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
+    if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) {
+      // Any reference that can alias with the allocation must appear after it in the block/in
+      // the block's successors. In reverse post order, those instructions will be visited after
+      // the allocation.
+      return ref_info2->GetPosition() >= ref_info1->GetPosition();
+    }
+    return true;
+  }
+
+  bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
+    if (ref_info1 == ref_info2) {
+      return true;
+    } else if (ref_info1->IsSingleton()) {
+      return false;
+    } else if (ref_info2->IsSingleton()) {
+      return false;
+    } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) ||
+        !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) {
+      return false;
+    }
+    return true;
+  }
+
+  // `index1` and `index2` are indices in the array of collected heap locations.
+  // Returns the position in the bit vector that tracks whether the two heap
+  // locations may alias.
+  size_t AliasingMatrixPosition(size_t index1, size_t index2) const {
+    DCHECK(index2 > index1);
+    const size_t number_of_locations = heap_locations_.size();
+    // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1).
+    return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1));
+  }
+
+  // An additional position is passed in to make sure the calculated position is correct.
+  size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) {
+    size_t calculated_position = AliasingMatrixPosition(index1, index2);
+    DCHECK_EQ(calculated_position, position);
+    return calculated_position;
+  }
+
+  // Compute if two locations may alias to each other.
+  bool ComputeMayAlias(size_t index1, size_t index2) const {
+    HeapLocation* loc1 = heap_locations_[index1];
+    HeapLocation* loc2 = heap_locations_[index2];
+    if (loc1->GetOffset() != loc2->GetOffset()) {
+      // Either two different instance fields, or one is an instance
+      // field and the other is an array element.
+      return false;
+    }
+    if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) {
+      // Different types.
+      return false;
+    }
+    if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) {
+      return false;
+    }
+    if (loc1->IsArrayElement() && loc2->IsArrayElement()) {
+      HInstruction* array_index1 = loc1->GetIndex();
+      HInstruction* array_index2 = loc2->GetIndex();
+      DCHECK(array_index1 != nullptr);
+      DCHECK(array_index2 != nullptr);
+      if (array_index1->IsIntConstant() &&
+          array_index2->IsIntConstant() &&
+          array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) {
+        // Different constant indices do not alias.
+        return false;
+      }
+      ReferenceInfo* ref_info = loc1->GetReferenceInfo();
+      ref_info->SetHasIndexAliasing(true);
+    }
+    return true;
+  }
+
+  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
+    ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
+    if (ref_info == nullptr) {
+      size_t pos = ref_info_array_.size();
+      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
+      ref_info_array_.push_back(ref_info);
+    }
+    return ref_info;
+  }
+
+  void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
+    if (instruction->GetType() != Primitive::kPrimNot) {
+      return;
+    }
+    DCHECK(FindReferenceInfoOf(instruction) == nullptr);
+    GetOrCreateReferenceInfo(instruction);
+  }
+
+  HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
+                                        size_t offset,
+                                        HInstruction* index,
+                                        int16_t declaring_class_def_index) {
+    HInstruction* original_ref = HuntForOriginalReference(ref);
+    ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
+    size_t heap_location_idx = FindHeapLocationIndex(
+        ref_info, offset, index, declaring_class_def_index);
+    if (heap_location_idx == kHeapLocationNotFound) {
+      HeapLocation* heap_loc = new (GetGraph()->GetArena())
+          HeapLocation(ref_info, offset, index, declaring_class_def_index);
+      heap_locations_.push_back(heap_loc);
+      return heap_loc;
+    }
+    return heap_locations_[heap_location_idx];
+  }
+
+  HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
+    if (field_info.IsVolatile()) {
+      has_volatile_ = true;
+    }
+    const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
+    const size_t offset = field_info.GetFieldOffset().SizeValue();
+    return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
+  }
+
+  void VisitArrayAccess(HInstruction* array, HInstruction* index) {
+    GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset,
+        index, HeapLocation::kDeclaringClassDefIndexForArrays);
+  }
+
+  void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
+    HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    has_heap_stores_ = true;
+    if (location->GetReferenceInfo()->IsSingleton()) {
+      // A singleton's location value may be killed by loop side effects if it's
+      // defined before that loop, and it's stored into inside that loop.
+      HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+      if (loop_info != nullptr) {
+        HInstruction* ref = location->GetReferenceInfo()->GetReference();
+        DCHECK(ref->IsNewInstance());
+        if (loop_info->IsDefinedOutOfTheLoop(ref)) {
+          // ref's location value may be killed by this loop's side effects.
+          location->SetValueKilledByLoopSideEffects(true);
+        } else {
+          // ref is defined inside this loop so this loop's side effects cannot
+          // kill its location value at the loop header since ref/its location doesn't
+          // exist yet at the loop header.
+        }
+      }
+    } else {
+      // For non-singletons, value_killed_by_loop_side_effects_ is inited to
+      // true.
+      DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true);
+    }
+  }
+
+  void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
+    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
+    has_heap_stores_ = true;
+  }
+
+  // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses
+  // since we cannot accurately track the fields.
+
+  void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
+    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitArraySet(HArraySet* instruction) OVERRIDE {
+    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
+    has_heap_stores_ = true;
+  }
+
+  void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
+    // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
+    CreateReferenceInfoForReferenceType(new_instance);
+  }
+
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitSelect(HSelect* instruction) OVERRIDE {
+    CreateReferenceInfoForReferenceType(instruction);
+  }
+
+  void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
+    has_monitor_operations_ = true;
+  }
+
+  ArenaVector<ReferenceInfo*> ref_info_array_;   // All references used for heap accesses.
+  ArenaVector<HeapLocation*> heap_locations_;    // All heap locations.
+  ArenaBitVector aliasing_matrix_;    // aliasing info between each pair of locations.
+  bool has_heap_stores_;    // If there is no heap stores, LSE acts as GVN with better
+                            // alias analysis and won't be as effective.
+  bool has_volatile_;       // If there are volatile field accesses.
+  bool has_monitor_operations_;    // If there are monitor operations.
+
+  DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
+};
+
+class LoadStoreAnalysis : public HOptimization {
+ public:
+  explicit LoadStoreAnalysis(HGraph* graph)
+    : HOptimization(graph, kLoadStoreAnalysisPassName),
+      heap_location_collector_(graph) {}
+
+  const HeapLocationCollector& GetHeapLocationCollector() const {
+    return heap_location_collector_;
+  }
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kLoadStoreAnalysisPassName = "load_store_analysis";
+
+ private:
+  HeapLocationCollector heap_location_collector_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadStoreAnalysis);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_LOAD_STORE_ANALYSIS_H_
diff --git a/compiler/optimizing/load_store_analysis_test.cc b/compiler/optimizing/load_store_analysis_test.cc
new file mode 100644
index 0000000..2418777
--- /dev/null
+++ b/compiler/optimizing/load_store_analysis_test.cc
@@ -0,0 +1,187 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "load_store_analysis.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class LoadStoreAnalysisTest : public CommonCompilerTest {
+ public:
+  LoadStoreAnalysisTest() : pool_(), allocator_(&pool_) {
+    graph_ = CreateGraph(&allocator_);
+  }
+
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+};
+
+TEST_F(LoadStoreAnalysisTest, ArrayHeapLocations) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+
+  // entry:
+  // array         ParameterValue
+  // index         ParameterValue
+  // c1            IntConstant
+  // c2            IntConstant
+  // c3            IntConstant
+  // array_get1    ArrayGet [array, c1]
+  // array_get2    ArrayGet [array, c2]
+  // array_set1    ArraySet [array, c1, c3]
+  // array_set2    ArraySet [array, index, c3]
+  HInstruction* array = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
+  HInstruction* index = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt);
+  HInstruction* c1 = graph_->GetIntConstant(1);
+  HInstruction* c2 = graph_->GetIntConstant(2);
+  HInstruction* c3 = graph_->GetIntConstant(3);
+  HInstruction* array_get1 = new (&allocator_) HArrayGet(array, c1, Primitive::kPrimInt, 0);
+  HInstruction* array_get2 = new (&allocator_) HArrayGet(array, c2, Primitive::kPrimInt, 0);
+  HInstruction* array_set1 = new (&allocator_) HArraySet(array, c1, c3, Primitive::kPrimInt, 0);
+  HInstruction* array_set2 = new (&allocator_) HArraySet(array, index, c3, Primitive::kPrimInt, 0);
+  entry->AddInstruction(array);
+  entry->AddInstruction(index);
+  entry->AddInstruction(array_get1);
+  entry->AddInstruction(array_get2);
+  entry->AddInstruction(array_set1);
+  entry->AddInstruction(array_set2);
+
+  // Test HeapLocationCollector initialization.
+  // Should be no heap locations, no operations on the heap.
+  HeapLocationCollector heap_location_collector(graph_);
+  ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U);
+  ASSERT_FALSE(heap_location_collector.HasHeapStores());
+
+  // Test that after visiting the graph_, it must see following heap locations
+  // array[c1], array[c2], array[index]; and it should see heap stores.
+  heap_location_collector.VisitBasicBlock(entry);
+  ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 3U);
+  ASSERT_TRUE(heap_location_collector.HasHeapStores());
+
+  // Test queries on HeapLocationCollector's ref info and index records.
+  ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(array);
+  size_t field_off = HeapLocation::kInvalidFieldOffset;
+  size_t class_def = HeapLocation::kDeclaringClassDefIndexForArrays;
+  size_t loc1 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c1, class_def);
+  size_t loc2 = heap_location_collector.FindHeapLocationIndex(ref, field_off, c2, class_def);
+  size_t loc3 = heap_location_collector.FindHeapLocationIndex(ref, field_off, index, class_def);
+  // must find this reference info for array in HeapLocationCollector.
+  ASSERT_TRUE(ref != nullptr);
+  // must find these heap locations;
+  // and array[1], array[2], array[3] should be different heap locations.
+  ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound);
+  ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound);
+  ASSERT_TRUE(loc3 != HeapLocationCollector::kHeapLocationNotFound);
+  ASSERT_TRUE(loc1 != loc2);
+  ASSERT_TRUE(loc2 != loc3);
+  ASSERT_TRUE(loc1 != loc3);
+
+  // Test alias relationships after building aliasing matrix.
+  // array[1] and array[2] clearly should not alias;
+  // array[index] should alias with the others, because index is an unknow value.
+  heap_location_collector.BuildAliasingMatrix();
+  ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+  ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
+  ASSERT_TRUE(heap_location_collector.MayAlias(loc1, loc3));
+}
+
+TEST_F(LoadStoreAnalysisTest, FieldHeapLocations) {
+  HBasicBlock* entry = new (&allocator_) HBasicBlock(graph_);
+  graph_->AddBlock(entry);
+  graph_->SetEntryBlock(entry);
+
+  // entry:
+  // object              ParameterValue
+  // c1                  IntConstant
+  // set_field10         InstanceFieldSet [object, c1, 10]
+  // get_field10         InstanceFieldGet [object, 10]
+  // get_field20         InstanceFieldGet [object, 20]
+
+  HInstruction* c1 = graph_->GetIntConstant(1);
+  HInstruction* object = new (&allocator_) HParameterValue(graph_->GetDexFile(),
+                                                           dex::TypeIndex(0),
+                                                           0,
+                                                           Primitive::kPrimNot);
+  HInstanceFieldSet* set_field10 = new (&allocator_) HInstanceFieldSet(object,
+                                                                       c1,
+                                                                       nullptr,
+                                                                       Primitive::kPrimInt,
+                                                                       MemberOffset(10),
+                                                                       false,
+                                                                       kUnknownFieldIndex,
+                                                                       kUnknownClassDefIndex,
+                                                                       graph_->GetDexFile(),
+                                                                       0);
+  HInstanceFieldGet* get_field10 = new (&allocator_) HInstanceFieldGet(object,
+                                                                       nullptr,
+                                                                       Primitive::kPrimInt,
+                                                                       MemberOffset(10),
+                                                                       false,
+                                                                       kUnknownFieldIndex,
+                                                                       kUnknownClassDefIndex,
+                                                                       graph_->GetDexFile(),
+                                                                       0);
+  HInstanceFieldGet* get_field20 = new (&allocator_) HInstanceFieldGet(object,
+                                                                       nullptr,
+                                                                       Primitive::kPrimInt,
+                                                                       MemberOffset(20),
+                                                                       false,
+                                                                       kUnknownFieldIndex,
+                                                                       kUnknownClassDefIndex,
+                                                                       graph_->GetDexFile(),
+                                                                       0);
+  entry->AddInstruction(object);
+  entry->AddInstruction(set_field10);
+  entry->AddInstruction(get_field10);
+  entry->AddInstruction(get_field20);
+
+  // Test HeapLocationCollector initialization.
+  // Should be no heap locations, no operations on the heap.
+  HeapLocationCollector heap_location_collector(graph_);
+  ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 0U);
+  ASSERT_FALSE(heap_location_collector.HasHeapStores());
+
+  // Test that after visiting the graph, it must see following heap locations
+  // object.field10, object.field20 and it should see heap stores.
+  heap_location_collector.VisitBasicBlock(entry);
+  ASSERT_EQ(heap_location_collector.GetNumberOfHeapLocations(), 2U);
+  ASSERT_TRUE(heap_location_collector.HasHeapStores());
+
+  // Test queries on HeapLocationCollector's ref info and index records.
+  ReferenceInfo* ref = heap_location_collector.FindReferenceInfoOf(object);
+  size_t loc1 = heap_location_collector.FindHeapLocationIndex(
+      ref, 10, nullptr, kUnknownClassDefIndex);
+  size_t loc2 = heap_location_collector.FindHeapLocationIndex(
+      ref, 20, nullptr, kUnknownClassDefIndex);
+  // must find references info for object and in HeapLocationCollector.
+  ASSERT_TRUE(ref != nullptr);
+  // must find these heap locations.
+  ASSERT_TRUE(loc1 != HeapLocationCollector::kHeapLocationNotFound);
+  ASSERT_TRUE(loc2 != HeapLocationCollector::kHeapLocationNotFound);
+  // different fields of same object.
+  ASSERT_TRUE(loc1 != loc2);
+  // accesses to different fields of the same object should not alias.
+  ASSERT_FALSE(heap_location_collector.MayAlias(loc1, loc2));
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 76c9d23..211528b 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include "load_store_analysis.h"
 #include "load_store_elimination.h"
 
 #include "escape.h"
@@ -23,477 +24,6 @@
 
 namespace art {
 
-class ReferenceInfo;
-
-// A cap for the number of heap locations to prevent pathological time/space consumption.
-// The number of heap locations for most of the methods stays below this threshold.
-constexpr size_t kMaxNumberOfHeapLocations = 32;
-
-// A ReferenceInfo contains additional info about a reference such as
-// whether it's a singleton, returned, etc.
-class ReferenceInfo : public ArenaObject<kArenaAllocMisc> {
- public:
-  ReferenceInfo(HInstruction* reference, size_t pos)
-      : reference_(reference),
-        position_(pos),
-        is_singleton_(true),
-        is_singleton_and_not_returned_(true),
-        is_singleton_and_not_deopt_visible_(true),
-        has_index_aliasing_(false) {
-    CalculateEscape(reference_,
-                    nullptr,
-                    &is_singleton_,
-                    &is_singleton_and_not_returned_,
-                    &is_singleton_and_not_deopt_visible_);
-  }
-
-  HInstruction* GetReference() const {
-    return reference_;
-  }
-
-  size_t GetPosition() const {
-    return position_;
-  }
-
-  // Returns true if reference_ is the only name that can refer to its value during
-  // the lifetime of the method. So it's guaranteed to not have any alias in
-  // the method (including its callees).
-  bool IsSingleton() const {
-    return is_singleton_;
-  }
-
-  // Returns true if reference_ is a singleton and not returned to the caller or
-  // used as an environment local of an HDeoptimize instruction.
-  // The allocation and stores into reference_ may be eliminated for such cases.
-  bool IsSingletonAndRemovable() const {
-    return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
-  }
-
-  // Returns true if reference_ is a singleton and returned to the caller or
-  // used as an environment local of an HDeoptimize instruction.
-  bool IsSingletonAndNonRemovable() const {
-    return is_singleton_ &&
-           (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_);
-  }
-
-  bool HasIndexAliasing() {
-    return has_index_aliasing_;
-  }
-
-  void SetHasIndexAliasing(bool has_index_aliasing) {
-    // Only allow setting to true.
-    DCHECK(has_index_aliasing);
-    has_index_aliasing_ = has_index_aliasing;
-  }
-
- private:
-  HInstruction* const reference_;
-  const size_t position_;  // position in HeapLocationCollector's ref_info_array_.
-
-  // Can only be referred to by a single name in the method.
-  bool is_singleton_;
-  // Is singleton and not returned to caller.
-  bool is_singleton_and_not_returned_;
-  // Is singleton and not used as an environment local of HDeoptimize.
-  bool is_singleton_and_not_deopt_visible_;
-  // Some heap locations with reference_ have array index aliasing,
-  // e.g. arr[i] and arr[j] may be the same location.
-  bool has_index_aliasing_;
-
-  DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
-};
-
-// A heap location is a reference-offset/index pair that a value can be loaded from
-// or stored to.
-class HeapLocation : public ArenaObject<kArenaAllocMisc> {
- public:
-  static constexpr size_t kInvalidFieldOffset = -1;
-
-  // TODO: more fine-grained array types.
-  static constexpr int16_t kDeclaringClassDefIndexForArrays = -1;
-
-  HeapLocation(ReferenceInfo* ref_info,
-               size_t offset,
-               HInstruction* index,
-               int16_t declaring_class_def_index)
-      : ref_info_(ref_info),
-        offset_(offset),
-        index_(index),
-        declaring_class_def_index_(declaring_class_def_index),
-        value_killed_by_loop_side_effects_(true) {
-    DCHECK(ref_info != nullptr);
-    DCHECK((offset == kInvalidFieldOffset && index != nullptr) ||
-           (offset != kInvalidFieldOffset && index == nullptr));
-    if (ref_info->IsSingleton() && !IsArrayElement()) {
-      // Assume this location's value cannot be killed by loop side effects
-      // until proven otherwise.
-      value_killed_by_loop_side_effects_ = false;
-    }
-  }
-
-  ReferenceInfo* GetReferenceInfo() const { return ref_info_; }
-  size_t GetOffset() const { return offset_; }
-  HInstruction* GetIndex() const { return index_; }
-
-  // Returns the definition of declaring class' dex index.
-  // It's kDeclaringClassDefIndexForArrays for an array element.
-  int16_t GetDeclaringClassDefIndex() const {
-    return declaring_class_def_index_;
-  }
-
-  bool IsArrayElement() const {
-    return index_ != nullptr;
-  }
-
-  bool IsValueKilledByLoopSideEffects() const {
-    return value_killed_by_loop_side_effects_;
-  }
-
-  void SetValueKilledByLoopSideEffects(bool val) {
-    value_killed_by_loop_side_effects_ = val;
-  }
-
- private:
-  ReferenceInfo* const ref_info_;      // reference for instance/static field or array access.
-  const size_t offset_;                // offset of static/instance field.
-  HInstruction* const index_;          // index of an array element.
-  const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
-  bool value_killed_by_loop_side_effects_;   // value of this location may be killed by loop
-                                             // side effects because this location is stored
-                                             // into inside a loop. This gives
-                                             // better info on whether a singleton's location
-                                             // value may be killed by loop side effects.
-
-  DISALLOW_COPY_AND_ASSIGN(HeapLocation);
-};
-
-static HInstruction* HuntForOriginalReference(HInstruction* ref) {
-  DCHECK(ref != nullptr);
-  while (ref->IsNullCheck() || ref->IsBoundType()) {
-    ref = ref->InputAt(0);
-  }
-  return ref;
-}
-
-// A HeapLocationCollector collects all relevant heap locations and keeps
-// an aliasing matrix for all locations.
-class HeapLocationCollector : public HGraphVisitor {
- public:
-  static constexpr size_t kHeapLocationNotFound = -1;
-  // Start with a single uint32_t word. That's enough bits for pair-wise
-  // aliasing matrix of 8 heap locations.
-  static constexpr uint32_t kInitialAliasingMatrixBitVectorSize = 32;
-
-  explicit HeapLocationCollector(HGraph* graph)
-      : HGraphVisitor(graph),
-        ref_info_array_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        heap_locations_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        aliasing_matrix_(graph->GetArena(),
-                         kInitialAliasingMatrixBitVectorSize,
-                         true,
-                         kArenaAllocLSE),
-        has_heap_stores_(false),
-        has_volatile_(false),
-        has_monitor_operations_(false) {}
-
-  size_t GetNumberOfHeapLocations() const {
-    return heap_locations_.size();
-  }
-
-  HeapLocation* GetHeapLocation(size_t index) const {
-    return heap_locations_[index];
-  }
-
-  ReferenceInfo* FindReferenceInfoOf(HInstruction* ref) const {
-    for (size_t i = 0; i < ref_info_array_.size(); i++) {
-      ReferenceInfo* ref_info = ref_info_array_[i];
-      if (ref_info->GetReference() == ref) {
-        DCHECK_EQ(i, ref_info->GetPosition());
-        return ref_info;
-      }
-    }
-    return nullptr;
-  }
-
-  bool HasHeapStores() const {
-    return has_heap_stores_;
-  }
-
-  bool HasVolatile() const {
-    return has_volatile_;
-  }
-
-  bool HasMonitorOps() const {
-    return has_monitor_operations_;
-  }
-
-  // Find and return the heap location index in heap_locations_.
-  size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
-                               size_t offset,
-                               HInstruction* index,
-                               int16_t declaring_class_def_index) const {
-    for (size_t i = 0; i < heap_locations_.size(); i++) {
-      HeapLocation* loc = heap_locations_[i];
-      if (loc->GetReferenceInfo() == ref_info &&
-          loc->GetOffset() == offset &&
-          loc->GetIndex() == index &&
-          loc->GetDeclaringClassDefIndex() == declaring_class_def_index) {
-        return i;
-      }
-    }
-    return kHeapLocationNotFound;
-  }
-
-  // Returns true if heap_locations_[index1] and heap_locations_[index2] may alias.
-  bool MayAlias(size_t index1, size_t index2) const {
-    if (index1 < index2) {
-      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index1, index2));
-    } else if (index1 > index2) {
-      return aliasing_matrix_.IsBitSet(AliasingMatrixPosition(index2, index1));
-    } else {
-      DCHECK(false) << "index1 and index2 are expected to be different";
-      return true;
-    }
-  }
-
-  void BuildAliasingMatrix() {
-    const size_t number_of_locations = heap_locations_.size();
-    if (number_of_locations == 0) {
-      return;
-    }
-    size_t pos = 0;
-    // Compute aliasing info between every pair of different heap locations.
-    // Save the result in a matrix represented as a BitVector.
-    for (size_t i = 0; i < number_of_locations - 1; i++) {
-      for (size_t j = i + 1; j < number_of_locations; j++) {
-        if (ComputeMayAlias(i, j)) {
-          aliasing_matrix_.SetBit(CheckedAliasingMatrixPosition(i, j, pos));
-        }
-        pos++;
-      }
-    }
-  }
-
- private:
-  // An allocation cannot alias with a name which already exists at the point
-  // of the allocation, such as a parameter or a load happening before the allocation.
-  bool MayAliasWithPreexistenceChecking(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
-    if (ref_info1->GetReference()->IsNewInstance() || ref_info1->GetReference()->IsNewArray()) {
-      // Any reference that can alias with the allocation must appear after it in the block/in
-      // the block's successors. In reverse post order, those instructions will be visited after
-      // the allocation.
-      return ref_info2->GetPosition() >= ref_info1->GetPosition();
-    }
-    return true;
-  }
-
-  bool CanReferencesAlias(ReferenceInfo* ref_info1, ReferenceInfo* ref_info2) const {
-    if (ref_info1 == ref_info2) {
-      return true;
-    } else if (ref_info1->IsSingleton()) {
-      return false;
-    } else if (ref_info2->IsSingleton()) {
-      return false;
-    } else if (!MayAliasWithPreexistenceChecking(ref_info1, ref_info2) ||
-        !MayAliasWithPreexistenceChecking(ref_info2, ref_info1)) {
-      return false;
-    }
-    return true;
-  }
-
-  // `index1` and `index2` are indices in the array of collected heap locations.
-  // Returns the position in the bit vector that tracks whether the two heap
-  // locations may alias.
-  size_t AliasingMatrixPosition(size_t index1, size_t index2) const {
-    DCHECK(index2 > index1);
-    const size_t number_of_locations = heap_locations_.size();
-    // It's (num_of_locations - 1) + ... + (num_of_locations - index1) + (index2 - index1 - 1).
-    return (number_of_locations * index1 - (1 + index1) * index1 / 2 + (index2 - index1 - 1));
-  }
-
-  // An additional position is passed in to make sure the calculated position is correct.
-  size_t CheckedAliasingMatrixPosition(size_t index1, size_t index2, size_t position) {
-    size_t calculated_position = AliasingMatrixPosition(index1, index2);
-    DCHECK_EQ(calculated_position, position);
-    return calculated_position;
-  }
-
-  // Compute if two locations may alias to each other.
-  bool ComputeMayAlias(size_t index1, size_t index2) const {
-    HeapLocation* loc1 = heap_locations_[index1];
-    HeapLocation* loc2 = heap_locations_[index2];
-    if (loc1->GetOffset() != loc2->GetOffset()) {
-      // Either two different instance fields, or one is an instance
-      // field and the other is an array element.
-      return false;
-    }
-    if (loc1->GetDeclaringClassDefIndex() != loc2->GetDeclaringClassDefIndex()) {
-      // Different types.
-      return false;
-    }
-    if (!CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) {
-      return false;
-    }
-    if (loc1->IsArrayElement() && loc2->IsArrayElement()) {
-      HInstruction* array_index1 = loc1->GetIndex();
-      HInstruction* array_index2 = loc2->GetIndex();
-      DCHECK(array_index1 != nullptr);
-      DCHECK(array_index2 != nullptr);
-      if (array_index1->IsIntConstant() &&
-          array_index2->IsIntConstant() &&
-          array_index1->AsIntConstant()->GetValue() != array_index2->AsIntConstant()->GetValue()) {
-        // Different constant indices do not alias.
-        return false;
-      }
-      ReferenceInfo* ref_info = loc1->GetReferenceInfo();
-      ref_info->SetHasIndexAliasing(true);
-    }
-    return true;
-  }
-
-  ReferenceInfo* GetOrCreateReferenceInfo(HInstruction* instruction) {
-    ReferenceInfo* ref_info = FindReferenceInfoOf(instruction);
-    if (ref_info == nullptr) {
-      size_t pos = ref_info_array_.size();
-      ref_info = new (GetGraph()->GetArena()) ReferenceInfo(instruction, pos);
-      ref_info_array_.push_back(ref_info);
-    }
-    return ref_info;
-  }
-
-  void CreateReferenceInfoForReferenceType(HInstruction* instruction) {
-    if (instruction->GetType() != Primitive::kPrimNot) {
-      return;
-    }
-    DCHECK(FindReferenceInfoOf(instruction) == nullptr);
-    GetOrCreateReferenceInfo(instruction);
-  }
-
-  HeapLocation* GetOrCreateHeapLocation(HInstruction* ref,
-                                        size_t offset,
-                                        HInstruction* index,
-                                        int16_t declaring_class_def_index) {
-    HInstruction* original_ref = HuntForOriginalReference(ref);
-    ReferenceInfo* ref_info = GetOrCreateReferenceInfo(original_ref);
-    size_t heap_location_idx = FindHeapLocationIndex(
-        ref_info, offset, index, declaring_class_def_index);
-    if (heap_location_idx == kHeapLocationNotFound) {
-      HeapLocation* heap_loc = new (GetGraph()->GetArena())
-          HeapLocation(ref_info, offset, index, declaring_class_def_index);
-      heap_locations_.push_back(heap_loc);
-      return heap_loc;
-    }
-    return heap_locations_[heap_location_idx];
-  }
-
-  HeapLocation* VisitFieldAccess(HInstruction* ref, const FieldInfo& field_info) {
-    if (field_info.IsVolatile()) {
-      has_volatile_ = true;
-    }
-    const uint16_t declaring_class_def_index = field_info.GetDeclaringClassDefIndex();
-    const size_t offset = field_info.GetFieldOffset().SizeValue();
-    return GetOrCreateHeapLocation(ref, offset, nullptr, declaring_class_def_index);
-  }
-
-  void VisitArrayAccess(HInstruction* array, HInstruction* index) {
-    GetOrCreateHeapLocation(array, HeapLocation::kInvalidFieldOffset,
-        index, HeapLocation::kDeclaringClassDefIndexForArrays);
-  }
-
-  void VisitInstanceFieldGet(HInstanceFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
-    HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
-    has_heap_stores_ = true;
-    if (location->GetReferenceInfo()->IsSingleton()) {
-      // A singleton's location value may be killed by loop side effects if it's
-      // defined before that loop, and it's stored into inside that loop.
-      HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
-      if (loop_info != nullptr) {
-        HInstruction* ref = location->GetReferenceInfo()->GetReference();
-        DCHECK(ref->IsNewInstance());
-        if (loop_info->IsDefinedOutOfTheLoop(ref)) {
-          // ref's location value may be killed by this loop's side effects.
-          location->SetValueKilledByLoopSideEffects(true);
-        } else {
-          // ref is defined inside this loop so this loop's side effects cannot
-          // kill its location value at the loop header since ref/its location doesn't
-          // exist yet at the loop header.
-        }
-      }
-    } else {
-      // For non-singletons, value_killed_by_loop_side_effects_ is inited to
-      // true.
-      DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true);
-    }
-  }
-
-  void VisitStaticFieldGet(HStaticFieldGet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitStaticFieldSet(HStaticFieldSet* instruction) OVERRIDE {
-    VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
-    has_heap_stores_ = true;
-  }
-
-  // We intentionally don't collect HUnresolvedInstanceField/HUnresolvedStaticField accesses
-  // since we cannot accurately track the fields.
-
-  void VisitArrayGet(HArrayGet* instruction) OVERRIDE {
-    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitArraySet(HArraySet* instruction) OVERRIDE {
-    VisitArrayAccess(instruction->InputAt(0), instruction->InputAt(1));
-    has_heap_stores_ = true;
-  }
-
-  void VisitNewInstance(HNewInstance* new_instance) OVERRIDE {
-    // Any references appearing in the ref_info_array_ so far cannot alias with new_instance.
-    CreateReferenceInfoForReferenceType(new_instance);
-  }
-
-  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitInvokeVirtual(HInvokeVirtual* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitInvokeInterface(HInvokeInterface* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitParameterValue(HParameterValue* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitSelect(HSelect* instruction) OVERRIDE {
-    CreateReferenceInfoForReferenceType(instruction);
-  }
-
-  void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
-    has_monitor_operations_ = true;
-  }
-
-  ArenaVector<ReferenceInfo*> ref_info_array_;   // All references used for heap accesses.
-  ArenaVector<HeapLocation*> heap_locations_;    // All heap locations.
-  ArenaBitVector aliasing_matrix_;    // aliasing info between each pair of locations.
-  bool has_heap_stores_;    // If there is no heap stores, LSE acts as GVN with better
-                            // alias analysis and won't be as effective.
-  bool has_volatile_;       // If there are volatile field accesses.
-  bool has_monitor_operations_;    // If there are monitor operations.
-
-  DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
-};
-
 // An unknown heap value. Loads with such a value in the heap location cannot be eliminated.
 // A heap location can be set to kUnknownHeapValue when:
 // - initially set a value.
@@ -516,7 +46,7 @@
         side_effects_(side_effects),
         heap_values_for_(graph->GetBlocks().size(),
                          ArenaVector<HInstruction*>(heap_locations_collector.
-                                                        GetNumberOfHeapLocations(),
+                                                    GetNumberOfHeapLocations(),
                                                     kUnknownHeapValue,
                                                     graph->GetArena()->Adapter(kArenaAllocLSE)),
                          graph->GetArena()->Adapter(kArenaAllocLSE)),
@@ -760,7 +290,7 @@
                         size_t offset,
                         HInstruction* index,
                         int16_t declaring_class_def_index) {
-    HInstruction* original_ref = HuntForOriginalReference(ref);
+    HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
     size_t idx = heap_location_collector_.FindHeapLocationIndex(
         ref_info, offset, index, declaring_class_def_index);
@@ -827,7 +357,7 @@
                         HInstruction* index,
                         int16_t declaring_class_def_index,
                         HInstruction* value) {
-    HInstruction* original_ref = HuntForOriginalReference(ref);
+    HInstruction* original_ref = heap_location_collector_.HuntForOriginalReference(ref);
     ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(original_ref);
     size_t idx = heap_location_collector_.FindHeapLocationIndex(
         ref_info, offset, index, declaring_class_def_index);
@@ -1127,25 +657,12 @@
     // Skip this optimization.
     return;
   }
-  HeapLocationCollector heap_location_collector(graph_);
-  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
-    heap_location_collector.VisitBasicBlock(block);
-  }
-  if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
-    // Bail out if there are too many heap locations to deal with.
+  const HeapLocationCollector& heap_location_collector = lsa_.GetHeapLocationCollector();
+  if (heap_location_collector.GetNumberOfHeapLocations() == 0) {
+    // No HeapLocation information from LSA, skip this optimization.
     return;
   }
-  if (!heap_location_collector.HasHeapStores()) {
-    // Without heap stores, this pass would act mostly as GVN on heap accesses.
-    return;
-  }
-  if (heap_location_collector.HasVolatile() || heap_location_collector.HasMonitorOps()) {
-    // Don't do load/store elimination if the method has volatile field accesses or
-    // monitor operations, for now.
-    // TODO: do it right.
-    return;
-  }
-  heap_location_collector.BuildAliasingMatrix();
+
   LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
   for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     lse_visitor.VisitBasicBlock(block);
diff --git a/compiler/optimizing/load_store_elimination.h b/compiler/optimizing/load_store_elimination.h
index 1d9e5c8..efe71c7 100644
--- a/compiler/optimizing/load_store_elimination.h
+++ b/compiler/optimizing/load_store_elimination.h
@@ -22,12 +22,16 @@
 namespace art {
 
 class SideEffectsAnalysis;
+class LoadStoreAnalysis;
 
 class LoadStoreElimination : public HOptimization {
  public:
-  LoadStoreElimination(HGraph* graph, const SideEffectsAnalysis& side_effects)
+  LoadStoreElimination(HGraph* graph,
+                       const SideEffectsAnalysis& side_effects,
+                       const LoadStoreAnalysis& lsa)
       : HOptimization(graph, kLoadStoreEliminationPassName),
-        side_effects_(side_effects) {}
+        side_effects_(side_effects),
+        lsa_(lsa) {}
 
   void Run() OVERRIDE;
 
@@ -35,6 +39,7 @@
 
  private:
   const SideEffectsAnalysis& side_effects_;
+  const LoadStoreAnalysis& lsa_;
 
   DISALLOW_COPY_AND_ASSIGN(LoadStoreElimination);
 };
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 963df5a..9c8a632 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -173,6 +173,39 @@
   return false;
 }
 
+// Detect situations with same-extension narrower operands.
+// Returns true on success and sets is_unsigned accordingly.
+static bool IsNarrowerOperands(HInstruction* a,
+                               HInstruction* b,
+                               Primitive::Type type,
+                               /*out*/ HInstruction** r,
+                               /*out*/ HInstruction** s,
+                               /*out*/ bool* is_unsigned) {
+  if (IsSignExtensionAndGet(a, type, r) && IsSignExtensionAndGet(b, type, s)) {
+    *is_unsigned = false;
+    return true;
+  } else if (IsZeroExtensionAndGet(a, type, r) && IsZeroExtensionAndGet(b, type, s)) {
+    *is_unsigned = true;
+    return true;
+  }
+  return false;
+}
+
+// As above, single operand.
+static bool IsNarrowerOperand(HInstruction* a,
+                              Primitive::Type type,
+                              /*out*/ HInstruction** r,
+                              /*out*/ bool* is_unsigned) {
+  if (IsSignExtensionAndGet(a, type, r)) {
+    *is_unsigned = false;
+    return true;
+  } else if (IsZeroExtensionAndGet(a, type, r)) {
+    *is_unsigned = true;
+    return true;
+  }
+  return false;
+}
+
 // Detect up to two instructions a and b, and an acccumulated constant c.
 static bool IsAddConstHelper(HInstruction* instruction,
                              /*out*/ HInstruction** a,
@@ -466,6 +499,7 @@
       body = it.Current();
     }
   }
+  CHECK(body != nullptr);
   // Ensure there is only a single exit point.
   if (header->GetSuccessors().size() != 2) {
     return;
@@ -756,7 +790,7 @@
   return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
 }
 
-// TODO: more operations and intrinsics, detect saturation arithmetic, etc.
+// TODO: saturation arithmetic.
 bool HLoopOptimization::VectorizeUse(LoopNode* node,
                                      HInstruction* instruction,
                                      bool generate_code,
@@ -778,6 +812,11 @@
     }
     return true;
   } else if (instruction->IsArrayGet()) {
+    // Deal with vector restrictions.
+    if (instruction->AsArrayGet()->IsStringCharAt() &&
+        HasVectorRestrictions(restrictions, kNoStringCharAt)) {
+      return false;
+    }
     // Accept a right-hand-side array base[index] for
     // (1) exact matching vector type,
     // (2) loop-invariant base,
@@ -867,25 +906,38 @@
       return true;
     }
     // Deal with vector restrictions.
+    HInstruction* opa = instruction->InputAt(0);
+    HInstruction* opb = instruction->InputAt(1);
+    HInstruction* r = opa;
+    bool is_unsigned = false;
     if ((HasVectorRestrictions(restrictions, kNoShift)) ||
         (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
       return false;  // unsupported instruction
-    } else if ((instruction->IsShr() || instruction->IsUShr()) &&
-               HasVectorRestrictions(restrictions, kNoHiBits)) {
-      return false;  // hibits may impact lobits; TODO: we can do better!
+    } else if (HasVectorRestrictions(restrictions, kNoHiBits)) {
+      // Shifts right need extra care to account for higher order bits.
+      // TODO: less likely shr/unsigned and ushr/signed can by flipping signess.
+      if (instruction->IsShr() &&
+          (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+        return false;  // reject, unless all operands are sign-extension narrower
+      } else if (instruction->IsUShr() &&
+                 (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || !is_unsigned)) {
+        return false;  // reject, unless all operands are zero-extension narrower
+      }
     }
     // Accept shift operator for vectorizable/invariant operands.
     // TODO: accept symbolic, albeit loop invariant shift factors.
-    HInstruction* opa = instruction->InputAt(0);
-    HInstruction* opb = instruction->InputAt(1);
+    DCHECK(r != nullptr);
+    if (generate_code && vector_mode_ != kVector) {  // de-idiom
+      r = opa;
+    }
     int64_t distance = 0;
-    if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+    if (VectorizeUse(node, r, generate_code, type, restrictions) &&
         IsInt64AndGet(opb, /*out*/ &distance)) {
       // Restrict shift distance to packed data type width.
       int64_t max_distance = Primitive::ComponentSize(type) * 8;
       if (0 <= distance && distance < max_distance) {
         if (generate_code) {
-          GenerateVecOp(instruction, vector_map_->Get(opa), opb, type);
+          GenerateVecOp(instruction, vector_map_->Get(r), opb, type);
         }
         return true;
       }
@@ -899,16 +951,23 @@
       case Intrinsics::kMathAbsFloat:
       case Intrinsics::kMathAbsDouble: {
         // Deal with vector restrictions.
-        if (HasVectorRestrictions(restrictions, kNoAbs) ||
-            HasVectorRestrictions(restrictions, kNoHiBits)) {
-          // TODO: we can do better for some hibits cases.
+        HInstruction* opa = instruction->InputAt(0);
+        HInstruction* r = opa;
+        bool is_unsigned = false;
+        if (HasVectorRestrictions(restrictions, kNoAbs)) {
           return false;
+        } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+                   (!IsNarrowerOperand(opa, type, &r, &is_unsigned) || is_unsigned)) {
+          return false;  // reject, unless operand is sign-extension narrower
         }
         // Accept ABS(x) for vectorizable operand.
-        HInstruction* opa = instruction->InputAt(0);
-        if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+        DCHECK(r != nullptr);
+        if (generate_code && vector_mode_ != kVector) {  // de-idiom
+          r = opa;
+        }
+        if (VectorizeUse(node, r, generate_code, type, restrictions)) {
           if (generate_code) {
-            GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+            GenerateVecOp(instruction, vector_map_->Get(r), nullptr, type);
           }
           return true;
         }
@@ -923,18 +982,28 @@
       case Intrinsics::kMathMaxFloatFloat:
       case Intrinsics::kMathMaxDoubleDouble: {
         // Deal with vector restrictions.
-        if (HasVectorRestrictions(restrictions, kNoMinMax) ||
-            HasVectorRestrictions(restrictions, kNoHiBits)) {
-          // TODO: we can do better for some hibits cases.
-          return false;
-        }
-        // Accept MIN/MAX(x, y) for vectorizable operands.
         HInstruction* opa = instruction->InputAt(0);
         HInstruction* opb = instruction->InputAt(1);
-        if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
-            VectorizeUse(node, opb, generate_code, type, restrictions)) {
+        HInstruction* r = opa;
+        HInstruction* s = opb;
+        bool is_unsigned = false;
+        if (HasVectorRestrictions(restrictions, kNoMinMax)) {
+          return false;
+        } else if (HasVectorRestrictions(restrictions, kNoHiBits) &&
+                   !IsNarrowerOperands(opa, opb, type, &r, &s, &is_unsigned)) {
+          return false;  // reject, unless all operands are same-extension narrower
+        }
+        // Accept MIN/MAX(x, y) for vectorizable operands.
+        DCHECK(r != nullptr && s != nullptr);
+        if (generate_code && vector_mode_ != kVector) {  // de-idiom
+          r = opa;
+          s = opb;
+        }
+        if (VectorizeUse(node, r, generate_code, type, restrictions) &&
+            VectorizeUse(node, s, generate_code, type, restrictions)) {
           if (generate_code) {
-            GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+            GenerateVecOp(
+                instruction, vector_map_->Get(r), vector_map_->Get(s), type, is_unsigned);
           }
           return true;
         }
@@ -959,11 +1028,11 @@
       switch (type) {
         case Primitive::kPrimBoolean:
         case Primitive::kPrimByte:
-          *restrictions |= kNoDiv | kNoAbs;
+          *restrictions |= kNoDiv;
           return TrySetVectorLength(16);
         case Primitive::kPrimChar:
         case Primitive::kPrimShort:
-          *restrictions |= kNoDiv | kNoAbs;
+          *restrictions |= kNoDiv;
           return TrySetVectorLength(8);
         case Primitive::kPrimInt:
           *restrictions |= kNoDiv;
@@ -1009,9 +1078,36 @@
       }
       return false;
     case kMips:
-    case kMips64:
       // TODO: implement MIPS SIMD.
       return false;
+    case kMips64:
+      if (features->AsMips64InstructionSetFeatures()->HasMsa()) {
+        switch (type) {
+          case Primitive::kPrimBoolean:
+          case Primitive::kPrimByte:
+            *restrictions |= kNoDiv;
+            return TrySetVectorLength(16);
+          case Primitive::kPrimChar:
+          case Primitive::kPrimShort:
+            *restrictions |= kNoDiv | kNoStringCharAt;
+            return TrySetVectorLength(8);
+          case Primitive::kPrimInt:
+            *restrictions |= kNoDiv;
+            return TrySetVectorLength(4);
+          case Primitive::kPrimLong:
+            *restrictions |= kNoDiv;
+            return TrySetVectorLength(2);
+          case Primitive::kPrimFloat:
+            *restrictions |= kNoMinMax;  // min/max(x, NaN)
+            return TrySetVectorLength(4);
+          case Primitive::kPrimDouble:
+            *restrictions |= kNoMinMax;  // min/max(x, NaN)
+            return TrySetVectorLength(2);
+          default:
+            break;
+        }  // switch type
+      }
+      return false;
     default:
       return false;
   }  // switch instruction set
@@ -1098,13 +1194,14 @@
 void HLoopOptimization::GenerateVecOp(HInstruction* org,
                                       HInstruction* opa,
                                       HInstruction* opb,
-                                      Primitive::Type type) {
+                                      Primitive::Type type,
+                                      bool is_unsigned) {
   if (vector_mode_ == kSequential) {
-    // Scalar code follows implicit integral promotion.
-    if (type == Primitive::kPrimBoolean ||
-        type == Primitive::kPrimByte ||
-        type == Primitive::kPrimChar ||
-        type == Primitive::kPrimShort) {
+    // Non-converting scalar code follows implicit integral promotion.
+    if (!org->IsTypeConversion() && (type == Primitive::kPrimBoolean ||
+                                     type == Primitive::kPrimByte ||
+                                     type == Primitive::kPrimChar ||
+                                     type == Primitive::kPrimShort)) {
       type = Primitive::kPrimInt;
     }
   }
@@ -1185,7 +1282,6 @@
           case Intrinsics::kMathMinLongLong:
           case Intrinsics::kMathMinFloatFloat:
           case Intrinsics::kMathMinDoubleDouble: {
-            bool is_unsigned = false;  // TODO: detect unsigned versions
             vector = new (global_allocator_)
                 HVecMin(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
             break;
@@ -1194,7 +1290,6 @@
           case Intrinsics::kMathMaxLongLong:
           case Intrinsics::kMathMaxFloatFloat:
           case Intrinsics::kMathMaxDoubleDouble: {
-            bool is_unsigned = false;  // TODO: detect unsigned versions
             vector = new (global_allocator_)
                 HVecMax(global_allocator_, opa, opb, type, vector_length_, is_unsigned);
             break;
@@ -1208,9 +1303,10 @@
         // corresponding new scalar instructions in the loop. The instruction will get an
         // environment while being inserted from the instruction map in original program order.
         DCHECK(vector_mode_ == kSequential);
+        size_t num_args = invoke->GetNumberOfArguments();
         HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
             global_allocator_,
-            invoke->GetNumberOfArguments(),
+            num_args,
             invoke->GetType(),
             invoke->GetDexPc(),
             invoke->GetDexMethodIndex(),
@@ -1220,8 +1316,14 @@
             invoke->GetTargetMethod(),
             invoke->GetClinitCheckRequirement());
         HInputsRef inputs = invoke->GetInputs();
-        for (size_t index = 0; index < inputs.size(); ++index) {
-          new_invoke->SetArgumentAt(index, vector_map_->Get(inputs[index]));
+        size_t num_inputs = inputs.size();
+        DCHECK_LE(num_args, num_inputs);
+        DCHECK_EQ(num_inputs, new_invoke->GetInputs().size());  // both invokes agree
+        for (size_t index = 0; index < num_inputs; ++index) {
+          HInstruction* new_input = index < num_args
+              ? vector_map_->Get(inputs[index])
+              : inputs[index];  // beyond arguments: just pass through
+          new_invoke->SetArgumentAt(index, new_input);
         }
         new_invoke->SetIntrinsic(invoke->GetIntrinsic(),
                                  kNeedsEnvironmentOrCache,
@@ -1258,7 +1360,7 @@
                                                  Primitive::Type type,
                                                  uint64_t restrictions) {
   // Test for top level arithmetic shift right x >> 1 or logical shift right x >>> 1
-  // (note whether the sign bit in higher precision is shifted in has no effect
+  // (note whether the sign bit in wider precision is shifted in has no effect
   // on the narrow precision computed by the idiom).
   int64_t distance = 0;
   if ((instruction->IsShr() ||
@@ -1269,6 +1371,7 @@
     HInstruction* b = nullptr;
     int64_t       c = 0;
     if (IsAddConst(instruction->InputAt(0), /*out*/ &a, /*out*/ &b, /*out*/ &c)) {
+      DCHECK(a != nullptr && b != nullptr);
       // Accept c == 1 (rounded) or c == 0 (not rounded).
       bool is_rounded = false;
       if (c == 1) {
@@ -1280,11 +1383,7 @@
       HInstruction* r = nullptr;
       HInstruction* s = nullptr;
       bool is_unsigned = false;
-      if (IsZeroExtensionAndGet(a, type, &r) && IsZeroExtensionAndGet(b, type, &s)) {
-        is_unsigned = true;
-      } else if (IsSignExtensionAndGet(a, type, &r) && IsSignExtensionAndGet(b, type, &s)) {
-        is_unsigned = false;
-      } else {
+      if (!IsNarrowerOperands(a, b, type, &r, &s, &is_unsigned)) {
         return false;
       }
       // Deal with vector restrictions.
@@ -1295,6 +1394,10 @@
       // Accept recognized halving add for vectorizable operands. Vectorized code uses the
       // shorthand idiomatic operation. Sequential code uses the original scalar expressions.
       DCHECK(r != nullptr && s != nullptr);
+      if (generate_code && vector_mode_ != kVector) {  // de-idiom
+        r = instruction->InputAt(0);
+        s = instruction->InputAt(1);
+      }
       if (VectorizeUse(node, r, generate_code, type, restrictions) &&
           VectorizeUse(node, s, generate_code, type, restrictions)) {
         if (generate_code) {
@@ -1308,12 +1411,7 @@
                 is_unsigned,
                 is_rounded));
           } else {
-            VectorizeUse(node, instruction->InputAt(0), generate_code, type, restrictions);
-            VectorizeUse(node, instruction->InputAt(1), generate_code, type, restrictions);
-            GenerateVecOp(instruction,
-                          vector_map_->Get(instruction->InputAt(0)),
-                          vector_map_->Get(instruction->InputAt(1)),
-                          type);
+            GenerateVecOp(instruction, vector_map_->Get(r), vector_map_->Get(s), type);
           }
         }
         return true;
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6d5978d..75a42f3 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -72,6 +72,7 @@
     kNoUnroundedHAdd = 64,   // no unrounded halving add
     kNoAbs           = 128,  // no absolute value
     kNoMinMax        = 256,  // no min/max
+    kNoStringCharAt  = 512,  // no StringCharAt
   };
 
   /*
@@ -137,7 +138,11 @@
                       HInstruction* opa,
                       HInstruction* opb,
                       Primitive::Type type);
-  void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
+  void GenerateVecOp(HInstruction* org,
+                     HInstruction* opa,
+                     HInstruction* opb,
+                     Primitive::Type type,
+                     bool is_unsigned = false);
 
   // Vectorization idioms.
   bool VectorizeHalvingAddIdiom(LoopNode* node,
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 833f32b..d0047c5 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -2623,7 +2623,7 @@
 }
 
 bool HInvokeStaticOrDirect::NeedsDexCacheOfDeclaringClass() const {
-  if (GetMethodLoadKind() != MethodLoadKind::kDexCacheViaMethod) {
+  if (GetMethodLoadKind() != MethodLoadKind::kRuntimeCall) {
     return false;
   }
   if (!IsIntrinsic()) {
@@ -2636,15 +2636,17 @@
 std::ostream& operator<<(std::ostream& os, HInvokeStaticOrDirect::MethodLoadKind rhs) {
   switch (rhs) {
     case HInvokeStaticOrDirect::MethodLoadKind::kStringInit:
-      return os << "string_init";
+      return os << "StringInit";
     case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
-      return os << "recursive";
+      return os << "Recursive";
+    case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
+      return os << "BootImageLinkTimePcRelative";
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
-      return os << "direct";
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative:
-      return os << "dex_cache_pc_relative";
-    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod:
-      return os << "dex_cache_via_method";
+      return os << "DirectAddress";
+    case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry:
+      return os << "BssEntry";
+    case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall:
+      return os << "RuntimeCall";
     default:
       LOG(FATAL) << "Unknown MethodLoadKind: " << static_cast<int>(rhs);
       UNREACHABLE();
@@ -2688,7 +2690,7 @@
 void HLoadClass::SetLoadKind(LoadKind load_kind) {
   SetPackedField<LoadKindField>(load_kind);
 
-  if (load_kind != LoadKind::kDexCacheViaMethod &&
+  if (load_kind != LoadKind::kRuntimeCall &&
       load_kind != LoadKind::kReferrersClass) {
     RemoveAsUserOfInput(0u);
     SetRawInputAt(0u, nullptr);
@@ -2712,8 +2714,8 @@
       return os << "BssEntry";
     case HLoadClass::LoadKind::kJitTableAddress:
       return os << "JitTableAddress";
-    case HLoadClass::LoadKind::kDexCacheViaMethod:
-      return os << "DexCacheViaMethod";
+    case HLoadClass::LoadKind::kRuntimeCall:
+      return os << "RuntimeCall";
     default:
       LOG(FATAL) << "Unknown HLoadClass::LoadKind: " << static_cast<int>(rhs);
       UNREACHABLE();
@@ -2741,10 +2743,10 @@
 
 void HLoadString::SetLoadKind(LoadKind load_kind) {
   // Once sharpened, the load kind should not be changed again.
-  DCHECK_EQ(GetLoadKind(), LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(GetLoadKind(), LoadKind::kRuntimeCall);
   SetPackedField<LoadKindField>(load_kind);
 
-  if (load_kind != LoadKind::kDexCacheViaMethod) {
+  if (load_kind != LoadKind::kRuntimeCall) {
     RemoveAsUserOfInput(0u);
     SetRawInputAt(0u, nullptr);
   }
@@ -2764,8 +2766,8 @@
       return os << "BssEntry";
     case HLoadString::LoadKind::kJitTableAddress:
       return os << "JitTableAddress";
-    case HLoadString::LoadKind::kDexCacheViaMethod:
-      return os << "DexCacheViaMethod";
+    case HLoadString::LoadKind::kRuntimeCall:
+      return os << "RuntimeCall";
     default:
       LOG(FATAL) << "Unknown HLoadString::LoadKind: " << static_cast<int>(rhs);
       UNREACHABLE();
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 72774da..ffa16dd 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1410,12 +1410,7 @@
   M(IntermediateAddressIndex, Instruction)
 #endif
 
-#ifndef ART_ENABLE_CODEGEN_arm
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)
-#else
-#define FOR_EACH_CONCRETE_INSTRUCTION_ARM(M)                            \
-  M(ArmDexCacheArraysBase, Instruction)
-#endif
 
 #define FOR_EACH_CONCRETE_INSTRUCTION_ARM64(M)
 
@@ -1424,7 +1419,6 @@
 #else
 #define FOR_EACH_CONCRETE_INSTRUCTION_MIPS(M)                           \
   M(MipsComputeBaseMethodAddress, Instruction)                          \
-  M(MipsDexCacheArraysBase, Instruction)                                \
   M(MipsPackedSwitch, Instruction)
 #endif
 
@@ -1485,7 +1479,8 @@
   H##type* As##type() { return this; }
 
 template <typename T>
-class HUseListNode : public ArenaObject<kArenaAllocUseListNode> {
+class HUseListNode : public ArenaObject<kArenaAllocUseListNode>,
+                     public IntrusiveForwardListNode<HUseListNode<T>> {
  public:
   // Get the instruction which has this use as one of the inputs.
   T GetUser() const { return user_; }
@@ -1494,10 +1489,6 @@
   // Set the position of the input record that this use corresponds to.
   void SetIndex(size_t index) { index_ = index; }
 
-  // Hook for the IntrusiveForwardList<>.
-  // TODO: Hide this better.
-  IntrusiveForwardListHook hook;
-
  private:
   HUseListNode(T user, size_t index)
       : user_(user), index_(index) {}
@@ -1790,7 +1781,7 @@
                              uint32_t dex_pc,
                              HInstruction* holder)
      : vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)),
-       locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)),
+       locations_(arena->Adapter(kArenaAllocEnvironmentLocations)),
        parent_(nullptr),
        method_(method),
        dex_pc_(dex_pc),
@@ -1804,6 +1795,11 @@
                      to_copy.GetDexPc(),
                      holder) {}
 
+  void AllocateLocations() {
+    DCHECK(locations_.empty());
+    locations_.resize(vregs_.size());
+  }
+
   void SetAndCopyParentChain(ArenaAllocator* allocator, HEnvironment* parent) {
     if (parent_ != nullptr) {
       parent_->SetAndCopyParentChain(allocator, parent);
@@ -4153,21 +4149,21 @@
     // Use the method's own ArtMethod* loaded by the register allocator.
     kRecursive,
 
+    // Use PC-relative boot image ArtMethod* address that will be known at link time.
+    // Used for boot image methods referenced by boot image code.
+    kBootImageLinkTimePcRelative,
+
     // Use ArtMethod* at a known address, embed the direct address in the code.
     // Used for app->boot calls with non-relocatable image and for JIT-compiled calls.
     kDirectAddress,
 
-    // Load from resolved methods array in the dex cache using a PC-relative load.
-    // Used when we need to use the dex cache, for example for invoke-static that
-    // may cause class initialization (the entry may point to a resolution method),
-    // and we know that we can access the dex cache arrays using a PC-relative load.
-    kDexCachePcRelative,
+    // Load from an entry in the .bss section using a PC-relative load.
+    // Used for classes outside boot image when .bss is accessible with a PC-relative load.
+    kBssEntry,
 
-    // Use ArtMethod* from the resolved methods of the compiled method's own ArtMethod*.
-    // Used for JIT when we need to use the dex cache. This is also the last-resort-kind
-    // used when other kinds are unavailable (say, dex cache arrays are not PC-relative)
-    // or unimplemented or impractical (i.e. slow) on a particular architecture.
-    kDexCacheViaMethod,
+    // Make a runtime call to resolve and call the method. This is the last-resort-kind
+    // used when other kinds are unimplemented on a particular architecture.
+    kRuntimeCall,
   };
 
   // Determines the location of the code pointer.
@@ -4188,7 +4184,6 @@
     //   - thread entrypoint offset for kStringInit method if this is a string init invoke.
     //     Note that there are multiple string init methods, each having its own offset.
     //   - the method address for kDirectAddress
-    //   - the dex cache arrays offset for kDexCachePcRel.
     uint64_t method_load_data;
   };
 
@@ -4289,8 +4284,9 @@
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE;
   bool IsStringInit() const { return GetMethodLoadKind() == MethodLoadKind::kStringInit; }
   bool HasMethodAddress() const { return GetMethodLoadKind() == MethodLoadKind::kDirectAddress; }
-  bool HasPcRelativeDexCache() const {
-    return GetMethodLoadKind() == MethodLoadKind::kDexCachePcRelative;
+  bool HasPcRelativeMethodLoadKind() const {
+    return GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative ||
+           GetMethodLoadKind() == MethodLoadKind::kBssEntry;
   }
   bool HasCurrentMethodInput() const {
     // This function can be called only after the invoke has been fully initialized by the builder.
@@ -4314,11 +4310,6 @@
     return dispatch_info_.method_load_data;
   }
 
-  uint32_t GetDexCacheArrayOffset() const {
-    DCHECK(HasPcRelativeDexCache());
-    return dispatch_info_.method_load_data;
-  }
-
   const DexFile& GetDexFileForPcRelativeDexCache() const;
 
   ClinitCheckRequirement GetClinitCheckRequirement() const {
@@ -4363,7 +4354,7 @@
 
   // Does this method load kind need the current method as an input?
   static bool NeedsCurrentMethodInput(MethodLoadKind kind) {
-    return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kDexCacheViaMethod;
+    return kind == MethodLoadKind::kRecursive || kind == MethodLoadKind::kRuntimeCall;
   }
 
   DECLARE_INSTRUCTION(InvokeStaticOrDirect);
@@ -5679,12 +5670,11 @@
     // Load from the root table associated with the JIT compiled method.
     kJitTableAddress,
 
-    // Load from resolved types array accessed through the class loaded from
-    // the compiled method's own ArtMethod*. This is the default access type when
-    // all other types are unavailable.
-    kDexCacheViaMethod,
+    // Load using a simple runtime call. This is the fall-back load kind when
+    // the codegen is unable to use another appropriate kind.
+    kRuntimeCall,
 
-    kLast = kDexCacheViaMethod
+    kLast = kRuntimeCall
   };
 
   HLoadClass(HCurrentMethod* current_method,
@@ -5705,7 +5695,7 @@
     DCHECK(!is_referrers_class || !needs_access_check);
 
     SetPackedField<LoadKindField>(
-        is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kDexCacheViaMethod);
+        is_referrers_class ? LoadKind::kReferrersClass : LoadKind::kRuntimeCall);
     SetPackedFlag<kFlagNeedsAccessCheck>(needs_access_check);
     SetPackedFlag<kFlagIsInBootImage>(false);
     SetPackedFlag<kFlagGenerateClInitCheck>(false);
@@ -5739,7 +5729,7 @@
   bool CanCallRuntime() const {
     return NeedsAccessCheck() ||
            MustGenerateClinitCheck() ||
-           GetLoadKind() == LoadKind::kDexCacheViaMethod ||
+           GetLoadKind() == LoadKind::kRuntimeCall ||
            GetLoadKind() == LoadKind::kBssEntry;
   }
 
@@ -5749,7 +5739,7 @@
            // If the class is in the boot image, the lookup in the runtime call cannot throw.
            // This keeps CanThrow() consistent between non-PIC (using kBootImageAddress) and
            // PIC and subsequently avoids a DCE behavior dependency on the PIC option.
-           ((GetLoadKind() == LoadKind::kDexCacheViaMethod ||
+           ((GetLoadKind() == LoadKind::kRuntimeCall ||
              GetLoadKind() == LoadKind::kBssEntry) &&
             !IsInBootImage());
   }
@@ -5768,7 +5758,7 @@
   const DexFile& GetDexFile() const { return dex_file_; }
 
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
-    return GetLoadKind() == LoadKind::kDexCacheViaMethod;
+    return GetLoadKind() == LoadKind::kRuntimeCall;
   }
 
   static SideEffects SideEffectsForArchRuntimeCalls() {
@@ -5819,12 +5809,12 @@
     return load_kind == LoadKind::kReferrersClass ||
         load_kind == LoadKind::kBootImageLinkTimePcRelative ||
         load_kind == LoadKind::kBssEntry ||
-        load_kind == LoadKind::kDexCacheViaMethod;
+        load_kind == LoadKind::kRuntimeCall;
   }
 
   void SetLoadKindInternal(LoadKind load_kind);
 
-  // The special input is the HCurrentMethod for kDexCacheViaMethod or kReferrersClass.
+  // The special input is the HCurrentMethod for kRuntimeCall or kReferrersClass.
   // For other load kinds it's empty or possibly some architecture-specific instruction
   // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative.
   HUserRecord<HInstruction*> special_input_;
@@ -5833,7 +5823,7 @@
   // - The compiling method's dex file if the class is defined there too.
   // - The compiling method's dex file if the class is referenced there.
   // - The dex file where the class is defined. When the load kind can only be
-  //   kBssEntry or kDexCacheViaMethod, we cannot emit code for this `HLoadClass`.
+  //   kBssEntry or kRuntimeCall, we cannot emit code for this `HLoadClass`.
   const dex::TypeIndex type_index_;
   const DexFile& dex_file_;
 
@@ -5876,12 +5866,11 @@
     // Load from the root table associated with the JIT compiled method.
     kJitTableAddress,
 
-    // Load from resolved strings array accessed through the class loaded from
-    // the compiled method's own ArtMethod*. This is the default access type when
-    // all other types are unavailable.
-    kDexCacheViaMethod,
+    // Load using a simple runtime call. This is the fall-back load kind when
+    // the codegen is unable to use another appropriate kind.
+    kRuntimeCall,
 
-    kLast = kDexCacheViaMethod,
+    kLast = kRuntimeCall,
   };
 
   HLoadString(HCurrentMethod* current_method,
@@ -5892,7 +5881,7 @@
         special_input_(HUserRecord<HInstruction*>(current_method)),
         string_index_(string_index),
         dex_file_(dex_file) {
-    SetPackedField<LoadKindField>(LoadKind::kDexCacheViaMethod);
+    SetPackedField<LoadKindField>(LoadKind::kRuntimeCall);
   }
 
   void SetLoadKind(LoadKind load_kind);
@@ -5936,7 +5925,7 @@
   }
 
   bool NeedsDexCacheOfDeclaringClass() const OVERRIDE {
-    return GetLoadKind() == LoadKind::kDexCacheViaMethod;
+    return GetLoadKind() == LoadKind::kRuntimeCall;
   }
 
   bool CanBeNull() const OVERRIDE { return false; }
@@ -5970,7 +5959,7 @@
 
   void SetLoadKindInternal(LoadKind load_kind);
 
-  // The special input is the HCurrentMethod for kDexCacheViaMethod.
+  // The special input is the HCurrentMethod for kRuntimeCall.
   // For other load kinds it's empty or possibly some architecture-specific instruction
   // for PC-relative loads, i.e. kBssEntry or kBootImageLinkTimePcRelative.
   HUserRecord<HInstruction*> special_input_;
@@ -6870,9 +6859,6 @@
 #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
 #include "nodes_shared.h"
 #endif
-#ifdef ART_ENABLE_CODEGEN_arm
-#include "nodes_arm.h"
-#endif
 #ifdef ART_ENABLE_CODEGEN_mips
 #include "nodes_mips.h"
 #endif
diff --git a/compiler/optimizing/nodes_arm.h b/compiler/optimizing/nodes_arm.h
deleted file mode 100644
index d9f9740e..0000000
--- a/compiler/optimizing/nodes_arm.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2015 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef ART_COMPILER_OPTIMIZING_NODES_ARM_H_
-#define ART_COMPILER_OPTIMIZING_NODES_ARM_H_
-
-namespace art {
-
-class HArmDexCacheArraysBase FINAL : public HExpression<0> {
- public:
-  explicit HArmDexCacheArraysBase(const DexFile& dex_file)
-      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
-        dex_file_(&dex_file),
-        element_offset_(static_cast<size_t>(-1)) { }
-
-  bool CanBeMoved() const OVERRIDE { return true; }
-
-  void UpdateElementOffset(size_t element_offset) {
-    // Use the lowest offset from the requested elements so that all offsets from
-    // this base are non-negative because our assemblers emit negative-offset loads
-    // as a sequence of two or more instructions. (However, positive offsets beyond
-    // 4KiB also require two or more instructions, so this simple heuristic could
-    // be improved for cases where there is a dense cluster of elements far from
-    // the lowest offset. This is expected to be rare enough though, so we choose
-    // not to spend compile time on elaborate calculations.)
-    element_offset_ = std::min(element_offset_, element_offset);
-  }
-
-  const DexFile& GetDexFile() const {
-    return *dex_file_;
-  }
-
-  size_t GetElementOffset() const {
-    return element_offset_;
-  }
-
-  DECLARE_INSTRUCTION(ArmDexCacheArraysBase);
-
- private:
-  const DexFile* dex_file_;
-  size_t element_offset_;
-
-  DISALLOW_COPY_AND_ASSIGN(HArmDexCacheArraysBase);
-};
-
-}  // namespace art
-
-#endif  // ART_COMPILER_OPTIMIZING_NODES_ARM_H_
diff --git a/compiler/optimizing/nodes_mips.h b/compiler/optimizing/nodes_mips.h
index 36431c1..8e439d9 100644
--- a/compiler/optimizing/nodes_mips.h
+++ b/compiler/optimizing/nodes_mips.h
@@ -34,38 +34,6 @@
   DISALLOW_COPY_AND_ASSIGN(HMipsComputeBaseMethodAddress);
 };
 
-class HMipsDexCacheArraysBase : public HExpression<0> {
- public:
-  explicit HMipsDexCacheArraysBase(const DexFile& dex_file)
-      : HExpression(Primitive::kPrimInt, SideEffects::None(), kNoDexPc),
-        dex_file_(&dex_file),
-        element_offset_(static_cast<size_t>(-1)) { }
-
-  bool CanBeMoved() const OVERRIDE { return true; }
-
-  void UpdateElementOffset(size_t element_offset) {
-    // We'll maximize the range of a single load instruction for dex cache array accesses
-    // by aligning offset -32768 with the offset of the first used element.
-    element_offset_ = std::min(element_offset_, element_offset);
-  }
-
-  const DexFile& GetDexFile() const {
-    return *dex_file_;
-  }
-
-  size_t GetElementOffset() const {
-    return element_offset_;
-  }
-
-  DECLARE_INSTRUCTION(MipsDexCacheArraysBase);
-
- private:
-  const DexFile* dex_file_;
-  size_t element_offset_;
-
-  DISALLOW_COPY_AND_ASSIGN(HMipsDexCacheArraysBase);
-};
-
 // Mips version of HPackedSwitch that holds a pointer to the base method address.
 class HMipsPackedSwitch FINAL : public HTemplateInstruction<2> {
  public:
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f928f71..890ba67 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -24,16 +24,11 @@
 
 #include "android-base/strings.h"
 
-#ifdef ART_ENABLE_CODEGEN_arm
-#include "dex_cache_array_fixups_arm.h"
-#endif
-
 #ifdef ART_ENABLE_CODEGEN_arm64
 #include "instruction_simplifier_arm64.h"
 #endif
 
 #ifdef ART_ENABLE_CODEGEN_mips
-#include "dex_cache_array_fixups_mips.h"
 #include "pc_relative_fixups_mips.h"
 #endif
 
@@ -83,6 +78,7 @@
 #include "jit/jit_code_cache.h"
 #include "jni/quick/jni_compiler.h"
 #include "licm.h"
+#include "load_store_analysis.h"
 #include "load_store_elimination.h"
 #include "loop_optimization.h"
 #include "nodes.h"
@@ -465,7 +461,8 @@
     const DexCompilationUnit& dex_compilation_unit,
     VariableSizedHandleScope* handles,
     SideEffectsAnalysis* most_recent_side_effects,
-    HInductionVarAnalysis* most_recent_induction) {
+    HInductionVarAnalysis* most_recent_induction,
+    LoadStoreAnalysis* most_recent_lsa) {
   std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
   if (opt_name == BoundsCheckElimination::kBoundsCheckEliminationPassName) {
     CHECK(most_recent_side_effects != nullptr && most_recent_induction != nullptr);
@@ -499,15 +496,18 @@
   } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
     return new (arena) HInductionVarAnalysis(graph);
   } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
-    return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str());
+    return new (arena) InstructionSimplifier(graph, codegen, driver, stats, pass_name.c_str());
   } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
     return new (arena) IntrinsicsRecognizer(graph, stats);
   } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
     CHECK(most_recent_side_effects != nullptr);
     return new (arena) LICM(graph, *most_recent_side_effects, stats);
+  } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) {
+    return new (arena) LoadStoreAnalysis(graph);
   } else if (opt_name == LoadStoreElimination::kLoadStoreEliminationPassName) {
     CHECK(most_recent_side_effects != nullptr);
-    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects);
+    CHECK(most_recent_lsa != nullptr);
+    return new (arena) LoadStoreElimination(graph, *most_recent_side_effects, *most_recent_lsa);
   } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
     return new (arena) SideEffectsAnalysis(graph);
   } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
@@ -517,8 +517,6 @@
   } else if (opt_name == CodeSinking::kCodeSinkingPassName) {
     return new (arena) CodeSinking(graph, stats);
 #ifdef ART_ENABLE_CODEGEN_arm
-  } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
-    return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
   } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
     return new (arena) arm::InstructionSimplifierArm(graph, stats);
 #endif
@@ -527,8 +525,6 @@
     return new (arena) arm64::InstructionSimplifierArm64(graph, stats);
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips
-  } else if (opt_name == mips::DexCacheArrayFixups::kDexCacheArrayFixupsMipsPassName) {
-    return new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
   } else if (opt_name == mips::PcRelativeFixups::kPcRelativeFixupsMipsPassName) {
     return new (arena) mips::PcRelativeFixups(graph, codegen, stats);
 #endif
@@ -556,6 +552,7 @@
   // in the pass name list.
   SideEffectsAnalysis* most_recent_side_effects = nullptr;
   HInductionVarAnalysis* most_recent_induction = nullptr;
+  LoadStoreAnalysis* most_recent_lsa = nullptr;
   ArenaVector<HOptimization*> ret(arena->Adapter());
   for (const std::string& pass_name : pass_names) {
     HOptimization* opt = BuildOptimization(
@@ -568,7 +565,8 @@
         dex_compilation_unit,
         handles,
         most_recent_side_effects,
-        most_recent_induction);
+        most_recent_induction,
+        most_recent_lsa);
     CHECK(opt != nullptr) << "Couldn't build optimization: \"" << pass_name << "\"";
     ret.push_back(opt);
 
@@ -577,6 +575,8 @@
       most_recent_side_effects = down_cast<SideEffectsAnalysis*>(opt);
     } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
       most_recent_induction = down_cast<HInductionVarAnalysis*>(opt);
+    } else if (opt_name == LoadStoreAnalysis::kLoadStoreAnalysisPassName) {
+      most_recent_lsa = down_cast<LoadStoreAnalysis*>(opt);
     }
   }
   return ret;
@@ -632,8 +632,6 @@
 #if defined(ART_ENABLE_CODEGEN_arm)
     case kThumb2:
     case kArm: {
-      arm::DexCacheArrayFixups* fixups =
-          new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
       arm::InstructionSimplifierArm* simplifier =
           new (arena) arm::InstructionSimplifierArm(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
@@ -644,7 +642,6 @@
         simplifier,
         side_effects,
         gvn,
-        fixups,
         scheduling,
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
@@ -673,11 +670,8 @@
     case kMips: {
       mips::PcRelativeFixups* pc_relative_fixups =
           new (arena) mips::PcRelativeFixups(graph, codegen, stats);
-      mips::DexCacheArrayFixups* dex_cache_array_fixups =
-          new (arena) mips::DexCacheArrayFixups(graph, codegen, stats);
       HOptimization* mips_optimizations[] = {
           pc_relative_fixups,
-          dex_cache_array_fixups
       };
       RunOptimizations(mips_optimizations, arraysize(mips_optimizations), pass_observer);
       break;
@@ -763,7 +757,8 @@
   HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$final");
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
-  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats);
+  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(
+      graph, codegen, driver, stats);
   HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(
       graph, "constant_folding$after_inlining");
@@ -777,15 +772,16 @@
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
   BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
   HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
-  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2);
+  LoadStoreAnalysis* lsa = new (arena) LoadStoreAnalysis(graph);
+  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2, *lsa);
   HSharpening* sharpening = new (arena) HSharpening(
       graph, codegen, dex_compilation_unit, driver, handles);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, codegen, stats, "instruction_simplifier$after_inlining");
+      graph, codegen, driver, stats, "instruction_simplifier$after_inlining");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
-      graph, codegen, stats, "instruction_simplifier$after_bce");
+      graph, codegen, driver, stats, "instruction_simplifier$after_bce");
   InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
-      graph, codegen, stats, "instruction_simplifier$before_codegen");
+      graph, codegen, driver, stats, "instruction_simplifier$before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
   CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
   CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
@@ -817,6 +813,7 @@
     fold3,  // evaluates code generated by dynamic bce
     simplify3,
     side_effects2,
+    lsa,
     lse,
     cha_guard,
     dce3,
diff --git a/compiler/optimizing/pc_relative_fixups_mips.cc b/compiler/optimizing/pc_relative_fixups_mips.cc
index ef2c432..21b6452 100644
--- a/compiler/optimizing/pc_relative_fixups_mips.cc
+++ b/compiler/optimizing/pc_relative_fixups_mips.cc
@@ -58,6 +58,18 @@
     DCHECK(base_ != nullptr);
   }
 
+  void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE {
+    // If this is an invoke with PC-relative load kind,
+    // we need to add the base as the special input.
+    if (invoke->HasPcRelativeMethodLoadKind() &&
+        !IsCallFreeIntrinsic<IntrinsicLocationsBuilderMIPS>(invoke, codegen_)) {
+      InitializePCRelativeBasePointer();
+      // Add the special argument base to the method.
+      DCHECK(!invoke->HasCurrentMethodInput());
+      invoke->AddSpecialInput(base_);
+    }
+  }
+
   void VisitLoadClass(HLoadClass* load_class) OVERRIDE {
     HLoadClass::LoadKind load_kind = load_class->GetLoadKind();
     switch (load_kind) {
diff --git a/compiler/optimizing/pc_relative_fixups_x86.cc b/compiler/optimizing/pc_relative_fixups_x86.cc
index a1c916f..2743df9 100644
--- a/compiler/optimizing/pc_relative_fixups_x86.cc
+++ b/compiler/optimizing/pc_relative_fixups_x86.cc
@@ -205,13 +205,13 @@
     // method pointer from the invoke.
     if (invoke_static_or_direct != nullptr &&
         invoke_static_or_direct->HasCurrentMethodInput()) {
-      DCHECK(!invoke_static_or_direct->HasPcRelativeDexCache());
+      DCHECK(!invoke_static_or_direct->HasPcRelativeMethodLoadKind());
       return;
     }
 
     bool base_added = false;
     if (invoke_static_or_direct != nullptr &&
-        invoke_static_or_direct->HasPcRelativeDexCache() &&
+        invoke_static_or_direct->HasPcRelativeMethodLoadKind() &&
         !IsCallFreeIntrinsic<IntrinsicLocationsBuilderX86>(invoke, codegen_)) {
       HX86ComputeBaseMethodAddress* method_address = GetPCRelativeBasePointer(invoke);
       // Add the extra parameter.
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index c6a0b6a..ce3a496 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -308,8 +308,10 @@
     }
     InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc);
   }
-  UsePosition* use = current->GetFirstUse();
-  EnvUsePosition* env_use = current->GetFirstEnvironmentUse();
+  UsePositionList::const_iterator use_it = current->GetUses().begin();
+  const UsePositionList::const_iterator use_end = current->GetUses().end();
+  EnvUsePositionList::const_iterator env_use_it = current->GetEnvironmentUses().begin();
+  const EnvUsePositionList::const_iterator env_use_end = current->GetEnvironmentUses().end();
 
   // Walk over all siblings, updating locations of use positions, and
   // connecting them when they are adjacent.
@@ -321,43 +323,47 @@
 
     LiveRange* range = current->GetFirstRange();
     while (range != nullptr) {
-      while (use != nullptr && use->GetPosition() < range->GetStart()) {
-        DCHECK(use->IsSynthesized());
-        use = use->GetNext();
-      }
-      while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
-        DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
-        if (!use->IsSynthesized()) {
-          LocationSummary* locations = use->GetUser()->GetLocations();
-          Location expected_location = locations->InAt(use->GetInputIndex());
+      // Process uses in the closed interval [range->GetStart(), range->GetEnd()].
+      // FindMatchingUseRange() expects a half-open interval, so pass `range->GetEnd() + 1u`.
+      size_t range_begin = range->GetStart();
+      size_t range_end = range->GetEnd() + 1u;
+      auto matching_use_range =
+          FindMatchingUseRange(use_it, use_end, range_begin, range_end);
+      DCHECK(std::all_of(use_it,
+                         matching_use_range.begin(),
+                         [](const UsePosition& pos) { return pos.IsSynthesized(); }));
+      for (const UsePosition& use : matching_use_range) {
+        DCHECK(current->CoversSlow(use.GetPosition()) || (use.GetPosition() == range->GetEnd()));
+        if (!use.IsSynthesized()) {
+          LocationSummary* locations = use.GetUser()->GetLocations();
+          Location expected_location = locations->InAt(use.GetInputIndex());
           // The expected (actual) location may be invalid in case the input is unused. Currently
           // this only happens for intrinsics.
           if (expected_location.IsValid()) {
             if (expected_location.IsUnallocated()) {
-              locations->SetInAt(use->GetInputIndex(), source);
+              locations->SetInAt(use.GetInputIndex(), source);
             } else if (!expected_location.IsConstant()) {
-              AddInputMoveFor(interval->GetDefinedBy(), use->GetUser(), source, expected_location);
+              AddInputMoveFor(
+                  interval->GetDefinedBy(), use.GetUser(), source, expected_location);
             }
           } else {
-            DCHECK(use->GetUser()->IsInvoke());
-            DCHECK(use->GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
+            DCHECK(use.GetUser()->IsInvoke());
+            DCHECK(use.GetUser()->AsInvoke()->GetIntrinsic() != Intrinsics::kNone);
           }
         }
-        use = use->GetNext();
       }
+      use_it = matching_use_range.end();
 
       // Walk over the environment uses, and update their locations.
-      while (env_use != nullptr && env_use->GetPosition() < range->GetStart()) {
-        env_use = env_use->GetNext();
+      auto matching_env_use_range =
+          FindMatchingUseRange(env_use_it, env_use_end, range_begin, range_end);
+      for (const EnvUsePosition& env_use : matching_env_use_range) {
+        DCHECK(current->CoversSlow(env_use.GetPosition())
+               || (env_use.GetPosition() == range->GetEnd()));
+        HEnvironment* environment = env_use.GetEnvironment();
+        environment->SetLocationAt(env_use.GetInputIndex(), source);
       }
-
-      while (env_use != nullptr && env_use->GetPosition() <= range->GetEnd()) {
-        DCHECK(current->CoversSlow(env_use->GetPosition())
-               || (env_use->GetPosition() == range->GetEnd()));
-        HEnvironment* environment = env_use->GetEnvironment();
-        environment->SetLocationAt(env_use->GetInputIndex(), source);
-        env_use = env_use->GetNext();
-      }
+      env_use_it = matching_env_use_range.end();
 
       range = range->GetNext();
     }
@@ -395,13 +401,8 @@
     current = next_sibling;
   } while (current != nullptr);
 
-  if (kIsDebugBuild) {
-    // Following uses can only be synthesized uses.
-    while (use != nullptr) {
-      DCHECK(use->IsSynthesized());
-      use = use->GetNext();
-    }
-  }
+  // Following uses can only be synthesized uses.
+  DCHECK(std::all_of(use_it, use_end, [](const UsePosition& pos) { return pos.IsSynthesized(); }));
 }
 
 static bool IsMaterializableEntryBlockInstructionOfGraphWithIrreducibleLoop(
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 300f4c6..5e22772 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -20,7 +20,7 @@
 #include "linear_order.h"
 #include "register_allocation_resolver.h"
 #include "ssa_liveness_analysis.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
@@ -178,18 +178,17 @@
     use_weight += CostForMoveAt(interval->GetStart() + 1, liveness);
   }
 
-  UsePosition* use = interval->GetFirstUse();
-  while (use != nullptr && use->GetPosition() <= interval->GetStart()) {
-    // Skip uses before the start of this live interval.
-    use = use->GetNext();
-  }
-
-  while (use != nullptr && use->GetPosition() <= interval->GetEnd()) {
-    if (use->GetUser() != nullptr && use->RequiresRegister()) {
+  // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
+  // [interval->GetStart() + 1, interval->GetEnd() + 1)
+  auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
+                                                 interval->GetUses().end(),
+                                                 interval->GetStart() + 1u,
+                                                 interval->GetEnd() + 1u);
+  for (const UsePosition& use : matching_use_range) {
+    if (use.GetUser() != nullptr && use.RequiresRegister()) {
       // Cost for spilling at a register use point.
-      use_weight += CostForMoveAt(use->GetUser()->GetLifetimePosition() - 1, liveness);
+      use_weight += CostForMoveAt(use.GetUser()->GetLifetimePosition() - 1, liveness);
     }
-    use = use->GetNext();
   }
 
   // We divide by the length of the interval because we want to prioritize
@@ -989,16 +988,16 @@
     interval = TrySplit(interval, interval->GetStart() + 1);
   }
 
-  UsePosition* use = interval->GetFirstUse();
-  while (use != nullptr && use->GetPosition() < interval->GetStart()) {
-    use = use->GetNext();
-  }
-
+  // Process uses in the range [interval->GetStart(), interval->GetEnd()], i.e.
+  // [interval->GetStart(), interval->GetEnd() + 1)
+  auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
+                                                 interval->GetUses().end(),
+                                                 interval->GetStart(),
+                                                 interval->GetEnd() + 1u);
   // Split around register uses.
-  size_t end = interval->GetEnd();
-  while (use != nullptr && use->GetPosition() <= end) {
-    if (use->RequiresRegister()) {
-      size_t position = use->GetPosition();
+  for (const UsePosition& use : matching_use_range) {
+    if (use.RequiresRegister()) {
+      size_t position = use.GetPosition();
       interval = TrySplit(interval, position - 1);
       if (liveness_.GetInstructionFromPosition(position / 2)->IsControlFlow()) {
         // If we are at the very end of a basic block, we cannot split right
@@ -1008,7 +1007,6 @@
         interval = TrySplit(interval, position);
       }
     }
-    use = use->GetNext();
   }
 }
 
@@ -1398,18 +1396,20 @@
     }
 
     // Try to prevent moves into fixed input locations.
-    UsePosition* use = interval->GetFirstUse();
-    for (; use != nullptr && use->GetPosition() <= interval->GetStart(); use = use->GetNext()) {
-      // Skip past uses before the start of this interval.
-    }
-    for (; use != nullptr && use->GetPosition() <= interval->GetEnd(); use = use->GetNext()) {
-      HInstruction* user = use->GetUser();
+    // Process uses in the range (interval->GetStart(), interval->GetEnd()], i.e.
+    // [interval->GetStart() + 1, interval->GetEnd() + 1)
+    auto matching_use_range = FindMatchingUseRange(interval->GetUses().begin(),
+                                                   interval->GetUses().end(),
+                                                   interval->GetStart() + 1u,
+                                                   interval->GetEnd() + 1u);
+    for (const UsePosition& use : matching_use_range) {
+      HInstruction* user = use.GetUser();
       if (user == nullptr) {
         // User may be null for certain intervals, such as temp intervals.
         continue;
       }
       LocationSummary* locations = user->GetLocations();
-      Location input = locations->InAt(use->GetInputIndex());
+      Location input = locations->InAt(use.GetInputIndex());
       if (input.IsRegister() || input.IsFpuRegister()) {
         // TODO: Could try to handle pair interval too, but coalescing with fixed pair nodes
         //       is currently not supported.
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 667afb1..24a2ab2 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -912,9 +912,9 @@
   // Create an interval with lifetime holes.
   static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}};
   LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one);
-  first->first_use_ = new(&allocator) UsePosition(user, false, 8, first->first_use_);
-  first->first_use_ = new(&allocator) UsePosition(user, false, 7, first->first_use_);
-  first->first_use_ = new(&allocator) UsePosition(user, false, 6, first->first_use_);
+  first->uses_.push_front(*new(&allocator) UsePosition(user, false, 8));
+  first->uses_.push_front(*new(&allocator) UsePosition(user, false, 7));
+  first->uses_.push_front(*new(&allocator) UsePosition(user, false, 6));
 
   locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall);
   locations->SetOut(Location::RequiresRegister());
@@ -934,9 +934,9 @@
   // before lifetime position 6 yet.
   static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}};
   LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three);
-  third->first_use_ = new(&allocator) UsePosition(user, false, 8, third->first_use_);
-  third->first_use_ = new(&allocator) UsePosition(user, false, 4, third->first_use_);
-  third->first_use_ = new(&allocator) UsePosition(user, false, 3, third->first_use_);
+  third->uses_.push_front(*new(&allocator) UsePosition(user, false, 8));
+  third->uses_.push_front(*new(&allocator) UsePosition(user, false, 4));
+  third->uses_.push_front(*new(&allocator) UsePosition(user, false, 3));
   locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall);
   locations->SetOut(Location::RequiresRegister());
   third = third->SplitAt(3);
diff --git a/compiler/optimizing/scheduler_arm.cc b/compiler/optimizing/scheduler_arm.cc
index 1a89567..e78cd78 100644
--- a/compiler/optimizing/scheduler_arm.cc
+++ b/compiler/optimizing/scheduler_arm.cc
@@ -288,6 +288,11 @@
   last_visited_latency_ = kArmIntegerOpLatency;
 }
 
+void SchedulingLatencyVisitorARM::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* ATTRIBUTE_UNUSED) {
+  UNIMPLEMENTED(FATAL) << "IntermediateAddressIndex is not implemented for ARM";
+}
+
 void SchedulingLatencyVisitorARM::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
   last_visited_latency_ = kArmMulIntegerLatency;
 }
@@ -813,10 +818,5 @@
   }
 }
 
-void SchedulingLatencyVisitorARM::VisitArmDexCacheArraysBase(art::HArmDexCacheArraysBase*) {
-  last_visited_internal_latency_ = kArmIntegerOpLatency;
-  last_visited_latency_ = kArmIntegerOpLatency;
-}
-
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/scheduler_arm.h b/compiler/optimizing/scheduler_arm.h
index 8d5e4f3..897e97d 100644
--- a/compiler/optimizing/scheduler_arm.h
+++ b/compiler/optimizing/scheduler_arm.h
@@ -17,7 +17,11 @@
 #ifndef ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
 #define ART_COMPILER_OPTIMIZING_SCHEDULER_ARM_H_
 
+#ifdef ART_USE_OLD_ARM_BACKEND
+#include "code_generator_arm.h"
+#else
 #include "code_generator_arm_vixl.h"
+#endif
 #include "scheduler.h"
 
 namespace art {
@@ -99,6 +103,7 @@
   M(BitwiseNegatedRight, unused)                 \
   M(MultiplyAccumulate, unused)                  \
   M(IntermediateAddress, unused)                 \
+  M(IntermediateAddressIndex, unused)            \
   M(DataProcWithShifterOp, unused)
 
 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
diff --git a/compiler/optimizing/scheduler_arm64.cc b/compiler/optimizing/scheduler_arm64.cc
index 558dcc4..83b487f 100644
--- a/compiler/optimizing/scheduler_arm64.cc
+++ b/compiler/optimizing/scheduler_arm64.cc
@@ -16,6 +16,7 @@
 
 #include "scheduler_arm64.h"
 #include "code_generator_utils.h"
+#include "mirror/array-inl.h"
 
 namespace art {
 namespace arm64 {
@@ -43,6 +44,13 @@
   last_visited_latency_ = kArm64IntegerOpLatency + 2;
 }
 
+void SchedulingLatencyVisitorARM64::VisitIntermediateAddressIndex(
+    HIntermediateAddressIndex* instr ATTRIBUTE_UNUSED) {
+  // Although the code generated is a simple `add` instruction, we found through empirical results
+  // that spacing it from its use in memory accesses was beneficial.
+  last_visited_latency_ = kArm64DataProcWithShifterOpLatency + 2;
+}
+
 void SchedulingLatencyVisitorARM64::VisitMultiplyAccumulate(HMultiplyAccumulate* ATTRIBUTE_UNUSED) {
   last_visited_latency_ = kArm64MulIntegerLatency;
 }
@@ -192,5 +200,148 @@
   }
 }
 
+void SchedulingLatencyVisitorARM64::HandleSimpleArithmeticSIMD(HVecOperation *instr) {
+  if (Primitive::IsFloatingPointType(instr->GetPackedType())) {
+    last_visited_latency_ = kArm64SIMDFloatingPointOpLatency;
+  } else {
+    last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecReplicateScalar(
+    HVecReplicateScalar* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDReplicateOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSetScalars(HVecSetScalars* instr) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSumReduce(HVecSumReduce* instr) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecCnv(HVecCnv* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDTypeConversionInt2FPLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecNeg(HVecNeg* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAbs(HVecAbs* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecNot(HVecNot* instr) {
+  if (instr->GetPackedType() == Primitive::kPrimBoolean) {
+    last_visited_internal_latency_ = kArm64SIMDIntegerOpLatency;
+  }
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAdd(HVecAdd* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecSub(HVecSub* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMul(HVecMul* instr) {
+  if (Primitive::IsFloatingPointType(instr->GetPackedType())) {
+    last_visited_latency_ = kArm64SIMDMulFloatingPointLatency;
+  } else {
+    last_visited_latency_ = kArm64SIMDMulIntegerLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecDiv(HVecDiv* instr) {
+  if (instr->GetPackedType() == Primitive::kPrimFloat) {
+    last_visited_latency_ = kArm64SIMDDivFloatLatency;
+  } else {
+    DCHECK(instr->GetPackedType() == Primitive::kPrimDouble);
+    last_visited_latency_ = kArm64SIMDDivDoubleLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMin(HVecMin* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMax(HVecMax* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAnd(HVecAnd* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecAndNot(HVecAndNot* instr) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instr->GetId();
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecOr(HVecOr* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecXor(HVecXor* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDIntegerOpLatency;
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecShl(HVecShl* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecShr(HVecShr* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecUShr(HVecUShr* instr) {
+  HandleSimpleArithmeticSIMD(instr);
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecMultiplyAccumulate(
+    HVecMultiplyAccumulate* instr ATTRIBUTE_UNUSED) {
+  last_visited_latency_ = kArm64SIMDMulIntegerLatency;
+}
+
+void SchedulingLatencyVisitorARM64::HandleVecAddress(
+    HVecMemoryOperation* instruction,
+    size_t size ATTRIBUTE_UNUSED) {
+  HInstruction* index = instruction->InputAt(1);
+  if (!index->IsConstant()) {
+    last_visited_internal_latency_ += kArm64DataProcWithShifterOpLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecLoad(HVecLoad* instr) {
+  last_visited_internal_latency_ = 0;
+  size_t size = Primitive::ComponentSize(instr->GetPackedType());
+
+  if (instr->GetPackedType() == Primitive::kPrimChar
+      && mirror::kUseStringCompression
+      && instr->IsStringCharAt()) {
+    // Set latencies for the uncompressed case.
+    last_visited_internal_latency_ += kArm64MemoryLoadLatency + kArm64BranchLatency;
+    HandleVecAddress(instr, size);
+    last_visited_latency_ = kArm64SIMDMemoryLoadLatency;
+  } else {
+    HandleVecAddress(instr, size);
+    last_visited_latency_ = kArm64SIMDMemoryLoadLatency;
+  }
+}
+
+void SchedulingLatencyVisitorARM64::VisitVecStore(HVecStore* instr) {
+  last_visited_internal_latency_ = 0;
+  size_t size = Primitive::ComponentSize(instr->GetPackedType());
+  HandleVecAddress(instr, size);
+  last_visited_latency_ = kArm64SIMDMemoryStoreLatency;
+}
+
 }  // namespace arm64
 }  // namespace art
diff --git a/compiler/optimizing/scheduler_arm64.h b/compiler/optimizing/scheduler_arm64.h
index 7a33720..63d5b7d 100644
--- a/compiler/optimizing/scheduler_arm64.h
+++ b/compiler/optimizing/scheduler_arm64.h
@@ -42,6 +42,18 @@
 static constexpr uint32_t kArm64MulFloatingPointLatency = 6;
 static constexpr uint32_t kArm64MulIntegerLatency = 6;
 static constexpr uint32_t kArm64TypeConversionFloatingPointIntegerLatency = 5;
+static constexpr uint32_t kArm64BranchLatency = kArm64IntegerOpLatency;
+
+static constexpr uint32_t kArm64SIMDFloatingPointOpLatency = 10;
+static constexpr uint32_t kArm64SIMDIntegerOpLatency = 6;
+static constexpr uint32_t kArm64SIMDMemoryLoadLatency = 10;
+static constexpr uint32_t kArm64SIMDMemoryStoreLatency = 6;
+static constexpr uint32_t kArm64SIMDMulFloatingPointLatency = 12;
+static constexpr uint32_t kArm64SIMDMulIntegerLatency = 12;
+static constexpr uint32_t kArm64SIMDReplicateOpLatency = 16;
+static constexpr uint32_t kArm64SIMDDivDoubleLatency = 60;
+static constexpr uint32_t kArm64SIMDDivFloatLatency = 30;
+static constexpr uint32_t kArm64SIMDTypeConversionInt2FPLatency = 10;
 
 class SchedulingLatencyVisitorARM64 : public SchedulingLatencyVisitor {
  public:
@@ -52,29 +64,54 @@
 
 // We add a second unused parameter to be able to use this macro like the others
 // defined in `nodes.h`.
-#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M) \
-  M(ArrayGet         , unused)                   \
-  M(ArrayLength      , unused)                   \
-  M(ArraySet         , unused)                   \
-  M(BinaryOperation  , unused)                   \
-  M(BoundsCheck      , unused)                   \
-  M(Div              , unused)                   \
-  M(InstanceFieldGet , unused)                   \
-  M(InstanceOf       , unused)                   \
-  M(Invoke           , unused)                   \
-  M(LoadString       , unused)                   \
-  M(Mul              , unused)                   \
-  M(NewArray         , unused)                   \
-  M(NewInstance      , unused)                   \
-  M(Rem              , unused)                   \
-  M(StaticFieldGet   , unused)                   \
-  M(SuspendCheck     , unused)                   \
-  M(TypeConversion   , unused)
+#define FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(M)     \
+  M(ArrayGet             , unused)                   \
+  M(ArrayLength          , unused)                   \
+  M(ArraySet             , unused)                   \
+  M(BinaryOperation      , unused)                   \
+  M(BoundsCheck          , unused)                   \
+  M(Div                  , unused)                   \
+  M(InstanceFieldGet     , unused)                   \
+  M(InstanceOf           , unused)                   \
+  M(Invoke               , unused)                   \
+  M(LoadString           , unused)                   \
+  M(Mul                  , unused)                   \
+  M(NewArray             , unused)                   \
+  M(NewInstance          , unused)                   \
+  M(Rem                  , unused)                   \
+  M(StaticFieldGet       , unused)                   \
+  M(SuspendCheck         , unused)                   \
+  M(TypeConversion       , unused)                   \
+  M(VecReplicateScalar   , unused)                   \
+  M(VecSetScalars        , unused)                   \
+  M(VecSumReduce         , unused)                   \
+  M(VecCnv               , unused)                   \
+  M(VecNeg               , unused)                   \
+  M(VecAbs               , unused)                   \
+  M(VecNot               , unused)                   \
+  M(VecAdd               , unused)                   \
+  M(VecHalvingAdd        , unused)                   \
+  M(VecSub               , unused)                   \
+  M(VecMul               , unused)                   \
+  M(VecDiv               , unused)                   \
+  M(VecMin               , unused)                   \
+  M(VecMax               , unused)                   \
+  M(VecAnd               , unused)                   \
+  M(VecAndNot            , unused)                   \
+  M(VecOr                , unused)                   \
+  M(VecXor               , unused)                   \
+  M(VecShl               , unused)                   \
+  M(VecShr               , unused)                   \
+  M(VecUShr              , unused)                   \
+  M(VecMultiplyAccumulate, unused)                   \
+  M(VecLoad              , unused)                   \
+  M(VecStore             , unused)
 
 #define FOR_EACH_SCHEDULED_SHARED_INSTRUCTION(M) \
   M(BitwiseNegatedRight, unused)                 \
   M(MultiplyAccumulate, unused)                  \
   M(IntermediateAddress, unused)                 \
+  M(IntermediateAddressIndex, unused)            \
   M(DataProcWithShifterOp, unused)
 
 #define DECLARE_VISIT_INSTRUCTION(type, unused)  \
@@ -85,6 +122,10 @@
   FOR_EACH_CONCRETE_INSTRUCTION_ARM64(DECLARE_VISIT_INSTRUCTION)
 
 #undef DECLARE_VISIT_INSTRUCTION
+
+ private:
+  void HandleSimpleArithmeticSIMD(HVecOperation *instr);
+  void HandleVecAddress(HVecMemoryOperation* instruction, size_t size);
 };
 
 class HSchedulerARM64 : public HScheduler {
@@ -101,6 +142,8 @@
         return true;
       FOR_EACH_CONCRETE_INSTRUCTION_ARM64(CASE_INSTRUCTION_KIND)
         return true;
+      FOR_EACH_SCHEDULED_COMMON_INSTRUCTION(CASE_INSTRUCTION_KIND)
+        return true;
       default:
         return HScheduler::IsSchedulable(instruction);
     }
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index 9a03163..8bd568b 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -16,6 +16,7 @@
 
 #include "sharpening.h"
 
+#include "art_method-inl.h"
 #include "base/casts.h"
 #include "base/enums.h"
 #include "class_linker.h"
@@ -41,7 +42,9 @@
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* instruction = it.Current();
       if (instruction->IsInvokeStaticOrDirect()) {
-        SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_);
+        SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(),
+                                    codegen_,
+                                    compiler_driver_);
       } else if (instruction->IsLoadString()) {
         ProcessLoadString(instruction->AsLoadString());
       }
@@ -68,9 +71,21 @@
   return IsInBootImage(method) && !options.GetCompilePic();
 }
 
+static bool BootImageAOTCanEmbedMethod(ArtMethod* method, CompilerDriver* compiler_driver) {
+  DCHECK(compiler_driver->GetCompilerOptions().IsBootImage());
+  if (!compiler_driver->GetSupportBootImageFixup()) {
+    return false;
+  }
+  ScopedObjectAccess soa(Thread::Current());
+  ObjPtr<mirror::Class> klass = method->GetDeclaringClass();
+  DCHECK(klass != nullptr);
+  const DexFile& dex_file = klass->GetDexFile();
+  return compiler_driver->IsImageClass(dex_file.StringByTypeIdx(klass->GetDexTypeIndex()));
+}
 
 void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
-                                              CodeGenerator* codegen) {
+                                              CodeGenerator* codegen,
+                                              CompilerDriver* compiler_driver) {
   if (invoke->IsStringInit()) {
     // Not using the dex cache arrays. But we could still try to use a better dispatch...
     // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
@@ -108,16 +123,13 @@
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
     method_load_data = reinterpret_cast<uintptr_t>(callee);
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
+  } else if (codegen->GetCompilerOptions().IsBootImage() &&
+             BootImageAOTCanEmbedMethod(callee, compiler_driver)) {
+    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative;
+    code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
   } else {
-    // Use PC-relative access to the dex cache arrays.
-    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
-    // Note: we use the invoke's graph instead of the codegen graph, which are
-    // different when inlining (the codegen graph is the most outer graph). The
-    // invoke's dex method index is relative to the dex file where the invoke's graph
-    // was built from.
-    DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen->GetInstructionSet()),
-                                &invoke->GetBlock()->GetGraph()->GetDexFile());
-    method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex());
+    // Use PC-relative access to the .bss methods arrays.
+    method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kBssEntry;
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
   }
 
@@ -140,7 +152,7 @@
                                                        CompilerDriver* compiler_driver,
                                                        const DexCompilationUnit& dex_compilation_unit) {
   Handle<mirror::Class> klass = load_class->GetClass();
-  DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
+  DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kRuntimeCall ||
          load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
       << load_class->GetLoadKind();
   DCHECK(!load_class->IsInBootImage()) << "HLoadClass should not be optimized before sharpening.";
@@ -166,9 +178,9 @@
       DCHECK(!runtime->UseJitCompilation());
       if (!compiler_driver->GetSupportBootImageFixup()) {
         // compiler_driver_test. Do not sharpen.
-        desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
-      } else if ((klass != nullptr) && compiler_driver->IsImageClass(
-          dex_file.StringDataByIdx(dex_file.GetTypeId(type_index).descriptor_idx_))) {
+        desired_load_kind = HLoadClass::LoadKind::kRuntimeCall;
+      } else if ((klass != nullptr) &&
+                 compiler_driver->IsImageClass(dex_file.StringByTypeIdx(type_index))) {
         is_in_boot_image = true;
         desired_load_kind = HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
       } else {
@@ -191,7 +203,7 @@
           // this `HLoadClass` hasn't been executed in the interpreter.
           // Fallback to the dex cache.
           // TODO(ngeoffray): Generate HDeoptimize instead.
-          desired_load_kind = HLoadClass::LoadKind::kDexCacheViaMethod;
+          desired_load_kind = HLoadClass::LoadKind::kRuntimeCall;
         }
       } else if (is_in_boot_image && !codegen->GetCompilerOptions().GetCompilePic()) {
         // AOT app compilation. Check if the class is in the boot image.
@@ -210,7 +222,7 @@
   }
 
   if (!IsSameDexFile(load_class->GetDexFile(), *dex_compilation_unit.GetDexFile())) {
-    if ((load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) ||
+    if ((load_kind == HLoadClass::LoadKind::kRuntimeCall) ||
         (load_kind == HLoadClass::LoadKind::kBssEntry)) {
       // We actually cannot reference this class, we're forced to bail.
       // We cannot reference this class with Bss, as the entrypoint will lookup the class
@@ -222,7 +234,7 @@
 }
 
 void HSharpening::ProcessLoadString(HLoadString* load_string) {
-  DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+  DCHECK_EQ(load_string->GetLoadKind(), HLoadString::LoadKind::kRuntimeCall);
 
   const DexFile& dex_file = load_string->GetDexFile();
   dex::StringIndex string_index = load_string->GetStringIndex();
@@ -249,7 +261,7 @@
         desired_load_kind = HLoadString::LoadKind::kBootImageLinkTimePcRelative;
       } else {
         // compiler_driver_test. Do not sharpen.
-        desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+        desired_load_kind = HLoadString::LoadKind::kRuntimeCall;
       }
     } else if (runtime->UseJitCompilation()) {
       DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
@@ -261,7 +273,7 @@
           desired_load_kind = HLoadString::LoadKind::kJitTableAddress;
         }
       } else {
-        desired_load_kind = HLoadString::LoadKind::kDexCacheViaMethod;
+        desired_load_kind = HLoadString::LoadKind::kRuntimeCall;
       }
     } else {
       // AOT app compilation. Try to lookup the string without allocating if not found.
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 10707c7..f74b0af 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -55,7 +55,9 @@
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Used by Sharpening and InstructionSimplifier.
-  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen);
+  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+                                          CodeGenerator* codegen,
+                                          CompilerDriver* compiler_driver);
 
  private:
   void ProcessLoadString(HLoadString* load_string);
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index b538a89..7b7495b 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -356,14 +356,16 @@
     }
   }
 
-  UsePosition* use = first_use_;
   size_t start = GetStart();
   size_t end = GetEnd();
-  while (use != nullptr && use->GetPosition() <= end) {
-    size_t use_position = use->GetPosition();
-    if (use_position >= start && !use->IsSynthesized()) {
-      HInstruction* user = use->GetUser();
-      size_t input_index = use->GetInputIndex();
+  for (const UsePosition& use : GetUses()) {
+    size_t use_position = use.GetPosition();
+    if (use_position > end) {
+      break;
+    }
+    if (use_position >= start && !use.IsSynthesized()) {
+      HInstruction* user = use.GetUser();
+      size_t input_index = use.GetInputIndex();
       if (user->IsPhi()) {
         // If the phi has a register, try to use the same.
         Location phi_location = user->GetLiveInterval()->ToLocation();
@@ -395,7 +397,7 @@
       } else {
         // If the instruction is expected in a register, try to use it.
         LocationSummary* locations = user->GetLocations();
-        Location expected = locations->InAt(use->GetInputIndex());
+        Location expected = locations->InAt(use.GetInputIndex());
         // We use the user's lifetime position - 1 (and not `use_position`) because the
         // register is blocked at the beginning of the user.
         size_t position = user->GetLifetimePosition() - 1;
@@ -408,7 +410,6 @@
         }
       }
     }
-    use = use->GetNext();
   }
 
   return kNoRegister;
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index e9dffc1..a668157 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -19,7 +19,9 @@
 
 #include <iostream>
 
+#include "base/iteration_range.h"
 #include "nodes.h"
+#include "utils/intrusive_forward_list.h"
 
 namespace art {
 
@@ -102,28 +104,23 @@
 /**
  * A use position represents a live interval use at a given position.
  */
-class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
+class UsePosition : public ArenaObject<kArenaAllocSsaLiveness>,
+                    public IntrusiveForwardListNode<UsePosition> {
  public:
-  UsePosition(HInstruction* user, size_t input_index, size_t position, UsePosition* next)
+  UsePosition(HInstruction* user, size_t input_index, size_t position)
       : user_(user),
         input_index_(input_index),
-        position_(position),
-        next_(next) {
-    DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
+        position_(position) {
   }
 
   explicit UsePosition(size_t position)
       : user_(nullptr),
         input_index_(kNoInput),
-        position_(dchecked_integral_cast<uint32_t>(position)),
-        next_(nullptr) {
+        position_(dchecked_integral_cast<uint32_t>(position)) {
   }
 
   size_t GetPosition() const { return position_; }
 
-  UsePosition* GetNext() const { return next_; }
-  void SetNext(UsePosition* next) { next_ = next; }
-
   HInstruction* GetUser() const { return user_; }
 
   bool IsSynthesized() const { return user_ == nullptr; }
@@ -138,10 +135,8 @@
     return user_->GetBlock()->GetLoopInformation();
   }
 
-  UsePosition* Dup(ArenaAllocator* allocator) const {
-    return new (allocator) UsePosition(
-        user_, input_index_, position_,
-        next_ == nullptr ? nullptr : next_->Dup(allocator));
+  UsePosition* Clone(ArenaAllocator* allocator) const {
+    return new (allocator) UsePosition(user_, input_index_, position_);
   }
 
   bool RequiresRegister() const {
@@ -156,33 +151,28 @@
   HInstruction* const user_;
   const size_t input_index_;
   const size_t position_;
-  UsePosition* next_;
 
   DISALLOW_COPY_AND_ASSIGN(UsePosition);
 };
+using UsePositionList = IntrusiveForwardList<UsePosition>;
 
 /**
  * An environment use position represents a live interval for environment use at a given position.
  */
-class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
+class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness>,
+                       public IntrusiveForwardListNode<EnvUsePosition> {
  public:
   EnvUsePosition(HEnvironment* environment,
                  size_t input_index,
-                 size_t position,
-                 EnvUsePosition* next)
+                 size_t position)
       : environment_(environment),
         input_index_(input_index),
-        position_(position),
-        next_(next) {
+        position_(position) {
     DCHECK(environment != nullptr);
-    DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
 
   size_t GetPosition() const { return position_; }
 
-  EnvUsePosition* GetNext() const { return next_; }
-  void SetNext(EnvUsePosition* next) { next_ = next; }
-
   HEnvironment* GetEnvironment() const { return environment_; }
   size_t GetInputIndex() const { return input_index_; }
 
@@ -190,20 +180,47 @@
     stream << position_;
   }
 
-  EnvUsePosition* Dup(ArenaAllocator* allocator) const {
-    return new (allocator) EnvUsePosition(
-        environment_, input_index_, position_,
-        next_ == nullptr ? nullptr : next_->Dup(allocator));
+  EnvUsePosition* Clone(ArenaAllocator* allocator) const {
+    return new (allocator) EnvUsePosition(environment_, input_index_, position_);
   }
 
  private:
   HEnvironment* const environment_;
   const size_t input_index_;
   const size_t position_;
-  EnvUsePosition* next_;
 
   DISALLOW_COPY_AND_ASSIGN(EnvUsePosition);
 };
+using EnvUsePositionList = IntrusiveForwardList<EnvUsePosition>;
+
+template <typename Iterator>
+inline Iterator FindUseAtOrAfterPosition(Iterator first, Iterator last, size_t position) {
+  using value_type = const typename Iterator::value_type;
+  static_assert(std::is_same<value_type, const UsePosition>::value ||
+                    std::is_same<value_type, const EnvUsePosition>::value,
+                "Expecting value type UsePosition or EnvUsePosition.");
+  Iterator ret = std::find_if(
+      first, last, [position](const value_type& use) { return use.GetPosition() >= position; });
+  // Check that the processed range is sorted. Do not check the rest of the range to avoid
+  // increasing the complexity of callers from O(n) to O(n^2).
+  DCHECK(std::is_sorted(
+      first,
+      ret,
+      [](const value_type& lhs, const value_type& rhs) {
+          return lhs.GetPosition() < rhs.GetPosition();
+      }));
+  return ret;
+}
+
+template <typename Iterator>
+inline IterationRange<Iterator> FindMatchingUseRange(Iterator first,
+                                                     Iterator last,
+                                                     size_t position_begin,
+                                                     size_t position_end) {
+  Iterator begin = FindUseAtOrAfterPosition(first, last, position_begin);
+  Iterator end = FindUseAtOrAfterPosition(begin, last, position_end);
+  return MakeIterationRange(begin, end);
+}
 
 class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
@@ -265,11 +282,11 @@
 
   void AddTempUse(HInstruction* instruction, size_t temp_index) {
     DCHECK(IsTemp());
-    DCHECK(first_use_ == nullptr) << "A temporary can only have one user";
-    DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
+    DCHECK(GetUses().empty()) << "A temporary can only have one user";
+    DCHECK(GetEnvironmentUses().empty()) << "A temporary cannot have environment user";
     size_t position = instruction->GetLifetimePosition();
-    first_use_ = new (allocator_) UsePosition(
-        instruction, temp_index, position, first_use_);
+    UsePosition* new_use = new (allocator_) UsePosition(instruction, temp_index, position);
+    uses_.push_front(*new_use);
     AddRange(position, position + 1);
   }
 
@@ -306,32 +323,36 @@
       AddBackEdgeUses(*instruction->GetBlock());
     }
 
-    if ((first_use_ != nullptr)
-        && (first_use_->GetUser() == actual_user)
-        && (first_use_->GetPosition() < position)) {
+    if ((!uses_.empty()) &&
+        (uses_.front().GetUser() == actual_user) &&
+        (uses_.front().GetPosition() < position)) {
       // The user uses the instruction multiple times, and one use dies before the other.
       // We update the use list so that the latter is first.
       DCHECK(!is_environment);
-      UsePosition* cursor = first_use_;
-      while ((cursor->GetNext() != nullptr) && (cursor->GetNext()->GetPosition() < position)) {
-        cursor = cursor->GetNext();
-      }
-      DCHECK(first_use_->GetPosition() + 1 == position);
-      UsePosition* new_use = new (allocator_) UsePosition(
-          instruction, input_index, position, cursor->GetNext());
-      cursor->SetNext(new_use);
-      if (first_range_->GetEnd() == first_use_->GetPosition()) {
+      DCHECK(uses_.front().GetPosition() + 1 == position);
+      UsePositionList::iterator next_pos = uses_.begin();
+      UsePositionList::iterator insert_pos;
+      do {
+        insert_pos = next_pos;
+        ++next_pos;
+      } while (next_pos != uses_.end() && next_pos->GetPosition() < position);
+      UsePosition* new_use = new (allocator_) UsePosition(instruction, input_index, position);
+      uses_.insert_after(insert_pos, *new_use);
+      if (first_range_->GetEnd() == uses_.front().GetPosition()) {
         first_range_->end_ = position;
       }
       return;
     }
 
     if (is_environment) {
-      first_env_use_ = new (allocator_) EnvUsePosition(
-          environment, input_index, position, first_env_use_);
+      DCHECK(env_uses_.empty() || position <= env_uses_.front().GetPosition());
+      EnvUsePosition* new_env_use =
+          new (allocator_) EnvUsePosition(environment, input_index, position);
+      env_uses_.push_front(*new_env_use);
     } else {
-      first_use_ = new (allocator_) UsePosition(
-          instruction, input_index, position, first_use_);
+      DCHECK(uses_.empty() || position <= uses_.front().GetPosition());
+      UsePosition* new_use = new (allocator_) UsePosition(instruction, input_index, position);
+      uses_.push_front(*new_use);
     }
 
     if (is_environment && !keep_alive) {
@@ -369,8 +390,9 @@
     if (block->IsInLoop()) {
       AddBackEdgeUses(*block);
     }
-    first_use_ = new (allocator_) UsePosition(
-        instruction, input_index, block->GetLifetimeEnd(), first_use_);
+    UsePosition* new_use =
+        new (allocator_) UsePosition(instruction, input_index, block->GetLifetimeEnd());
+    uses_.push_front(*new_use);
   }
 
   ALWAYS_INLINE void AddRange(size_t start, size_t end) {
@@ -430,7 +452,7 @@
       first_range_->start_ = from;
     } else {
       // Instruction without uses.
-      DCHECK(first_use_ == nullptr);
+      DCHECK(uses_.empty());
       DCHECK(from == defined_by_->GetLifetimePosition());
       first_range_ = last_range_ = range_search_start_ =
           new (allocator_) LiveRange(from, from + 2, nullptr);
@@ -528,16 +550,17 @@
       return position;
     }
 
-    UsePosition* use = first_use_;
     size_t end = GetEnd();
-    while (use != nullptr && use->GetPosition() <= end) {
-      size_t use_position = use->GetPosition();
+    for (const UsePosition& use : GetUses()) {
+      size_t use_position = use.GetPosition();
+      if (use_position > end) {
+        break;
+      }
       if (use_position > position) {
-        if (use->RequiresRegister()) {
+        if (use.RequiresRegister()) {
           return use_position;
         }
       }
-      use = use->GetNext();
     }
     return kNoLifetime;
   }
@@ -564,24 +587,25 @@
       return position;
     }
 
-    UsePosition* use = first_use_;
     size_t end = GetEnd();
-    while (use != nullptr && use->GetPosition() <= end) {
-      size_t use_position = use->GetPosition();
+    for (const UsePosition& use : GetUses()) {
+      size_t use_position = use.GetPosition();
+      if (use_position > end) {
+        break;
+      }
       if (use_position > position) {
         return use_position;
       }
-      use = use->GetNext();
     }
     return kNoLifetime;
   }
 
-  UsePosition* GetFirstUse() const {
-    return first_use_;
+  const UsePositionList& GetUses() const {
+    return parent_->uses_;
   }
 
-  EnvUsePosition* GetFirstEnvironmentUse() const {
-    return first_env_use_;
+  const EnvUsePositionList& GetEnvironmentUses() const {
+    return parent_->env_uses_;
   }
 
   Primitive::Type GetType() const {
@@ -645,8 +669,6 @@
     next_sibling_ = new_interval;
     new_interval->parent_ = parent_;
 
-    new_interval->first_use_ = first_use_;
-    new_interval->first_env_use_ = first_env_use_;
     LiveRange* current = first_range_;
     LiveRange* previous = nullptr;
     // Iterate over the ranges, and either find a range that covers this position, or
@@ -718,20 +740,14 @@
       current = current->GetNext();
     }
     stream << "}, uses: { ";
-    const UsePosition* use = first_use_;
-    if (use != nullptr) {
-      do {
-        use->Dump(stream);
-        stream << " ";
-      } while ((use = use->GetNext()) != nullptr);
+    for (const UsePosition& use : GetUses()) {
+      use.Dump(stream);
+      stream << " ";
     }
     stream << "}, { ";
-    const EnvUsePosition* env_use = first_env_use_;
-    if (env_use != nullptr) {
-      do {
-        env_use->Dump(stream);
-        stream << " ";
-      } while ((env_use = env_use->GetNext()) != nullptr);
+    for (const EnvUsePosition& env_use : GetEnvironmentUses()) {
+      env_use.Dump(stream);
+      stream << " ";
     }
     stream << "}";
     stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
@@ -833,12 +849,16 @@
       high_or_low_interval_->last_range_ = high_or_low_interval_->first_range_->GetLastRange();
       high_or_low_interval_->range_search_start_ = high_or_low_interval_->first_range_;
     }
-    if (first_use_ != nullptr) {
-      high_or_low_interval_->first_use_ = first_use_->Dup(allocator_);
+    auto pos = high_or_low_interval_->uses_.before_begin();
+    for (const UsePosition& use : uses_) {
+      UsePosition* new_use = use.Clone(allocator_);
+      pos = high_or_low_interval_->uses_.insert_after(pos, *new_use);
     }
 
-    if (first_env_use_ != nullptr) {
-      high_or_low_interval_->first_env_use_ = first_env_use_->Dup(allocator_);
+    auto env_pos = high_or_low_interval_->env_uses_.before_begin();
+    for (const EnvUsePosition& env_use : env_uses_) {
+      EnvUsePosition* new_env_use = env_use.Clone(allocator_);
+      env_pos = high_or_low_interval_->env_uses_.insert_after(env_pos, *new_env_use);
     }
   }
 
@@ -962,8 +982,8 @@
         range_search_start_(nullptr),
         first_safepoint_(nullptr),
         last_safepoint_(nullptr),
-        first_use_(nullptr),
-        first_env_use_(nullptr),
+        uses_(),
+        env_uses_(),
         type_(type),
         next_sibling_(nullptr),
         parent_(this),
@@ -1005,14 +1025,12 @@
   }
 
   bool HasSynthesizeUseAt(size_t position) const {
-    UsePosition* use = first_use_;
-    while (use != nullptr) {
-      size_t use_position = use->GetPosition();
-      if ((use_position == position) && use->IsSynthesized()) {
+    for (const UsePosition& use : GetUses()) {
+      size_t use_position = use.GetPosition();
+      if ((use_position == position) && use.IsSynthesized()) {
         return true;
       }
       if (use_position > position) break;
-      use = use->GetNext();
     }
     return false;
   }
@@ -1028,11 +1046,11 @@
 
     // Add synthesized uses at the back edge of loops to help the register allocator.
     // Note that this method is called in decreasing liveness order, to faciliate adding
-    // uses at the head of the `first_use_` linked list. Because below
+    // uses at the head of the `uses_` list. Because below
     // we iterate from inner-most to outer-most, which is in increasing liveness order,
-    // we need to take extra care of how the `first_use_` linked list is being updated.
-    UsePosition* first_in_new_list = nullptr;
-    UsePosition* last_in_new_list = nullptr;
+    // we need to add subsequent entries after the last inserted entry.
+    const UsePositionList::iterator old_begin = uses_.begin();
+    UsePositionList::iterator insert_pos = uses_.before_begin();
     for (HLoopInformationOutwardIterator it(block_at_use);
          !it.Done();
          it.Advance()) {
@@ -1042,37 +1060,25 @@
         break;
       }
 
-      // We're only adding a synthesized use at the last back edge. Adding syntehsized uses on
+      // We're only adding a synthesized use at the last back edge. Adding synthesized uses on
       // all back edges is not necessary: anything used in the loop will have its use at the
       // last back edge. If we want branches in a loop to have better register allocation than
       // another branch, then it is the linear order we should change.
       size_t back_edge_use_position = current->GetLifetimeEnd();
-      if ((first_use_ != nullptr) && (first_use_->GetPosition() <= back_edge_use_position)) {
+      if ((old_begin != uses_.end()) && (old_begin->GetPosition() <= back_edge_use_position)) {
         // There was a use already seen in this loop. Therefore the previous call to `AddUse`
         // already inserted the backedge use. We can stop going outward.
         DCHECK(HasSynthesizeUseAt(back_edge_use_position));
         break;
       }
 
-      DCHECK(last_in_new_list == nullptr ||
-             back_edge_use_position > last_in_new_list->GetPosition());
+      DCHECK(insert_pos != uses_.before_begin()
+             ? back_edge_use_position > insert_pos->GetPosition()
+             : current == block_at_use.GetLoopInformation())
+          << std::distance(uses_.before_begin(), insert_pos);
 
       UsePosition* new_use = new (allocator_) UsePosition(back_edge_use_position);
-
-      if (last_in_new_list != nullptr) {
-        // Going outward. The latest created use needs to point to the new use.
-        last_in_new_list->SetNext(new_use);
-      } else {
-        // This is the inner-most loop.
-        DCHECK_EQ(current, block_at_use.GetLoopInformation());
-        first_in_new_list = new_use;
-      }
-      last_in_new_list = new_use;
-    }
-    // Link the newly created linked list with `first_use_`.
-    if (last_in_new_list != nullptr) {
-      last_in_new_list->SetNext(first_use_);
-      first_use_ = first_in_new_list;
+      insert_pos = uses_.insert_after(insert_pos, *new_use);
     }
   }
 
@@ -1091,9 +1097,9 @@
   SafepointPosition* first_safepoint_;
   SafepointPosition* last_safepoint_;
 
-  // Uses of this interval. Note that this linked list is shared amongst siblings.
-  UsePosition* first_use_;
-  EnvUsePosition* first_env_use_;
+  // Uses of this interval. Only the parent interval keeps these lists.
+  UsePositionList uses_;
+  EnvUsePositionList env_uses_;
 
   // The instruction type this interval corresponds to.
   const Primitive::Type type_;
@@ -1202,14 +1208,14 @@
     // A temporary shares the same lifetime start as the instruction that requires it.
     DCHECK(temp->IsTemp());
     HInstruction* user = GetInstructionFromPosition(temp->GetStart() / 2);
-    DCHECK_EQ(user, temp->GetFirstUse()->GetUser());
+    DCHECK_EQ(user, temp->GetUses().front().GetUser());
     return user;
   }
 
   size_t GetTempIndex(LiveInterval* temp) const {
     // We use the input index to store the index of the temporary in the user's temporary list.
     DCHECK(temp->IsTemp());
-    return temp->GetFirstUse()->GetInputIndex();
+    return temp->GetUses().front().GetInputIndex();
   }
 
   size_t GetMaxLifetimePosition() const {
diff --git a/compiler/utils/arm/assembler_arm.h b/compiler/utils/arm/assembler_arm.h
index 0f24e81..bb23a29 100644
--- a/compiler/utils/arm/assembler_arm.h
+++ b/compiler/utils/arm/assembler_arm.h
@@ -25,7 +25,7 @@
 #include "base/bit_utils.h"
 #include "base/enums.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
+#include "base/stl_util_identity.h"
 #include "base/value_object.h"
 #include "constants_arm.h"
 #include "utils/arm/assembler_arm_shared.h"
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index 6afc3dd..eb3f870 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -18,6 +18,8 @@
 #include <type_traits>
 
 #include "assembler_arm_vixl.h"
+#include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "thread.h"
 
diff --git a/compiler/utils/dedupe_set_test.cc b/compiler/utils/dedupe_set_test.cc
index 4c0979e..b390508 100644
--- a/compiler/utils/dedupe_set_test.cc
+++ b/compiler/utils/dedupe_set_test.cc
@@ -23,7 +23,7 @@
 #include "base/array_ref.h"
 #include "dedupe_set-inl.h"
 #include "gtest/gtest.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/compiler/utils/intrusive_forward_list.h b/compiler/utils/intrusive_forward_list.h
index b5fc2f2..5a358ac 100644
--- a/compiler/utils/intrusive_forward_list.h
+++ b/compiler/utils/intrusive_forward_list.h
@@ -23,6 +23,7 @@
 #include <memory>
 #include <type_traits>
 
+#include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
 
@@ -42,10 +43,19 @@
   mutable const IntrusiveForwardListHook* next_hook;
 };
 
-template <typename T, IntrusiveForwardListHook T::* NextPtr = &T::hook>
-class IntrusiveForwardListMemberHook;
+template <typename Derived, typename Tag = void>
+struct IntrusiveForwardListNode : public IntrusiveForwardListHook {
+};
 
-template <typename T, typename HookTraits = IntrusiveForwardListMemberHook<T>>
+template <typename T, IntrusiveForwardListHook T::* NextPtr = &T::hook>
+class IntrusiveForwardListMemberHookTraits;
+
+template <typename T, typename Tag = void>
+class IntrusiveForwardListBaseHookTraits;
+
+template <typename T,
+          typename HookTraits =
+              IntrusiveForwardListBaseHookTraits<typename std::remove_const<T>::type>>
 class IntrusiveForwardList;
 
 template <typename T, typename HookTraits>
@@ -435,7 +445,7 @@
 }
 
 template <typename T, IntrusiveForwardListHook T::* NextPtr>
-class IntrusiveForwardListMemberHook {
+class IntrusiveForwardListMemberHookTraits {
  public:
   static const IntrusiveForwardListHook* GetHook(const T* value) {
     return &(value->*NextPtr);
@@ -447,6 +457,20 @@
   }
 };
 
+template <typename T, typename Tag>
+class IntrusiveForwardListBaseHookTraits {
+ public:
+  static const IntrusiveForwardListHook* GetHook(const T* value) {
+    // Explicit conversion to the "node" followed by implicit conversion to the "hook".
+    return static_cast<const IntrusiveForwardListNode<T, Tag>*>(value);
+  }
+
+  static T* GetValue(const IntrusiveForwardListHook* hook) {
+    return down_cast<T*>(down_cast<IntrusiveForwardListNode<T, Tag>*>(
+        const_cast<IntrusiveForwardListHook*>(hook)));
+  }
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_UTILS_INTRUSIVE_FORWARD_LIST_H_
diff --git a/compiler/utils/intrusive_forward_list_test.cc b/compiler/utils/intrusive_forward_list_test.cc
index f2efa4d..939676c 100644
--- a/compiler/utils/intrusive_forward_list_test.cc
+++ b/compiler/utils/intrusive_forward_list_test.cc
@@ -23,13 +23,14 @@
 
 namespace art {
 
-struct IFLTestValue {
+struct IFLTestValue : public IntrusiveForwardListNode<IFLTestValue> {
   // Deliberately not explicit.
-  IFLTestValue(int v) : hook(), value(v) { }  // NOLINT(runtime/explicit)
+  IFLTestValue(int v) : value(v) { }  // NOLINT(runtime/explicit)
 
-  IntrusiveForwardListHook hook;
   int value;
 };
+using IFLTestValueList = IntrusiveForwardList<IFLTestValue>;
+using ConstIFLTestValueList = IntrusiveForwardList<const IFLTestValue>;
 
 bool operator==(const IFLTestValue& lhs, const IFLTestValue& rhs) {
   return lhs.value == rhs.value;
@@ -39,6 +40,24 @@
   return lhs.value < rhs.value;
 }
 
+struct IFLTestValue2 {
+  // Deliberately not explicit.
+  IFLTestValue2(int v) : hook(), value(v) { }  // NOLINT(runtime/explicit)
+
+  IntrusiveForwardListHook hook;
+  int value;
+};
+using IFLTestValue2List =
+    IntrusiveForwardList<IFLTestValue2, IntrusiveForwardListMemberHookTraits<IFLTestValue2>>;
+
+bool operator==(const IFLTestValue2& lhs, const IFLTestValue2& rhs) {
+  return lhs.value == rhs.value;
+}
+
+bool operator<(const IFLTestValue2& lhs, const IFLTestValue2& rhs) {
+  return lhs.value < rhs.value;
+}
+
 #define ASSERT_LISTS_EQUAL(expected, value)                                         \
   do {                                                                              \
     ASSERT_EQ((expected).empty(), (value).empty());                                 \
@@ -47,16 +66,82 @@
     ASSERT_TRUE(std::equal((expected).begin(), (expected).end(), (value).begin())); \
   } while (false)
 
-TEST(IntrusiveForwardList, IteratorToConstIterator) {
-  IntrusiveForwardList<IFLTestValue> ifl;
-  IntrusiveForwardList<IFLTestValue>::iterator begin = ifl.begin();
-  IntrusiveForwardList<IFLTestValue>::const_iterator cbegin = ifl.cbegin();
-  IntrusiveForwardList<IFLTestValue>::const_iterator converted_begin = begin;
+class IntrusiveForwardListTest : public testing::Test {
+ public:
+  template <typename ListType>
+  void IteratorToConstIterator();
+
+  template <typename ListType>
+  void IteratorOperators();
+
+  template <typename ListType>
+  void ConstructRange();
+
+  template <typename ListType>
+  void Assign();
+
+  template <typename ListType>
+  void PushPop();
+
+  template <typename ListType>
+  void InsertAfter1();
+
+  template <typename ListType>
+  void InsertAfter2();
+
+  template <typename ListType>
+  void EraseAfter1();
+
+  template <typename ListType>
+  void EraseAfter2();
+
+  template <typename ListType>
+  void SwapClear();
+
+  template <typename ListType>
+  void SpliceAfter();
+
+  template <typename ListType>
+  void Remove();
+
+  template <typename ListType>
+  void Unique();
+
+  template <typename ListType>
+  void Merge();
+
+  template <typename ListType>
+  void Sort1();
+
+  template <typename ListType>
+  void Sort2();
+
+  template <typename ListType>
+  void Reverse();
+
+  template <typename ListType>
+  void ModifyValue();
+};
+
+template <typename ListType>
+void IntrusiveForwardListTest::IteratorToConstIterator() {
+  ListType ifl;
+  typename ListType::iterator begin = ifl.begin();
+  typename ListType::const_iterator cbegin = ifl.cbegin();
+  typename ListType::const_iterator converted_begin = begin;
   ASSERT_TRUE(converted_begin == cbegin);
 }
 
-TEST(IntrusiveForwardList, IteratorOperators) {
-  IntrusiveForwardList<IFLTestValue> ifl;
+TEST_F(IntrusiveForwardListTest, IteratorToConstIterator) {
+  IteratorToConstIterator<IFLTestValueList>();
+  IteratorToConstIterator<ConstIFLTestValueList>();
+  IteratorToConstIterator<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::IteratorOperators() {
+  using ValueType = typename ListType::value_type;
+  ListType ifl;
   ASSERT_TRUE(ifl.begin() == ifl.cbegin());
   ASSERT_FALSE(ifl.begin() != ifl.cbegin());
   ASSERT_TRUE(ifl.end() == ifl.cend());
@@ -65,37 +150,61 @@
   ASSERT_TRUE(ifl.begin() == ifl.end());  // Empty.
   ASSERT_FALSE(ifl.begin() != ifl.end());  // Empty.
 
-  IFLTestValue value(1);
+  ValueType value(1);
   ifl.insert_after(ifl.cbefore_begin(), value);
 
   ASSERT_FALSE(ifl.begin() == ifl.end());  // Not empty.
   ASSERT_TRUE(ifl.begin() != ifl.end());  // Not empty.
 }
 
-TEST(IntrusiveForwardList, ConstructRange) {
+TEST_F(IntrusiveForwardListTest, IteratorOperators) {
+  IteratorOperators<IFLTestValueList>();
+  IteratorOperators<ConstIFLTestValueList>();
+  IteratorOperators<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::ConstructRange() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref({ 1, 2, 7 });
-  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
-  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  std::vector<ValueType> storage(ref.begin(), ref.end());
+  ListType ifl(storage.begin(), storage.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
 }
 
-TEST(IntrusiveForwardList, Assign) {
+TEST_F(IntrusiveForwardListTest, ConstructRange) {
+  ConstructRange<IFLTestValueList>();
+  ConstructRange<ConstIFLTestValueList>();
+  ConstructRange<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::Assign() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref1({ 2, 8, 5 });
-  std::vector<IFLTestValue> storage1(ref1.begin(), ref1.end());
-  IntrusiveForwardList<IFLTestValue> ifl;
+  std::vector<ValueType> storage1(ref1.begin(), ref1.end());
+  ListType ifl;
   ifl.assign(storage1.begin(), storage1.end());
   ASSERT_LISTS_EQUAL(ref1, ifl);
   std::forward_list<int> ref2({ 7, 1, 3 });
-  std::vector<IFLTestValue> storage2(ref2.begin(), ref2.end());
+  std::vector<ValueType> storage2(ref2.begin(), ref2.end());
   ifl.assign(storage2.begin(), storage2.end());
   ASSERT_LISTS_EQUAL(ref2, ifl);
 }
 
-TEST(IntrusiveForwardList, PushPop) {
-  IFLTestValue value3(3);
-  IFLTestValue value7(7);
+TEST_F(IntrusiveForwardListTest, Assign) {
+  Assign<IFLTestValueList>();
+  Assign<ConstIFLTestValueList>();
+  Assign<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::PushPop() {
+  using ValueType = typename ListType::value_type;
+  ValueType value3(3);
+  ValueType value7(7);
   std::forward_list<int> ref;
-  IntrusiveForwardList<IFLTestValue> ifl;
+  ListType ifl;
   ASSERT_LISTS_EQUAL(ref, ifl);
   ref.push_front(3);
   ifl.push_front(value3);
@@ -114,13 +223,21 @@
   ASSERT_LISTS_EQUAL(ref, ifl);
 }
 
-TEST(IntrusiveForwardList, InsertAfter1) {
-  IFLTestValue value4(4);
-  IFLTestValue value8(8);
-  IFLTestValue value5(5);
-  IFLTestValue value3(3);
+TEST_F(IntrusiveForwardListTest, PushPop) {
+  PushPop<IFLTestValueList>();
+  PushPop<ConstIFLTestValueList>();
+  PushPop<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::InsertAfter1() {
+  using ValueType = typename ListType::value_type;
+  ValueType value4(4);
+  ValueType value8(8);
+  ValueType value5(5);
+  ValueType value3(3);
   std::forward_list<int> ref;
-  IntrusiveForwardList<IFLTestValue> ifl;
+  ListType ifl;
 
   auto ref_it = ref.insert_after(ref.before_begin(), 4);
   auto ifl_it = ifl.insert_after(ifl.before_begin(), value4);
@@ -149,23 +266,31 @@
   ASSERT_EQ(*ref_it, *ifl_it);
 }
 
-TEST(IntrusiveForwardList, InsertAfter2) {
+TEST_F(IntrusiveForwardListTest, InsertAfter1) {
+  InsertAfter1<IFLTestValueList>();
+  InsertAfter1<ConstIFLTestValueList>();
+  InsertAfter1<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::InsertAfter2() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref;
-  IntrusiveForwardList<IFLTestValue> ifl;
+  ListType ifl;
 
   auto ref_it = ref.insert_after(ref.before_begin(), { 2, 8, 5 });
-  std::vector<IFLTestValue> storage1({ { 2 }, { 8 }, { 5 } });
+  std::vector<ValueType> storage1({ { 2 }, { 8 }, { 5 } });
   auto ifl_it = ifl.insert_after(ifl.before_begin(), storage1.begin(), storage1.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
   ASSERT_EQ(*ref_it, *ifl_it);
 
-  std::vector<IFLTestValue> storage2({ { 7 }, { 2 } });
+  std::vector<ValueType> storage2({ { 7 }, { 2 } });
   ref_it = ref.insert_after(ref.begin(), { 7, 2 });
   ifl_it = ifl.insert_after(ifl.begin(), storage2.begin(), storage2.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
   ASSERT_EQ(*ref_it, *ifl_it);
 
-  std::vector<IFLTestValue> storage3({ { 1 }, { 3 }, { 4 }, { 9 } });
+  std::vector<ValueType> storage3({ { 1 }, { 3 }, { 4 }, { 9 } });
   ref_it = ref.begin();
   ifl_it = ifl.begin();
   std::advance(ref_it, std::distance(ref.begin(), ref.end()) - 1);
@@ -175,10 +300,18 @@
   ASSERT_LISTS_EQUAL(ref, ifl);
 }
 
-TEST(IntrusiveForwardList, EraseAfter1) {
+TEST_F(IntrusiveForwardListTest, InsertAfter2) {
+  InsertAfter2<IFLTestValueList>();
+  InsertAfter2<ConstIFLTestValueList>();
+  InsertAfter2<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::EraseAfter1() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref({ 1, 2, 7, 4, 5 });
-  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
-  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  std::vector<ValueType> storage(ref.begin(), ref.end());
+  ListType ifl(storage.begin(), storage.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
   CHECK_EQ(std::distance(ref.begin(), ref.end()), 5);
 
@@ -230,10 +363,18 @@
   ASSERT_TRUE(ifl_it == ifl.begin());
 }
 
-TEST(IntrusiveForwardList, EraseAfter2) {
+TEST_F(IntrusiveForwardListTest, EraseAfter1) {
+  EraseAfter1<IFLTestValueList>();
+  EraseAfter1<ConstIFLTestValueList>();
+  EraseAfter1<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::EraseAfter2() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref({ 1, 2, 7, 4, 5, 3, 2, 8, 9 });
-  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
-  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  std::vector<ValueType> storage(ref.begin(), ref.end());
+  ListType ifl(storage.begin(), storage.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
   CHECK_EQ(std::distance(ref.begin(), ref.end()), 9);
 
@@ -262,13 +403,21 @@
   CHECK_EQ(std::distance(ref.begin(), ref.end()), 0);
 }
 
-TEST(IntrusiveForwardList, SwapClear) {
+TEST_F(IntrusiveForwardListTest, EraseAfter2) {
+  EraseAfter2<IFLTestValueList>();
+  EraseAfter2<ConstIFLTestValueList>();
+  EraseAfter2<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::SwapClear() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref1({ 1, 2, 7 });
-  std::vector<IFLTestValue> storage1(ref1.begin(), ref1.end());
-  IntrusiveForwardList<IFLTestValue> ifl1(storage1.begin(), storage1.end());
+  std::vector<ValueType> storage1(ref1.begin(), ref1.end());
+  ListType ifl1(storage1.begin(), storage1.end());
   std::forward_list<int> ref2({ 3, 8, 6 });
-  std::vector<IFLTestValue> storage2(ref2.begin(), ref2.end());
-  IntrusiveForwardList<IFLTestValue> ifl2(storage2.begin(), storage2.end());
+  std::vector<ValueType> storage2(ref2.begin(), ref2.end());
+  ListType ifl2(storage2.begin(), storage2.end());
   ASSERT_LISTS_EQUAL(ref1, ifl1);
   ASSERT_LISTS_EQUAL(ref2, ifl2);
   ref1.swap(ref2);
@@ -289,12 +438,20 @@
   ASSERT_LISTS_EQUAL(ref2, ifl2);
 }
 
-TEST(IntrusiveForwardList, SpliceAfter) {
+TEST_F(IntrusiveForwardListTest, SwapClear) {
+  SwapClear<IFLTestValueList>();
+  SwapClear<ConstIFLTestValueList>();
+  SwapClear<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::SpliceAfter() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref1({ 3, 1, 2, 7, 4, 5, 4, 8, 7 });
   std::forward_list<int> ref2;
-  std::vector<IFLTestValue> storage(ref1.begin(), ref1.end());
-  IntrusiveForwardList<IFLTestValue> ifl1(storage.begin(), storage.end());
-  IntrusiveForwardList<IFLTestValue> ifl2;
+  std::vector<ValueType> storage(ref1.begin(), ref1.end());
+  ListType ifl1(storage.begin(), storage.end());
+  ListType ifl2;
   ASSERT_LISTS_EQUAL(ref1, ifl1);
   ASSERT_LISTS_EQUAL(ref2, ifl2);
 
@@ -398,10 +555,18 @@
   ASSERT_LISTS_EQUAL(check, ifl2);
 }
 
-TEST(IntrusiveForwardList, Remove) {
+TEST_F(IntrusiveForwardListTest, SpliceAfter) {
+  SpliceAfter<IFLTestValueList>();
+  SpliceAfter<ConstIFLTestValueList>();
+  SpliceAfter<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::Remove() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref({ 3, 1, 2, 7, 4, 5, 4, 8, 7 });
-  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
-  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  std::vector<ValueType> storage(ref.begin(), ref.end());
+  ListType ifl(storage.begin(), storage.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
   ref.remove(1);
   ifl.remove(1);
@@ -409,20 +574,28 @@
   ref.remove(4);
   ifl.remove(4);
   ASSERT_LISTS_EQUAL(ref, ifl);
-  auto odd = [](IFLTestValue value) { return (value.value & 1) != 0; };  // NOLINT(readability/braces)
+  auto odd = [](ValueType value) { return (value.value & 1) != 0; };  // NOLINT(readability/braces)
   ref.remove_if(odd);
   ifl.remove_if(odd);
   ASSERT_LISTS_EQUAL(ref, ifl);
-  auto all = [](IFLTestValue value ATTRIBUTE_UNUSED) { return true; };  // NOLINT(readability/braces)
+  auto all = [](ValueType value ATTRIBUTE_UNUSED) { return true; };  // NOLINT(readability/braces)
   ref.remove_if(all);
   ifl.remove_if(all);
   ASSERT_LISTS_EQUAL(ref, ifl);
 }
 
-TEST(IntrusiveForwardList, Unique) {
+TEST_F(IntrusiveForwardListTest, Remove) {
+  Remove<IFLTestValueList>();
+  Remove<ConstIFLTestValueList>();
+  Remove<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::Unique() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref({ 3, 1, 1, 2, 3, 3, 7, 7, 4, 4, 5, 7 });
-  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
-  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  std::vector<ValueType> storage(ref.begin(), ref.end());
+  ListType ifl(storage.begin(), storage.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
   ref.unique();
   ifl.unique();
@@ -430,7 +603,7 @@
   std::forward_list<int> check({ 3, 1, 2, 3, 7, 4, 5, 7 });
   ASSERT_LISTS_EQUAL(check, ifl);
 
-  auto bin_pred = [](IFLTestValue lhs, IFLTestValue rhs) {
+  auto bin_pred = [](const ValueType& lhs, const ValueType& rhs) {
     return (lhs.value & ~1) == (rhs.value & ~1);
   };
   ref.unique(bin_pred);
@@ -440,13 +613,21 @@
   ASSERT_LISTS_EQUAL(check, ifl);
 }
 
-TEST(IntrusiveForwardList, Merge) {
+TEST_F(IntrusiveForwardListTest, Unique) {
+  Unique<IFLTestValueList>();
+  Unique<ConstIFLTestValueList>();
+  Unique<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::Merge() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref1({ 1, 4, 8, 8, 12 });
-  std::vector<IFLTestValue> storage1(ref1.begin(), ref1.end());
-  IntrusiveForwardList<IFLTestValue> ifl1(storage1.begin(), storage1.end());
+  std::vector<ValueType> storage1(ref1.begin(), ref1.end());
+  ListType ifl1(storage1.begin(), storage1.end());
   std::forward_list<int> ref2({ 3, 5, 6, 7, 9 });
-  std::vector<IFLTestValue> storage2(ref2.begin(), ref2.end());
-  IntrusiveForwardList<IFLTestValue> ifl2(storage2.begin(), storage2.end());
+  std::vector<ValueType> storage2(ref2.begin(), ref2.end());
+  ListType ifl2(storage2.begin(), storage2.end());
   ASSERT_LISTS_EQUAL(ref1, ifl1);
   ASSERT_LISTS_EQUAL(ref2, ifl2);
   CHECK(std::is_sorted(ref1.begin(), ref1.end()));
@@ -460,10 +641,18 @@
   ASSERT_LISTS_EQUAL(check, ifl1);
 }
 
-TEST(IntrusiveForwardList, Sort1) {
+TEST_F(IntrusiveForwardListTest, Merge) {
+  Merge<IFLTestValueList>();
+  Merge<ConstIFLTestValueList>();
+  Merge<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::Sort1() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref({ 2, 9, 8, 3, 7, 4, 1, 5, 3, 0 });
-  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
-  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  std::vector<ValueType> storage(ref.begin(), ref.end());
+  ListType ifl(storage.begin(), storage.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
   CHECK(!std::is_sorted(ref.begin(), ref.end()));
   ref.sort();
@@ -473,12 +662,20 @@
   ASSERT_LISTS_EQUAL(check, ifl);
 }
 
-TEST(IntrusiveForwardList, Sort2) {
+TEST_F(IntrusiveForwardListTest, Sort1) {
+  Sort1<IFLTestValueList>();
+  Sort1<ConstIFLTestValueList>();
+  Sort1<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::Sort2() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref({ 2, 9, 8, 3, 7, 4, 1, 5, 3, 0 });
-  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
-  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  std::vector<ValueType> storage(ref.begin(), ref.end());
+  ListType ifl(storage.begin(), storage.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
-  auto cmp = [](IFLTestValue lhs, IFLTestValue rhs) {
+  auto cmp = [](const ValueType& lhs, const ValueType& rhs) {
     return (lhs.value & ~1) < (rhs.value & ~1);
   };
   CHECK(!std::is_sorted(ref.begin(), ref.end(), cmp));
@@ -489,10 +686,18 @@
   ASSERT_LISTS_EQUAL(check, ifl);
 }
 
-TEST(IntrusiveForwardList, Reverse) {
+TEST_F(IntrusiveForwardListTest, Sort2) {
+  Sort2<IFLTestValueList>();
+  Sort2<ConstIFLTestValueList>();
+  Sort2<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::Reverse() {
+  using ValueType = typename ListType::value_type;
   std::forward_list<int> ref({ 8, 3, 5, 4, 1, 3 });
-  std::vector<IFLTestValue> storage(ref.begin(), ref.end());
-  IntrusiveForwardList<IFLTestValue> ifl(storage.begin(), storage.end());
+  std::vector<ValueType> storage(ref.begin(), ref.end());
+  ListType ifl(storage.begin(), storage.end());
   ASSERT_LISTS_EQUAL(ref, ifl);
   CHECK(!std::is_sorted(ref.begin(), ref.end()));
   ref.reverse();
@@ -502,4 +707,73 @@
   ASSERT_LISTS_EQUAL(check, ifl);
 }
 
+TEST_F(IntrusiveForwardListTest, Reverse) {
+  Reverse<IFLTestValueList>();
+  Reverse<ConstIFLTestValueList>();
+  Reverse<IFLTestValue2List>();
+}
+
+template <typename ListType>
+void IntrusiveForwardListTest::ModifyValue() {
+  using ValueType = typename ListType::value_type;
+  std::forward_list<int> ref({ 3, 7, 42 });
+  std::vector<ValueType> storage(ref.begin(), ref.end());
+  ListType ifl(storage.begin(), storage.end());
+  ASSERT_LISTS_EQUAL(ref, ifl);
+
+  auto add1 = [](const ValueType& value) { return value.value + 1; };  // NOLINT [readability/braces]
+  std::transform(ref.begin(), ref.end(), ref.begin(), add1);
+  std::transform(ifl.begin(), ifl.end(), ifl.begin(), add1);
+  ASSERT_LISTS_EQUAL(ref, ifl);
+}
+
+TEST_F(IntrusiveForwardListTest, ModifyValue) {
+  ModifyValue<IFLTestValueList>();
+  // Does not compile with ConstIFLTestValueList because LHS of the assignment is const.
+  // ModifyValue<ConstIFLTestValueList>();
+  static_assert(std::is_const<ConstIFLTestValueList::iterator::value_type>::value, "Const check.");
+  ModifyValue<IFLTestValue2List>();
+}
+
+struct Tag1;
+struct Tag2;
+struct TwoListsValue : public IntrusiveForwardListNode<TwoListsValue, Tag1>,
+                       public IntrusiveForwardListNode<TwoListsValue, Tag2> {
+  // Deliberately not explicit.
+  TwoListsValue(int v) : value(v) { }  // NOLINT(runtime/explicit)
+
+  int value;
+};
+using FirstList =
+    IntrusiveForwardList<TwoListsValue, IntrusiveForwardListBaseHookTraits<TwoListsValue, Tag1>>;
+using SecondList =
+    IntrusiveForwardList<TwoListsValue, IntrusiveForwardListBaseHookTraits<TwoListsValue, Tag2>>;
+
+bool operator==(const TwoListsValue& lhs, const TwoListsValue& rhs) {
+  return lhs.value == rhs.value;
+}
+
+TEST_F(IntrusiveForwardListTest, TwoLists) {
+  // Test that a value can be in two lists at the same time and the hooks do not interfere.
+  std::vector<TwoListsValue> storage({ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 });  // storage[i] = i
+
+  std::vector<int> order1({ 3, 1, 7, 2, 8, 9, 4, 0, 6, 5 });
+  FirstList list1;
+  auto pos1 = list1.before_begin();
+  for (size_t idx : order1) {
+    pos1 = list1.insert_after(pos1, storage[idx]);
+  }
+
+  std::vector<int> order2({ 8, 5, 1, 6, 7, 2, 9, 3, 0, 4 });
+  SecondList list2;
+  auto pos2 = list2.before_begin();
+  for (size_t idx : order2) {
+    pos2 = list2.insert_after(pos2, storage[idx]);
+  }
+
+  // Using `storage[i] = i`, we can easily compare that nodes of each list are in the right order.
+  ASSERT_LISTS_EQUAL(order1, list1);
+  ASSERT_LISTS_EQUAL(order2, list2);
+}
+
 }  // namespace art
diff --git a/compiler/utils/label.h b/compiler/utils/label.h
index 0f82ad5..4c6ae8e 100644
--- a/compiler/utils/label.h
+++ b/compiler/utils/label.h
@@ -29,24 +29,24 @@
 namespace arm {
   class ArmAssembler;
   class Thumb2Assembler;
-}
+}  // namespace arm
 namespace arm64 {
   class Arm64Assembler;
-}
+}  // namespace arm64
 namespace mips {
   class MipsAssembler;
-}
+}  // namespace mips
 namespace mips64 {
   class Mips64Assembler;
-}
+}  // namespace mips64
 namespace x86 {
   class X86Assembler;
   class NearLabel;
-}
+}  // namespace x86
 namespace x86_64 {
   class X86_64Assembler;
   class NearLabel;
-}
+}  // namespace x86_64
 
 class ExternalLabel {
  public:
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 184cdf5..2b7b2aa 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -26,24 +26,24 @@
 
 namespace arm {
 class ArmManagedRegister;
-}
+}  // namespace arm
 namespace arm64 {
 class Arm64ManagedRegister;
-}
+}  // namespace arm64
 namespace mips {
 class MipsManagedRegister;
-}
+}  // namespace mips
 namespace mips64 {
 class Mips64ManagedRegister;
-}
+}  // namespace mips64
 
 namespace x86 {
 class X86ManagedRegister;
-}
+}  // namespace x86
 
 namespace x86_64 {
 class X86_64ManagedRegister;
-}
+}  // namespace x86_64
 
 class ManagedRegister : public ValueObject {
  public:
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index a99d02d..0b05b75 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -404,6 +404,129 @@
   return encoding;
 }
 
+uint32_t MipsAssembler::EmitMsa3R(int operation,
+                                  int df,
+                                  VectorRegister wt,
+                                  VectorRegister ws,
+                                  VectorRegister wd,
+                                  int minor_opcode) {
+  CHECK_NE(wt, kNoVectorRegister);
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaOperationShift |
+                      df << kDfShift |
+                      static_cast<uint32_t>(wt) << kWtShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsaBIT(int operation,
+                                   int df_m,
+                                   VectorRegister ws,
+                                   VectorRegister wd,
+                                   int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaOperationShift |
+                      df_m << kDfMShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsaELM(int operation,
+                                   int df_n,
+                                   VectorRegister ws,
+                                   VectorRegister wd,
+                                   int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaELMOperationShift |
+                      df_n << kDfNShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsaMI10(int s10,
+                                    Register rs,
+                                    VectorRegister wd,
+                                    int minor_opcode,
+                                    int df) {
+  CHECK_NE(rs, kNoRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  CHECK(IsUint<10>(s10)) << s10;
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      s10 << kS10Shift |
+                      static_cast<uint32_t>(rs) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode << kS10MinorShift |
+                      df;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsaI10(int operation,
+                                   int df,
+                                   int i10,
+                                   VectorRegister wd,
+                                   int minor_opcode) {
+  CHECK_NE(wd, kNoVectorRegister);
+  CHECK(IsUint<10>(i10)) << i10;
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaOperationShift |
+                      df << kDfShift |
+                      i10 << kI10Shift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsa2R(int operation,
+                                  int df,
+                                  VectorRegister ws,
+                                  VectorRegister wd,
+                                  int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsa2ROperationShift |
+                      df << kDf2RShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
+uint32_t MipsAssembler::EmitMsa2RF(int operation,
+                                   int df,
+                                   VectorRegister ws,
+                                   VectorRegister wd,
+                                   int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsa2RFOperationShift |
+                      df << kDf2RShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+  return encoding;
+}
+
 void MipsAssembler::Addu(Register rd, Register rs, Register rt) {
   DsFsmInstrRrr(EmitR(0, rs, rt, rd, 0, 0x21), rd, rs, rt);
 }
@@ -635,9 +758,8 @@
   DsFsmInstrRrr(EmitR(0x1f, rt, rd, static_cast<Register>(pos + size - 1), pos, 0x04), rd, rd, rt);
 }
 
-// TODO: This instruction is available in both R6 and MSA and it should be used when available.
 void MipsAssembler::Lsa(Register rd, Register rs, Register rt, int saPlusOne) {
-  CHECK(IsR6());
+  CHECK(IsR6() || HasMsa());
   CHECK(1 <= saPlusOne && saPlusOne <= 4) << saPlusOne;
   int sa = saPlusOne - 1;
   DsFsmInstrRrr(EmitR(0x0, rs, rt, rd, sa, 0x05), rd, rs, rt);
@@ -653,7 +775,7 @@
   if (shamt == TIMES_1) {
     // Catch the special case where the shift amount is zero (0).
     Addu(dst, src_base, src_idx);
-  } else if (IsR6()) {
+  } else if (IsR6() || HasMsa()) {
     Lsa(dst, src_idx, src_base, shamt);
   } else {
     Sll(tmp, src_idx, shamt);
@@ -1709,6 +1831,1079 @@
   SetReorder(reordering);
 }
 
+void MipsAssembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x3, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x3, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x3, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x3, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x4, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0xe),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x1b),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::Ffint_sW(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Ffint_sD(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Ftint_sW(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::Ftint_sD(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(shamt3)) << shamt3;
+  DsFsmInstrFff(EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(shamt4)) << shamt4;
+  DsFsmInstrFff(EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) {
+  CHECK(HasMsa());
+  CHECK(IsUint<5>(shamt5)) << shamt5;
+  DsFsmInstrFff(EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) {
+  CHECK(HasMsa());
+  CHECK(IsUint<6>(shamt6)) << shamt6;
+  DsFsmInstrFff(EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(shamt3)) << shamt3;
+  DsFsmInstrFff(EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(shamt4)) << shamt4;
+  DsFsmInstrFff(EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) {
+  CHECK(HasMsa());
+  CHECK(IsUint<5>(shamt5)) << shamt5;
+  DsFsmInstrFff(EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) {
+  CHECK(HasMsa());
+  CHECK(IsUint<6>(shamt6)) << shamt6;
+  DsFsmInstrFff(EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(shamt3)) << shamt3;
+  DsFsmInstrFff(EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(shamt4)) << shamt4;
+  DsFsmInstrFff(EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) {
+  CHECK(HasMsa());
+  CHECK(IsUint<5>(shamt5)) << shamt5;
+  DsFsmInstrFff(EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) {
+  CHECK(HasMsa());
+  CHECK(IsUint<6>(shamt6)) << shamt6;
+  DsFsmInstrFff(EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::MoveV(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  DsFsmInstrFff(EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  DsFsmInstrFff(EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  DsFsmInstrFff(EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) {
+  CHECK(HasMsa());
+  CHECK(IsUint<1>(n1)) << n1;
+  DsFsmInstrFff(EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(ws));
+}
+
+void MipsAssembler::FillB(VectorRegister wd, Register rs) {
+  CHECK(HasMsa());
+  DsFsmInstrFr(EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::FillH(VectorRegister wd, Register rs) {
+  CHECK(HasMsa());
+  DsFsmInstrFr(EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::FillW(VectorRegister wd, Register rs) {
+  CHECK(HasMsa());
+  DsFsmInstrFr(EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::LdiB(VectorRegister wd, int imm8) {
+  CHECK(HasMsa());
+  CHECK(IsInt<8>(imm8)) << imm8;
+  DsFsmInstrFr(EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7),
+               static_cast<FRegister>(wd),
+               ZERO);
+}
+
+void MipsAssembler::LdiH(VectorRegister wd, int imm10) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(imm10)) << imm10;
+  DsFsmInstrFr(EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7),
+               static_cast<FRegister>(wd),
+               ZERO);
+}
+
+void MipsAssembler::LdiW(VectorRegister wd, int imm10) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(imm10)) << imm10;
+  DsFsmInstrFr(EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7),
+               static_cast<FRegister>(wd),
+               ZERO);
+}
+
+void MipsAssembler::LdiD(VectorRegister wd, int imm10) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(imm10)) << imm10;
+  DsFsmInstrFr(EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7),
+               static_cast<FRegister>(wd),
+               ZERO);
+}
+
+void MipsAssembler::LdB(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(offset)) << offset;
+  DsFsmInstrFr(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::LdH(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<11>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsHalfwordSize);
+  DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::LdW(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<12>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsWordSize);
+  DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::LdD(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<13>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsDoublewordSize);
+  DsFsmInstrFr(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::StB(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(offset)) << offset;
+  DsFsmInstrFR(EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0), static_cast<FRegister>(wd), rs);
+}
+
+void MipsAssembler::StH(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<11>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsHalfwordSize);
+  DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::StW(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<12>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsWordSize);
+  DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::StD(VectorRegister wd, Register rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<13>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMipsDoublewordSize);
+  DsFsmInstrFR(EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3),
+               static_cast<FRegister>(wd),
+               rs);
+}
+
+void MipsAssembler::IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
+void MipsAssembler::IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  DsFsmInstrFff(EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14),
+                static_cast<FRegister>(wd),
+                static_cast<FRegister>(ws),
+                static_cast<FRegister>(wt));
+}
+
 void MipsAssembler::LoadConst32(Register rd, int32_t value) {
   if (IsUint<16>(value)) {
     // Use OR with (unsigned) immediate to encode 16b unsigned int.
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 463daeb..dd4ce6d 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -25,6 +25,7 @@
 #include "base/arena_containers.h"
 #include "base/enums.h"
 #include "base/macros.h"
+#include "base/stl_util_identity.h"
 #include "constants_mips.h"
 #include "globals.h"
 #include "managed_register_mips.h"
@@ -36,6 +37,7 @@
 namespace art {
 namespace mips {
 
+static constexpr size_t kMipsHalfwordSize = 2;
 static constexpr size_t kMipsWordSize = 4;
 static constexpr size_t kMipsDoublewordSize = 8;
 
@@ -194,6 +196,7 @@
         last_position_adjustment_(0),
         last_old_position_(0),
         last_branch_id_(0),
+        has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false),
         isa_features_(instruction_set_features) {
     cfi().DelayEmittingAdvancePCs();
   }
@@ -464,6 +467,149 @@
   void Clear(Register rd);
   void Not(Register rd, Register rs);
 
+  // MSA instructions.
+  void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Add_aD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Ave_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void Ffint_sW(VectorRegister wd, VectorRegister ws);
+  void Ffint_sD(VectorRegister wd, VectorRegister ws);
+  void Ftint_sW(VectorRegister wd, VectorRegister ws);
+  void Ftint_sD(VectorRegister wd, VectorRegister ws);
+
+  void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1).
+  void SlliB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SlliH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SlliW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SlliD(VectorRegister wd, VectorRegister ws, int shamt6);
+  void SraiB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SraiH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SraiW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SraiD(VectorRegister wd, VectorRegister ws, int shamt6);
+  void SrliB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SrliH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SrliW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SrliD(VectorRegister wd, VectorRegister ws, int shamt6);
+
+  void MoveV(VectorRegister wd, VectorRegister ws);
+  void SplatiB(VectorRegister wd, VectorRegister ws, int n4);
+  void SplatiH(VectorRegister wd, VectorRegister ws, int n3);
+  void SplatiW(VectorRegister wd, VectorRegister ws, int n2);
+  void SplatiD(VectorRegister wd, VectorRegister ws, int n1);
+  void FillB(VectorRegister wd, Register rs);
+  void FillH(VectorRegister wd, Register rs);
+  void FillW(VectorRegister wd, Register rs);
+
+  void LdiB(VectorRegister wd, int imm8);
+  void LdiH(VectorRegister wd, int imm10);
+  void LdiW(VectorRegister wd, int imm10);
+  void LdiD(VectorRegister wd, int imm10);
+  void LdB(VectorRegister wd, Register rs, int offset);
+  void LdH(VectorRegister wd, Register rs, int offset);
+  void LdW(VectorRegister wd, Register rs, int offset);
+  void LdD(VectorRegister wd, Register rs, int offset);
+  void StB(VectorRegister wd, Register rs, int offset);
+  void StH(VectorRegister wd, Register rs, int offset);
+  void StW(VectorRegister wd, Register rs, int offset);
+  void StD(VectorRegister wd, Register rs, int offset);
+
+  void IlvrB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
   // Higher level composite instructions.
   void LoadConst32(Register rd, int32_t value);
   void LoadConst64(Register reg_hi, Register reg_lo, int64_t value);
@@ -1282,6 +1428,30 @@
   uint32_t EmitFI(int opcode, int fmt, FRegister rt, uint16_t imm);
   void EmitBcondR2(BranchCondition cond, Register rs, Register rt, uint16_t imm16);
   void EmitBcondR6(BranchCondition cond, Register rs, Register rt, uint32_t imm16_21);
+  uint32_t EmitMsa3R(int operation,
+                     int df,
+                     VectorRegister wt,
+                     VectorRegister ws,
+                     VectorRegister wd,
+                     int minor_opcode);
+  uint32_t EmitMsaBIT(int operation,
+                      int df_m,
+                      VectorRegister ws,
+                      VectorRegister wd,
+                      int minor_opcode);
+  uint32_t EmitMsaELM(int operation,
+                      int df_n,
+                      VectorRegister ws,
+                      VectorRegister wd,
+                      int minor_opcode);
+  uint32_t EmitMsaMI10(int s10, Register rs, VectorRegister wd, int minor_opcode, int df);
+  uint32_t EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode);
+  uint32_t EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
+  uint32_t EmitMsa2RF(int operation,
+                      int df,
+                      VectorRegister ws,
+                      VectorRegister wd,
+                      int minor_opcode);
 
   void Buncond(MipsLabel* label);
   void Bcond(MipsLabel* label, BranchCondition condition, Register lhs, Register rhs = ZERO);
@@ -1332,6 +1502,10 @@
   // Emits exception block.
   void EmitExceptionPoll(MipsExceptionSlowPath* exception);
 
+  bool HasMsa() const {
+    return has_msa_;
+  }
+
   bool IsR6() const {
     if (isa_features_ != nullptr) {
       return isa_features_->IsR6();
@@ -1386,6 +1560,8 @@
   uint32_t last_old_position_;
   uint32_t last_branch_id_;
 
+  const bool has_msa_;
+
   const MipsInstructionSetFeatures* isa_features_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsAssembler);
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 30667ef..d464260 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -34,9 +34,14 @@
 class AssemblerMIPS32r6Test : public AssemblerTest<mips::MipsAssembler,
                                                    mips::Register,
                                                    mips::FRegister,
-                                                   uint32_t> {
+                                                   uint32_t,
+                                                   mips::VectorRegister> {
  public:
-  typedef AssemblerTest<mips::MipsAssembler, mips::Register, mips::FRegister, uint32_t> Base;
+  typedef AssemblerTest<mips::MipsAssembler,
+                        mips::Register,
+                        mips::FRegister,
+                        uint32_t,
+                        mips::VectorRegister> Base;
 
   AssemblerMIPS32r6Test() :
     instruction_set_features_(MipsInstructionSetFeatures::FromVariant("mips32r6", nullptr)) {
@@ -61,7 +66,7 @@
     // We use "-modd-spreg" so we can use odd-numbered single precision FPU registers.
     // We put the code at address 0x1000000 (instead of 0) to avoid overlapping with the
     // .MIPS.abiflags section (there doesn't seem to be a way to suppress its generation easily).
-    return " -march=mips32r6 -modd-spreg -Wa,--no-warn"
+    return " -march=mips32r6 -mmsa -modd-spreg -Wa,--no-warn"
         " -Wl,-Ttext=0x1000000 -Wl,-e0x1000000 -nostdlib";
   }
 
@@ -182,6 +187,39 @@
       fp_registers_.push_back(new mips::FRegister(mips::F29));
       fp_registers_.push_back(new mips::FRegister(mips::F30));
       fp_registers_.push_back(new mips::FRegister(mips::F31));
+
+      vec_registers_.push_back(new mips::VectorRegister(mips::W0));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W1));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W2));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W3));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W4));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W5));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W6));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W7));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W8));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W9));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W10));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W11));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W12));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W13));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W14));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W15));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W16));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W17));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W18));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W19));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W20));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W21));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W22));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W23));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W24));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W25));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W26));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W27));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W28));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W29));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W30));
+      vec_registers_.push_back(new mips::VectorRegister(mips::W31));
     }
   }
 
@@ -189,6 +227,7 @@
     AssemblerTest::TearDown();
     STLDeleteElements(&registers_);
     STLDeleteElements(&fp_registers_);
+    STLDeleteElements(&vec_registers_);
   }
 
   std::vector<mips::Register*> GetRegisters() OVERRIDE {
@@ -199,6 +238,10 @@
     return fp_registers_;
   }
 
+  std::vector<mips::VectorRegister*> GetVectorRegisters() OVERRIDE {
+    return vec_registers_;
+  }
+
   uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
     return imm_value;
   }
@@ -250,6 +293,7 @@
   std::map<mips::Register, std::string, MIPSCpuRegisterCompare> secondary_register_names_;
 
   std::vector<mips::FRegister*> fp_registers_;
+  std::vector<mips::VectorRegister*> vec_registers_;
   std::unique_ptr<const MipsInstructionSetFeatures> instruction_set_features_;
 };
 
@@ -328,13 +372,11 @@
 }
 
 TEST_F(AssemblerMIPS32r6Test, Seleqz) {
-  DriverStr(RepeatRRR(&mips::MipsAssembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"),
-            "seleqz");
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Seleqz, "seleqz ${reg1}, ${reg2}, ${reg3}"), "seleqz");
 }
 
 TEST_F(AssemblerMIPS32r6Test, Selnez) {
-  DriverStr(RepeatRRR(&mips::MipsAssembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"),
-            "selnez");
+  DriverStr(RepeatRRR(&mips::MipsAssembler::Selnez, "selnez ${reg1}, ${reg2}, ${reg3}"), "selnez");
 }
 
 TEST_F(AssemblerMIPS32r6Test, ClzR6) {
@@ -914,6 +956,566 @@
 //        AssemblerMIPS32r6Test.Bltu
 //        AssemblerMIPS32r6Test.Bgeu
 
+// MSA instructions.
+
+TEST_F(AssemblerMIPS32r6Test, AndV) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AndV, "and.v ${reg1}, ${reg2}, ${reg3}"), "and.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, OrV) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::OrV, "or.v ${reg1}, ${reg2}, ${reg3}"), "or.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, NorV) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::NorV, "nor.v ${reg1}, ${reg2}, ${reg3}"), "nor.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, XorV) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::XorV, "xor.v ${reg1}, ${reg2}, ${reg3}"), "xor.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, AddvB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AddvB, "addv.b ${reg1}, ${reg2}, ${reg3}"), "addv.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, AddvH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AddvH, "addv.h ${reg1}, ${reg2}, ${reg3}"), "addv.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, AddvW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AddvW, "addv.w ${reg1}, ${reg2}, ${reg3}"), "addv.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, AddvD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::AddvD, "addv.d ${reg1}, ${reg2}, ${reg3}"), "addv.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SubvB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SubvB, "subv.b ${reg1}, ${reg2}, ${reg3}"), "subv.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SubvH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SubvH, "subv.h ${reg1}, ${reg2}, ${reg3}"), "subv.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SubvW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SubvW, "subv.w ${reg1}, ${reg2}, ${reg3}"), "subv.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SubvD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SubvD, "subv.d ${reg1}, ${reg2}, ${reg3}"), "subv.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulvB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::MulvB, "mulv.b ${reg1}, ${reg2}, ${reg3}"), "mulv.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulvH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::MulvH, "mulv.h ${reg1}, ${reg2}, ${reg3}"), "mulv.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulvW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::MulvW, "mulv.w ${reg1}, ${reg2}, ${reg3}"), "mulv.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MulvD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::MulvD, "mulv.d ${reg1}, ${reg2}, ${reg3}"), "mulv.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sB, "div_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sH, "div_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sW, "div_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_sD, "div_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uB, "div_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uH, "div_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uW, "div_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Div_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Div_uD, "div_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sB, "mod_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sH, "mod_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sW, "mod_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_sD, "mod_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uB, "mod_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uH, "mod_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uW, "mod_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Mod_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Mod_uD, "mod_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Add_aB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aB, "add_a.b ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Add_aH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aH, "add_a.h ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Add_aW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aW, "add_a.w ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Add_aD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Add_aD, "add_a.d ${reg1}, ${reg2}, ${reg3}"),
+            "add_a.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sB, "ave_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sH, "ave_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sW, "ave_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_sD, "ave_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "ave_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uB, "ave_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uH, "ave_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uW, "ave_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ave_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Ave_uD, "ave_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "ave_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sB, "aver_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sH, "aver_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sW, "aver_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_sD, "aver_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "aver_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uB, "aver_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uH, "aver_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uW, "aver_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Aver_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Aver_uD, "aver_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "aver_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sB, "max_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sH, "max_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sW, "max_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_sD, "max_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uB, "max_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uH, "max_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uW, "max_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Max_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Max_uD, "max_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_sB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sB, "min_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_sH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sH, "min_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_sW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sW, "min_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_sD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_sD, "min_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_uB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uB, "min_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_uH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uH, "min_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_uW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uW, "min_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Min_uD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::Min_uD, "min_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FaddW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"), "fadd.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FaddD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FaddD, "fadd.d ${reg1}, ${reg2}, ${reg3}"), "fadd.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FsubW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FsubW, "fsub.w ${reg1}, ${reg2}, ${reg3}"), "fsub.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FsubD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FsubD, "fsub.d ${reg1}, ${reg2}, ${reg3}"), "fsub.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FmulW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FmulW, "fmul.w ${reg1}, ${reg2}, ${reg3}"), "fmul.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FmulD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FmulD, "fmul.d ${reg1}, ${reg2}, ${reg3}"), "fmul.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FdivW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FdivW, "fdiv.w ${reg1}, ${reg2}, ${reg3}"), "fdiv.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FdivD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FdivD, "fdiv.d ${reg1}, ${reg2}, ${reg3}"), "fdiv.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FmaxW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FmaxW, "fmax.w ${reg1}, ${reg2}, ${reg3}"), "fmax.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FmaxD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FmaxD, "fmax.d ${reg1}, ${reg2}, ${reg3}"), "fmax.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FminW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FminW, "fmin.w ${reg1}, ${reg2}, ${reg3}"), "fmin.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FminD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::FminD, "fmin.d ${reg1}, ${reg2}, ${reg3}"), "fmin.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ffint_sW) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"), "ffint_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ffint_sD) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::Ffint_sD, "ffint_s.d ${reg1}, ${reg2}"), "ffint_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ftint_sW) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::Ftint_sW, "ftint_s.w ${reg1}, ${reg2}"), "ftint_s.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, Ftint_sD) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::Ftint_sD, "ftint_s.d ${reg1}, ${reg2}"), "ftint_s.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SllB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SllB, "sll.b ${reg1}, ${reg2}, ${reg3}"), "sll.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SllH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SllH, "sll.h ${reg1}, ${reg2}, ${reg3}"), "sll.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SllW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SllW, "sll.w ${reg1}, ${reg2}, ${reg3}"), "sll.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SllD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SllD, "sll.d ${reg1}, ${reg2}, ${reg3}"), "sll.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SraB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SraB, "sra.b ${reg1}, ${reg2}, ${reg3}"), "sra.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SraH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SraH, "sra.h ${reg1}, ${reg2}, ${reg3}"), "sra.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SraW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SraW, "sra.w ${reg1}, ${reg2}, ${reg3}"), "sra.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SraD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SraD, "sra.d ${reg1}, ${reg2}, ${reg3}"), "sra.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SrlB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SrlB, "srl.b ${reg1}, ${reg2}, ${reg3}"), "srl.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SrlH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SrlH, "srl.h ${reg1}, ${reg2}, ${reg3}"), "srl.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SrlW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SrlW, "srl.w ${reg1}, ${reg2}, ${reg3}"), "srl.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SrlD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::SrlD, "srl.d ${reg1}, ${reg2}, ${reg3}"), "srl.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SlliB) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliB, 3, "slli.b ${reg1}, ${reg2}, {imm}"), "slli.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SlliH) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliH, 4, "slli.h ${reg1}, ${reg2}, {imm}"), "slli.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SlliW) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliW, 5, "slli.w ${reg1}, ${reg2}, {imm}"), "slli.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SlliD) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SlliD, 6, "slli.d ${reg1}, ${reg2}, {imm}"), "slli.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, MoveV) {
+  DriverStr(RepeatVV(&mips::MipsAssembler::MoveV, "move.v ${reg1}, ${reg2}"), "move.v");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SplatiB) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiB, 4, "splati.b ${reg1}, ${reg2}[{imm}]"),
+            "splati.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SplatiH) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiH, 3, "splati.h ${reg1}, ${reg2}[{imm}]"),
+            "splati.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SplatiW) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiW, 2, "splati.w ${reg1}, ${reg2}[{imm}]"),
+            "splati.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SplatiD) {
+  DriverStr(RepeatVVIb(&mips::MipsAssembler::SplatiD, 1, "splati.d ${reg1}, ${reg2}[{imm}]"),
+            "splati.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FillB) {
+  DriverStr(RepeatVR(&mips::MipsAssembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FillH) {
+  DriverStr(RepeatVR(&mips::MipsAssembler::FillH, "fill.h ${reg1}, ${reg2}"), "fill.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, FillW) {
+  DriverStr(RepeatVR(&mips::MipsAssembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdiB) {
+  DriverStr(RepeatVIb(&mips::MipsAssembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdiH) {
+  DriverStr(RepeatVIb(&mips::MipsAssembler::LdiH, -10, "ldi.h ${reg}, {imm}"), "ldi.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdiW) {
+  DriverStr(RepeatVIb(&mips::MipsAssembler::LdiW, -10, "ldi.w ${reg}, {imm}"), "ldi.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdiD) {
+  DriverStr(RepeatVIb(&mips::MipsAssembler::LdiD, -10, "ldi.d ${reg}, {imm}"), "ldi.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdB) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdH) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::LdH, -10, "ld.h ${reg1}, {imm}(${reg2})", 0, 2),
+            "ld.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdW) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::LdW, -10, "ld.w ${reg1}, {imm}(${reg2})", 0, 4),
+            "ld.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, LdD) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::LdD, -10, "ld.d ${reg1}, {imm}(${reg2})", 0, 8),
+            "ld.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StB) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::StB, -10, "st.b ${reg1}, {imm}(${reg2})"), "st.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StH) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::StH, -10, "st.h ${reg1}, {imm}(${reg2})", 0, 2),
+            "st.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StW) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::StW, -10, "st.w ${reg1}, {imm}(${reg2})", 0, 4),
+            "st.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, StD) {
+  DriverStr(RepeatVRIb(&mips::MipsAssembler::StD, -10, "st.d ${reg1}, {imm}(${reg2})", 0, 8),
+            "st.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvrB) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrB, "ilvr.b ${reg1}, ${reg2}, ${reg3}"), "ilvr.b");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvrH) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrH, "ilvr.h ${reg1}, ${reg2}, ${reg3}"), "ilvr.h");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvrW) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrW, "ilvr.w ${reg1}, ${reg2}, ${reg3}"), "ilvr.w");
+}
+
+TEST_F(AssemblerMIPS32r6Test, IlvrD) {
+  DriverStr(RepeatVVV(&mips::MipsAssembler::IlvrD, "ilvr.d ${reg1}, ${reg2}, ${reg3}"), "ilvr.d");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/compiler/utils/mips/constants_mips.h b/compiler/utils/mips/constants_mips.h
index 44ed5cc..b4dfdbd 100644
--- a/compiler/utils/mips/constants_mips.h
+++ b/compiler/utils/mips/constants_mips.h
@@ -75,8 +75,37 @@
   kFdShift = 6,
   kFdBits = 5,
 
+  kMsaOperationShift = 23,
+  kMsaELMOperationShift = 22,
+  kMsa2ROperationShift = 18,
+  kMsa2RFOperationShift = 17,
+  kDfShift = 21,
+  kDfMShift = 16,
+  kDf2RShift = 16,
+  kDfNShift = 16,
+  kWtShift = 16,
+  kWtBits = 5,
+  kWsShift = 11,
+  kWsBits = 5,
+  kWdShift = 6,
+  kWdBits = 5,
+  kS10Shift = 16,
+  kI10Shift = 11,
+  kS10MinorShift = 2,
+
   kBranchOffsetMask = 0x0000ffff,
   kJumpOffsetMask = 0x03ffffff,
+
+  kMsaMajorOpcode = 0x1e,
+  kMsaDfMByteMask = 0x70,
+  kMsaDfMHalfwordMask = 0x60,
+  kMsaDfMWordMask = 0x40,
+  kMsaDfMDoublewordMask = 0x00,
+  kMsaDfNByteMask = 0x00,
+  kMsaDfNHalfwordMask = 0x20,
+  kMsaDfNWordMask = 0x30,
+  kMsaDfNDoublewordMask = 0x38,
+  kMsaS10Mask = 0x3ff,
 };
 
 enum ScaleFactor {
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index c03b98c..24900a7 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -1456,6 +1456,86 @@
   EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x10);
 }
 
+void Mips64Assembler::Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x3, 0x0, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x3, 0x1, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x3, 0x2, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x3, 0x3, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x0, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x1, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x2, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x3, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x0, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x1, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x2, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x3, wt, ws, wd, 0xe);
+}
+
 void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
   CHECK(HasMsa());
   EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b);
@@ -1496,6 +1576,26 @@
   EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b);
 }
 
+void Mips64Assembler::FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x1b);
+}
+
 void Mips64Assembler::Ffint_sW(VectorRegister wd, VectorRegister ws) {
   CHECK(HasMsa());
   EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e);
@@ -1795,6 +1895,17 @@
   EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x14);
 }
 
+void Mips64Assembler::ReplicateFPToVectorRegister(VectorRegister dst,
+                                                  FpuRegister src,
+                                                  bool is_double) {
+  // Float or double in FPU register Fx can be considered as 0th element in vector register Wx.
+  if (is_double) {
+    SplatiD(dst, static_cast<VectorRegister>(src), 0);
+  } else {
+    SplatiW(dst, static_cast<VectorRegister>(src), 0);
+  }
+}
+
 void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) {
   TemplateLoadConst32(this, rd, value);
 }
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index c92cf4c..773db9b 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -25,6 +25,7 @@
 #include "base/arena_containers.h"
 #include "base/enums.h"
 #include "base/macros.h"
+#include "base/stl_util_identity.h"
 #include "constants_mips64.h"
 #include "globals.h"
 #include "managed_register_mips64.h"
@@ -704,6 +705,22 @@
   void Aver_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void Aver_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void Aver_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Max_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Min_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
   void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
@@ -713,6 +730,10 @@
   void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmaxW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmaxD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FminW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FminD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
   void Ffint_sW(VectorRegister wd, VectorRegister ws);
   void Ffint_sD(VectorRegister wd, VectorRegister ws);
@@ -774,6 +795,9 @@
   void IlvrW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
   void IlvrD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
 
+  // Helper for replicating floating point value in all destination elements.
+  void ReplicateFPToVectorRegister(VectorRegister dst, FpuRegister src, bool is_double);
+
   // Higher level composite instructions.
   int InstrCountForLoadReplicatedConst32(int64_t);
   void LoadConst32(GpuRegister rd, int32_t value);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index fbebe0c..bdf9598 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -2998,6 +2998,86 @@
             "aver_u.d");
 }
 
+TEST_F(AssemblerMIPS64Test, Max_sB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_sB, "max_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Max_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_sH, "max_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Max_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_sW, "max_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Max_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_sD, "max_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "max_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Max_uB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_uB, "max_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Max_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_uH, "max_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Max_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_uW, "max_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Max_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Max_uD, "max_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "max_u.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Min_sB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_sB, "min_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Min_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_sH, "min_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Min_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_sW, "min_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Min_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_sD, "min_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "min_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Min_uB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_uB, "min_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Min_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_uH, "min_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Min_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_uW, "min_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Min_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Min_uD, "min_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "min_u.d");
+}
+
 TEST_F(AssemblerMIPS64Test, FaddW) {
   DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"),
             "fadd.w");
@@ -3038,6 +3118,26 @@
             "fdiv.d");
 }
 
+TEST_F(AssemblerMIPS64Test, FmaxW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmaxW, "fmax.w ${reg1}, ${reg2}, ${reg3}"),
+            "fmax.w");
+}
+
+TEST_F(AssemblerMIPS64Test, FmaxD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmaxD, "fmax.d ${reg1}, ${reg2}, ${reg3}"),
+            "fmax.d");
+}
+
+TEST_F(AssemblerMIPS64Test, FminW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FminW, "fmin.w ${reg1}, ${reg2}, ${reg3}"),
+            "fmin.w");
+}
+
+TEST_F(AssemblerMIPS64Test, FminD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FminD, "fmin.d ${reg1}, ${reg2}, ${reg3}"),
+            "fmin.d");
+}
+
 TEST_F(AssemblerMIPS64Test, Ffint_sW) {
   DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"),
             "ffint_s.w");
diff --git a/compiler/utils/swap_space.cc b/compiler/utils/swap_space.cc
index a1eb08e..621a652 100644
--- a/compiler/utils/swap_space.cc
+++ b/compiler/utils/swap_space.cc
@@ -20,10 +20,11 @@
 #include <numeric>
 #include <sys/mman.h>
 
+#include "base/bit_utils.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index 4d55eb0..dd09fed 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -25,8 +25,8 @@
 #include "dex/verified_method.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
+#include "driver/compiler_driver-inl.h"
 #include "driver/compiler_options.h"
-#include "driver/compiler_driver.h"
 #include "handle_scope-inl.h"
 #include "indenter.h"
 #include "mirror/class_loader.h"
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 660409f..b88fe09 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -39,6 +39,7 @@
 #include "arch/instruction_set_features.h"
 #include "arch/mips/instruction_set_features_mips.h"
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "base/dumpable.h"
 #include "base/macros.h"
 #include "base/scoped_flock.h"
@@ -74,6 +75,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "oat_file.h"
 #include "oat_file_assistant.h"
 #include "oat_writer.h"
 #include "os.h"
@@ -112,12 +114,15 @@
 static std::string StrippedCommandLine() {
   std::vector<std::string> command;
 
-  // Do a pre-pass to look for zip-fd.
+  // Do a pre-pass to look for zip-fd and the compiler filter.
   bool saw_zip_fd = false;
+  bool saw_compiler_filter = false;
   for (int i = 0; i < original_argc; ++i) {
     if (android::base::StartsWith(original_argv[i], "--zip-fd=")) {
       saw_zip_fd = true;
-      break;
+    }
+    if (android::base::StartsWith(original_argv[i], "--compiler-filter=")) {
+      saw_compiler_filter = true;
     }
   }
 
@@ -161,6 +166,11 @@
     command.push_back(original_argv[i]);
   }
 
+  if (!saw_compiler_filter) {
+    command.push_back("--compiler-filter=" +
+        CompilerFilter::NameOfFilter(CompilerFilter::kDefaultCompilerFilter));
+  }
+
   // Construct the final output.
   if (command.size() <= 1U) {
     // It seems only "/system/bin/dex2oat" is left, or not even that. Use a pretty line.
@@ -477,6 +487,16 @@
                                        android::base::LogId::DEFAULT,
                                        LogSeverity::FATAL,
                                        message.c_str());
+    // If we're on the host, try to dump all threads to get a sense of what's going on. This is
+    // restricted to the host as the dump may itself go bad.
+    // TODO: Use a double watchdog timeout, so we can enable this on-device.
+    if (!kIsTargetBuild && Runtime::Current() != nullptr) {
+      Runtime::Current()->AttachCurrentThread("Watchdog thread attached for dumping",
+                                              true,
+                                              nullptr,
+                                              false);
+      Runtime::Current()->DumpForSigQuit(std::cerr);
+    }
     exit(1);
   }
 
@@ -503,13 +523,14 @@
     CHECK_WATCH_DOG_PTHREAD_CALL(pthread_mutex_unlock, (&mutex_), reason);
   }
 
-  const int64_t timeout_in_milliseconds_;
-  bool shutting_down_;
   // TODO: Switch to Mutex when we can guarantee it won't prevent shutdown in error cases.
   pthread_mutex_t mutex_;
   pthread_cond_t cond_;
   pthread_attr_t attr_;
   pthread_t pthread_;
+
+  const int64_t timeout_in_milliseconds_;
+  bool shutting_down_;
 };
 
 class Dex2Oat FINAL {
@@ -1379,8 +1400,8 @@
     // Note: we're only invalidating the magic data in the file, as dex2oat needs the rest of
     // the information to remain valid.
     if (update_input_vdex_) {
-      std::unique_ptr<BufferedOutputStream> vdex_out(MakeUnique<BufferedOutputStream>(
-          MakeUnique<FileOutputStream>(vdex_files_.back().get())));
+      std::unique_ptr<BufferedOutputStream> vdex_out = std::make_unique<BufferedOutputStream>(
+          std::make_unique<FileOutputStream>(vdex_files_.back().get()));
       if (!vdex_out->WriteFully(&VdexFile::Header::kVdexInvalidMagic,
                                 arraysize(VdexFile::Header::kVdexInvalidMagic))) {
         PLOG(ERROR) << "Failed to invalidate vdex header. File: " << vdex_out->GetLocation();
@@ -1441,12 +1462,8 @@
       Runtime* runtime = Runtime::Current();
       CHECK(runtime != nullptr);
       // Filter out class path classes since we don't want to include these in the image.
-      std::unordered_set<std::string> dex_files_locations;
-      for (const DexFile* dex_file : dex_files_) {
-        dex_files_locations.insert(dex_file->GetLocation());
-      }
       std::set<DexCacheResolvedClasses> resolved_classes(
-          profile_compilation_info_->GetResolvedClasses(dex_files_locations));
+          profile_compilation_info_->GetResolvedClasses(dex_files_));
       image_classes_.reset(new std::unordered_set<std::string>(
           runtime->GetClassLinker()->GetClassDescriptorsForResolvedClasses(resolved_classes)));
       VLOG(compiler) << "Loaded " << image_classes_->size()
@@ -1881,8 +1898,8 @@
       verifier::VerifierDeps* verifier_deps = callbacks_->GetVerifierDeps();
       for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         File* vdex_file = vdex_files_[i].get();
-        std::unique_ptr<BufferedOutputStream> vdex_out(
-            MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
+        std::unique_ptr<BufferedOutputStream> vdex_out =
+            std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(vdex_file));
 
         if (!oat_writers_[i]->WriteVerifierDeps(vdex_out.get(), verifier_deps)) {
           LOG(ERROR) << "Failed to write verifier dependencies into VDEX " << vdex_file->GetPath();
@@ -1916,6 +1933,7 @@
         elf_writer->PrepareDynamicSection(rodata_size,
                                           text_size,
                                           oat_writer->GetBssSize(),
+                                          oat_writer->GetBssMethodsOffset(),
                                           oat_writer->GetBssRootsOffset());
 
         if (IsImage()) {
@@ -2130,7 +2148,10 @@
 
   bool LoadProfile() {
     DCHECK(UseProfile());
-
+    // TODO(calin): We should be using the runtime arena pool (instead of the
+    // default profile arena). However the setup logic is messy and needs
+    // cleaning up before that (e.g. the oat writers are created before the
+    // runtime).
     profile_compilation_info_.reset(new ProfileCompilationInfo());
     ScopedFlock flock;
     bool success = true;
@@ -2490,8 +2511,8 @@
 
     runtime_.reset(Runtime::Current());
     runtime_->SetInstructionSet(instruction_set_);
-    for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-      Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
+    for (uint32_t i = 0; i < static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType); ++i) {
+      CalleeSaveType type = CalleeSaveType(i);
       if (!runtime_->HasCalleeSaveMethod(type)) {
         runtime_->SetCalleeSaveMethod(runtime_->CreateCalleeSaveMethod(), type);
       }
@@ -2910,7 +2931,7 @@
   // might produce a stack frame too large for this function or for
   // functions inlining it (such as main), that would not fit the
   // requirements of the `-Wframe-larger-than` option.
-  std::unique_ptr<Dex2Oat> dex2oat = MakeUnique<Dex2Oat>(&timings);
+  std::unique_ptr<Dex2Oat> dex2oat = std::make_unique<Dex2Oat>(&timings);
 
   // Parse arguments. Argument mistakes will lead to exit(EXIT_FAILURE) in UsageError.
   dex2oat->ParseArgs(argc, argv);
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 6420aa8..b604e8b 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -28,6 +28,7 @@
 
 #include "base/logging.h"
 #include "base/macros.h"
+#include "base/mutex-inl.h"
 #include "dex_file-inl.h"
 #include "dex2oat_environment_test.h"
 #include "dex2oat_return_codes.h"
@@ -38,6 +39,8 @@
 
 namespace art {
 
+static constexpr size_t kMaxMethodIds = 65535;
+
 using android::base::StringPrintf;
 
 class Dex2oatTest : public Dex2oatEnvironmentTest {
@@ -612,7 +615,7 @@
     ProfileCompilationInfo info;
     std::string profile_key = ProfileCompilationInfo::GetProfileDexFileKey(dex_location);
     for (size_t i = 0; i < num_classes; ++i) {
-      info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1 + i));
+      info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1 + i), kMaxMethodIds);
     }
     bool result = info.Save(profile_test_fd);
     close(profile_test_fd);
diff --git a/dex2oat/include/dex2oat_return_codes.h b/dex2oat/include/dex2oat_return_codes.h
index cc5400f..ad09d47 100644
--- a/dex2oat/include/dex2oat_return_codes.h
+++ b/dex2oat/include/dex2oat_return_codes.h
@@ -21,9 +21,10 @@
 namespace dex2oat {
 
 enum class ReturnCode : int {
-  kNoFailure = 0,
-  kOther = 1,
-  kCreateRuntime = 2,
+  kNoFailure = 0,          // No failure, execution completed successfully.
+  kOther = 1,              // Some other not closer specified error occurred.
+  kCreateRuntime = 2,      // Dex2oat failed creating a runtime. This may be indicative
+                           // of a missing or out of date boot image, for example.
 };
 
 }  // namespace dex2oat
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 5656ddd..1541d7b 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -1747,9 +1747,8 @@
       case EncodedArrayValueIterator::ValueType::kArray:
       case EncodedArrayValueIterator::ValueType::kAnnotation:
         // Unreachable based on current EncodedArrayValueIterator::Next().
-        UNIMPLEMENTED(FATAL) << " type " << type;
+        UNIMPLEMENTED(FATAL) << " type " << it.GetValueType();
         UNREACHABLE();
-        break;
       case EncodedArrayValueIterator::ValueType::kNull:
         type = "Null";
         value = "null";
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index cf453b9..62ee445 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -57,31 +57,6 @@
                     entry.reg_)));
 }
 
-static uint32_t GetCodeItemSize(const DexFile::CodeItem& disk_code_item) {
-  uintptr_t code_item_start = reinterpret_cast<uintptr_t>(&disk_code_item);
-  uint32_t insns_size = disk_code_item.insns_size_in_code_units_;
-  uint32_t tries_size = disk_code_item.tries_size_;
-  if (tries_size == 0) {
-    uintptr_t insns_end = reinterpret_cast<uintptr_t>(&disk_code_item.insns_[insns_size]);
-    return insns_end - code_item_start;
-  } else {
-    // Get the start of the handler data.
-    const uint8_t* handler_data = DexFile::GetCatchHandlerData(disk_code_item, 0);
-    uint32_t handlers_size = DecodeUnsignedLeb128(&handler_data);
-    // Manually read each handler.
-    for (uint32_t i = 0; i < handlers_size; ++i) {
-      int32_t uleb128_count = DecodeSignedLeb128(&handler_data) * 2;
-      if (uleb128_count <= 0) {
-        uleb128_count = -uleb128_count + 1;
-      }
-      for (int32_t j = 0; j < uleb128_count; ++j) {
-        DecodeUnsignedLeb128(&handler_data);
-      }
-    }
-    return reinterpret_cast<uintptr_t>(handler_data) - code_item_start;
-  }
-}
-
 static uint32_t GetDebugInfoStreamSize(const uint8_t* debug_info_stream) {
   const uint8_t* stream = debug_info_stream;
   DecodeUnsignedLeb128(&stream);  // line_start
@@ -686,7 +661,7 @@
     }
   }
 
-  uint32_t size = GetCodeItemSize(disk_code_item);
+  uint32_t size = DexFile::GetCodeItemSize(disk_code_item);
   CodeItem* code_item = new CodeItem(
       registers_size, ins_size, outs_size, debug_info, insns_size, insns, tries, handler_list);
   code_item->SetSize(size);
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index 5692eb2..95e64bf 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -23,6 +23,7 @@
 #include <vector>
 #include <stdint.h>
 
+#include "base/stl_util.h"
 #include "dex_file-inl.h"
 #include "leb128.h"
 #include "utf.h"
diff --git a/dexlayout/dex_visualize.cc b/dexlayout/dex_visualize.cc
index 829e9fe..d279bcb 100644
--- a/dexlayout/dex_visualize.cc
+++ b/dexlayout/dex_visualize.cc
@@ -174,7 +174,7 @@
                       ProfileCompilationInfo* profile_info) {
     if (profile_info != nullptr) {
       uint32_t method_idx = method->GetMethodId()->GetIndex();
-      if (!profile_info->ContainsMethod(MethodReference(dex_file, method_idx))) {
+      if (!profile_info->ContainsHotMethod(MethodReference(dex_file, method_idx))) {
         return;
       }
     }
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 205c0d1..50dda88 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -1557,7 +1557,7 @@
             (method->GetAccessFlags() & kAccConstructor) != 0 &&
             (method->GetAccessFlags() & kAccStatic) != 0;
         const bool method_executed = is_clinit ||
-            info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
+            info_->IsStartupOrHotMethod(MethodReference(dex_file, method_id->GetIndex()));
         if (!method_executed) {
           continue;
         }
@@ -1665,8 +1665,9 @@
 
   enum CodeItemKind {
     kMethodNotExecuted = 0,
-    kMethodExecuted = 1,
-    kSize = 2,
+    kMethodClinit = 1,
+    kMethodExecuted = 2,
+    kSize = 3,
   };
 
   static constexpr InvokeType invoke_types[] = {
@@ -1694,26 +1695,28 @@
           continue;
         }
         // Separate executed methods (clinits and profiled methods) from unexecuted methods.
-        // TODO: clinits are executed only once, consider separating them further.
         const bool is_clinit = is_profile_class &&
             (method->GetAccessFlags() & kAccConstructor) != 0 &&
             (method->GetAccessFlags() & kAccStatic) != 0;
-        const bool is_method_executed = is_clinit ||
-            info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
-        code_items[is_method_executed
-                       ? CodeItemKind::kMethodExecuted
-                       : CodeItemKind::kMethodNotExecuted]
-            .insert(code_item);
+        const bool is_method_executed =
+            info_->IsStartupOrHotMethod(MethodReference(dex_file, method_id->GetIndex()));
+        CodeItemKind code_item_kind = CodeItemKind::kMethodNotExecuted;
+        if (is_clinit) {
+          code_item_kind = CodeItemKind::kMethodClinit;
+        } else if (is_method_executed) {
+          code_item_kind = CodeItemKind::kMethodExecuted;
+        }
+        code_items[code_item_kind].insert(code_item);
       }
     }
   }
 
-  // total_diff includes diffs generated by both executed and non-executed methods.
+  // Total_diff includes diffs generated by clinits, executed, and non-executed methods.
   int32_t total_diff = 0;
   // The relative placement has no effect on correctness; it is used to ensure
   // the layout is deterministic
   for (std::unordered_set<dex_ir::CodeItem*>& code_items_set : code_items) {
-    // diff is reset for executed and non-executed methods.
+    // diff is reset for each class of code items.
     int32_t diff = 0;
     for (dex_ir::ClassData* data : new_class_data_order) {
       data->SetOffset(data->GetOffset() + diff);
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index 5a6a20d..6fe8eeb 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -23,7 +23,9 @@
 
 #include "base/unix_file/fd_file.h"
 #include "common_runtime_test.h"
+#include "dex_file-inl.h"
 #include "exec_utils.h"
+#include "jit/profile_compilation_info.h"
 #include "utils.h"
 
 namespace art {
@@ -40,9 +42,6 @@
     "qAAAAAYAAAACAAAAwAAAAAEgAAACAAAAAAEAAAIgAAAHAAAAMAEAAAMgAAACAAAAaQEAAAAgAAAC"
     "AAAAdQEAAAAQAAABAAAAjAEAAA==";
 
-static const char kDexFileLayoutInputProfile[] =
-    "cHJvADAwNwAAAAAAAAgAAAB4AQMAAAAAAQ==";
-
 // Dex file with catch handler unreferenced by try blocks.
 // Constructed by building a dex file with try/catch blocks and hex editing.
 static const char kUnreferencedCatchHandlerInputDex[] =
@@ -317,6 +316,68 @@
     return true;
   }
 
+  // Create a profile with some subset of methods and classes.
+  void CreateProfile(const std::string& input_dex,
+                     const std::string& out_profile,
+                     const std::string& dex_location) {
+    std::vector<std::unique_ptr<const DexFile>> dex_files;
+    std::string error_msg;
+    bool result = DexFile::Open(input_dex.c_str(),
+                                input_dex,
+                                false,
+                                &error_msg,
+                                &dex_files);
+
+    ASSERT_TRUE(result) << error_msg;
+    ASSERT_GE(dex_files.size(), 1u);
+
+    size_t profile_methods = 0;
+    size_t profile_classes = 0;
+    ProfileCompilationInfo pfi;
+    std::vector<ProfileMethodInfo> pmis;
+    std::set<DexCacheResolvedClasses> classes;
+    for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
+      for (uint32_t i = 0; i < dex_file->NumMethodIds(); i += 2) {
+        if ((i & 3) != 0) {
+          pfi.AddMethodIndex(dex_location,
+                             dex_file->GetLocationChecksum(),
+                             i,
+                             dex_file->NumMethodIds());
+          ++profile_methods;
+        } else if ((i & 2) != 0) {
+          pfi.AddSampledMethod(/*startup*/true,
+                               dex_location,
+                               dex_file->GetLocationChecksum(),
+                               i,
+                               dex_file->NumMethodIds());
+          ++profile_methods;
+        }
+      }
+      DexCacheResolvedClasses cur_classes(dex_location,
+                                          dex_location,
+                                          dex_file->GetLocationChecksum(),
+                                          dex_file->NumMethodIds());
+      // Add every even class too.
+      for (uint32_t i = 0; i < dex_file->NumClassDefs(); i += 1) {
+        if ((i & 2) == 0) {
+          cur_classes.AddClass(dex_file->GetClassDef(i).class_idx_);
+          ++profile_classes;
+        }
+      }
+      classes.insert(cur_classes);
+    }
+    pfi.AddMethodsAndClasses(pmis, classes);
+    // Write to provided file.
+    std::unique_ptr<File> file(OS::CreateEmptyFile(out_profile.c_str()));
+    ASSERT_TRUE(file != nullptr);
+    pfi.Save(file->Fd());
+    if (file->FlushCloseOrErase() != 0) {
+      PLOG(FATAL) << "Could not flush and close test file.";
+    }
+    EXPECT_GE(profile_methods, 0u);
+    EXPECT_GE(profile_classes, 0u);
+  }
+
   // Runs DexFileLayout test.
   bool DexFileLayoutExec(std::string* error_msg) {
     ScratchFile tmp_file;
@@ -328,7 +389,8 @@
     std::string dex_file = tmp_dir + "classes.dex";
     WriteFileBase64(kDexFileLayoutInputDex, dex_file.c_str());
     std::string profile_file = tmp_dir + "primary.prof";
-    WriteFileBase64(kDexFileLayoutInputProfile, profile_file.c_str());
+    CreateProfile(dex_file, profile_file, dex_file);
+    // WriteFileBase64(kDexFileLayoutInputProfile, profile_file.c_str());
     std::string output_dex = tmp_dir + "classes.dex.new";
 
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
@@ -350,6 +412,73 @@
     return true;
   }
 
+  // Runs DexFileLayout test twice (second time is run on output of first time)
+  // for behavior consistency.
+  bool DexFileLayoutFixedPointExec(std::string* error_msg) {
+    ScratchFile tmp_file;
+    std::string tmp_name = tmp_file.GetFilename();
+    size_t tmp_last_slash = tmp_name.rfind("/");
+    std::string tmp_dir = tmp_name.substr(0, tmp_last_slash + 1);
+
+    // Unzip the test dex file to the classes.dex destination. It is required to unzip since
+    // opening from jar recalculates the dex location checksum.
+    std::string dex_file = tmp_dir + "classes.dex";
+
+    std::vector<std::string> unzip_args = {
+        "/usr/bin/unzip",
+        GetTestDexFileName("ManyMethods"),
+        "classes.dex",
+        "-d",
+        tmp_dir,
+    };
+    if (!art::Exec(unzip_args, error_msg)) {
+      LOG(ERROR) << "Failed to unzip dex";
+      return false;
+    }
+
+    std::string profile_file = tmp_dir + "primary.prof";
+    CreateProfile(dex_file, profile_file, dex_file);
+    std::string output_dex = tmp_dir + "classes.dex.new";
+    std::string second_output_dex = tmp_dir + "classes.dex.new.new";
+
+    std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+    EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+
+    // -v makes sure that the layout did not corrupt the dex file.
+    std::vector<std::string> dexlayout_exec_argv =
+        { dexlayout, "-i", "-v", "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
+    if (!::art::Exec(dexlayout_exec_argv, error_msg)) {
+      return false;
+    }
+
+    // Recreate the profile with the new dex location. This is required so that the profile dex
+    // location matches.
+    CreateProfile(dex_file, profile_file, output_dex);
+
+    // -v makes sure that the layout did not corrupt the dex file.
+    // -i since the checksum won't match from the first layout.
+    std::vector<std::string> second_dexlayout_exec_argv =
+        { dexlayout, "-i", "-v", "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, output_dex };
+    if (!::art::Exec(second_dexlayout_exec_argv, error_msg)) {
+      return false;
+    }
+
+    bool diff_result = true;
+    std::vector<std::string> diff_exec_argv =
+        { "/usr/bin/diff", output_dex, second_output_dex };
+    if (!::art::Exec(diff_exec_argv, error_msg)) {
+      diff_result = false;
+    }
+
+    std::vector<std::string> rm_exec_argv =
+        { "/bin/rm", dex_file, profile_file, output_dex, second_output_dex };
+    if (!::art::Exec(rm_exec_argv, error_msg)) {
+      return false;
+    }
+
+    return diff_result;
+  }
+
   // Runs UnreferencedCatchHandlerTest & Unreferenced0SizeCatchHandlerTest.
   bool UnreferencedCatchHandlerExec(std::string* error_msg, const char* filename) {
     ScratchFile tmp_file;
@@ -387,13 +516,11 @@
   bool DexLayoutExec(ScratchFile* dex_file,
                      const char* dex_filename,
                      ScratchFile* profile_file,
-                     const char* profile_filename,
                      std::vector<std::string>& dexlayout_exec_argv) {
     WriteBase64ToFile(dex_filename, dex_file->GetFile());
     EXPECT_EQ(dex_file->GetFile()->Flush(), 0);
     if (profile_file != nullptr) {
-      WriteBase64ToFile(profile_filename, profile_file->GetFile());
-      EXPECT_EQ(profile_file->GetFile()->Flush(), 0);
+      CreateProfile(dex_file->GetFilename(), profile_file->GetFilename(), dex_file->GetFilename());
     }
     std::string error_msg;
     const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
@@ -427,6 +554,13 @@
   ASSERT_TRUE(DexFileLayoutExec(&error_msg)) << error_msg;
 }
 
+TEST_F(DexLayoutTest, DexFileLayoutFixedPoint) {
+  // Disable test on target.
+  TEST_DISABLED_FOR_TARGET();
+  std::string error_msg;
+  ASSERT_TRUE(DexFileLayoutFixedPointExec(&error_msg)) << error_msg;
+}
+
 TEST_F(DexLayoutTest, UnreferencedCatchHandler) {
   // Disable test on target.
   TEST_DISABLED_FOR_TARGET();
@@ -460,7 +594,6 @@
   ASSERT_TRUE(DexLayoutExec(&temp_dex,
                             kDexFileDuplicateOffset,
                             nullptr /* profile_file */,
-                            nullptr /* profile_filename */,
                             dexlayout_exec_argv));
 }
 
@@ -473,7 +606,6 @@
   ASSERT_TRUE(DexLayoutExec(&temp_dex,
                             kNullSetRefListElementInputDex,
                             nullptr /* profile_file */,
-                            nullptr /* profile_filename */,
                             dexlayout_exec_argv));
 }
 
@@ -487,7 +619,6 @@
   ASSERT_TRUE(DexLayoutExec(&temp_dex,
                             kMultiClassDataInputDex,
                             &temp_profile,
-                            kDexFileLayoutInputProfile,
                             dexlayout_exec_argv));
 }
 
@@ -501,7 +632,6 @@
   ASSERT_TRUE(DexLayoutExec(&temp_dex,
                             kUnalignedCodeInfoInputDex,
                             &temp_profile,
-                            kDexFileLayoutInputProfile,
                             dexlayout_exec_argv));
 }
 
@@ -515,7 +645,6 @@
   ASSERT_TRUE(DexLayoutExec(&temp_dex,
                             kClassDataBeforeCodeInputDex,
                             &temp_profile,
-                            kDexFileLayoutInputProfile,
                             dexlayout_exec_argv));
 }
 
@@ -528,7 +657,6 @@
   ASSERT_TRUE(DexLayoutExec(&temp_dex,
                             kUnknownTypeDebugInfoInputDex,
                             nullptr /* profile_file */,
-                            nullptr /* profile_filename */,
                             dexlayout_exec_argv));
 }
 
@@ -541,7 +669,6 @@
   ASSERT_TRUE(DexLayoutExec(&temp_dex,
                             kDuplicateCodeItemInputDex,
                             nullptr /* profile_file */,
-                            nullptr /* profile_filename */,
                             dexlayout_exec_argv));
 }
 
diff --git a/dexoptanalyzer/Android.bp b/dexoptanalyzer/Android.bp
index cf4c99e..da6663d 100644
--- a/dexoptanalyzer/Android.bp
+++ b/dexoptanalyzer/Android.bp
@@ -48,8 +48,8 @@
 art_cc_binary {
     name: "dexoptanalyzerd",
     defaults: [
-        "dexoptanalyzer-defaults",
         "art_debug_defaults",
+        "dexoptanalyzer-defaults",
     ],
     shared_libs: [
         "libartd",
diff --git a/disassembler/Android.bp b/disassembler/Android.bp
index 8dfada2..086b8c7 100644
--- a/disassembler/Android.bp
+++ b/disassembler/Android.bp
@@ -47,8 +47,8 @@
 art_cc_library {
     name: "libartd-disassembler",
     defaults: [
-        "libart-disassembler-defaults",
         "art_debug_defaults",
+        "libart-disassembler-defaults",
     ],
     shared_libs: [
         // For disassembler_arm*.
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 8894cc9..91203cb 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -438,10 +438,16 @@
   { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x10, "ave_u", "Vkmn" },
   { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x10, "aver_s", "Vkmn" },
   { kMsaMask | (0x7 << 23), kMsa | (0x7 << 23) | 0x10, "aver_u", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x2 << 23) | 0xe, "max_s", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x3 << 23) | 0xe, "max_u", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x4 << 23) | 0xe, "min_s", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0xe, "min_u", "Vkmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x0 << 22) | 0x1b, "fadd", "Ukmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x1 << 22) | 0x1b, "fsub", "Ukmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x2 << 22) | 0x1b, "fmul", "Ukmn" },
   { kMsaMask | (0xf << 22), kMsa | (0x3 << 22) | 0x1b, "fdiv", "Ukmn" },
+  { kMsaMask | (0xf << 22), kMsa | (0xe << 22) | 0x1b, "fmax", "Ukmn" },
+  { kMsaMask | (0xf << 22), kMsa | (0xc << 22) | 0x1b, "fmin", "Ukmn" },
   { kMsaMask | (0x1ff << 17), kMsa | (0x19e << 17) | 0x1e, "ffint_s", "ukm" },
   { kMsaMask | (0x1ff << 17), kMsa | (0x19c << 17) | 0x1e, "ftint_s", "ukm" },
   { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0xd, "sll", "Vkmn" },
diff --git a/imgdiag/Android.bp b/imgdiag/Android.bp
index eaeb78e..9459bb5 100644
--- a/imgdiag/Android.bp
+++ b/imgdiag/Android.bp
@@ -64,8 +64,8 @@
 art_cc_binary {
     name: "imgdiagd",
     defaults: [
-        "imgdiag-defaults",
         "art_debug_defaults",
+        "imgdiag-defaults",
     ],
     shared_libs: [
         "libartd",
diff --git a/imgdiag/imgdiag_test.cc b/imgdiag/imgdiag_test.cc
index 0d46b2e..c948d3c 100644
--- a/imgdiag/imgdiag_test.cc
+++ b/imgdiag/imgdiag_test.cc
@@ -28,6 +28,7 @@
 #include "runtime/utils.h"
 #include "runtime/gc/space/image_space.h"
 #include "runtime/gc/heap.h"
+#include "runtime/runtime.h"
 
 #include <sys/types.h>
 #include <unistd.h>
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index f07e0f9..9b95de2 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -63,6 +63,7 @@
 #include "safe_map.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
+#include "stack.h"
 #include "stack_map.h"
 #include "string_reference.h"
 #include "thread_list.h"
@@ -129,8 +130,8 @@
     if (elf_file == nullptr) {
       return false;
     }
-    std::unique_ptr<BufferedOutputStream> output_stream(
-        MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file.get())));
+    std::unique_ptr<BufferedOutputStream> output_stream =
+        std::make_unique<BufferedOutputStream>(std::make_unique<FileOutputStream>(elf_file.get()));
     builder_.reset(new ElfBuilder<ElfTypes>(isa, features.get(), output_stream.get()));
 
     builder_->Start();
@@ -170,6 +171,7 @@
                                     rodata_size,
                                     text_size,
                                     oat_file_->BssSize(),
+                                    oat_file_->BssMethodsOffset(),
                                     oat_file_->BssRootsOffset());
     builder_->WriteDynamicSection();
 
diff --git a/patchoat/Android.bp b/patchoat/Android.bp
index a78f97d..d3bc2a7 100644
--- a/patchoat/Android.bp
+++ b/patchoat/Android.bp
@@ -40,8 +40,8 @@
 art_cc_binary {
     name: "patchoatd",
     defaults: [
-        "patchoat-defaults",
         "art_debug_defaults",
+        "patchoat-defaults",
     ],
     shared_libs: [
         "libartd",
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index e750ede..ec3481b 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -40,6 +40,7 @@
 #include "elf_file_impl.h"
 #include "gc/space/image_space.h"
 #include "image-inl.h"
+#include "intern_table.h"
 #include "mirror/dex_cache.h"
 #include "mirror/executable.h"
 #include "mirror/object-inl.h"
diff --git a/profman/Android.bp b/profman/Android.bp
index 2dcbaee..a327ef2 100644
--- a/profman/Android.bp
+++ b/profman/Android.bp
@@ -49,8 +49,8 @@
 art_cc_binary {
     name: "profmand",
     defaults: [
-        "profman-defaults",
         "art_debug_defaults",
+        "profman-defaults",
     ],
     shared_libs: [
         "libartd",
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index 38254e2..ccf9ac6 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -21,6 +21,7 @@
 #include "common_runtime_test.h"
 #include "exec_utils.h"
 #include "jit/profile_compilation_info.h"
+#include "linear_alloc.h"
 #include "mirror/class-inl.h"
 #include "obj_ptr-inl.h"
 #include "profile_assistant.h"
@@ -29,7 +30,14 @@
 
 namespace art {
 
+static constexpr size_t kMaxMethodIds = 65535;
+
 class ProfileAssistantTest : public CommonRuntimeTest {
+ public:
+  void PostRuntimeCreate() OVERRIDE {
+    arena_.reset(new ArenaAllocator(Runtime::Current()->GetArenaPool()));
+  }
+
  protected:
   void SetupProfile(const std::string& id,
                     uint32_t checksum,
@@ -50,15 +58,18 @@
           GetOfflineProfileMethodInfo(dex_location1, dex_location_checksum1,
                                       dex_location2, dex_location_checksum2);
       if (reverse_dex_write_order) {
-        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, pmi));
-        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, kMaxMethodIds, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, kMaxMethodIds, pmi));
       } else {
-        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, pmi));
-        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, kMaxMethodIds, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, kMaxMethodIds, pmi));
       }
     }
     for (uint16_t i = 0; i < number_of_classes; i++) {
-      ASSERT_TRUE(info->AddClassIndex(dex_location1, dex_location_checksum1, dex::TypeIndex(i)));
+      ASSERT_TRUE(info->AddClassIndex(dex_location1,
+                                      dex_location_checksum1,
+                                      dex::TypeIndex(i),
+                                      kMaxMethodIds));
     }
 
     ASSERT_TRUE(info->Save(GetFd(profile)));
@@ -66,38 +77,69 @@
     ASSERT_TRUE(profile.GetFile()->ResetOffset());
   }
 
+  void SetupBasicProfile(const std::string& id,
+                         uint32_t checksum,
+                         uint16_t number_of_methods,
+                         const std::vector<uint32_t> hot_methods,
+                         const std::vector<uint32_t> startup_methods,
+                         const std::vector<uint32_t> post_startup_methods,
+                         const ScratchFile& profile,
+                         ProfileCompilationInfo* info) {
+    std::string dex_location = "location1" + id;
+    for (uint32_t idx : hot_methods) {
+      info->AddMethodIndex(dex_location, checksum, idx, number_of_methods);
+    }
+    for (uint32_t idx : startup_methods) {
+      info->AddSampledMethod(/*startup*/true, dex_location, checksum, idx, number_of_methods);
+    }
+    for (uint32_t idx : post_startup_methods) {
+      info->AddSampledMethod(/*startup*/false, dex_location, checksum, idx, number_of_methods);
+    }
+    ASSERT_TRUE(info->Save(GetFd(profile)));
+    ASSERT_EQ(0, profile.GetFile()->Flush());
+    ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  }
+
+  // Creates an inline cache which will be destructed at the end of the test.
+  ProfileCompilationInfo::InlineCacheMap* CreateInlineCacheMap() {
+    used_inline_caches.emplace_back(new ProfileCompilationInfo::InlineCacheMap(
+        std::less<uint16_t>(), arena_->Adapter(kArenaAllocProfile)));
+    return used_inline_caches.back().get();
+  }
+
   ProfileCompilationInfo::OfflineProfileMethodInfo GetOfflineProfileMethodInfo(
         const std::string& dex_location1, uint32_t dex_checksum1,
         const std::string& dex_location2, uint32_t dex_checksum2) {
-    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
-    pmi.dex_references.emplace_back(dex_location1, dex_checksum1);
-    pmi.dex_references.emplace_back(dex_location2, dex_checksum2);
+    ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
+    pmi.dex_references.emplace_back(dex_location1, dex_checksum1, kMaxMethodIds);
+    pmi.dex_references.emplace_back(dex_location2, dex_checksum2, kMaxMethodIds);
 
     // Monomorphic
     for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
-      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      ic_map->Put(dex_pc, dex_pc_data);
     }
     // Polymorphic
     for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       dex_pc_data.AddClass(1, dex::TypeIndex(1));
 
-      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      ic_map->Put(dex_pc, dex_pc_data);
     }
     // Megamorphic
     for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.SetIsMegamorphic();
-      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      ic_map->Put(dex_pc, dex_pc_data);
     }
     // Missing types
     for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.SetIsMissingTypes();
-      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      ic_map->Put(dex_pc, dex_pc_data);
     }
 
     return pmi;
@@ -179,28 +221,42 @@
     return true;
   }
 
-  bool DumpClassesAndMethods(const std::string& filename, std::string* file_contents) {
-    ScratchFile class_names_file;
+  bool RunProfman(const std::string& filename,
+                  std::vector<std::string>& extra_args,
+                  std::string* output) {
+    ScratchFile output_file;
     std::string profman_cmd = GetProfmanCmd();
     std::vector<std::string> argv_str;
     argv_str.push_back(profman_cmd);
-    argv_str.push_back("--dump-classes-and-methods");
+    argv_str.insert(argv_str.end(), extra_args.begin(), extra_args.end());
     argv_str.push_back("--profile-file=" + filename);
     argv_str.push_back("--apk=" + GetLibCoreDexFileNames()[0]);
     argv_str.push_back("--dex-location=" + GetLibCoreDexFileNames()[0]);
-    argv_str.push_back("--dump-output-to-fd=" + std::to_string(GetFd(class_names_file)));
+    argv_str.push_back("--dump-output-to-fd=" + std::to_string(GetFd(output_file)));
     std::string error;
     EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0);
-    File* file = class_names_file.GetFile();
+    File* file = output_file.GetFile();
     EXPECT_EQ(0, file->Flush());
     EXPECT_TRUE(file->ResetOffset());
     int64_t length = file->GetLength();
     std::unique_ptr<char[]> buf(new char[length]);
     EXPECT_EQ(file->Read(buf.get(), length, 0), length);
-    *file_contents = std::string(buf.get(), length);
+    *output = std::string(buf.get(), length);
     return true;
   }
 
+  bool DumpClassesAndMethods(const std::string& filename, std::string* file_contents) {
+    std::vector<std::string> extra_args;
+    extra_args.push_back("--dump-classes-and-methods");
+    return RunProfman(filename, extra_args, file_contents);
+  }
+
+  bool DumpOnly(const std::string& filename, std::string* file_contents) {
+    std::vector<std::string> extra_args;
+    extra_args.push_back("--dump-only");
+    return RunProfman(filename, extra_args, file_contents);
+  }
+
   bool CreateAndDump(const std::string& input_file_contents,
                      std::string* output_file_contents) {
     ScratchFile profile_file;
@@ -247,13 +303,13 @@
                           bool is_megamorphic,
                           bool is_missing_types)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
-    ASSERT_TRUE(info.GetMethod(method->GetDexFile()->GetLocation(),
-                               method->GetDexFile()->GetLocationChecksum(),
-                               method->GetDexMethodIndex(),
-                               &pmi));
-    ASSERT_EQ(pmi.inline_caches.size(), 1u);
-    ProfileCompilationInfo::DexPcData dex_pc_data = pmi.inline_caches.begin()->second;
+    std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> pmi =
+        info.GetMethod(method->GetDexFile()->GetLocation(),
+                       method->GetDexFile()->GetLocationChecksum(),
+                       method->GetDexMethodIndex());
+    ASSERT_TRUE(pmi != nullptr);
+    ASSERT_EQ(pmi->inline_caches->size(), 1u);
+    const ProfileCompilationInfo::DexPcData& dex_pc_data = pmi->inline_caches->begin()->second;
 
     ASSERT_EQ(dex_pc_data.is_megamorphic, is_megamorphic);
     ASSERT_EQ(dex_pc_data.is_missing_types, is_missing_types);
@@ -262,7 +318,7 @@
     for (mirror::Class* it : expected_clases) {
       for (const auto& class_ref : dex_pc_data.classes) {
         ProfileCompilationInfo::DexReference dex_ref =
-            pmi.dex_references[class_ref.dex_profile_index];
+            pmi->dex_references[class_ref.dex_profile_index];
         if (dex_ref.MatchesDex(&(it->GetDexFile())) &&
             class_ref.type_index == it->GetDexTypeIndex()) {
           found++;
@@ -272,6 +328,13 @@
 
     ASSERT_EQ(expected_clases.size(), found);
   }
+
+  std::unique_ptr<ArenaAllocator> arena_;
+
+  // Cache of inline caches generated during tests.
+  // This makes it easier to pass data between different utilities and ensure that
+  // caches are destructed at the end of the test.
+  std::vector<std::unique_ptr<ProfileCompilationInfo::InlineCacheMap>> used_inline_caches;
 };
 
 TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
@@ -499,10 +562,11 @@
 TEST_F(ProfileAssistantTest, TestProfileCreationAllMatch) {
   // Class names put here need to be in sorted order.
   std::vector<std::string> class_names = {
+    "HLjava/lang/Object;-><init>()V",
     "Ljava/lang/Comparable;",
     "Ljava/lang/Math;",
     "Ljava/lang/Object;",
-    "Ljava/lang/Object;-><init>()V"
+    "SPLjava/lang/Comparable;->compareTo(Ljava/lang/Object;)I",
   };
   std::string file_contents;
   for (std::string& class_name : class_names) {
@@ -541,11 +605,11 @@
   for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
     if (!method.IsCopied() && method.GetCodeItem() != nullptr) {
       ++method_count;
-      ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
-      ASSERT_TRUE(info.GetMethod(method.GetDexFile()->GetLocation(),
-                                 method.GetDexFile()->GetLocationChecksum(),
-                                 method.GetDexMethodIndex(),
-                                 &pmi));
+      std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> pmi =
+          info.GetMethod(method.GetDexFile()->GetLocation(),
+                         method.GetDexFile()->GetLocationChecksum(),
+                         method.GetDexMethodIndex());
+      ASSERT_TRUE(pmi != nullptr);
     }
   }
   EXPECT_GT(method_count, 0u);
@@ -689,12 +753,12 @@
     // Verify that method noInlineCache has no inline caches in the profile.
     ArtMethod* no_inline_cache = GetVirtualMethod(class_loader, "LTestInline;", "noInlineCache");
     ASSERT_TRUE(no_inline_cache != nullptr);
-    ProfileCompilationInfo::OfflineProfileMethodInfo pmi_no_inline_cache;
-    ASSERT_TRUE(info.GetMethod(no_inline_cache->GetDexFile()->GetLocation(),
-                               no_inline_cache->GetDexFile()->GetLocationChecksum(),
-                               no_inline_cache->GetDexMethodIndex(),
-                               &pmi_no_inline_cache));
-    ASSERT_TRUE(pmi_no_inline_cache.inline_caches.empty());
+    std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> pmi_no_inline_cache =
+        info.GetMethod(no_inline_cache->GetDexFile()->GetLocation(),
+                       no_inline_cache->GetDexFile()->GetLocationChecksum(),
+                       no_inline_cache->GetDexMethodIndex());
+    ASSERT_TRUE(pmi_no_inline_cache != nullptr);
+    ASSERT_TRUE(pmi_no_inline_cache->inline_caches->empty());
   }
 }
 
@@ -738,4 +802,128 @@
   CheckProfileInfo(profile1, info1);
 }
 
+TEST_F(ProfileAssistantTest, TestProfileCreateWithInvalidData) {
+  // Create the profile content.
+  std::vector<std::string> profile_methods = {
+    "LTestInline;->inlineMonomorphic(LSuper;)I+invalid_class",
+    "LTestInline;->invalid_method",
+    "invalid_class"
+  };
+  std::string input_file_contents;
+  for (std::string& m : profile_methods) {
+    input_file_contents += m + std::string("\n");
+  }
+
+  // Create the profile and save it to disk.
+  ScratchFile profile_file;
+  std::string dex_filename = GetTestDexFileName("ProfileTestMultiDex");
+  ASSERT_TRUE(CreateProfile(input_file_contents,
+                            profile_file.GetFilename(),
+                            dex_filename));
+
+  // Load the profile from disk.
+  ProfileCompilationInfo info;
+  profile_file.GetFile()->ResetOffset();
+  ASSERT_TRUE(info.Load(GetFd(profile_file)));
+
+  // Load the dex files and verify that the profile contains the expected methods info.
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("ProfileTestMultiDex");
+  ASSERT_NE(class_loader, nullptr);
+
+  ArtMethod* inline_monomorphic = GetVirtualMethod(class_loader,
+                                                   "LTestInline;",
+                                                   "inlineMonomorphic");
+  const DexFile* dex_file = inline_monomorphic->GetDexFile();
+
+  // Verify that the inline cache contains the invalid type.
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> pmi =
+      info.GetMethod(dex_file->GetLocation(),
+                     dex_file->GetLocationChecksum(),
+                     inline_monomorphic->GetDexMethodIndex());
+  ASSERT_TRUE(pmi != nullptr);
+  ASSERT_EQ(pmi->inline_caches->size(), 1u);
+  const ProfileCompilationInfo::DexPcData& dex_pc_data = pmi->inline_caches->begin()->second;
+  dex::TypeIndex invalid_class_index(std::numeric_limits<uint16_t>::max() - 1);
+  ASSERT_EQ(1u, dex_pc_data.classes.size());
+  ASSERT_EQ(invalid_class_index, dex_pc_data.classes.begin()->type_index);
+
+  // Verify that the start-up classes contain the invalid class.
+  std::set<dex::TypeIndex> classes;
+  std::set<uint16_t> hot_methods;
+  std::set<uint16_t> startup_methods;
+  std::set<uint16_t> post_start_methods;
+  ASSERT_TRUE(info.GetClassesAndMethods(*dex_file,
+                                        &classes,
+                                        &hot_methods,
+                                        &startup_methods,
+                                        &post_start_methods));
+  ASSERT_EQ(1u, classes.size());
+  ASSERT_TRUE(classes.find(invalid_class_index) != classes.end());
+
+  // Verify that the invalid method is in the profile.
+  ASSERT_EQ(2u, hot_methods.size());
+  uint16_t invalid_method_index = std::numeric_limits<uint16_t>::max() - 1;
+  ASSERT_TRUE(hot_methods.find(invalid_method_index) != hot_methods.end());
+}
+
+TEST_F(ProfileAssistantTest, DumpOnly) {
+  ScratchFile profile;
+
+  const uint32_t kNumberOfMethods = 64;
+  std::vector<uint32_t> hot_methods;
+  std::vector<uint32_t> startup_methods;
+  std::vector<uint32_t> post_startup_methods;
+  for (size_t i = 0; i < kNumberOfMethods; ++i) {
+    if (i % 2 == 0) {
+      hot_methods.push_back(i);
+    }
+    if (i % 3 == 1) {
+      startup_methods.push_back(i);
+    }
+    if (i % 4 == 2) {
+      post_startup_methods.push_back(i);
+    }
+  }
+  EXPECT_GT(hot_methods.size(), 0u);
+  EXPECT_GT(startup_methods.size(), 0u);
+  EXPECT_GT(post_startup_methods.size(), 0u);
+  ProfileCompilationInfo info1;
+  SetupBasicProfile("p1",
+                    1,
+                    kNumberOfMethods,
+                    hot_methods,
+                    startup_methods,
+                    post_startup_methods,
+                    profile,
+                    &info1);
+  std::string output;
+  DumpOnly(profile.GetFilename(), &output);
+  const size_t hot_offset = output.find("hot methods:");
+  const size_t startup_offset = output.find("startup methods:");
+  const size_t post_startup_offset = output.find("post startup methods:");
+  const size_t classes_offset = output.find("classes:");
+  ASSERT_NE(hot_offset, std::string::npos);
+  ASSERT_NE(startup_offset, std::string::npos);
+  ASSERT_NE(post_startup_offset, std::string::npos);
+  ASSERT_LT(hot_offset, startup_offset);
+  ASSERT_LT(startup_offset, post_startup_offset);
+  // Check the actual contents of the dump by looking at the offsets of the methods.
+  for (uint32_t m : hot_methods) {
+    const size_t pos = output.find(std::to_string(m) + "[],", hot_offset);
+    ASSERT_NE(pos, std::string::npos);
+    EXPECT_LT(pos, startup_offset);
+  }
+  for (uint32_t m : startup_methods) {
+    const size_t pos = output.find(std::to_string(m) + ",", startup_offset);
+    ASSERT_NE(pos, std::string::npos);
+    EXPECT_LT(pos, post_startup_offset);
+  }
+  for (uint32_t m : post_startup_methods) {
+    const size_t pos = output.find(std::to_string(m) + ",", post_startup_offset);
+    ASSERT_NE(pos, std::string::npos);
+    EXPECT_LT(pos, classes_offset);
+  }
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index 384e129..adef0d0 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -39,10 +39,12 @@
 #include "bytecode_utils.h"
 #include "dex_file.h"
 #include "jit/profile_compilation_info.h"
-#include "runtime.h"
-#include "utils.h"
-#include "zip_archive.h"
 #include "profile_assistant.h"
+#include "runtime.h"
+#include "type_reference.h"
+#include "utils.h"
+#include "type_reference.h"
+#include "zip_archive.h"
 
 namespace art {
 
@@ -143,10 +145,15 @@
 // Separators used when parsing human friendly representation of profiles.
 static const std::string kMethodSep = "->";
 static const std::string kMissingTypesMarker = "missing_types";
+static const std::string kInvalidClassDescriptor = "invalid_class";
+static const std::string kInvalidMethod = "invalid_method";
 static const std::string kClassAllMethods = "*";
 static constexpr char kProfileParsingInlineChacheSep = '+';
 static constexpr char kProfileParsingTypeSep = ',';
 static constexpr char kProfileParsingFirstCharInSignature = '(';
+static constexpr char kMethodFlagStringHot = 'H';
+static constexpr char kMethodFlagStringStartup = 'S';
+static constexpr char kMethodFlagStringPostStartup = 'P';
 
 // TODO(calin): This class has grown too much from its initial design. Split the functionality
 // into smaller, more contained pieces.
@@ -423,20 +430,42 @@
     }
     for (const std::unique_ptr<const DexFile>& dex_file : *dex_files) {
       std::set<dex::TypeIndex> class_types;
-      ProfileCompilationInfo::MethodMap methods;
-      if (profile_info.GetClassesAndMethods(dex_file.get(), &class_types, &methods)) {
+      std::set<uint16_t> hot_methods;
+      std::set<uint16_t> startup_methods;
+      std::set<uint16_t> post_startup_methods;
+      std::set<uint16_t> combined_methods;
+      if (profile_info.GetClassesAndMethods(*dex_file.get(),
+                                            &class_types,
+                                            &hot_methods,
+                                            &startup_methods,
+                                            &post_startup_methods)) {
         for (const dex::TypeIndex& type_index : class_types) {
           const DexFile::TypeId& type_id = dex_file->GetTypeId(type_index);
           out_lines->insert(std::string(dex_file->GetTypeDescriptor(type_id)));
         }
-        for (const auto& pair : methods) {
-          // TODO: Process inline caches.
-          const uint16_t dex_method_idx = pair.first;
+        combined_methods = hot_methods;
+        combined_methods.insert(startup_methods.begin(), startup_methods.end());
+        combined_methods.insert(post_startup_methods.begin(), post_startup_methods.end());
+        for (uint16_t dex_method_idx : combined_methods) {
           const DexFile::MethodId& id = dex_file->GetMethodId(dex_method_idx);
           std::string signature_string(dex_file->GetMethodSignature(id).ToString());
           std::string type_string(dex_file->GetTypeDescriptor(dex_file->GetTypeId(id.class_idx_)));
           std::string method_name(dex_file->GetMethodName(id));
-          out_lines->insert(type_string + kMethodSep + method_name + signature_string);
+          std::string flags_string;
+          if (hot_methods.find(dex_method_idx) != hot_methods.end()) {
+            flags_string += kMethodFlagStringHot;
+          }
+          if (startup_methods.find(dex_method_idx) != startup_methods.end()) {
+            flags_string += kMethodFlagStringStartup;
+          }
+          if (post_startup_methods.find(dex_method_idx) != post_startup_methods.end()) {
+            flags_string += kMethodFlagStringPostStartup;
+          }
+          out_lines->insert(flags_string +
+                            type_string +
+                            kMethodSep +
+                            method_name +
+                            signature_string);
         }
       }
     }
@@ -460,7 +489,7 @@
     return true;
   }
 
-  int DumpClasses() {
+  int DumpClassesAndMethods() {
     // Validate that at least one profile file or reference was specified.
     if (profile_files_.empty() && profile_files_fd_.empty() &&
         reference_profile_file_.empty() && !FdIsValid(reference_profile_file_fd_)) {
@@ -562,9 +591,24 @@
   // Return true if the definition of the class was found in any of the dex_files.
   bool FindClass(const std::vector<std::unique_ptr<const DexFile>>& dex_files,
                  const std::string& klass_descriptor,
-                 /*out*/ProfileMethodInfo::ProfileClassReference* class_ref) {
+                 /*out*/TypeReference* class_ref) {
+    constexpr uint16_t kInvalidTypeIndex = std::numeric_limits<uint16_t>::max() - 1;
     for (const std::unique_ptr<const DexFile>& dex_file_ptr : dex_files) {
       const DexFile* dex_file = dex_file_ptr.get();
+      if (klass_descriptor == kInvalidClassDescriptor) {
+        if (kInvalidTypeIndex >= dex_file->NumTypeIds()) {
+          // The dex file does not contain all possible type ids which leaves us room
+          // to add an "invalid" type id.
+          class_ref->dex_file = dex_file;
+          class_ref->type_index = dex::TypeIndex(kInvalidTypeIndex);
+          return true;
+        } else {
+          // The dex file contains all possible type ids. We don't have any free type id
+          // that we can use as invalid.
+          continue;
+        }
+      }
+
       const DexFile::TypeId* type_id = dex_file->FindTypeId(klass_descriptor.c_str());
       if (type_id == nullptr) {
         continue;
@@ -582,16 +626,25 @@
   }
 
   // Find the method specified by method_spec in the class class_ref.
-  uint32_t FindMethodIndex(const ProfileMethodInfo::ProfileClassReference& class_ref,
+  uint32_t FindMethodIndex(const TypeReference& class_ref,
                            const std::string& method_spec) {
+    const DexFile* dex_file = class_ref.dex_file;
+    if (method_spec == kInvalidMethod) {
+      constexpr uint16_t kInvalidMethodIndex = std::numeric_limits<uint16_t>::max() - 1;
+      return kInvalidMethodIndex >= dex_file->NumMethodIds()
+             ? kInvalidMethodIndex
+             : DexFile::kDexNoIndex;
+    }
+
     std::vector<std::string> name_and_signature;
     Split(method_spec, kProfileParsingFirstCharInSignature, &name_and_signature);
     if (name_and_signature.size() != 2) {
       LOG(ERROR) << "Invalid method name and signature " << method_spec;
+      return DexFile::kDexNoIndex;
     }
+
     const std::string& name = name_and_signature[0];
     const std::string& signature = kProfileParsingFirstCharInSignature + name_and_signature[1];
-    const DexFile* dex_file = class_ref.dex_file;
 
     const DexFile::StringId* name_id = dex_file->FindStringId(name.c_str());
     if (name_id == nullptr) {
@@ -625,7 +678,7 @@
   // The format of the method spec is "inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;".
   //
   // TODO(calin): support INVOKE_INTERFACE and the range variants.
-  bool HasSingleInvoke(const ProfileMethodInfo::ProfileClassReference& class_ref,
+  bool HasSingleInvoke(const TypeReference& class_ref,
                        uint16_t method_index,
                        /*out*/uint32_t* dex_pc) {
     const DexFile* dex_file = class_ref.dex_file;
@@ -657,24 +710,46 @@
   // The possible line formats are:
   // "LJustTheCass;".
   // "LTestInline;->inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;".
+  // "LTestInline;->inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,invalid_class".
   // "LTestInline;->inlineMissingTypes(LSuper;)I+missing_types".
   // "LTestInline;->inlineNoInlineCaches(LSuper;)I".
   // "LTestInline;->*".
+  // "invalid_class".
+  // "LTestInline;->invalid_method".
   // The method and classes are searched only in the given dex files.
   bool ProcessLine(const std::vector<std::unique_ptr<const DexFile>>& dex_files,
                    const std::string& line,
                    /*out*/ProfileCompilationInfo* profile) {
     std::string klass;
     std::string method_str;
-    size_t method_sep_index = line.find(kMethodSep);
+    bool is_hot = false;
+    bool is_startup = false;
+    bool is_post_startup = false;
+    const size_t method_sep_index = line.find(kMethodSep, 0);
     if (method_sep_index == std::string::npos) {
-      klass = line;
+      klass = line.substr(0);
     } else {
-      klass = line.substr(0, method_sep_index);
+      // The method prefix flags are only valid for method strings.
+      size_t start_index = 0;
+      while (start_index < line.size() && line[start_index] != 'L') {
+        const char c = line[start_index];
+        if (c == kMethodFlagStringHot) {
+          is_hot = true;
+        } else if (c == kMethodFlagStringStartup) {
+          is_startup = true;
+        } else if (c == kMethodFlagStringPostStartup) {
+          is_post_startup = true;
+        } else {
+          LOG(WARNING) << "Invalid flag " << c;
+          return false;
+        }
+        ++start_index;
+      }
+      klass = line.substr(start_index, method_sep_index - start_index);
       method_str = line.substr(method_sep_index + kMethodSep.size());
     }
 
-    ProfileMethodInfo::ProfileClassReference class_ref;
+    TypeReference class_ref;
     if (!FindClass(dex_files, klass, &class_ref)) {
       LOG(WARNING) << "Could not find class: " << klass;
       return false;
@@ -687,7 +762,8 @@
       const auto& dex_resolved_classes = resolved_class_set.emplace(
             dex_file->GetLocation(),
             dex_file->GetBaseLocation(),
-            dex_file->GetLocationChecksum());
+            dex_file->GetLocationChecksum(),
+            dex_file->NumMethodIds());
       dex_resolved_classes.first->AddClass(class_ref.type_index);
       std::vector<ProfileMethodInfo> methods;
       if (method_str == kClassAllMethods) {
@@ -717,6 +793,9 @@
     std::string method_spec;
     std::vector<std::string> inline_cache_elems;
 
+    // If none of the flags are set, default to hot.
+    is_hot = is_hot || (!is_hot && !is_startup && !is_post_startup);
+
     std::vector<std::string> method_elems;
     bool is_missing_types = false;
     Split(method_str, kProfileParsingInlineChacheSep, &method_elems);
@@ -738,14 +817,13 @@
       return false;
     }
 
-    std::vector<ProfileMethodInfo> pmi;
     std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches;
     if (is_missing_types || !inline_cache_elems.empty()) {
       uint32_t dex_pc;
       if (!HasSingleInvoke(class_ref, method_index, &dex_pc)) {
         return false;
       }
-      std::vector<ProfileMethodInfo::ProfileClassReference> classes(inline_cache_elems.size());
+      std::vector<TypeReference> classes(inline_cache_elems.size());
       size_t class_it = 0;
       for (const std::string& ic_class : inline_cache_elems) {
         if (!FindClass(dex_files, ic_class, &(classes[class_it++]))) {
@@ -755,8 +833,29 @@
       }
       inline_caches.emplace_back(dex_pc, is_missing_types, classes);
     }
-    pmi.emplace_back(class_ref.dex_file, method_index, inline_caches);
-    profile->AddMethodsAndClasses(pmi, std::set<DexCacheResolvedClasses>());
+    ProfileMethodInfo pmi(class_ref.dex_file, method_index, inline_caches);
+    if (is_hot) {
+      profile->AddMethod(pmi);
+    }
+    if (is_startup) {
+      if (!profile->AddSampledMethod(/*is_startup*/ true,
+                                     pmi.dex_file->GetLocation(),
+                                     pmi.dex_file->GetLocationChecksum(),
+                                     method_index,
+                                     pmi.dex_file->NumMethodIds())) {
+        return false;
+      }
+      DCHECK(profile->IsStartupOrHotMethod(MethodReference(pmi.dex_file, method_index)));
+    }
+    if (is_post_startup) {
+      if (!profile->AddSampledMethod(/*is_startup*/ false,
+                                     pmi.dex_file->GetLocation(),
+                                     pmi.dex_file->GetLocationChecksum(),
+                                     method_index,
+                                     pmi.dex_file->NumMethodIds())) {
+        return false;
+      }
+    }
     return true;
   }
 
@@ -931,7 +1030,7 @@
     return profman.DumpProfileInfo();
   }
   if (profman.ShouldOnlyDumpClassesAndMethods()) {
-    return profman.DumpClasses();
+    return profman.DumpClassesAndMethods();
   }
   if (profman.ShouldCreateProfile()) {
     return profman.CreateProfile();
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 8ee5498..26e52e0 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -54,6 +54,7 @@
         "compiler_filter.cc",
         "debugger.cc",
         "dex_file.cc",
+        "dex_file_tracking_registrar.cc",
         "dex_file_annotations.cc",
         "dex_file_verifier.cc",
         "dex_instruction.cc",
@@ -123,6 +124,7 @@
         "jni_internal.cc",
         "jobject_comparator.cc",
         "linear_alloc.cc",
+        "managed_stack.cc",
         "mem_map.cc",
         "memory_region.cc",
         "method_handles.cc",
@@ -243,7 +245,6 @@
         "entrypoints/quick/quick_entrypoints_enum.cc",
         "entrypoints/quick/quick_field_entrypoints.cc",
         "entrypoints/quick/quick_fillarray_entrypoints.cc",
-        "entrypoints/quick/quick_instrumentation_entrypoints.cc",
         "entrypoints/quick/quick_jni_entrypoints.cc",
         "entrypoints/quick/quick_lock_entrypoints.cc",
         "entrypoints/quick/quick_math_entrypoints.cc",
@@ -352,6 +353,7 @@
                 "libdl",
                 // For android::FileMap used by libziparchive.
                 "libutils",
+                "libtombstoned_client"
             ],
             static_libs: [
                 // ZipArchive support, the order matters here to get all symbols.
@@ -426,6 +428,7 @@
     srcs: [
         "arch/instruction_set.h",
         "base/allocator.h",
+        "base/callee_save_type.h",
         "base/enums.h",
         "base/mutex.h",
         "debugger.h",
@@ -453,7 +456,6 @@
         "oat.h",
         "object_callbacks.h",
         "process_state.h",
-        "runtime.h",
         "stack.h",
         "thread.h",
         "thread_state.h",
diff --git a/runtime/arch/arch_test.cc b/runtime/arch/arch_test.cc
index d6056c0..838ae40 100644
--- a/runtime/arch/arch_test.cc
+++ b/runtime/arch/arch_test.cc
@@ -17,11 +17,30 @@
 #include <stdint.h>
 
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "common_runtime_test.h"
 #include "quick/quick_method_frame_info.h"
-// Common tests are declared next to the constants.
-#define ADD_TEST_EQ(x, y) EXPECT_EQ(x, y);
-#include "asm_support.h"
+
+
+// asm_support.h declares tests next to the #defines. We use asm_support_check.h to (safely)
+// generate CheckAsmSupportOffsetsAndSizes using gtest's EXPECT for the tests. We also use the
+// RETURN_TYPE, HEADER and FOOTER defines from asm_support_check.h to try to ensure that any
+// tests are actually generated.
+
+// Let CheckAsmSupportOffsetsAndSizes return a size_t (the count).
+#define ASM_SUPPORT_CHECK_RETURN_TYPE size_t
+
+// Declare the counter that will be updated per test.
+#define ASM_SUPPORT_CHECK_HEADER size_t count = 0;
+
+// Use EXPECT_EQ for tests, and increment the counter.
+#define ADD_TEST_EQ(x, y) EXPECT_EQ(x, y); count++;
+
+// Return the counter at the end of CheckAsmSupportOffsetsAndSizes.
+#define ASM_SUPPORT_CHECK_FOOTER return count;
+
+// Generate CheckAsmSupportOffsetsAndSizes().
+#include "asm_support_check.h"
 
 namespace art {
 
@@ -40,7 +59,7 @@
     ASSERT_EQ(InstructionSet::kX86_64, Runtime::Current()->GetInstructionSet());
   }
 
-  static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size)
+  static void CheckFrameSize(InstructionSet isa, CalleeSaveType type, uint32_t save_size)
       NO_THREAD_SAFETY_ANALYSIS {
     Runtime* const runtime = Runtime::Current();
     Thread* const self = Thread::Current();
@@ -57,7 +76,8 @@
 };
 
 TEST_F(ArchTest, CheckCommonOffsetsAndSizes) {
-  CheckAsmSupportOffsetsAndSizes();
+  size_t test_count = CheckAsmSupportOffsetsAndSizes();
+  EXPECT_GT(test_count, 0u);
 }
 
 // Grab architecture specific constants.
@@ -151,16 +171,16 @@
 #define TEST_ARCH(Arch, arch)                             \
   TEST_F(ArchTest, Arch) {                                \
     CheckFrameSize(InstructionSet::k##Arch,               \
-                   Runtime::kSaveAllCalleeSaves,          \
+                   CalleeSaveType::kSaveAllCalleeSaves,   \
                    arch::kFrameSizeSaveAllCalleeSaves);   \
     CheckFrameSize(InstructionSet::k##Arch,               \
-                   Runtime::kSaveRefsOnly,                \
+                   CalleeSaveType::kSaveRefsOnly,         \
                    arch::kFrameSizeSaveRefsOnly);         \
     CheckFrameSize(InstructionSet::k##Arch,               \
-                   Runtime::kSaveRefsAndArgs,             \
+                   CalleeSaveType::kSaveRefsAndArgs,      \
                    arch::kFrameSizeSaveRefsAndArgs);      \
     CheckFrameSize(InstructionSet::k##Arch,               \
-                   Runtime::kSaveEverything,              \
+                   CalleeSaveType::kSaveEverything,       \
                    arch::kFrameSizeSaveEverything);       \
   }
 TEST_ARCH(Arm, arm)
diff --git a/runtime/arch/arm/context_arm.cc b/runtime/arch/arm/context_arm.cc
index 9cbec1e..0db14fb 100644
--- a/runtime/arch/arm/context_arm.cc
+++ b/runtime/arch/arm/context_arm.cc
@@ -17,8 +17,9 @@
 #include "context_arm.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace arm {
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index 919b0af..8a8d264 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -18,6 +18,7 @@
 #include <string.h>
 
 #include "arch/arm/asm_support_arm.h"
+#include "base/bit_utils.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index 4c15450..b4bca01 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -25,7 +25,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 //
 // ARM specific fault handler functions.
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 6e387e7..676efc4 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1698,8 +1698,10 @@
     @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
     str   r0, [sp, #4]
     mov   r2, r9         @ pass Thread::Current
-    mov   r3, lr         @ pass LR
-    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, LR)
+    mov   r3, sp         @ pass SP
+    blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, SP)
+    cbz   r0, .Ldeliver_instrumentation_entry_exception
+                         @ Deliver exception if we got nullptr as function.
     mov   r12, r0        @ r12 holds reference to code
     ldr   r0, [sp, #4]   @ restore r0
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
@@ -1715,19 +1717,13 @@
     .cfi_adjust_cfa_offset 8
     .cfi_rel_offset r0, 0
     .cfi_rel_offset r1, 4
+    mov   r2, sp         @ store gpr_res pointer.
     vpush {d0}           @ save fp return value
     .cfi_adjust_cfa_offset 8
-    sub   sp, #8         @ space for return value argument. Note: AAPCS stack alignment is 8B, no
-                         @ need to align by 16.
-    .cfi_adjust_cfa_offset 8
-    vstr  d0, [sp]       @ d0 -> [sp] for fpr_res
-    mov   r2, r0         @ pass return value as gpr_res
-    mov   r3, r1
-    mov   r0, r9         @ pass Thread::Current
+    mov   r3, sp         @ store fpr_res pointer
     mov   r1, r12        @ pass SP
-    blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res, fpr_res)
-    add   sp, #8
-    .cfi_adjust_cfa_offset -8
+    mov   r0, r9         @ pass Thread::Current
+    blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res*, fpr_res*)
 
     mov   r2, r0         @ link register saved by instrumentation
     mov   lr, r1         @ r1 is holding link register if we're to bounce to deoptimize
@@ -1737,9 +1733,16 @@
     .cfi_adjust_cfa_offset -8
     .cfi_restore r0
     .cfi_restore r1
-    add sp, #32          @ remove callee save frame
-    .cfi_adjust_cfa_offset -32
-    bx    r2             @ return
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    cbz   r2, .Ldo_deliver_instrumentation_exception
+                         @ Deliver exception if we got nullptr as function.
+    bx    r2             @ Otherwise, return
+.Ldeliver_instrumentation_entry_exception:
+    @ Deliver exception for art_quick_instrumentation_entry placed after
+    @ art_quick_instrumentation_exit so that the fallthrough works.
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+.Ldo_deliver_instrumentation_exception:
+    DELIVER_PENDING_EXCEPTION
 END art_quick_instrumentation_entry
 
     /*
diff --git a/runtime/arch/arm/quick_method_frame_info_arm.h b/runtime/arch/arm/quick_method_frame_info_arm.h
index 35f1948..39061f0 100644
--- a/runtime/arch/arm/quick_method_frame_info_arm.h
+++ b/runtime/arch/arm/quick_method_frame_info_arm.h
@@ -17,10 +17,12 @@
 #ifndef ART_RUNTIME_ARCH_ARM_QUICK_METHOD_FRAME_INFO_ARM_H_
 #define ART_RUNTIME_ARCH_ARM_QUICK_METHOD_FRAME_INFO_ARM_H_
 
+#include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "base/callee_save_type.h"
+#include "base/enums.h"
 #include "quick/quick_method_frame_info.h"
 #include "registers_arm.h"
-#include "runtime.h"  // for Runtime::CalleeSaveType.
 
 namespace art {
 namespace arm {
@@ -53,44 +55,44 @@
 static constexpr uint32_t kArmCalleeSaveFpEverythingSpills =
     kArmCalleeSaveFpArgSpills | kArmCalleeSaveFpAllSpills;
 
-constexpr uint32_t ArmCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t ArmCalleeSaveCoreSpills(CalleeSaveType type) {
   return kArmCalleeSaveAlwaysSpills | kArmCalleeSaveRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAllCalleeSaves ? kArmCalleeSaveAllSpills : 0) |
-      (type == Runtime::kSaveEverything ? kArmCalleeSaveEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kArmCalleeSaveArgSpills : 0) |
+      (type == CalleeSaveType::kSaveAllCalleeSaves ? kArmCalleeSaveAllSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kArmCalleeSaveEverythingSpills : 0);
 }
 
-constexpr uint32_t ArmCalleeSaveFpSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t ArmCalleeSaveFpSpills(CalleeSaveType type) {
   return kArmCalleeSaveFpAlwaysSpills | kArmCalleeSaveFpRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kArmCalleeSaveFpArgSpills : 0) |
-      (type == Runtime::kSaveAllCalleeSaves ? kArmCalleeSaveFpAllSpills : 0) |
-      (type == Runtime::kSaveEverything ? kArmCalleeSaveFpEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kArmCalleeSaveFpArgSpills : 0) |
+      (type == CalleeSaveType::kSaveAllCalleeSaves ? kArmCalleeSaveFpAllSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kArmCalleeSaveFpEverythingSpills : 0);
 }
 
-constexpr uint32_t ArmCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+constexpr uint32_t ArmCalleeSaveFrameSize(CalleeSaveType type) {
   return RoundUp((POPCOUNT(ArmCalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(ArmCalleeSaveFpSpills(type)) /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kArmPointerSize), kStackAlignment);
 }
 
-constexpr QuickMethodFrameInfo ArmCalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+constexpr QuickMethodFrameInfo ArmCalleeSaveMethodFrameInfo(CalleeSaveType type) {
   return QuickMethodFrameInfo(ArmCalleeSaveFrameSize(type),
                               ArmCalleeSaveCoreSpills(type),
                               ArmCalleeSaveFpSpills(type));
 }
 
-constexpr size_t ArmCalleeSaveFpr1Offset(Runtime::CalleeSaveType type) {
+constexpr size_t ArmCalleeSaveFpr1Offset(CalleeSaveType type) {
   return ArmCalleeSaveFrameSize(type) -
          (POPCOUNT(ArmCalleeSaveCoreSpills(type)) +
           POPCOUNT(ArmCalleeSaveFpSpills(type))) * static_cast<size_t>(kArmPointerSize);
 }
 
-constexpr size_t ArmCalleeSaveGpr1Offset(Runtime::CalleeSaveType type) {
+constexpr size_t ArmCalleeSaveGpr1Offset(CalleeSaveType type) {
   return ArmCalleeSaveFrameSize(type) -
          POPCOUNT(ArmCalleeSaveCoreSpills(type)) * static_cast<size_t>(kArmPointerSize);
 }
 
-constexpr size_t ArmCalleeSaveLrOffset(Runtime::CalleeSaveType type) {
+constexpr size_t ArmCalleeSaveLrOffset(CalleeSaveType type) {
   return ArmCalleeSaveFrameSize(type) -
       POPCOUNT(ArmCalleeSaveCoreSpills(type) & (-(1 << LR))) * static_cast<size_t>(kArmPointerSize);
 }
diff --git a/runtime/arch/arm64/context_arm64.cc b/runtime/arch/arm64/context_arm64.cc
index d5d1ec7..0465c1e 100644
--- a/runtime/arch/arm64/context_arm64.cc
+++ b/runtime/arch/arm64/context_arm64.cc
@@ -19,8 +19,9 @@
 #include "context_arm64.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace arm64 {
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index 610cdee..9bbcef3 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -18,6 +18,7 @@
 #include <string.h>
 
 #include "arch/arm64/asm_support_arm64.h"
+#include "base/bit_utils.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "entrypoints/quick/quick_default_externs.h"
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index dc4e8f3..0ead732 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -26,7 +26,7 @@
 #include "base/macros.h"
 #include "globals.h"
 #include "registers_arm64.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_throw_null_pointer_exception_from_signal();
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 7fabbe7..ee91277 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2224,15 +2224,19 @@
     mov   x20, x0             // Preserve method reference in a callee-save.
 
     mov   x2, xSELF
-    mov   x3, xLR
-    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, LR)
+    mov   x3, sp  // Pass SP
+    bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, SP)
 
     mov   xIP0, x0            // x0 = result of call.
     mov   x0, x20             // Reload method reference.
 
     RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
+    cbz   xIP0, 1f            // Deliver the pending exception if method is null.
     adr   xLR, art_quick_instrumentation_exit
     br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
+
+1:
+    DELIVER_PENDING_EXCEPTION
 END art_quick_instrumentation_entry
 
     .extern artInstrumentationMethodExitFromCode
@@ -2241,30 +2245,28 @@
 
     SETUP_SAVE_REFS_ONLY_FRAME
 
-    // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
-    // we would need to fully restore it. As there are a lot of callee-save registers, it seems
-    // easier to have an extra small stack area.
-
     str x0, [sp, #-16]!       // Save integer result.
     .cfi_adjust_cfa_offset 16
-    str d0,  [sp, #8]         // Save floating-point result.
+    str d0, [sp, #8]          // Save floating-point result.
 
+    add   x3, sp, #8          // Pass floating-point result pointer.
+    mov   x2, sp              // Pass integer result pointer.
     add   x1, sp, #16         // Pass SP.
-    mov   x2, x0              // Pass integer result.
-    fmov  x3, d0              // Pass floating-point result.
     mov   x0, xSELF           // Pass Thread.
-    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
+    bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res*, fpr_res*)
 
     mov   xIP0, x0            // Return address from instrumentation call.
     mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
 
     ldr   d0, [sp, #8]        // Restore floating-point result.
     ldr   x0, [sp], #16       // Restore integer result, and drop stack area.
-    .cfi_adjust_cfa_offset 16
+    .cfi_adjust_cfa_offset -16
 
-    POP_SAVE_REFS_ONLY_FRAME
-
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    cbz   xIP0, 1f            // Handle error
     br    xIP0                // Tail-call out.
+1:
+    DELIVER_PENDING_EXCEPTION
 END art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/arch/arm64/quick_method_frame_info_arm64.h b/runtime/arch/arm64/quick_method_frame_info_arm64.h
index 32d9d08..c231d4d 100644
--- a/runtime/arch/arm64/quick_method_frame_info_arm64.h
+++ b/runtime/arch/arm64/quick_method_frame_info_arm64.h
@@ -17,10 +17,13 @@
 #ifndef ART_RUNTIME_ARCH_ARM64_QUICK_METHOD_FRAME_INFO_ARM64_H_
 #define ART_RUNTIME_ARCH_ARM64_QUICK_METHOD_FRAME_INFO_ARM64_H_
 
+#include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "base/callee_save_type.h"
+#include "base/enums.h"
+#include "globals.h"
 #include "quick/quick_method_frame_info.h"
 #include "registers_arm64.h"
-#include "runtime.h"  // for Runtime::CalleeSaveType.
 
 namespace art {
 namespace arm64 {
@@ -76,44 +79,44 @@
     (1 << art::arm64::D27) | (1 << art::arm64::D28) | (1 << art::arm64::D29) |
     (1 << art::arm64::D30) | (1 << art::arm64::D31);
 
-constexpr uint32_t Arm64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t Arm64CalleeSaveCoreSpills(CalleeSaveType type) {
   return kArm64CalleeSaveAlwaysSpills | kArm64CalleeSaveRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAllCalleeSaves ? kArm64CalleeSaveAllSpills : 0) |
-      (type == Runtime::kSaveEverything ? kArm64CalleeSaveEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kArm64CalleeSaveArgSpills : 0) |
+      (type == CalleeSaveType::kSaveAllCalleeSaves ? kArm64CalleeSaveAllSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kArm64CalleeSaveEverythingSpills : 0);
 }
 
-constexpr uint32_t Arm64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t Arm64CalleeSaveFpSpills(CalleeSaveType type) {
   return kArm64CalleeSaveFpAlwaysSpills | kArm64CalleeSaveFpRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kArm64CalleeSaveFpArgSpills : 0) |
-      (type == Runtime::kSaveAllCalleeSaves ? kArm64CalleeSaveFpAllSpills : 0) |
-      (type == Runtime::kSaveEverything ? kArm64CalleeSaveFpEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kArm64CalleeSaveFpArgSpills : 0) |
+      (type == CalleeSaveType::kSaveAllCalleeSaves ? kArm64CalleeSaveFpAllSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kArm64CalleeSaveFpEverythingSpills : 0);
 }
 
-constexpr uint32_t Arm64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+constexpr uint32_t Arm64CalleeSaveFrameSize(CalleeSaveType type) {
   return RoundUp((POPCOUNT(Arm64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(Arm64CalleeSaveFpSpills(type)) /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kArm64PointerSize), kStackAlignment);
 }
 
-constexpr QuickMethodFrameInfo Arm64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+constexpr QuickMethodFrameInfo Arm64CalleeSaveMethodFrameInfo(CalleeSaveType type) {
   return QuickMethodFrameInfo(Arm64CalleeSaveFrameSize(type),
                               Arm64CalleeSaveCoreSpills(type),
                               Arm64CalleeSaveFpSpills(type));
 }
 
-constexpr size_t Arm64CalleeSaveFpr1Offset(Runtime::CalleeSaveType type) {
+constexpr size_t Arm64CalleeSaveFpr1Offset(CalleeSaveType type) {
   return Arm64CalleeSaveFrameSize(type) -
          (POPCOUNT(Arm64CalleeSaveCoreSpills(type)) +
           POPCOUNT(Arm64CalleeSaveFpSpills(type))) * static_cast<size_t>(kArm64PointerSize);
 }
 
-constexpr size_t Arm64CalleeSaveGpr1Offset(Runtime::CalleeSaveType type) {
+constexpr size_t Arm64CalleeSaveGpr1Offset(CalleeSaveType type) {
   return Arm64CalleeSaveFrameSize(type) -
          POPCOUNT(Arm64CalleeSaveCoreSpills(type)) * static_cast<size_t>(kArm64PointerSize);
 }
 
-constexpr size_t Arm64CalleeSaveLrOffset(Runtime::CalleeSaveType type) {
+constexpr size_t Arm64CalleeSaveLrOffset(CalleeSaveType type) {
   return Arm64CalleeSaveFrameSize(type) -
       POPCOUNT(Arm64CalleeSaveCoreSpills(type) & (-(1 << LR))) *
       static_cast<size_t>(kArm64PointerSize);
diff --git a/runtime/arch/instruction_set.cc b/runtime/arch/instruction_set.cc
index 8f64dcd..64af7ec 100644
--- a/runtime/arch/instruction_set.cc
+++ b/runtime/arch/instruction_set.cc
@@ -18,8 +18,8 @@
 
 // Explicitly include our own elf.h to avoid Linux and other dependencies.
 #include "../elf.h"
+#include "android-base/logging.h"
 #include "base/bit_utils.h"
-#include "base/logging.h"
 #include "globals.h"
 
 namespace art {
@@ -36,11 +36,9 @@
     case kNone:
       LOG(FATAL) << "Unsupported instruction set " << isa;
       UNREACHABLE();
-
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      UNREACHABLE();
   }
+  LOG(FATAL) << "Unknown ISA " << isa;
+  UNREACHABLE();
 }
 
 const char* GetInstructionSetString(InstructionSet isa) {
@@ -60,10 +58,9 @@
       return "mips64";
     case kNone:
       return "none";
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      UNREACHABLE();
   }
+  LOG(FATAL) << "Unknown ISA " << isa;
+  UNREACHABLE();
 }
 
 InstructionSet GetInstructionSetFromString(const char* isa_str) {
@@ -128,10 +125,9 @@
     case kNone:
       LOG(FATAL) << "ISA kNone does not have alignment.";
       UNREACHABLE();
-    default:
-      LOG(FATAL) << "Unknown ISA " << isa;
-      UNREACHABLE();
   }
+  LOG(FATAL) << "Unknown ISA " << isa;
+  UNREACHABLE();
 }
 
 #if !defined(ART_STACK_OVERFLOW_GAP_arm) || !defined(ART_STACK_OVERFLOW_GAP_arm64) || \
@@ -197,11 +193,9 @@
     case kNone:
       LOG(FATAL) << "kNone has no stack overflow size";
       UNREACHABLE();
-
-    default:
-      LOG(FATAL) << "Unknown instruction set" << isa;
-      UNREACHABLE();
   }
+  LOG(FATAL) << "Unknown instruction set" << isa;
+  UNREACHABLE();
 }
 
 }  // namespace art
diff --git a/runtime/arch/instruction_set.h b/runtime/arch/instruction_set.h
index 7ef9a7a..7203b18 100644
--- a/runtime/arch/instruction_set.h
+++ b/runtime/arch/instruction_set.h
@@ -93,7 +93,7 @@
 // Fatal logging out of line to keep the header clean of logging.h.
 NO_RETURN void InstructionSetAbort(InstructionSet isa);
 
-static inline PointerSize GetInstructionSetPointerSize(InstructionSet isa) {
+constexpr PointerSize GetInstructionSetPointerSize(InstructionSet isa) {
   switch (isa) {
     case kArm:
       // Fall-through.
@@ -109,23 +109,37 @@
       return kMipsPointerSize;
     case kMips64:
       return kMips64PointerSize;
-    default:
-      InstructionSetAbort(isa);
+
+    case kNone:
+      break;
   }
+  InstructionSetAbort(isa);
 }
 
-ALWAYS_INLINE static inline constexpr size_t GetInstructionSetInstructionAlignment(
-    InstructionSet isa) {
-  return (isa == kThumb2 || isa == kArm) ? kThumb2InstructionAlignment :
-         (isa == kArm64) ? kArm64InstructionAlignment :
-         (isa == kX86) ? kX86InstructionAlignment :
-         (isa == kX86_64) ? kX86_64InstructionAlignment :
-         (isa == kMips) ? kMipsInstructionAlignment :
-         (isa == kMips64) ? kMips64InstructionAlignment :
-         0;  // Invalid case, but constexpr doesn't support asserts.
+constexpr size_t GetInstructionSetInstructionAlignment(InstructionSet isa) {
+  switch (isa) {
+    case kArm:
+      // Fall-through.
+    case kThumb2:
+      return kThumb2InstructionAlignment;
+    case kArm64:
+      return kArm64InstructionAlignment;
+    case kX86:
+      return kX86InstructionAlignment;
+    case kX86_64:
+      return kX86_64InstructionAlignment;
+    case kMips:
+      return kMipsInstructionAlignment;
+    case kMips64:
+      return kMips64InstructionAlignment;
+
+    case kNone:
+      break;
+  }
+  InstructionSetAbort(isa);
 }
 
-static inline bool IsValidInstructionSet(InstructionSet isa) {
+constexpr bool IsValidInstructionSet(InstructionSet isa) {
   switch (isa) {
     case kArm:
     case kThumb2:
@@ -135,15 +149,16 @@
     case kMips:
     case kMips64:
       return true;
+
     case kNone:
-    default:
       return false;
   }
+  return false;
 }
 
 size_t GetInstructionSetAlignment(InstructionSet isa);
 
-static inline bool Is64BitInstructionSet(InstructionSet isa) {
+constexpr bool Is64BitInstructionSet(InstructionSet isa) {
   switch (isa) {
     case kArm:
     case kThumb2:
@@ -156,16 +171,17 @@
     case kMips64:
       return true;
 
-    default:
-      InstructionSetAbort(isa);
+    case kNone:
+      break;
   }
+  InstructionSetAbort(isa);
 }
 
-static inline PointerSize InstructionSetPointerSize(InstructionSet isa) {
+constexpr PointerSize InstructionSetPointerSize(InstructionSet isa) {
   return Is64BitInstructionSet(isa) ? PointerSize::k64 : PointerSize::k32;
 }
 
-static inline size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
+constexpr size_t GetBytesPerGprSpillLocation(InstructionSet isa) {
   switch (isa) {
     case kArm:
       // Fall-through.
@@ -182,12 +198,13 @@
     case kMips64:
       return 8;
 
-    default:
-      InstructionSetAbort(isa);
+    case kNone:
+      break;
   }
+  InstructionSetAbort(isa);
 }
 
-static inline size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
+constexpr size_t GetBytesPerFprSpillLocation(InstructionSet isa) {
   switch (isa) {
     case kArm:
       // Fall-through.
@@ -204,9 +221,10 @@
     case kMips64:
       return 8;
 
-    default:
-      InstructionSetAbort(isa);
+    case kNone:
+      break;
   }
+  InstructionSetAbort(isa);
 }
 
 size_t GetStackOverflowReservedBytes(InstructionSet isa);
@@ -243,7 +261,7 @@
 }
 
 // Use the lower 32b for the method pointer and the upper 32b for the code pointer.
-static inline TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
+static inline constexpr TwoWordReturn GetTwoWordSuccessValue(uintptr_t hi, uintptr_t lo) {
   static_assert(sizeof(uint32_t) == sizeof(uintptr_t), "Unexpected size difference");
   uint32_t lo32 = lo;
   uint64_t hi64 = static_cast<uint64_t>(hi);
@@ -251,6 +269,10 @@
 }
 
 #elif defined(__x86_64__) || defined(__aarch64__) || (defined(__mips__) && defined(__LP64__))
+
+// Note: TwoWordReturn can't be constexpr for 64-bit targets. We'd need a constexpr constructor,
+//       which would violate C-linkage in the entrypoint functions.
+
 struct TwoWordReturn {
   uintptr_t lo;
   uintptr_t hi;
diff --git a/runtime/arch/instruction_set_features.cc b/runtime/arch/instruction_set_features.cc
index 00d22c4..43c1711 100644
--- a/runtime/arch/instruction_set_features.cc
+++ b/runtime/arch/instruction_set_features.cc
@@ -33,33 +33,26 @@
 
 std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromVariant(
     InstructionSet isa, const std::string& variant, std::string* error_msg) {
-  std::unique_ptr<const InstructionSetFeatures> result;
   switch (isa) {
     case kArm:
     case kThumb2:
-      result.reset(ArmInstructionSetFeatures::FromVariant(variant, error_msg).release());
-      break;
+      return ArmInstructionSetFeatures::FromVariant(variant, error_msg);
     case kArm64:
-      result.reset(Arm64InstructionSetFeatures::FromVariant(variant, error_msg).release());
-      break;
+      return Arm64InstructionSetFeatures::FromVariant(variant, error_msg);
     case kMips:
-      result.reset(MipsInstructionSetFeatures::FromVariant(variant, error_msg).release());
-      break;
+      return MipsInstructionSetFeatures::FromVariant(variant, error_msg);
     case kMips64:
-      result = Mips64InstructionSetFeatures::FromVariant(variant, error_msg);
-      break;
+      return Mips64InstructionSetFeatures::FromVariant(variant, error_msg);
     case kX86:
-      result.reset(X86InstructionSetFeatures::FromVariant(variant, error_msg).release());
-      break;
+      return X86InstructionSetFeatures::FromVariant(variant, error_msg);
     case kX86_64:
-      result.reset(X86_64InstructionSetFeatures::FromVariant(variant, error_msg).release());
+      return X86_64InstructionSetFeatures::FromVariant(variant, error_msg);
+
+    case kNone:
       break;
-    default:
-      UNIMPLEMENTED(FATAL) << isa;
-      UNREACHABLE();
   }
-  CHECK_EQ(result == nullptr, error_msg->size() != 0);
-  return result;
+  UNIMPLEMENTED(FATAL) << isa;
+  UNREACHABLE();
 }
 
 std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromBitmap(InstructionSet isa,
@@ -68,23 +61,25 @@
   switch (isa) {
     case kArm:
     case kThumb2:
-      result.reset(ArmInstructionSetFeatures::FromBitmap(bitmap).release());
+      result = ArmInstructionSetFeatures::FromBitmap(bitmap);
       break;
     case kArm64:
-      result.reset(Arm64InstructionSetFeatures::FromBitmap(bitmap).release());
+      result = Arm64InstructionSetFeatures::FromBitmap(bitmap);
       break;
     case kMips:
-      result.reset(MipsInstructionSetFeatures::FromBitmap(bitmap).release());
+      result = MipsInstructionSetFeatures::FromBitmap(bitmap);
       break;
     case kMips64:
       result = Mips64InstructionSetFeatures::FromBitmap(bitmap);
       break;
     case kX86:
-      result.reset(X86InstructionSetFeatures::FromBitmap(bitmap).release());
+      result = X86InstructionSetFeatures::FromBitmap(bitmap);
       break;
     case kX86_64:
-      result.reset(X86_64InstructionSetFeatures::FromBitmap(bitmap).release());
+      result = X86_64InstructionSetFeatures::FromBitmap(bitmap);
       break;
+
+    case kNone:
     default:
       UNIMPLEMENTED(FATAL) << isa;
       UNREACHABLE();
@@ -94,120 +89,96 @@
 }
 
 std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromCppDefines() {
-  std::unique_ptr<const InstructionSetFeatures> result;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
-      result.reset(ArmInstructionSetFeatures::FromCppDefines().release());
-      break;
+      return ArmInstructionSetFeatures::FromCppDefines();
     case kArm64:
-      result.reset(Arm64InstructionSetFeatures::FromCppDefines().release());
-      break;
+      return Arm64InstructionSetFeatures::FromCppDefines();
     case kMips:
-      result.reset(MipsInstructionSetFeatures::FromCppDefines().release());
-      break;
+      return MipsInstructionSetFeatures::FromCppDefines();
     case kMips64:
-      result = Mips64InstructionSetFeatures::FromCppDefines();
-      break;
+      return Mips64InstructionSetFeatures::FromCppDefines();
     case kX86:
-      result.reset(X86InstructionSetFeatures::FromCppDefines().release());
-      break;
+      return X86InstructionSetFeatures::FromCppDefines();
     case kX86_64:
-      result.reset(X86_64InstructionSetFeatures::FromCppDefines().release());
+      return X86_64InstructionSetFeatures::FromCppDefines();
+
+    case kNone:
       break;
-    default:
-      UNIMPLEMENTED(FATAL) << kRuntimeISA;
-      UNREACHABLE();
   }
-  return result;
+  UNIMPLEMENTED(FATAL) << kRuntimeISA;
+  UNREACHABLE();
 }
 
 
 std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromCpuInfo() {
-  std::unique_ptr<const InstructionSetFeatures> result;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
-      result.reset(ArmInstructionSetFeatures::FromCpuInfo().release());
-      break;
+      return ArmInstructionSetFeatures::FromCpuInfo();
     case kArm64:
-      result.reset(Arm64InstructionSetFeatures::FromCpuInfo().release());
-      break;
+      return Arm64InstructionSetFeatures::FromCpuInfo();
     case kMips:
-      result.reset(MipsInstructionSetFeatures::FromCpuInfo().release());
-      break;
+      return MipsInstructionSetFeatures::FromCpuInfo();
     case kMips64:
-      result = Mips64InstructionSetFeatures::FromCpuInfo();
-      break;
+      return Mips64InstructionSetFeatures::FromCpuInfo();
     case kX86:
-      result.reset(X86InstructionSetFeatures::FromCpuInfo().release());
-      break;
+      return X86InstructionSetFeatures::FromCpuInfo();
     case kX86_64:
-      result.reset(X86_64InstructionSetFeatures::FromCpuInfo().release());
+      return X86_64InstructionSetFeatures::FromCpuInfo();
+
+    case kNone:
       break;
-    default:
-      UNIMPLEMENTED(FATAL) << kRuntimeISA;
-      UNREACHABLE();
   }
-  return result;
+  UNIMPLEMENTED(FATAL) << kRuntimeISA;
+  UNREACHABLE();
 }
 
 std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromHwcap() {
-  std::unique_ptr<const InstructionSetFeatures> result;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
-      result.reset(ArmInstructionSetFeatures::FromHwcap().release());
-      break;
+      return ArmInstructionSetFeatures::FromHwcap();
     case kArm64:
-      result.reset(Arm64InstructionSetFeatures::FromHwcap().release());
-      break;
+      return Arm64InstructionSetFeatures::FromHwcap();
     case kMips:
-      result.reset(MipsInstructionSetFeatures::FromHwcap().release());
-      break;
+      return MipsInstructionSetFeatures::FromHwcap();
     case kMips64:
-      result = Mips64InstructionSetFeatures::FromHwcap();
-      break;
+      return Mips64InstructionSetFeatures::FromHwcap();
     case kX86:
-      result.reset(X86InstructionSetFeatures::FromHwcap().release());
-      break;
+      return X86InstructionSetFeatures::FromHwcap();
     case kX86_64:
-      result.reset(X86_64InstructionSetFeatures::FromHwcap().release());
+      return X86_64InstructionSetFeatures::FromHwcap();
+
+    case kNone:
       break;
-    default:
-      UNIMPLEMENTED(FATAL) << kRuntimeISA;
-      UNREACHABLE();
   }
-  return result;
+  UNIMPLEMENTED(FATAL) << kRuntimeISA;
+  UNREACHABLE();
 }
 
 std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromAssembly() {
-  std::unique_ptr<const InstructionSetFeatures> result;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
-      result.reset(ArmInstructionSetFeatures::FromAssembly().release());
-      break;
+      return ArmInstructionSetFeatures::FromAssembly();
     case kArm64:
-      result.reset(Arm64InstructionSetFeatures::FromAssembly().release());
-      break;
+      return Arm64InstructionSetFeatures::FromAssembly();
     case kMips:
-      result.reset(MipsInstructionSetFeatures::FromAssembly().release());
-      break;
+      return MipsInstructionSetFeatures::FromAssembly();
     case kMips64:
-      result = Mips64InstructionSetFeatures::FromAssembly();
-      break;
+      return Mips64InstructionSetFeatures::FromAssembly();
     case kX86:
-      result.reset(X86InstructionSetFeatures::FromAssembly().release());
-      break;
+      return X86InstructionSetFeatures::FromAssembly();
     case kX86_64:
-      result.reset(X86_64InstructionSetFeatures::FromAssembly().release());
+      return X86_64InstructionSetFeatures::FromAssembly();
+
+    case kNone:
       break;
-    default:
-      UNIMPLEMENTED(FATAL) << kRuntimeISA;
-      UNREACHABLE();
   }
-  return result;
+  UNIMPLEMENTED(FATAL) << kRuntimeISA;
+  UNREACHABLE();
 }
 
 std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::AddFeaturesFromString(
diff --git a/runtime/arch/memcmp16.cc b/runtime/arch/memcmp16.cc
index 813df2f..e714cfc 100644
--- a/runtime/arch/memcmp16.cc
+++ b/runtime/arch/memcmp16.cc
@@ -37,7 +37,7 @@
   return MemCmp16(s0, s1, count);
 }
 
-}
+}  // namespace testing
 
 }  // namespace art
 
diff --git a/runtime/arch/memcmp16.h b/runtime/arch/memcmp16.h
index c449a14..b051a1c 100644
--- a/runtime/arch/memcmp16.h
+++ b/runtime/arch/memcmp16.h
@@ -59,7 +59,7 @@
 // implementation.
 int32_t MemCmp16Testing(const uint16_t* s0, const uint16_t* s1, size_t count);
 
-}
+}  // namespace testing
 
 }  // namespace art
 
diff --git a/runtime/arch/mips/context_mips.cc b/runtime/arch/mips/context_mips.cc
index 98ed5e6..ca1de0a 100644
--- a/runtime/arch/mips/context_mips.cc
+++ b/runtime/arch/mips/context_mips.cc
@@ -17,6 +17,7 @@
 #include "context_mips.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
 
 namespace art {
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 7072a8a..52a3df5 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -18,13 +18,14 @@
 #include <sys/ucontext.h>
 
 #include "art_method.h"
+#include "base/callee_save_type.h"
 #include "base/hex_dump.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
 #include "quick_method_frame_info_mips.h"
 #include "registers_mips.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_throw_null_pointer_exception_from_signal();
@@ -80,7 +81,7 @@
 
   // Decrement $sp by the frame size of the kSaveEverything method and store
   // the fault address in the padding right after the ArtMethod*.
-  sc->sc_regs[mips::SP] -= mips::MipsCalleeSaveFrameSize(Runtime::kSaveEverything);
+  sc->sc_regs[mips::SP] -= mips::MipsCalleeSaveFrameSize(CalleeSaveType::kSaveEverything);
   uintptr_t* padding = reinterpret_cast<uintptr_t*>(sc->sc_regs[mips::SP]) + /* ArtMethod* */ 1;
   *padding = reinterpret_cast<uintptr_t>(info->si_addr);
 
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index 3c5afc2..6540b44 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -47,7 +47,7 @@
 static constexpr MipsLevel kRuntimeMipsLevel = MipsLevel::kBase;
 #endif
 
-static void GetFlagsFromCppDefined(bool* mips_isa_gte2, bool* r6, bool* fpu_32bit) {
+static void GetFlagsFromCppDefined(bool* mips_isa_gte2, bool* r6, bool* fpu_32bit, bool* msa) {
   // Override defaults based on compiler flags.
   if (kRuntimeMipsLevel >= MipsLevel::kR2) {
     *mips_isa_gte2 = true;
@@ -57,8 +57,10 @@
 
   if (kRuntimeMipsLevel >= MipsLevel::kR5) {
     *fpu_32bit = false;
+    *msa = true;
   } else {
     *fpu_32bit = true;
+    *msa = false;
   }
 
   if (kRuntimeMipsLevel >= MipsLevel::kR6) {
@@ -76,7 +78,8 @@
   bool fpu_32bit;
   bool mips_isa_gte2;
   bool r6;
-  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
+  bool msa;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit, &msa);
 
   // Override defaults based on variant string.
   // Only care if it is R1, R2, R5 or R6 and we assume all CPUs will have a FP unit.
@@ -87,6 +90,7 @@
     r6 = (variant[kPrefixLength] >= '6');
     fpu_32bit = (variant[kPrefixLength] < '5');
     mips_isa_gte2 = (variant[kPrefixLength] >= '2');
+    msa = (variant[kPrefixLength] >= '5');
   } else if (variant == "default") {
     // Default variant has FPU, is gte2. This is the traditional setting.
     //
@@ -100,32 +104,57 @@
     LOG(WARNING) << "Unexpected CPU variant for Mips32 using defaults: " << variant;
   }
 
-  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromBitmap(uint32_t bitmap) {
   bool fpu_32bit = (bitmap & kFpu32Bitfield) != 0;
   bool mips_isa_gte2 = (bitmap & kIsaRevGte2Bitfield) != 0;
   bool r6 = (bitmap & kR6) != 0;
-  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+  bool msa = (bitmap & kMsaBitfield) != 0;
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromCppDefines() {
   bool fpu_32bit;
   bool mips_isa_gte2;
   bool r6;
-  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
+  bool msa;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit, &msa);
 
-  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromCpuInfo() {
   bool fpu_32bit;
   bool mips_isa_gte2;
   bool r6;
-  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
+  bool msa;
+  GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit, &msa);
 
-  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+  msa = false;
+
+  std::ifstream in("/proc/cpuinfo");
+  if (!in.fail()) {
+    while (!in.eof()) {
+      std::string line;
+      std::getline(in, line);
+      if (!in.eof()) {
+        LOG(INFO) << "cpuinfo line: " << line;
+        if (line.find("ASEs") != std::string::npos) {
+          LOG(INFO) << "found Application Specific Extensions";
+          if (line.find("msa") != std::string::npos) {
+            msa = true;
+          }
+        }
+      }
+    }
+    in.close();
+  } else {
+    LOG(ERROR) << "Failed to open /proc/cpuinfo";
+  }
+
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromHwcap() {
@@ -145,13 +174,15 @@
   const MipsInstructionSetFeatures* other_as_mips = other->AsMipsInstructionSetFeatures();
   return (fpu_32bit_ == other_as_mips->fpu_32bit_) &&
       (mips_isa_gte2_ == other_as_mips->mips_isa_gte2_) &&
-      (r6_ == other_as_mips->r6_);
+      (r6_ == other_as_mips->r6_) &&
+      (msa_ == other_as_mips->msa_);
 }
 
 uint32_t MipsInstructionSetFeatures::AsBitmap() const {
   return (fpu_32bit_ ? kFpu32Bitfield : 0) |
       (mips_isa_gte2_ ? kIsaRevGte2Bitfield : 0) |
-      (r6_ ? kR6 : 0);
+      (r6_ ? kR6 : 0) |
+      (msa_ ? kMsaBitfield : 0);
 }
 
 std::string MipsInstructionSetFeatures::GetFeatureString() const {
@@ -169,6 +200,11 @@
   if (r6_) {
     result += ",r6";
   }  // Suppress non-r6.
+  if (msa_) {
+    result += ",msa";
+  } else {
+    result += ",-msa";
+  }
   return result;
 }
 
@@ -178,6 +214,7 @@
   bool fpu_32bit = fpu_32bit_;
   bool mips_isa_gte2 = mips_isa_gte2_;
   bool r6 = r6_;
+  bool msa = msa_;
   for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = android::base::Trim(*i);
     if (feature == "fpu32") {
@@ -192,13 +229,17 @@
       r6 = true;
     } else if (feature == "-r6") {
       r6 = false;
+    } else if (feature == "msa") {
+      msa = true;
+    } else if (feature == "-msa") {
+      msa = false;
     } else {
       *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
       return nullptr;
     }
   }
   return std::unique_ptr<const InstructionSetFeatures>(
-      new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6));
+      new MipsInstructionSetFeatures(fpu_32bit, mips_isa_gte2, r6, msa));
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h
index 1aec99f..1cb852e 100644
--- a/runtime/arch/mips/instruction_set_features_mips.h
+++ b/runtime/arch/mips/instruction_set_features_mips.h
@@ -75,6 +75,11 @@
     return r6_;
   }
 
+  // Does it have MSA (MIPS SIMD Architecture) support.
+  bool HasMsa() const {
+    return msa_;
+  }
+
   virtual ~MipsInstructionSetFeatures() {}
 
  protected:
@@ -84,11 +89,12 @@
                                  std::string* error_msg) const OVERRIDE;
 
  private:
-  MipsInstructionSetFeatures(bool fpu_32bit, bool mips_isa_gte2, bool r6)
+  MipsInstructionSetFeatures(bool fpu_32bit, bool mips_isa_gte2, bool r6, bool msa)
       : InstructionSetFeatures(),
         fpu_32bit_(fpu_32bit),
         mips_isa_gte2_(mips_isa_gte2),
-        r6_(r6) {
+        r6_(r6),
+        msa_(msa) {
     // Sanity checks.
     if (r6) {
       CHECK(mips_isa_gte2);
@@ -104,11 +110,13 @@
     kFpu32Bitfield = 1 << 0,
     kIsaRevGte2Bitfield = 1 << 1,
     kR6 = 1 << 2,
+    kMsaBitfield = 1 << 3,
   };
 
   const bool fpu_32bit_;
   const bool mips_isa_gte2_;
   const bool r6_;
+  const bool msa_;
 
   DISALLOW_COPY_AND_ASSIGN(MipsInstructionSetFeatures);
 };
diff --git a/runtime/arch/mips/instruction_set_features_mips_test.cc b/runtime/arch/mips/instruction_set_features_mips_test.cc
index 6613b84..54fd2c9 100644
--- a/runtime/arch/mips/instruction_set_features_mips_test.cc
+++ b/runtime/arch/mips/instruction_set_features_mips_test.cc
@@ -20,15 +20,109 @@
 
 namespace art {
 
-TEST(MipsInstructionSetFeaturesTest, MipsFeatures) {
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromDefaultVariant) {
   std::string error_msg;
   std::unique_ptr<const InstructionSetFeatures> mips_features(
       InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
   ASSERT_TRUE(mips_features.get() != nullptr) << error_msg;
   EXPECT_EQ(mips_features->GetInstructionSet(), kMips);
   EXPECT_TRUE(mips_features->Equals(mips_features.get()));
-  EXPECT_STREQ("fpu32,mips2", mips_features->GetFeatureString().c_str());
+  EXPECT_STREQ("fpu32,mips2,-msa", mips_features->GetFeatureString().c_str());
   EXPECT_EQ(mips_features->AsBitmap(), 3U);
 }
 
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromR1Variant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips32r1_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r1", &error_msg));
+  ASSERT_TRUE(mips32r1_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips32r1_features->GetInstructionSet(), kMips);
+  EXPECT_TRUE(mips32r1_features->Equals(mips32r1_features.get()));
+  EXPECT_STREQ("fpu32,-mips2,-msa", mips32r1_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips32r1_features->AsBitmap(), 1U);
+
+  std::unique_ptr<const InstructionSetFeatures> mips_default_features(
+      InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
+  ASSERT_TRUE(mips_default_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r1_features->Equals(mips_default_features.get()));
+}
+
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromR2Variant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips32r2_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r2", &error_msg));
+  ASSERT_TRUE(mips32r2_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips32r2_features->GetInstructionSet(), kMips);
+  EXPECT_TRUE(mips32r2_features->Equals(mips32r2_features.get()));
+  EXPECT_STREQ("fpu32,mips2,-msa", mips32r2_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips32r2_features->AsBitmap(), 3U);
+
+  std::unique_ptr<const InstructionSetFeatures> mips_default_features(
+      InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
+  ASSERT_TRUE(mips_default_features.get() != nullptr) << error_msg;
+  EXPECT_TRUE(mips32r2_features->Equals(mips_default_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r1_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r1", &error_msg));
+  ASSERT_TRUE(mips32r1_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r2_features->Equals(mips32r1_features.get()));
+}
+
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromR5Variant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips32r5_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r5", &error_msg));
+  ASSERT_TRUE(mips32r5_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips32r5_features->GetInstructionSet(), kMips);
+  EXPECT_TRUE(mips32r5_features->Equals(mips32r5_features.get()));
+  EXPECT_STREQ("-fpu32,mips2,msa", mips32r5_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips32r5_features->AsBitmap(), 10U);
+
+  std::unique_ptr<const InstructionSetFeatures> mips_default_features(
+      InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
+  ASSERT_TRUE(mips_default_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r5_features->Equals(mips_default_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r1_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r1", &error_msg));
+  ASSERT_TRUE(mips32r1_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r5_features->Equals(mips32r1_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r2_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r2", &error_msg));
+  ASSERT_TRUE(mips32r2_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r5_features->Equals(mips32r2_features.get()));
+}
+
+TEST(MipsInstructionSetFeaturesTest, MipsFeaturesFromR6Variant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips32r6_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r6", &error_msg));
+  ASSERT_TRUE(mips32r6_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips32r6_features->GetInstructionSet(), kMips);
+  EXPECT_TRUE(mips32r6_features->Equals(mips32r6_features.get()));
+  EXPECT_STREQ("-fpu32,mips2,r6,msa", mips32r6_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips32r6_features->AsBitmap(), 14U);
+
+  std::unique_ptr<const InstructionSetFeatures> mips_default_features(
+      InstructionSetFeatures::FromVariant(kMips, "default", &error_msg));
+  ASSERT_TRUE(mips_default_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r6_features->Equals(mips_default_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r1_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r1", &error_msg));
+  ASSERT_TRUE(mips32r1_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r6_features->Equals(mips32r1_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r2_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r2", &error_msg));
+  ASSERT_TRUE(mips32r2_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r6_features->Equals(mips32r2_features.get()));
+
+  std::unique_ptr<const InstructionSetFeatures> mips32r5_features(
+      InstructionSetFeatures::FromVariant(kMips, "mips32r5", &error_msg));
+  ASSERT_TRUE(mips32r5_features.get() != nullptr) << error_msg;
+  EXPECT_FALSE(mips32r6_features->Equals(mips32r5_features.get()));
+}
+
 }  // namespace art
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index e628a9f..d1da67f 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1252,7 +1252,39 @@
     .extern artLockObjectFromCode
 ENTRY art_quick_lock_object
     beqz    $a0, art_quick_throw_null_pointer_exception
+    li      $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE
+    li      $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED
+.Lretry_lock:
+    lw      $t0, THREAD_ID_OFFSET(rSELF)  # TODO: Can the thread ID really change during the loop?
+    ll      $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    and     $t2, $t1, $t3                 # zero the gc bits
+    bnez    $t2, .Lnot_unlocked           # already thin locked
+    # Unlocked case - $t1: original lock word that's zero except for the read barrier bits.
+    or      $t2, $t1, $t0                 # $t2 holds thread id with count of 0 with preserved read barrier bits
+    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    beqz    $t2, .Lretry_lock             # store failed, retry
     nop
+    jalr    $zero, $ra
+    sync                                  # full (LoadLoad|LoadStore) memory barrier
+.Lnot_unlocked:
+    # $t1: original lock word, $t0: thread_id with count of 0 and zero read barrier bits
+    srl     $t2, $t1, LOCK_WORD_STATE_SHIFT
+    bnez    $t2, .Lslow_lock              # if either of the top two bits are set, go slow path
+    xor     $t2, $t1, $t0                 # lock_word.ThreadId() ^ self->ThreadId()
+    andi    $t2, $t2, 0xFFFF              # zero top 16 bits
+    bnez    $t2, .Lslow_lock              # lock word and self thread id's match -> recursive lock
+                                          # otherwise contention, go to slow path
+    and     $t2, $t1, $t3                 # zero the gc bits
+    addu    $t2, $t2, $t8                 # increment count in lock word
+    srl     $t2, $t2, LOCK_WORD_STATE_SHIFT  # if the first gc state bit is set, we overflowed.
+    bnez    $t2, .Lslow_lock              # if we overflow the count go slow path
+    addu    $t2, $t1, $t8                 # increment count for real
+    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    beqz    $t2, .Lretry_lock             # store failed, retry
+    nop
+    jalr    $zero, $ra
+    nop
+.Lslow_lock:
     SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     la      $t9, artLockObjectFromCode
     jalr    $t9                           # (Object* obj, Thread*)
@@ -1276,11 +1308,55 @@
     .extern artUnlockObjectFromCode
 ENTRY art_quick_unlock_object
     beqz    $a0, art_quick_throw_null_pointer_exception
+    li      $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE
+    li      $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED
+.Lretry_unlock:
+#ifndef USE_READ_BARRIER
+    lw      $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+#else
+    ll      $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)  # Need to use atomic read-modify-write for read barrier
+#endif
+    srl     $t2, $t1, LOCK_WORD_STATE_SHIFT
+    bnez    $t2, .Lslow_unlock         # if either of the top two bits are set, go slow path
+    lw      $t0, THREAD_ID_OFFSET(rSELF)
+    and     $t2, $t1, $t3              # zero the gc bits
+    xor     $t2, $t2, $t0              # lock_word.ThreadId() ^ self->ThreadId()
+    andi    $t2, $t2, 0xFFFF           # zero top 16 bits
+    bnez    $t2, .Lslow_unlock         # do lock word and self thread id's match?
+    and     $t2, $t1, $t3              # zero the gc bits
+    bgeu    $t2, $t8, .Lrecursive_thin_unlock
+    # transition to unlocked
+    nor     $t2, $zero, $t3            # $t2 = LOCK_WORD_GC_STATE_MASK_SHIFTED
+    and     $t2, $t1, $t2              # $t2: zero except for the preserved gc bits
+    sync                               # full (LoadStore|StoreStore) memory barrier
+#ifndef USE_READ_BARRIER
+    jalr    $zero, $ra
+    sw      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+#else
+    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    beqz    $t2, .Lretry_unlock        # store failed, retry
     nop
-    SETUP_SAVE_REFS_ONLY_FRAME        # save callee saves in case exception allocation triggers GC
+    jalr    $zero, $ra
+    nop
+#endif
+.Lrecursive_thin_unlock:
+    # t1: original lock word
+    subu    $t2, $t1, $t8              # decrement count
+#ifndef USE_READ_BARRIER
+    jalr    $zero, $ra
+    sw      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+#else
+    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    beqz    $t2, .Lretry_unlock        # store failed, retry
+    nop
+    jalr    $zero, $ra
+    nop
+#endif
+.Lslow_unlock:
+    SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
     la      $t9, artUnlockObjectFromCode
-    jalr    $t9                       # (Object* obj, Thread*)
-    move    $a1, rSELF                # pass Thread::Current
+    jalr    $t9                        # (Object* obj, Thread*)
+    move    $a1, rSELF                 # pass Thread::Current
     RETURN_IF_ZERO
 END art_quick_unlock_object
 
diff --git a/runtime/arch/mips/quick_method_frame_info_mips.h b/runtime/arch/mips/quick_method_frame_info_mips.h
index 6f16352..01879a5 100644
--- a/runtime/arch/mips/quick_method_frame_info_mips.h
+++ b/runtime/arch/mips/quick_method_frame_info_mips.h
@@ -17,10 +17,12 @@
 #ifndef ART_RUNTIME_ARCH_MIPS_QUICK_METHOD_FRAME_INFO_MIPS_H_
 #define ART_RUNTIME_ARCH_MIPS_QUICK_METHOD_FRAME_INFO_MIPS_H_
 
+#include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "base/callee_save_type.h"
+#include "base/enums.h"
 #include "quick/quick_method_frame_info.h"
 #include "registers_mips.h"
-#include "runtime.h"  // for Runtime::CalleeSaveType.
 
 namespace art {
 namespace mips {
@@ -62,27 +64,27 @@
     (1 << art::mips::F24) | (1 << art::mips::F25) | (1 << art::mips::F26) | (1 << art::mips::F27) |
     (1 << art::mips::F28) | (1 << art::mips::F29) | (1 << art::mips::F30) | (1u << art::mips::F31);
 
-constexpr uint32_t MipsCalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t MipsCalleeSaveCoreSpills(CalleeSaveType type) {
   return kMipsCalleeSaveAlwaysSpills | kMipsCalleeSaveRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAllCalleeSaves ? kMipsCalleeSaveAllSpills : 0) |
-      (type == Runtime::kSaveEverything ? kMipsCalleeSaveEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kMipsCalleeSaveArgSpills : 0) |
+      (type == CalleeSaveType::kSaveAllCalleeSaves ? kMipsCalleeSaveAllSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kMipsCalleeSaveEverythingSpills : 0);
 }
 
-constexpr uint32_t MipsCalleeSaveFPSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t MipsCalleeSaveFPSpills(CalleeSaveType type) {
   return kMipsCalleeSaveFpAlwaysSpills | kMipsCalleeSaveFpRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
-      (type == Runtime::kSaveAllCalleeSaves ? kMipsCalleeSaveAllFPSpills : 0) |
-      (type == Runtime::kSaveEverything ? kMipsCalleeSaveFpEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kMipsCalleeSaveFpArgSpills : 0) |
+      (type == CalleeSaveType::kSaveAllCalleeSaves ? kMipsCalleeSaveAllFPSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kMipsCalleeSaveFpEverythingSpills : 0);
 }
 
-constexpr uint32_t MipsCalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+constexpr uint32_t MipsCalleeSaveFrameSize(CalleeSaveType type) {
   return RoundUp((POPCOUNT(MipsCalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(MipsCalleeSaveFPSpills(type))   /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kMipsPointerSize), kStackAlignment);
 }
 
-constexpr QuickMethodFrameInfo MipsCalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+constexpr QuickMethodFrameInfo MipsCalleeSaveMethodFrameInfo(CalleeSaveType type) {
   return QuickMethodFrameInfo(MipsCalleeSaveFrameSize(type),
                               MipsCalleeSaveCoreSpills(type),
                               MipsCalleeSaveFPSpills(type));
diff --git a/runtime/arch/mips/registers_mips.cc b/runtime/arch/mips/registers_mips.cc
index 5d31f2f..92c2746 100644
--- a/runtime/arch/mips/registers_mips.cc
+++ b/runtime/arch/mips/registers_mips.cc
@@ -45,5 +45,14 @@
   return os;
 }
 
+std::ostream& operator<<(std::ostream& os, const VectorRegister& rhs) {
+  if (rhs >= W0 && rhs < kNumberOfVectorRegisters) {
+    os << "w" << static_cast<int>(rhs);
+  } else {
+    os << "VectorRegister[" << static_cast<int>(rhs) << "]";
+  }
+  return os;
+}
+
 }  // namespace mips
 }  // namespace art
diff --git a/runtime/arch/mips/registers_mips.h b/runtime/arch/mips/registers_mips.h
index 555f3f0..57af150 100644
--- a/runtime/arch/mips/registers_mips.h
+++ b/runtime/arch/mips/registers_mips.h
@@ -106,6 +106,45 @@
 };
 std::ostream& operator<<(std::ostream& os, const FRegister& rhs);
 
+// Values for vector registers.
+enum VectorRegister {
+  W0  =  0,
+  W1  =  1,
+  W2  =  2,
+  W3  =  3,
+  W4  =  4,
+  W5  =  5,
+  W6  =  6,
+  W7  =  7,
+  W8  =  8,
+  W9  =  9,
+  W10 = 10,
+  W11 = 11,
+  W12 = 12,
+  W13 = 13,
+  W14 = 14,
+  W15 = 15,
+  W16 = 16,
+  W17 = 17,
+  W18 = 18,
+  W19 = 19,
+  W20 = 20,
+  W21 = 21,
+  W22 = 22,
+  W23 = 23,
+  W24 = 24,
+  W25 = 25,
+  W26 = 26,
+  W27 = 27,
+  W28 = 28,
+  W29 = 29,
+  W30 = 30,
+  W31 = 31,
+  kNumberOfVectorRegisters = 32,
+  kNoVectorRegister = -1,
+};
+std::ostream& operator<<(std::ostream& os, const VectorRegister& rhs);
+
 }  // namespace mips
 }  // namespace art
 
diff --git a/runtime/arch/mips64/context_mips64.cc b/runtime/arch/mips64/context_mips64.cc
index bd1ac3b..b14908f 100644
--- a/runtime/arch/mips64/context_mips64.cc
+++ b/runtime/arch/mips64/context_mips64.cc
@@ -17,6 +17,7 @@
 #include "context_mips64.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
 
 namespace art {
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index 763d93e..007f7b3 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -14,6 +14,7 @@
  * limitations under the License.
  */
 
+#include <math.h>
 #include <string.h>
 
 #include "atomic.h"
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index f9a92c8..9d77ebc 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -19,13 +19,14 @@
 #include <sys/ucontext.h>
 
 #include "art_method.h"
+#include "base/callee_save_type.h"
 #include "base/hex_dump.h"
 #include "base/logging.h"
 #include "base/macros.h"
 #include "globals.h"
 #include "quick_method_frame_info_mips64.h"
 #include "registers_mips64.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_throw_null_pointer_exception_from_signal();
@@ -82,7 +83,7 @@
 
   // Decrement $sp by the frame size of the kSaveEverything method and store
   // the fault address in the padding right after the ArtMethod*.
-  sc->sc_regs[mips64::SP] -= mips64::Mips64CalleeSaveFrameSize(Runtime::kSaveEverything);
+  sc->sc_regs[mips64::SP] -= mips64::Mips64CalleeSaveFrameSize(CalleeSaveType::kSaveEverything);
   uintptr_t* padding = reinterpret_cast<uintptr_t*>(sc->sc_regs[mips64::SP]) + /* ArtMethod* */ 1;
   *padding = reinterpret_cast<uintptr_t>(info->si_addr);
 
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 40bad16..c9eeb7c 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1222,8 +1222,38 @@
      */
     .extern artLockObjectFromCode
 ENTRY_NO_GP art_quick_lock_object
-    beq     $a0, $zero, art_quick_throw_null_pointer_exception
+    beqzc   $a0, art_quick_throw_null_pointer_exception
+    li      $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE
+    li      $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED
+.Lretry_lock:
+    lw      $t0, THREAD_ID_OFFSET(rSELF)  # TODO: Can the thread ID really change during the loop?
+    ll      $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    and     $t2, $t1, $t3                 # zero the gc bits
+    bnezc   $t2, .Lnot_unlocked           # already thin locked
+    # Unlocked case - $t1: original lock word that's zero except for the read barrier bits.
+    or      $t2, $t1, $t0                 # $t2 holds thread id with count of 0 with preserved read barrier bits
+    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    beqzc   $t2, .Lretry_lock             # store failed, retry
+    sync                                  # full (LoadLoad|LoadStore) memory barrier
+    jic     $ra, 0
+.Lnot_unlocked:
+    # $t1: original lock word, $t0: thread_id with count of 0 and zero read barrier bits
+    srl     $t2, $t1, LOCK_WORD_STATE_SHIFT
+    bnezc   $t2, .Lslow_lock              # if either of the top two bits are set, go slow path
+    xor     $t2, $t1, $t0                 # lock_word.ThreadId() ^ self->ThreadId()
+    andi    $t2, $t2, 0xFFFF              # zero top 16 bits
+    bnezc   $t2, .Lslow_lock              # lock word and self thread id's match -> recursive lock
+                                          # otherwise contention, go to slow path
+    and     $t2, $t1, $t3                 # zero the gc bits
+    addu    $t2, $t2, $t8                 # increment count in lock word
+    srl     $t2, $t2, LOCK_WORD_STATE_SHIFT  # if the first gc state bit is set, we overflowed.
+    bnezc   $t2, .Lslow_lock              # if we overflow the count go slow path
+    addu    $t2, $t1, $t8                 # increment count for real
+    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    beqzc   $t2, .Lretry_lock             # store failed, retry
     nop
+    jic     $ra, 0
+.Lslow_lock:
     .cpsetup $t9, $t8, art_quick_lock_object
     SETUP_SAVE_REFS_ONLY_FRAME            # save callee saves in case we block
     jal     artLockObjectFromCode         # (Object* obj, Thread*)
@@ -1246,8 +1276,48 @@
      */
     .extern artUnlockObjectFromCode
 ENTRY_NO_GP art_quick_unlock_object
-    beq     $a0, $zero, art_quick_throw_null_pointer_exception
+    beqzc   $a0, art_quick_throw_null_pointer_exception
+    li      $t8, LOCK_WORD_THIN_LOCK_COUNT_ONE
+    li      $t3, LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED
+.Lretry_unlock:
+#ifndef USE_READ_BARRIER
+    lw      $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+#else
+    ll      $t1, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)  # Need to use atomic read-modify-write for read barrier
+#endif
+    srl     $t2, $t1, LOCK_WORD_STATE_SHIFT
+    bnezc   $t2, .Lslow_unlock         # if either of the top two bits are set, go slow path
+    lw      $t0, THREAD_ID_OFFSET(rSELF)
+    and     $t2, $t1, $t3              # zero the gc bits
+    xor     $t2, $t2, $t0              # lock_word.ThreadId() ^ self->ThreadId()
+    andi    $t2, $t2, 0xFFFF           # zero top 16 bits
+    bnezc   $t2, .Lslow_unlock         # do lock word and self thread id's match?
+    and     $t2, $t1, $t3              # zero the gc bits
+    bgeuc   $t2, $t8, .Lrecursive_thin_unlock
+    # transition to unlocked
+    nor     $t2, $zero, $t3            # $t2 = LOCK_WORD_GC_STATE_MASK_SHIFTED
+    and     $t2, $t1, $t2              # $t2: zero except for the preserved gc bits
+    sync                               # full (LoadStore|StoreStore) memory barrier
+#ifndef USE_READ_BARRIER
+    sw      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+#else
+    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    beqzc   $t2, .Lretry_unlock        # store failed, retry
     nop
+#endif
+    jic     $ra, 0
+.Lrecursive_thin_unlock:
+    # t1: original lock word
+    subu    $t2, $t1, $t8              # decrement count
+#ifndef USE_READ_BARRIER
+    sw      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+#else
+    sc      $t2, MIRROR_OBJECT_LOCK_WORD_OFFSET($a0)
+    beqzc   $t2, .Lretry_unlock        # store failed, retry
+    nop
+#endif
+    jic     $ra, 0
+.Lslow_unlock:
     .cpsetup $t9, $t8, art_quick_unlock_object
     SETUP_SAVE_REFS_ONLY_FRAME         # save callee saves in case exception allocation triggers GC
     jal     artUnlockObjectFromCode    # (Object* obj, Thread*)
diff --git a/runtime/arch/mips64/quick_method_frame_info_mips64.h b/runtime/arch/mips64/quick_method_frame_info_mips64.h
index d774473..a55ab0e 100644
--- a/runtime/arch/mips64/quick_method_frame_info_mips64.h
+++ b/runtime/arch/mips64/quick_method_frame_info_mips64.h
@@ -17,10 +17,12 @@
 #ifndef ART_RUNTIME_ARCH_MIPS64_QUICK_METHOD_FRAME_INFO_MIPS64_H_
 #define ART_RUNTIME_ARCH_MIPS64_QUICK_METHOD_FRAME_INFO_MIPS64_H_
 
+#include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "base/callee_save_type.h"
+#include "base/enums.h"
 #include "quick/quick_method_frame_info.h"
 #include "registers_mips64.h"
-#include "runtime.h"  // for Runtime::CalleeSaveType.
 
 namespace art {
 namespace mips64 {
@@ -69,27 +71,27 @@
     (1 << art::mips64::F27) | (1 << art::mips64::F28) | (1 << art::mips64::F29) |
     (1 << art::mips64::F30) | (1 << art::mips64::F31);
 
-constexpr uint32_t Mips64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t Mips64CalleeSaveCoreSpills(CalleeSaveType type) {
   return kMips64CalleeSaveAlwaysSpills | kMips64CalleeSaveRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveAllCalleeSaves ? kMips64CalleeSaveAllSpills : 0) |
-      (type == Runtime::kSaveEverything ? kMips64CalleeSaveEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kMips64CalleeSaveArgSpills : 0) |
+      (type == CalleeSaveType::kSaveAllCalleeSaves ? kMips64CalleeSaveAllSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kMips64CalleeSaveEverythingSpills : 0);
 }
 
-constexpr uint32_t Mips64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t Mips64CalleeSaveFpSpills(CalleeSaveType type) {
   return kMips64CalleeSaveFpRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kMips64CalleeSaveFpArgSpills : 0) |
-      (type == Runtime::kSaveAllCalleeSaves ? kMips64CalleeSaveFpAllSpills : 0) |
-      (type == Runtime::kSaveEverything ? kMips64CalleeSaveFpEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kMips64CalleeSaveFpArgSpills : 0) |
+      (type == CalleeSaveType::kSaveAllCalleeSaves ? kMips64CalleeSaveFpAllSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kMips64CalleeSaveFpEverythingSpills : 0);
 }
 
-constexpr uint32_t Mips64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+constexpr uint32_t Mips64CalleeSaveFrameSize(CalleeSaveType type) {
   return RoundUp((POPCOUNT(Mips64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(Mips64CalleeSaveFpSpills(type))   /* fprs */ +
                   + 1 /* Method* */) * static_cast<size_t>(kMips64PointerSize), kStackAlignment);
 }
 
-constexpr QuickMethodFrameInfo Mips64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+constexpr QuickMethodFrameInfo Mips64CalleeSaveMethodFrameInfo(CalleeSaveType type) {
   return QuickMethodFrameInfo(Mips64CalleeSaveFrameSize(type),
                               Mips64CalleeSaveCoreSpills(type),
                               Mips64CalleeSaveFpSpills(type));
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 207bf9d..bd51809 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "base/enums.h"
 #include "class_linker-inl.h"
 #include "common_runtime_test.h"
@@ -43,8 +44,8 @@
       // Create callee-save methods
       ScopedObjectAccess soa(Thread::Current());
       runtime_->SetInstructionSet(kRuntimeISA);
-      for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-        Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
+      for (uint32_t i = 0; i < static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType); ++i) {
+        CalleeSaveType type = CalleeSaveType(i);
         if (!runtime_->HasCalleeSaveMethod(type)) {
           runtime_->SetCalleeSaveMethod(runtime_->CreateCalleeSaveMethod(), type);
         }
diff --git a/runtime/arch/x86/context_x86.cc b/runtime/arch/x86/context_x86.cc
index cb3dfec..5c31712 100644
--- a/runtime/arch/x86/context_x86.cc
+++ b/runtime/arch/x86/context_x86.cc
@@ -17,6 +17,7 @@
 #include "context_x86.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "base/memory_tool.h"
 #include "quick/quick_method_frame_info.h"
 
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index 7d8abb8..798c500 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -26,7 +26,7 @@
 #include "base/macros.h"
 #include "base/safe_copy.h"
 #include "globals.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 #if defined(__APPLE__)
 #define ucontext __darwin_ucontext
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 5788122..cc0bdf2 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -33,23 +33,28 @@
 
 static constexpr const char* x86_known_variants[] = {
     "atom",
+    "sandybridge",
     "silvermont",
 };
 
 static constexpr const char* x86_variants_with_ssse3[] = {
     "atom",
+    "sandybridge",
     "silvermont",
 };
 
 static constexpr const char* x86_variants_with_sse4_1[] = {
+    "sandybridge",
     "silvermont",
 };
 
 static constexpr const char* x86_variants_with_sse4_2[] = {
+    "sandybridge",
     "silvermont",
 };
 
 static constexpr const char* x86_variants_with_popcnt[] = {
+    "sandybridge",
     "silvermont",
 };
 
diff --git a/runtime/arch/x86/instruction_set_features_x86_test.cc b/runtime/arch/x86/instruction_set_features_x86_test.cc
index 7e6ad3e..c67b4dd 100644
--- a/runtime/arch/x86/instruction_set_features_x86_test.cc
+++ b/runtime/arch/x86/instruction_set_features_x86_test.cc
@@ -69,6 +69,43 @@
   EXPECT_FALSE(x86_features->Equals(x86_default_features.get()));
 }
 
+TEST(X86InstructionSetFeaturesTest, X86FeaturesFromSandybridgeVariant) {
+  // Build features for a 32-bit x86 sandybridge processor.
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> x86_features(
+      InstructionSetFeatures::FromVariant(kX86, "sandybridge", &error_msg));
+  ASSERT_TRUE(x86_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_features->GetInstructionSet(), kX86);
+  EXPECT_TRUE(x86_features->Equals(x86_features.get()));
+  EXPECT_STREQ("ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
+               x86_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_features->AsBitmap(), 39U);
+
+  // Build features for a 32-bit x86 default processor.
+  std::unique_ptr<const InstructionSetFeatures> x86_default_features(
+      InstructionSetFeatures::FromVariant(kX86, "default", &error_msg));
+  ASSERT_TRUE(x86_default_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_default_features->GetInstructionSet(), kX86);
+  EXPECT_TRUE(x86_default_features->Equals(x86_default_features.get()));
+  EXPECT_STREQ("-ssse3,-sse4.1,-sse4.2,-avx,-avx2,-popcnt",
+               x86_default_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_default_features->AsBitmap(), 0U);
+
+  // Build features for a 64-bit x86-64 sandybridge processor.
+  std::unique_ptr<const InstructionSetFeatures> x86_64_features(
+      InstructionSetFeatures::FromVariant(kX86_64, "sandybridge", &error_msg));
+  ASSERT_TRUE(x86_64_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(x86_64_features->GetInstructionSet(), kX86_64);
+  EXPECT_TRUE(x86_64_features->Equals(x86_64_features.get()));
+  EXPECT_STREQ("ssse3,sse4.1,sse4.2,-avx,-avx2,popcnt",
+               x86_64_features->GetFeatureString().c_str());
+  EXPECT_EQ(x86_64_features->AsBitmap(), 39U);
+
+  EXPECT_FALSE(x86_64_features->Equals(x86_features.get()));
+  EXPECT_FALSE(x86_64_features->Equals(x86_default_features.get()));
+  EXPECT_FALSE(x86_features->Equals(x86_default_features.get()));
+}
+
 TEST(X86InstructionSetFeaturesTest, X86FeaturesFromSilvermontVariant) {
   // Build features for a 32-bit x86 silvermont processor.
   std::string error_msg;
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2222f5c..031b36b 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1948,17 +1948,23 @@
 DEFINE_FUNCTION art_quick_instrumentation_entry
     SETUP_SAVE_REFS_AND_ARGS_FRAME ebx, edx
     PUSH eax                      // Save eax which will be clobbered by the callee-save method.
-    subl LITERAL(12), %esp        // Align stack.
-    CFI_ADJUST_CFA_OFFSET(12)
-    pushl FRAME_SIZE_SAVE_REFS_AND_ARGS-4+16(%esp)  // Pass LR.
-    CFI_ADJUST_CFA_OFFSET(4)
+    subl LITERAL(16), %esp        // Align stack (12 bytes) and reserve space for the SP argument
+    CFI_ADJUST_CFA_OFFSET(16)     // (4 bytes). We lack the scratch registers to calculate the SP
+                                  // right now, so we will just fill it in later.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current().
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                      // Pass receiver.
     PUSH eax                      // Pass Method*.
-    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
+    leal 32(%esp), %eax           // Put original SP into eax
+    movl %eax, 12(%esp)           // set SP
+    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
+
     addl LITERAL(28), %esp        // Pop arguments upto saved Method*.
     CFI_ADJUST_CFA_OFFSET(-28)
+
+    testl %eax, %eax
+    jz 1f                         // Test for null return (indicating exception) and handle it.
+
     movl 60(%esp), %edi           // Restore edi.
     movl %eax, 60(%esp)           // Place code* over edi, just under return pc.
     movl SYMBOL(art_quick_instrumentation_exit)@GOT(%ebx), %ebx
@@ -1980,9 +1986,15 @@
     addl LITERAL(60), %esp        // Wind stack back upto code*.
     CFI_ADJUST_CFA_OFFSET(-60)
     ret                           // Call method (and pop).
+1:
+    // Make caller handle exception
+    addl LITERAL(4), %esp
+    CFI_ADJUST_CFA_OFFSET(-4)
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_instrumentation_entry
 
-DEFINE_FUNCTION art_quick_instrumentation_exit
+DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0
     pushl LITERAL(0)              // Push a fake return PC as there will be none on the stack.
     CFI_ADJUST_CFA_OFFSET(4)
     SETUP_SAVE_REFS_ONLY_FRAME ebx, ebx
@@ -1992,18 +2004,19 @@
     movq %xmm0, (%esp)
     PUSH edx                      // Save gpr return value.
     PUSH eax
-    subl LITERAL(16), %esp        // Align stack
-    CFI_ADJUST_CFA_OFFSET(16)
-    movq %xmm0, (%esp)            // Pass float return value.
-    PUSH edx                      // Pass gpr return value.
-    PUSH eax
+    leal 8(%esp), %eax            // Get pointer to fpr_result
+    movl %esp, %edx               // Get pointer to gpr_result
+    PUSH eax                      // Pass fpr_result
+    PUSH edx                      // Pass gpr_result
     PUSH ecx                      // Pass SP.
     pushl %fs:THREAD_SELF_OFFSET  // Pass Thread::Current.
     CFI_ADJUST_CFA_OFFSET(4)
-    call SYMBOL(artInstrumentationMethodExitFromCode)  // (Thread*, SP, gpr_result, fpr_result)
+    call SYMBOL(artInstrumentationMethodExitFromCode)  // (Thread*, SP, gpr_result*, fpr_result*)
+    testl %eax, %eax              // Check if we returned error.
+    jz 1f
     mov   %eax, %ecx              // Move returned link register.
-    addl LITERAL(32), %esp        // Pop arguments.
-    CFI_ADJUST_CFA_OFFSET(-32)
+    addl LITERAL(16), %esp        // Pop arguments.
+    CFI_ADJUST_CFA_OFFSET(-16)
     movl %edx, %ebx               // Move returned link register for deopt
                                   // (ebx is pretending to be our LR).
     POP eax                       // Restore gpr return value.
@@ -2015,6 +2028,11 @@
     addl LITERAL(4), %esp         // Remove fake return pc.
     CFI_ADJUST_CFA_OFFSET(-4)
     jmp   *%ecx                   // Return.
+1:
+    addl LITERAL(32), %esp
+    CFI_ADJUST_CFA_OFFSET(-32)
+    RESTORE_SAVE_REFS_ONLY_FRAME
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/arch/x86/quick_method_frame_info_x86.h b/runtime/arch/x86/quick_method_frame_info_x86.h
index 9fcde35..8342c9f 100644
--- a/runtime/arch/x86/quick_method_frame_info_x86.h
+++ b/runtime/arch/x86/quick_method_frame_info_x86.h
@@ -17,10 +17,12 @@
 #ifndef ART_RUNTIME_ARCH_X86_QUICK_METHOD_FRAME_INFO_X86_H_
 #define ART_RUNTIME_ARCH_X86_QUICK_METHOD_FRAME_INFO_X86_H_
 
+#include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "base/callee_save_type.h"
+#include "base/enums.h"
 #include "quick/quick_method_frame_info.h"
 #include "registers_x86.h"
-#include "runtime.h"  // for Runtime::CalleeSaveType.
 
 namespace art {
 namespace x86 {
@@ -54,24 +56,24 @@
     (1 << art::x86::XMM4) | (1 << art::x86::XMM5) |
     (1 << art::x86::XMM6) | (1 << art::x86::XMM7);
 
-constexpr uint32_t X86CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t X86CalleeSaveCoreSpills(CalleeSaveType type) {
   return kX86CalleeSaveAlwaysSpills | kX86CalleeSaveRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveEverything ? kX86CalleeSaveEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kX86CalleeSaveArgSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kX86CalleeSaveEverythingSpills : 0);
 }
 
-constexpr uint32_t X86CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
-    return (type == Runtime::kSaveRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0) |
-        (type == Runtime::kSaveEverything ? kX86CalleeSaveFpEverythingSpills : 0);
+constexpr uint32_t X86CalleeSaveFpSpills(CalleeSaveType type) {
+    return (type == CalleeSaveType::kSaveRefsAndArgs ? kX86CalleeSaveFpArgSpills : 0) |
+           (type == CalleeSaveType::kSaveEverything ? kX86CalleeSaveFpEverythingSpills : 0);
 }
 
-constexpr uint32_t X86CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+constexpr uint32_t X86CalleeSaveFrameSize(CalleeSaveType type) {
   return RoundUp((POPCOUNT(X86CalleeSaveCoreSpills(type)) /* gprs */ +
                   2 * POPCOUNT(X86CalleeSaveFpSpills(type)) /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kX86PointerSize), kStackAlignment);
 }
 
-constexpr QuickMethodFrameInfo X86CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+constexpr QuickMethodFrameInfo X86CalleeSaveMethodFrameInfo(CalleeSaveType type) {
   return QuickMethodFrameInfo(X86CalleeSaveFrameSize(type),
                               X86CalleeSaveCoreSpills(type),
                               X86CalleeSaveFpSpills(type));
diff --git a/runtime/arch/x86/thread_x86.cc b/runtime/arch/x86/thread_x86.cc
index 241650e..cc8f1fa 100644
--- a/runtime/arch/x86/thread_x86.cc
+++ b/runtime/arch/x86/thread_x86.cc
@@ -22,7 +22,7 @@
 #include "asm_support_x86.h"
 #include "base/enums.h"
 #include "base/macros.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 #if defined(__APPLE__)
diff --git a/runtime/arch/x86_64/context_x86_64.cc b/runtime/arch/x86_64/context_x86_64.cc
index 7c49e9c..a4db223 100644
--- a/runtime/arch/x86_64/context_x86_64.cc
+++ b/runtime/arch/x86_64/context_x86_64.cc
@@ -17,6 +17,7 @@
 #include "context_x86_64.h"
 
 #include "base/bit_utils.h"
+#include "base/bit_utils_iterator.h"
 #include "quick/quick_method_frame_info.h"
 
 namespace art {
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 41651d8..ad06873 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1920,24 +1920,30 @@
     movq %rdi, %r12               // Preserve method pointer in a callee-save.
 
     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
-    movq FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp), %rcx   // Pass return PC.
+    movq %rsp, %rcx                     // Pass SP.
 
-    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
+    call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
 
                                   // %rax = result of call.
-    movq %r12, %rdi               // Reload method pointer.
+    testq %rax, %rax
+    jz 1f
 
+    movq %r12, %rdi               // Reload method pointer.
     leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
     movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
 
     RESTORE_SAVE_REFS_AND_ARGS_FRAME
 
     jmp *%rax                     // Tail call to intended method.
+1:
+    RESTORE_SAVE_REFS_AND_ARGS_FRAME
+    DELIVER_PENDING_EXCEPTION
 #endif  // __APPLE__
 END_FUNCTION art_quick_instrumentation_entry
 
-DEFINE_FUNCTION art_quick_instrumentation_exit
+DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0
     pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
+    CFI_ADJUST_CFA_OFFSET(8)
 
     SETUP_SAVE_REFS_ONLY_FRAME
 
@@ -1948,15 +1954,16 @@
     movq  %rsp, %rsi                          // Pass SP.
 
     PUSH rax                  // Save integer result.
+    movq %rsp, %rdx           // Pass integer result pointer.
+
     subq LITERAL(8), %rsp     // Save floating-point result.
     CFI_ADJUST_CFA_OFFSET(8)
     movq %xmm0, (%rsp)
+    movq %rsp, %rcx           // Pass floating-point result pointer.
 
     movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
-    movq  %rax, %rdx                          // Pass integer result.
-    movq  %xmm0, %rcx                         // Pass floating-point result.
 
-    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
+    call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res*, fpr_res*)
 
     movq  %rax, %rdi          // Store return PC
     movq  %rdx, %rsi          // Store second return PC in hidden arg.
@@ -1968,9 +1975,15 @@
 
     RESTORE_SAVE_REFS_ONLY_FRAME
 
+    testq %rdi, %rdi          // Check if we have a return-pc to go to. If we don't then there was
+                              // an exception
+    jz 1f
+
     addq LITERAL(8), %rsp     // Drop fake return pc.
 
     jmp   *%rdi               // Return.
+1:
+    DELIVER_PENDING_EXCEPTION
 END_FUNCTION art_quick_instrumentation_exit
 
     /*
diff --git a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
index 867522f..425d616 100644
--- a/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
+++ b/runtime/arch/x86_64/quick_method_frame_info_x86_64.h
@@ -17,10 +17,12 @@
 #ifndef ART_RUNTIME_ARCH_X86_64_QUICK_METHOD_FRAME_INFO_X86_64_H_
 #define ART_RUNTIME_ARCH_X86_64_QUICK_METHOD_FRAME_INFO_X86_64_H_
 
+#include "arch/instruction_set.h"
 #include "base/bit_utils.h"
+#include "base/callee_save_type.h"
+#include "base/enums.h"
 #include "quick/quick_method_frame_info.h"
 #include "registers_x86_64.h"
-#include "runtime.h"  // for Runtime::CalleeSaveType.
 
 namespace art {
 namespace x86_64 {
@@ -53,25 +55,25 @@
     (1 << art::x86_64::XMM8) | (1 << art::x86_64::XMM9) |
     (1 << art::x86_64::XMM10) | (1 << art::x86_64::XMM11);
 
-constexpr uint32_t X86_64CalleeSaveCoreSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t X86_64CalleeSaveCoreSpills(CalleeSaveType type) {
   return kX86_64CalleeSaveAlwaysSpills | kX86_64CalleeSaveRefSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
-      (type == Runtime::kSaveEverything ? kX86_64CalleeSaveEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kX86_64CalleeSaveArgSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kX86_64CalleeSaveEverythingSpills : 0);
 }
 
-constexpr uint32_t X86_64CalleeSaveFpSpills(Runtime::CalleeSaveType type) {
+constexpr uint32_t X86_64CalleeSaveFpSpills(CalleeSaveType type) {
   return kX86_64CalleeSaveFpSpills |
-      (type == Runtime::kSaveRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0) |
-      (type == Runtime::kSaveEverything ? kX86_64CalleeSaveFpEverythingSpills : 0);
+      (type == CalleeSaveType::kSaveRefsAndArgs ? kX86_64CalleeSaveFpArgSpills : 0) |
+      (type == CalleeSaveType::kSaveEverything ? kX86_64CalleeSaveFpEverythingSpills : 0);
 }
 
-constexpr uint32_t X86_64CalleeSaveFrameSize(Runtime::CalleeSaveType type) {
+constexpr uint32_t X86_64CalleeSaveFrameSize(CalleeSaveType type) {
   return RoundUp((POPCOUNT(X86_64CalleeSaveCoreSpills(type)) /* gprs */ +
                   POPCOUNT(X86_64CalleeSaveFpSpills(type)) /* fprs */ +
                   1 /* Method* */) * static_cast<size_t>(kX86_64PointerSize), kStackAlignment);
 }
 
-constexpr QuickMethodFrameInfo X86_64CalleeSaveMethodFrameInfo(Runtime::CalleeSaveType type) {
+constexpr QuickMethodFrameInfo X86_64CalleeSaveMethodFrameInfo(CalleeSaveType type) {
   return QuickMethodFrameInfo(X86_64CalleeSaveFrameSize(type),
                               X86_64CalleeSaveCoreSpills(type),
                               X86_64CalleeSaveFpSpills(type));
diff --git a/runtime/arch/x86_64/thread_x86_64.cc b/runtime/arch/x86_64/thread_x86_64.cc
index 553b656..19d25f6 100644
--- a/runtime/arch/x86_64/thread_x86_64.cc
+++ b/runtime/arch/x86_64/thread_x86_64.cc
@@ -18,7 +18,7 @@
 
 #include "asm_support_x86_64.h"
 #include "base/macros.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 #if defined(__linux__)
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 0de0f02..a8a58e1 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -28,7 +28,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "primitive.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "well_known_classes.h"
 
@@ -352,11 +352,6 @@
   return name;
 }
 
-template<typename RootVisitorType>
-inline void ArtField::VisitRoots(RootVisitorType& visitor) {
-  visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
-}
-
 template <typename Visitor>
 inline void ArtField::UpdateObjects(const Visitor& visitor) {
   ObjPtr<mirror::Class> old_class = DeclaringClassRoot().Read<kWithoutReadBarrier>();
diff --git a/runtime/art_field.h b/runtime/art_field.h
index 3789b0c..5114578 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -171,7 +171,9 @@
 
   // NO_THREAD_SAFETY_ANALYSIS since we don't know what the callback requires.
   template<typename RootVisitorType>
-  void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS;
+  ALWAYS_INLINE inline void VisitRoots(RootVisitorType& visitor) NO_THREAD_SAFETY_ANALYSIS {
+    visitor.VisitRoot(declaring_class_.AddressWithoutBarrier());
+  }
 
   bool IsVolatile() REQUIRES_SHARED(Locks::mutator_lock_) {
     return (GetAccessFlags() & kAccVolatile) != 0;
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 59cd978..40d7e5c 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -20,6 +20,7 @@
 #include "art_method.h"
 
 #include "art_field.h"
+#include "base/callee_save_type.h"
 #include "base/logging.h"
 #include "class_linker-inl.h"
 #include "common_throws.h"
@@ -27,6 +28,7 @@
 #include "dex_file_annotations.h"
 #include "dex_file-inl.h"
 #include "gc_root-inl.h"
+#include "invoke_type.h"
 #include "jit/profiling_info.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
@@ -35,11 +37,12 @@
 #include "mirror/string.h"
 #include "oat.h"
 #include "obj_ptr-inl.h"
+#include "primitive.h"
 #include "quick/quick_method_frame_info.h"
 #include "read_barrier-inl.h"
 #include "runtime-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
@@ -199,8 +202,8 @@
   }
   Runtime* runtime = Runtime::Current();
   bool result = false;
-  for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-    if (this == runtime->GetCalleeSaveMethod(Runtime::CalleeSaveType(i))) {
+  for (uint32_t i = 0; i < static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType); i++) {
+    if (this == runtime->GetCalleeSaveMethod(CalleeSaveType(i))) {
       result = true;
       break;
     }
@@ -271,12 +274,14 @@
     return "<runtime internal resolution method>";
   } else if (this == runtime->GetImtConflictMethod()) {
     return "<runtime internal imt conflict method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves)) {
+  } else if (this == runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveAllCalleeSaves)) {
     return "<runtime internal callee-save all registers method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly)) {
+  } else if (this == runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsOnly)) {
     return "<runtime internal callee-save reference registers method>";
-  } else if (this == runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs)) {
+  } else if (this == runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs)) {
     return "<runtime internal callee-save reference and argument registers method>";
+  } else if (this == runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverything)) {
+    return "<runtime internal save-every-register method>";
   } else {
     return "<unknown runtime internal method>";
   }
@@ -340,6 +345,10 @@
   return dex_file->GetTypeDescriptor(dex_file->GetTypeId(proto_id.return_type_idx_));
 }
 
+inline Primitive::Type ArtMethod::GetReturnTypePrimitive() {
+  return Primitive::GetType(GetReturnTypeDescriptor()[0]);
+}
+
 inline const char* ArtMethod::GetTypeDescriptorFromTypeIdx(dex::TypeIndex type_idx) {
   DCHECK(!IsProxyMethod());
   const DexFile* dex_file = GetDexFile();
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 7de8916..d591e09 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -664,7 +664,9 @@
     }
     if (existing_entry_point == GetQuickInstrumentationEntryPoint()) {
       // We are running the generic jni stub, but the method is being instrumented.
-      DCHECK_EQ(pc, 0u) << "Should be a downcall";
+      // NB We would normally expect the pc to be zero but we can have non-zero pc's if
+      // instrumentation is installed or removed during the call which is using the generic jni
+      // trampoline.
       DCHECK(IsNative());
       return nullptr;
     }
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 856bfd2..3a8d279 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -24,19 +24,17 @@
 #include "base/enums.h"
 #include "dex_file.h"
 #include "gc_root.h"
-#include "invoke_type.h"
-#include "method_reference.h"
 #include "modifiers.h"
-#include "mirror/dex_cache.h"
-#include "mirror/object.h"
 #include "obj_ptr.h"
+#include "offsets.h"
+#include "primitive.h"
 #include "read_barrier_option.h"
-#include "utils.h"
 
 namespace art {
 
 template<class T> class Handle;
 class ImtConflictTable;
+enum InvokeType : uint32_t;
 union JValue;
 class OatQuickMethodHeader;
 class ProfilingInfo;
@@ -47,8 +45,13 @@
 namespace mirror {
 class Array;
 class Class;
+class ClassLoader;
+class DexCache;
 class IfTable;
+class Object;
+template <typename MirrorType> class ObjectArray;
 class PointerArray;
+class String;
 }  // namespace mirror
 
 class ArtMethod FINAL {
@@ -318,11 +321,11 @@
   }
 
   static MemberOffset DexMethodIndexOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, dex_method_index_);
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, dex_method_index_));
   }
 
   static MemberOffset MethodIndexOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(ArtMethod, method_index_);
+    return MemberOffset(OFFSETOF_MEMBER(ArtMethod, method_index_));
   }
 
   uint32_t GetCodeItemOffset() {
@@ -524,10 +527,6 @@
 
   bool IsImtUnimplementedMethod() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  MethodReference ToMethodReference() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return MethodReference(GetDexFile(), GetDexMethodIndex());
-  }
-
   // Find the catch block for the given exception type and dex_pc. When a catch block is found,
   // indicates whether the found catch block is responsible for clearing the exception or whether
   // a move-exception instruction is present.
@@ -571,6 +570,8 @@
 
   const char* GetReturnTypeDescriptor() REQUIRES_SHARED(Locks::mutator_lock_);
 
+  ALWAYS_INLINE Primitive::Type GetReturnTypePrimitive() REQUIRES_SHARED(Locks::mutator_lock_);
+
   const char* GetTypeDescriptorFromTypeIdx(dex::TypeIndex type_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 6d271ed..44c0661 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -17,21 +17,6 @@
 #ifndef ART_RUNTIME_ASM_SUPPORT_H_
 #define ART_RUNTIME_ASM_SUPPORT_H_
 
-#if defined(__cplusplus)
-#include "art_method.h"
-#include "base/bit_utils.h"
-#include "gc/allocator/rosalloc.h"
-#include "gc/heap.h"
-#include "jit/jit.h"
-#include "lock_word.h"
-#include "mirror/class.h"
-#include "mirror/dex_cache.h"
-#include "mirror/string.h"
-#include "utils/dex_cache_arrays_layout.h"
-#include "runtime.h"
-#include "thread.h"
-#endif
-
 #include "read_barrier_c.h"
 
 #if defined(__arm__) || defined(__mips__)
@@ -48,14 +33,10 @@
 #define SUSPEND_CHECK_INTERVAL 96
 #endif
 
-#if defined(__cplusplus)
-
+// To generate tests related to the constants in this header, either define ADD_TEST_EQ before
+// including, or use asm_support_check.h.
 #ifndef ADD_TEST_EQ  // Allow #include-r to replace with their own.
-#define ADD_TEST_EQ(x, y) CHECK_EQ(x, y);
-#endif
-
-static inline void CheckAsmSupportOffsetsAndSizes() {
-#else
+#define DEFINED_ADD_TEST_EQ 1
 #define ADD_TEST_EQ(x, y)
 #endif
 
@@ -73,6 +54,7 @@
 // Export new defines (for assembly use) by editing cpp-define-generator def files.
 #define DEFINE_CHECK_EQ ADD_TEST_EQ
 #include "asm_support_gen.h"
+#undef DEFINE_CHECK_EQ
 
 // Offset of field Thread::tlsPtr_.exception.
 #define THREAD_EXCEPTION_OFFSET (THREAD_CARD_TABLE_OFFSET + __SIZEOF_POINTER__)
@@ -249,8 +231,9 @@
 #define STRING_COMPRESSION_FEATURE 1
 ADD_TEST_EQ(STRING_COMPRESSION_FEATURE, art::mirror::kUseStringCompression);
 
-#if defined(__cplusplus)
-}  // End of CheckAsmSupportOffsets.
+#ifdef DEFINED_ADD_TEST_EQ
+#undef ADD_TEST_EQ
+#undef DEFINED_ADD_TEST_EQ
 #endif
 
 #endif  // ART_RUNTIME_ASM_SUPPORT_H_
diff --git a/runtime/asm_support_check.h b/runtime/asm_support_check.h
new file mode 100644
index 0000000..cc6a578
--- /dev/null
+++ b/runtime/asm_support_check.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_ASM_SUPPORT_CHECK_H_
+#define ART_RUNTIME_ASM_SUPPORT_CHECK_H_
+
+#include "art_method.h"
+#include "base/bit_utils.h"
+#include "base/callee_save_type.h"
+#include "gc/accounting/card_table.h"
+#include "gc/allocator/rosalloc.h"
+#include "gc/heap.h"
+#include "jit/jit.h"
+#include "lock_word.h"
+#include "mirror/class.h"
+#include "mirror/dex_cache.h"
+#include "mirror/string.h"
+#include "utils/dex_cache_arrays_layout.h"
+#include "runtime.h"
+#include "stack.h"
+#include "thread.h"
+
+#ifndef ADD_TEST_EQ
+#define ADD_TEST_EQ(x, y) CHECK_EQ(x, y);
+#endif
+
+#ifndef ASM_SUPPORT_CHECK_RETURN_TYPE
+#define ASM_SUPPORT_CHECK_RETURN_TYPE void
+#endif
+
+// Prepare for re-include of asm_support.h.
+#ifdef ART_RUNTIME_ASM_SUPPORT_H_
+#undef ART_RUNTIME_ASM_SUPPORT_H_
+#endif
+
+namespace art {
+
+static inline ASM_SUPPORT_CHECK_RETURN_TYPE CheckAsmSupportOffsetsAndSizes() {
+#ifdef ASM_SUPPORT_CHECK_HEADER
+  ASM_SUPPORT_CHECK_HEADER
+#endif
+
+#include "asm_support.h"
+
+#ifdef ASM_SUPPORT_CHECK_FOOTER
+  ASM_SUPPORT_CHECK_FOOTER
+#endif
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_ASM_SUPPORT_CHECK_H_
diff --git a/runtime/atomic.cc b/runtime/atomic.cc
index d5ae570..07aceb7 100644
--- a/runtime/atomic.cc
+++ b/runtime/atomic.cc
@@ -17,7 +17,7 @@
 #include "atomic.h"
 #include "base/mutex.h"
 #include "base/stl_util.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/atomic.h b/runtime/atomic.h
index 45c3165..25dd1a3 100644
--- a/runtime/atomic.h
+++ b/runtime/atomic.h
@@ -257,6 +257,13 @@
     return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_relaxed);
   }
 
+  // Atomically replace the value with desired value if it matches the expected value. Prior writes
+  // to other memory locations become visible to the threads that do a consume or an acquire on the
+  // same location.
+  bool CompareExchangeStrongRelease(T expected_value, T desired_value) {
+    return this->compare_exchange_strong(expected_value, desired_value, std::memory_order_release);
+  }
+
   // The same, except it may fail spuriously.
   bool CompareExchangeWeakRelaxed(T expected_value, T desired_value) {
     return this->compare_exchange_weak(expected_value, desired_value, std::memory_order_relaxed);
diff --git a/runtime/barrier_test.cc b/runtime/barrier_test.cc
index f68a5d4..25b6925 100644
--- a/runtime/barrier_test.cc
+++ b/runtime/barrier_test.cc
@@ -22,7 +22,7 @@
 #include "common_runtime_test.h"
 #include "mirror/object_array-inl.h"
 #include "thread_pool.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 class CheckWaitTask : public Task {
diff --git a/runtime/base/allocator.h b/runtime/base/allocator.h
index 8d1c982..fba9308 100644
--- a/runtime/base/allocator.h
+++ b/runtime/base/allocator.h
@@ -17,12 +17,7 @@
 #ifndef ART_RUNTIME_BASE_ALLOCATOR_H_
 #define ART_RUNTIME_BASE_ALLOCATOR_H_
 
-#include <map>
-#include <set>
-#include <unordered_map>
-
 #include "atomic.h"
-#include "base/hash_map.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/type_static_if.h"
@@ -156,29 +151,6 @@
                                                 TrackingAllocatorImpl<T, kTag>,
                                                 std::allocator<T>>::type;
 
-template<class Key, class T, AllocatorTag kTag, class Compare = std::less<Key>>
-using AllocationTrackingMultiMap = std::multimap<
-    Key, T, Compare, TrackingAllocator<std::pair<const Key, T>, kTag>>;
-
-template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
-using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
-
-template<class Key,
-         class T,
-         AllocatorTag kTag,
-         class Hash = std::hash<Key>,
-         class Pred = std::equal_to<Key>>
-using AllocationTrackingUnorderedMap = std::unordered_map<
-    Key, T, Hash, Pred, TrackingAllocator<std::pair<const Key, T>, kTag>>;
-
-template<class Key,
-         class T,
-         class EmptyFn,
-         AllocatorTag kTag,
-         class Hash = std::hash<Key>,
-         class Pred = std::equal_to<Key>>
-using AllocationTrackingHashMap = HashMap<
-    Key, T, EmptyFn, Hash, Pred, TrackingAllocator<std::pair<Key, T>, kTag>>;
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_ALLOCATOR_H_
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index fc5b5b1..54b40f2 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -14,16 +14,19 @@
  * limitations under the License.
  */
 
+#include "arena_allocator-inl.h"
+
+#include <sys/mman.h>
+
 #include <algorithm>
 #include <cstddef>
 #include <iomanip>
 #include <numeric>
 
-#include "arena_allocator-inl.h"
 #include "logging.h"
 #include "mem_map.h"
 #include "mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "systrace.h"
 
 namespace art {
@@ -88,6 +91,7 @@
   "CallingConv  ",
   "CHA          ",
   "Scheduler    ",
+  "Profile      ",
 };
 
 template <bool kCount>
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 5430458..ebde82d 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -98,6 +98,7 @@
   kArenaAllocCallingConvention,
   kArenaAllocCHA,
   kArenaAllocScheduler,
+  kArenaAllocProfile,
   kNumArenaAllocKinds
 };
 
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index f536c72..0844678 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -17,13 +17,11 @@
 #ifndef ART_RUNTIME_BASE_BIT_UTILS_H_
 #define ART_RUNTIME_BASE_BIT_UTILS_H_
 
-#include <iterator>
 #include <limits>
 #include <type_traits>
 
-#include "base/iteration_range.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
+#include "base/stl_util_identity.h"
 
 namespace art {
 
@@ -312,85 +310,6 @@
           : static_cast<T>(0);
 }
 
-// Using the Curiously Recurring Template Pattern to implement everything shared
-// by LowToHighBitIterator and HighToLowBitIterator, i.e. everything but operator*().
-template <typename T, typename Iter>
-class BitIteratorBase
-    : public std::iterator<std::forward_iterator_tag, uint32_t, ptrdiff_t, void, void> {
-  static_assert(std::is_integral<T>::value, "T must be integral");
-  static_assert(std::is_unsigned<T>::value, "T must be unsigned");
-
-  static_assert(sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t), "Unsupported size");
-
- public:
-  BitIteratorBase() : bits_(0u) { }
-  explicit BitIteratorBase(T bits) : bits_(bits) { }
-
-  Iter& operator++() {
-    DCHECK_NE(bits_, 0u);
-    uint32_t bit = *static_cast<Iter&>(*this);
-    bits_ &= ~(static_cast<T>(1u) << bit);
-    return static_cast<Iter&>(*this);
-  }
-
-  Iter& operator++(int) {
-    Iter tmp(static_cast<Iter&>(*this));
-    ++*this;
-    return tmp;
-  }
-
- protected:
-  T bits_;
-
-  template <typename U, typename I>
-  friend bool operator==(const BitIteratorBase<U, I>& lhs, const BitIteratorBase<U, I>& rhs);
-};
-
-template <typename T, typename Iter>
-bool operator==(const BitIteratorBase<T, Iter>& lhs, const BitIteratorBase<T, Iter>& rhs) {
-  return lhs.bits_ == rhs.bits_;
-}
-
-template <typename T, typename Iter>
-bool operator!=(const BitIteratorBase<T, Iter>& lhs, const BitIteratorBase<T, Iter>& rhs) {
-  return !(lhs == rhs);
-}
-
-template <typename T>
-class LowToHighBitIterator : public BitIteratorBase<T, LowToHighBitIterator<T>> {
- public:
-  using BitIteratorBase<T, LowToHighBitIterator<T>>::BitIteratorBase;
-
-  uint32_t operator*() const {
-    DCHECK_NE(this->bits_, 0u);
-    return CTZ(this->bits_);
-  }
-};
-
-template <typename T>
-class HighToLowBitIterator : public BitIteratorBase<T, HighToLowBitIterator<T>> {
- public:
-  using BitIteratorBase<T, HighToLowBitIterator<T>>::BitIteratorBase;
-
-  uint32_t operator*() const {
-    DCHECK_NE(this->bits_, 0u);
-    static_assert(std::numeric_limits<T>::radix == 2, "Unexpected radix!");
-    return std::numeric_limits<T>::digits - 1u - CLZ(this->bits_);
-  }
-};
-
-template <typename T>
-IterationRange<LowToHighBitIterator<T>> LowToHighBits(T bits) {
-  return IterationRange<LowToHighBitIterator<T>>(
-      LowToHighBitIterator<T>(bits), LowToHighBitIterator<T>());
-}
-
-template <typename T>
-IterationRange<HighToLowBitIterator<T>> HighToLowBits(T bits) {
-  return IterationRange<HighToLowBitIterator<T>>(
-      HighToLowBitIterator<T>(bits), HighToLowBitIterator<T>());
-}
-
 // Returns value with bit set in lowest one-bit position or 0 if 0.  (java.lang.X.lowestOneBit).
 template <typename kind>
 inline static kind LowestOneBitValue(kind opnd) {
diff --git a/runtime/base/bit_utils_iterator.h b/runtime/base/bit_utils_iterator.h
new file mode 100644
index 0000000..8514de6
--- /dev/null
+++ b/runtime/base/bit_utils_iterator.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_BIT_UTILS_ITERATOR_H_
+#define ART_RUNTIME_BASE_BIT_UTILS_ITERATOR_H_
+
+#include <iterator>
+#include <limits>
+#include <type_traits>
+
+#include "base/bit_utils.h"
+#include "base/iteration_range.h"
+#include "base/logging.h"
+#include "base/stl_util.h"
+
+namespace art {
+
+// Using the Curiously Recurring Template Pattern to implement everything shared
+// by LowToHighBitIterator and HighToLowBitIterator, i.e. everything but operator*().
+template <typename T, typename Iter>
+class BitIteratorBase
+    : public std::iterator<std::forward_iterator_tag, uint32_t, ptrdiff_t, void, void> {
+  static_assert(std::is_integral<T>::value, "T must be integral");
+  static_assert(std::is_unsigned<T>::value, "T must be unsigned");
+
+  static_assert(sizeof(T) == sizeof(uint32_t) || sizeof(T) == sizeof(uint64_t), "Unsupported size");
+
+ public:
+  BitIteratorBase() : bits_(0u) { }
+  explicit BitIteratorBase(T bits) : bits_(bits) { }
+
+  Iter& operator++() {
+    DCHECK_NE(bits_, 0u);
+    uint32_t bit = *static_cast<Iter&>(*this);
+    bits_ &= ~(static_cast<T>(1u) << bit);
+    return static_cast<Iter&>(*this);
+  }
+
+  Iter& operator++(int) {
+    Iter tmp(static_cast<Iter&>(*this));
+    ++*this;
+    return tmp;
+  }
+
+ protected:
+  T bits_;
+
+  template <typename U, typename I>
+  friend bool operator==(const BitIteratorBase<U, I>& lhs, const BitIteratorBase<U, I>& rhs);
+};
+
+template <typename T, typename Iter>
+bool operator==(const BitIteratorBase<T, Iter>& lhs, const BitIteratorBase<T, Iter>& rhs) {
+  return lhs.bits_ == rhs.bits_;
+}
+
+template <typename T, typename Iter>
+bool operator!=(const BitIteratorBase<T, Iter>& lhs, const BitIteratorBase<T, Iter>& rhs) {
+  return !(lhs == rhs);
+}
+
+template <typename T>
+class LowToHighBitIterator : public BitIteratorBase<T, LowToHighBitIterator<T>> {
+ public:
+  using BitIteratorBase<T, LowToHighBitIterator<T>>::BitIteratorBase;
+
+  uint32_t operator*() const {
+    DCHECK_NE(this->bits_, 0u);
+    return CTZ(this->bits_);
+  }
+};
+
+template <typename T>
+class HighToLowBitIterator : public BitIteratorBase<T, HighToLowBitIterator<T>> {
+ public:
+  using BitIteratorBase<T, HighToLowBitIterator<T>>::BitIteratorBase;
+
+  uint32_t operator*() const {
+    DCHECK_NE(this->bits_, 0u);
+    static_assert(std::numeric_limits<T>::radix == 2, "Unexpected radix!");
+    return std::numeric_limits<T>::digits - 1u - CLZ(this->bits_);
+  }
+};
+
+template <typename T>
+IterationRange<LowToHighBitIterator<T>> LowToHighBits(T bits) {
+  return IterationRange<LowToHighBitIterator<T>>(
+      LowToHighBitIterator<T>(bits), LowToHighBitIterator<T>());
+}
+
+template <typename T>
+IterationRange<HighToLowBitIterator<T>> HighToLowBits(T bits) {
+  return IterationRange<HighToLowBitIterator<T>>(
+      HighToLowBitIterator<T>(bits), HighToLowBitIterator<T>());
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_BIT_UTILS_ITERATOR_H_
diff --git a/runtime/base/bit_utils_test.cc b/runtime/base/bit_utils_test.cc
index 77bd0b8..9f22fb4 100644
--- a/runtime/base/bit_utils_test.cc
+++ b/runtime/base/bit_utils_test.cc
@@ -17,6 +17,7 @@
 #include <vector>
 
 #include "bit_utils.h"
+#include "bit_utils_iterator.h"
 
 #include "gtest/gtest.h"
 
diff --git a/runtime/base/callee_save_type.h b/runtime/base/callee_save_type.h
new file mode 100644
index 0000000..501b296
--- /dev/null
+++ b/runtime/base/callee_save_type.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_CALLEE_SAVE_TYPE_H_
+#define ART_RUNTIME_BASE_CALLEE_SAVE_TYPE_H_
+
+#include <cstddef>
+#include <ostream>
+
+namespace art {
+
+// Returns a special method that describes all callee saves being spilled to the stack.
+enum class CalleeSaveType : uint32_t {
+  kSaveAllCalleeSaves,  // All callee-save registers.
+  kSaveRefsOnly,        // Only those callee-save registers that can hold references.
+  kSaveRefsAndArgs,     // References (see above) and arguments (usually caller-save registers).
+  kSaveEverything,      // All registers, including both callee-save and caller-save.
+  kLastCalleeSaveType   // Value used for iteration.
+};
+std::ostream& operator<<(std::ostream& os, const CalleeSaveType& rhs);
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_CALLEE_SAVE_TYPE_H_
diff --git a/runtime/base/dumpable-inl.h b/runtime/base/dumpable-inl.h
index 2cdf083..9d7fc39 100644
--- a/runtime/base/dumpable-inl.h
+++ b/runtime/base/dumpable-inl.h
@@ -19,7 +19,7 @@
 
 #include "base/dumpable.h"
 #include "base/mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index b28eb72..be20920 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -198,7 +198,7 @@
                                                         kFractionalDigits)
      << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit, kFractionalDigits) << " "
      << "Avg: " << FormatDuration(Mean() * kAdjust, unit, kFractionalDigits) << " Max: "
-     << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << "\n";
+     << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << std::endl;
 }
 
 template <class Value>
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 55b4306..adfd7d3 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -21,7 +21,7 @@
 #include <sstream>
 
 #include "base/mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 // Headers for LogMessage::LogLine.
@@ -112,7 +112,7 @@
   if (priority == ANDROID_LOG_FATAL) {
     // Allocate buffer for snprintf(buf, buf_size, "%s:%u] %s", file, line, message) below.
     // If allocation fails, fall back to printing only the message.
-    buf_size = strlen(file) + 1 /* ':' */ + std::numeric_limits<typeof(line)>::max_digits10 +
+    buf_size = strlen(file) + 1 /* ':' */ + std::numeric_limits<decltype(line)>::max_digits10 +
         2 /* "] " */ + strlen(message) + 1 /* terminating 0 */;
     buf = reinterpret_cast<char*>(malloc(buf_size));
   }
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 08b370e..0ac2399 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -194,6 +194,16 @@
   return exclusive_owner_;
 }
 
+inline void Mutex::AssertExclusiveHeld(const Thread* self) const {
+  if (kDebugLocking && (gAborting == 0)) {
+    CHECK(IsExclusiveHeld(self)) << *this;
+  }
+}
+
+inline void Mutex::AssertHeld(const Thread* self) const {
+  AssertExclusiveHeld(self);
+}
+
 inline bool ReaderWriterMutex::IsExclusiveHeld(const Thread* self) const {
   DCHECK(self == nullptr || self == Thread::Current());
   bool result = (GetExclusiveOwnerTid() == SafeGetTid(self));
@@ -221,6 +231,16 @@
 #endif
 }
 
+inline void ReaderWriterMutex::AssertExclusiveHeld(const Thread* self) const {
+  if (kDebugLocking && (gAborting == 0)) {
+    CHECK(IsExclusiveHeld(self)) << *this;
+  }
+}
+
+inline void ReaderWriterMutex::AssertWriterHeld(const Thread* self) const {
+  AssertExclusiveHeld(self);
+}
+
 inline void MutatorMutex::TransitionFromRunnableToSuspended(Thread* self) {
   AssertSharedHeld(self);
   RegisterAsUnlocked(self);
@@ -231,6 +251,19 @@
   AssertSharedHeld(self);
 }
 
+inline ReaderMutexLock::ReaderMutexLock(Thread* self, ReaderWriterMutex& mu)
+    : self_(self), mu_(mu) {
+  mu_.SharedLock(self_);
+}
+
+inline ReaderMutexLock::~ReaderMutexLock() {
+  mu_.SharedUnlock(self_);
+}
+
+// Catch bug where variable name is omitted. "ReaderMutexLock (lock);" instead of
+// "ReaderMutexLock mu(lock)".
+#define ReaderMutexLock(x) static_assert(0, "ReaderMutexLock declaration missing variable name")
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_MUTEX_INL_H_
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index 03ae63a..e77d8d7 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -244,15 +244,11 @@
   void Unlock(Thread* self) RELEASE() {  ExclusiveUnlock(self); }
 
   // Is the current thread the exclusive holder of the Mutex.
-  bool IsExclusiveHeld(const Thread* self) const;
+  ALWAYS_INLINE bool IsExclusiveHeld(const Thread* self) const;
 
   // Assert that the Mutex is exclusively held by the current thread.
-  void AssertExclusiveHeld(const Thread* self) ASSERT_CAPABILITY(this) {
-    if (kDebugLocking && (gAborting == 0)) {
-      CHECK(IsExclusiveHeld(self)) << *this;
-    }
-  }
-  void AssertHeld(const Thread* self) ASSERT_CAPABILITY(this) { AssertExclusiveHeld(self); }
+  ALWAYS_INLINE void AssertExclusiveHeld(const Thread* self) const ASSERT_CAPABILITY(this);
+  ALWAYS_INLINE void AssertHeld(const Thread* self) const ASSERT_CAPABILITY(this);
 
   // Assert that the Mutex is not held by the current thread.
   void AssertNotHeldExclusive(const Thread* self) ASSERT_CAPABILITY(!*this) {
@@ -349,15 +345,11 @@
   void ReaderUnlock(Thread* self) RELEASE_SHARED() { SharedUnlock(self); }
 
   // Is the current thread the exclusive holder of the ReaderWriterMutex.
-  bool IsExclusiveHeld(const Thread* self) const;
+  ALWAYS_INLINE bool IsExclusiveHeld(const Thread* self) const;
 
   // Assert the current thread has exclusive access to the ReaderWriterMutex.
-  void AssertExclusiveHeld(const Thread* self) ASSERT_CAPABILITY(this) {
-    if (kDebugLocking && (gAborting == 0)) {
-      CHECK(IsExclusiveHeld(self)) << *this;
-    }
-  }
-  void AssertWriterHeld(const Thread* self) ASSERT_CAPABILITY(this) { AssertExclusiveHeld(self); }
+  ALWAYS_INLINE void AssertExclusiveHeld(const Thread* self) const ASSERT_CAPABILITY(this);
+  ALWAYS_INLINE void AssertWriterHeld(const Thread* self) const ASSERT_CAPABILITY(this);
 
   // Assert the current thread doesn't have exclusive access to the ReaderWriterMutex.
   void AssertNotExclusiveHeld(const Thread* self) ASSERT_CAPABILITY(!this) {
@@ -517,23 +509,15 @@
 // construction and releases it upon destruction.
 class SCOPED_CAPABILITY ReaderMutexLock {
  public:
-  ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu) ALWAYS_INLINE :
-      self_(self), mu_(mu) {
-    mu_.SharedLock(self_);
-  }
+  ALWAYS_INLINE ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu);
 
-  ~ReaderMutexLock() RELEASE() ALWAYS_INLINE {
-    mu_.SharedUnlock(self_);
-  }
+  ALWAYS_INLINE ~ReaderMutexLock() RELEASE();
 
  private:
   Thread* const self_;
   ReaderWriterMutex& mu_;
   DISALLOW_COPY_AND_ASSIGN(ReaderMutexLock);
 };
-// Catch bug where variable name is omitted. "ReaderMutexLock (lock);" instead of
-// "ReaderMutexLock mu(lock)".
-#define ReaderMutexLock(x) static_assert(0, "ReaderMutexLock declaration missing variable name")
 
 // Scoped locker/unlocker for a ReaderWriterMutex that acquires write access to mu upon
 // construction and releases it upon destruction.
diff --git a/runtime/base/mutex_test.cc b/runtime/base/mutex_test.cc
index 340550f..752e77a 100644
--- a/runtime/base/mutex_test.cc
+++ b/runtime/base/mutex_test.cc
@@ -14,10 +14,10 @@
  * limitations under the License.
  */
 
-#include "mutex.h"
+#include "mutex-inl.h"
 
 #include "common_runtime_test.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/base/safe_copy_test.cc b/runtime/base/safe_copy_test.cc
index 987895e..a9ec952 100644
--- a/runtime/base/safe_copy_test.cc
+++ b/runtime/base/safe_copy_test.cc
@@ -23,80 +23,86 @@
 #include <sys/mman.h>
 #include <sys/user.h>
 
+#include "globals.h"
+
 namespace art {
 
 #if defined(__linux__)
 
 TEST(SafeCopyTest, smoke) {
+  DCHECK_EQ(kPageSize, static_cast<decltype(kPageSize)>(PAGE_SIZE));
+
   // Map four pages, mark the second one as PROT_NONE, unmap the last one.
-  void* map = mmap(nullptr, PAGE_SIZE * 4, PROT_READ | PROT_WRITE,
+  void* map = mmap(nullptr, kPageSize * 4, PROT_READ | PROT_WRITE,
                    MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   ASSERT_NE(MAP_FAILED, map);
   char* page1 = static_cast<char*>(map);
-  char* page2 = page1 + PAGE_SIZE;
-  char* page3 = page2 + PAGE_SIZE;
-  char* page4 = page3 + PAGE_SIZE;
-  ASSERT_EQ(0, mprotect(page1 + PAGE_SIZE, PAGE_SIZE, PROT_NONE));
-  ASSERT_EQ(0, munmap(page4, PAGE_SIZE));
+  char* page2 = page1 + kPageSize;
+  char* page3 = page2 + kPageSize;
+  char* page4 = page3 + kPageSize;
+  ASSERT_EQ(0, mprotect(page1 + kPageSize, kPageSize, PROT_NONE));
+  ASSERT_EQ(0, munmap(page4, kPageSize));
 
   page1[0] = 'a';
-  page1[PAGE_SIZE - 1] = 'z';
+  page1[kPageSize - 1] = 'z';
 
   page3[0] = 'b';
-  page3[PAGE_SIZE - 1] = 'y';
+  page3[kPageSize - 1] = 'y';
 
-  char buf[PAGE_SIZE];
+  char buf[kPageSize];
 
   // Completely valid read.
   memset(buf, 0xCC, sizeof(buf));
-  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE), SafeCopy(buf, page1, PAGE_SIZE)) << strerror(errno);
-  EXPECT_EQ(0, memcmp(buf, page1, PAGE_SIZE));
+  EXPECT_EQ(static_cast<ssize_t>(kPageSize), SafeCopy(buf, page1, kPageSize)) << strerror(errno);
+  EXPECT_EQ(0, memcmp(buf, page1, kPageSize));
 
   // Reading into a guard page.
   memset(buf, 0xCC, sizeof(buf));
-  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE - 1), SafeCopy(buf, page1 + 1, PAGE_SIZE));
-  EXPECT_EQ(0, memcmp(buf, page1 + 1, PAGE_SIZE - 1));
+  EXPECT_EQ(static_cast<ssize_t>(kPageSize - 1), SafeCopy(buf, page1 + 1, kPageSize));
+  EXPECT_EQ(0, memcmp(buf, page1 + 1, kPageSize - 1));
 
   // Reading from a guard page into a real page.
   memset(buf, 0xCC, sizeof(buf));
-  EXPECT_EQ(0, SafeCopy(buf, page2 + PAGE_SIZE - 1, PAGE_SIZE));
+  EXPECT_EQ(0, SafeCopy(buf, page2 + kPageSize - 1, kPageSize));
 
   // Reading off of the end of a mapping.
   memset(buf, 0xCC, sizeof(buf));
-  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE), SafeCopy(buf, page3, PAGE_SIZE * 2));
-  EXPECT_EQ(0, memcmp(buf, page3, PAGE_SIZE));
+  EXPECT_EQ(static_cast<ssize_t>(kPageSize), SafeCopy(buf, page3, kPageSize * 2));
+  EXPECT_EQ(0, memcmp(buf, page3, kPageSize));
 
   // Completely invalid.
-  EXPECT_EQ(0, SafeCopy(buf, page1 + PAGE_SIZE, PAGE_SIZE));
+  EXPECT_EQ(0, SafeCopy(buf, page1 + kPageSize, kPageSize));
 
   // Clean up.
-  ASSERT_EQ(0, munmap(map, PAGE_SIZE * 3));
+  ASSERT_EQ(0, munmap(map, kPageSize * 3));
 }
 
 TEST(SafeCopyTest, alignment) {
+  DCHECK_EQ(kPageSize, static_cast<decltype(kPageSize)>(PAGE_SIZE));
+
   // Copy the middle of a mapping to the end of another one.
-  void* src_map = mmap(nullptr, PAGE_SIZE * 3, PROT_READ | PROT_WRITE,
+  void* src_map = mmap(nullptr, kPageSize * 3, PROT_READ | PROT_WRITE,
                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   ASSERT_NE(MAP_FAILED, src_map);
 
   // Add a guard page to make sure we don't write past the end of the mapping.
-  void* dst_map = mmap(nullptr, PAGE_SIZE * 4, PROT_READ | PROT_WRITE,
+  void* dst_map = mmap(nullptr, kPageSize * 4, PROT_READ | PROT_WRITE,
                        MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
   ASSERT_NE(MAP_FAILED, dst_map);
 
   char* src = static_cast<char*>(src_map);
   char* dst = static_cast<char*>(dst_map);
-  ASSERT_EQ(0, mprotect(dst + 3 * PAGE_SIZE, PAGE_SIZE, PROT_NONE));
+  ASSERT_EQ(0, mprotect(dst + 3 * kPageSize, kPageSize, PROT_NONE));
 
   src[512] = 'a';
-  src[PAGE_SIZE * 3 - 512 - 1] = 'z';
+  src[kPageSize * 3 - 512 - 1] = 'z';
 
-  EXPECT_EQ(static_cast<ssize_t>(PAGE_SIZE * 3 - 1024),
-            SafeCopy(dst + 1024, src + 512, PAGE_SIZE * 3 - 1024));
-  EXPECT_EQ(0, memcmp(dst + 1024, src + 512, PAGE_SIZE * 3 - 1024));
+  EXPECT_EQ(static_cast<ssize_t>(kPageSize * 3 - 1024),
+            SafeCopy(dst + 1024, src + 512, kPageSize * 3 - 1024));
+  EXPECT_EQ(0, memcmp(dst + 1024, src + 512, kPageSize * 3 - 1024));
 
-  ASSERT_EQ(0, munmap(src_map, PAGE_SIZE * 3));
-  ASSERT_EQ(0, munmap(dst_map, PAGE_SIZE * 4));
+  ASSERT_EQ(0, munmap(src_map, kPageSize * 3));
+  ASSERT_EQ(0, munmap(dst_map, kPageSize * 4));
 }
 
 #endif  // defined(__linux__)
diff --git a/runtime/base/stl_util.h b/runtime/base/stl_util.h
index cfe27f3..b272972 100644
--- a/runtime/base/stl_util.h
+++ b/runtime/base/stl_util.h
@@ -25,13 +25,6 @@
 
 namespace art {
 
-// Sort and remove duplicates of an STL vector or deque.
-template<class T>
-void STLSortAndRemoveDuplicates(T* v) {
-  std::sort(v->begin(), v->end());
-  v->erase(std::unique(v->begin(), v->end()), v->end());
-}
-
 // STLDeleteContainerPointers()
 //  For a range within a container of pointers, calls delete
 //  (non-array version) on these pointers.
@@ -83,20 +76,6 @@
   }
 }
 
-template <class T>
-std::string ToString(const T& v) {
-  std::ostringstream os;
-  os << "[";
-  for (size_t i = 0; i < v.size(); ++i) {
-    os << v[i];
-    if (i < v.size() - 1) {
-      os << ", ";
-    }
-  }
-  os << "]";
-  return os.str();
-}
-
 // Deleter using free() for use with std::unique_ptr<>. See also UniqueCPtr<> below.
 struct FreeDelete {
   // NOTE: Deleting a const object is valid but free() takes a non-const pointer.
@@ -109,13 +88,6 @@
 template <typename T>
 using UniqueCPtr = std::unique_ptr<T, FreeDelete>;
 
-// C++14 from-the-future import (std::make_unique)
-// Invoke the constructor of 'T' with the provided args, and wrap the result in a unique ptr.
-template <typename T, typename ... Args>
-std::unique_ptr<T> MakeUnique(Args&& ... args) {
-  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
-}
-
 // Find index of the first element with the specified value known to be in the container.
 template <typename Container, typename T>
 size_t IndexOfElement(const Container& container, const T& value) {
@@ -150,13 +122,6 @@
   return it != container.end();
 }
 
-// const char* compare function suitable for std::map or std::set.
-struct CStringLess {
-  bool operator()(const char* lhs, const char* rhs) const {
-    return strcmp(lhs, rhs) < 0;
-  }
-};
-
 // 32-bit FNV-1a hash function suitable for std::unordered_map.
 // It can be used with any container which works with range-based for loop.
 // See http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
@@ -171,23 +136,6 @@
   }
 };
 
-// Use to suppress type deduction for a function argument.
-// See std::identity<> for more background:
-// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1856.html#20.2.2 - move/forward helpers
-//
-// e.g. "template <typename X> void bar(identity<X>::type foo);
-//     bar(5); // compilation error
-//     bar<int>(5); // ok
-// or "template <typename T> void foo(T* x, typename Identity<T*>::type y);
-//     Base b;
-//     Derived d;
-//     foo(&b, &d);  // Use implicit Derived* -> Base* conversion.
-// If T was deduced from both &b and &d, there would be a mismatch, i.e. deduction failure.
-template <typename T>
-struct Identity {
-  using type = T;
-};
-
 // Merge `other` entries into `to_update`.
 template <typename T>
 static inline void MergeSets(std::set<T>& to_update, const std::set<T>& other) {
diff --git a/runtime/base/stl_util_identity.h b/runtime/base/stl_util_identity.h
new file mode 100644
index 0000000..40a93f7
--- /dev/null
+++ b/runtime/base/stl_util_identity.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_STL_UTIL_IDENTITY_H_
+#define ART_RUNTIME_BASE_STL_UTIL_IDENTITY_H_
+
+namespace art {
+
+// Use to suppress type deduction for a function argument.
+// See std::identity<> for more background:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2005/n1856.html#20.2.2 - move/forward helpers
+//
+// e.g. "template <typename X> void bar(identity<X>::type foo);
+//     bar(5); // compilation error
+//     bar<int>(5); // ok
+// or "template <typename T> void foo(T* x, typename Identity<T*>::type y);
+//     Base b;
+//     Derived d;
+//     foo(&b, &d);  // Use implicit Derived* -> Base* conversion.
+// If T was deduced from both &b and &d, there would be a mismatch, i.e. deduction failure.
+template <typename T>
+struct Identity {
+  using type = T;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BASE_STL_UTIL_IDENTITY_H_
diff --git a/runtime/base/strlcpy.h b/runtime/base/strlcpy.h
new file mode 100644
index 0000000..de135ea
--- /dev/null
+++ b/runtime/base/strlcpy.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BASE_STRLCPY_H_
+#define ART_RUNTIME_BASE_STRLCPY_H_
+
+#include <cstdio>
+#include <cstring>
+
+// Expose a simple implementation of strlcpy when we're not compiling against bionic. This is to
+// make static analyzers happy not using strcpy.
+//
+// Bionic exposes this function, but the host glibc does not. Remove this shim when we compile
+// against bionic on the host, also.
+
+#if !defined(__BIONIC__) && !defined(__APPLE__)
+
+static inline size_t strlcpy(char* dst, const char* src, size_t size) {
+  // Extra-lazy implementation: this is only a host shim, and we don't have to call this often.
+  return snprintf(dst, size, "%s", src);
+}
+
+#endif
+
+#endif  // ART_RUNTIME_BASE_STRLCPY_H_
diff --git a/runtime/base/timing_logger.cc b/runtime/base/timing_logger.cc
index 9a0e0d0..aaa2431 100644
--- a/runtime/base/timing_logger.cc
+++ b/runtime/base/timing_logger.cc
@@ -24,7 +24,9 @@
 #include "base/histogram-inl.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
-#include "thread-inl.h"
+#include "gc/heap.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 
 #include <cmath>
 #include <iomanip>
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 03fc959..00b5567 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -91,6 +91,7 @@
   fd_ = other.fd_;
   file_path_ = std::move(other.file_path_);
   auto_close_ = other.auto_close_;
+  read_only_mode_ = other.read_only_mode_;
   other.Release();  // Release other.
 
   return *this;
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 7657a38..6aef348 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -186,6 +186,20 @@
   ASSERT_EQ(file2.Close(), 0);
 }
 
+TEST_F(FdFileTest, OperatorMoveEquals) {
+  // Make sure the read_only_ flag is correctly copied
+  // over.
+  art::ScratchFile tmp;
+  FdFile file(tmp.GetFilename(), O_RDONLY, false);
+  ASSERT_TRUE(file.ReadOnlyMode());
+
+  FdFile file2(tmp.GetFilename(), O_RDWR, false);
+  ASSERT_FALSE(file2.ReadOnlyMode());
+
+  file2 = std::move(file);
+  ASSERT_TRUE(file2.ReadOnlyMode());
+}
+
 TEST_F(FdFileTest, EraseWithPathUnlinks) {
   // New scratch file, zero-length.
   art::ScratchFile tmp;
diff --git a/runtime/base/variant_map.h b/runtime/base/variant_map.h
index 531cb37..fdb60c4 100644
--- a/runtime/base/variant_map.h
+++ b/runtime/base/variant_map.h
@@ -22,7 +22,7 @@
 #include <type_traits>
 #include <utility>
 
-#include "base/stl_util.h"
+#include "base/stl_util_identity.h"
 
 namespace art {
 
diff --git a/runtime/cha.cc b/runtime/cha.cc
index 7948c29..e6bdb84 100644
--- a/runtime/cha.cc
+++ b/runtime/cha.cc
@@ -31,34 +31,24 @@
 void ClassHierarchyAnalysis::AddDependency(ArtMethod* method,
                                            ArtMethod* dependent_method,
                                            OatQuickMethodHeader* dependent_header) {
-  auto it = cha_dependency_map_.find(method);
-  if (it == cha_dependency_map_.end()) {
-    cha_dependency_map_[method] =
-        new std::vector<std::pair<art::ArtMethod*, art::OatQuickMethodHeader*>>();
-    it = cha_dependency_map_.find(method);
-  } else {
-    DCHECK(it->second != nullptr);
-  }
-  it->second->push_back(std::make_pair(dependent_method, dependent_header));
+  const auto it = cha_dependency_map_.insert(
+      decltype(cha_dependency_map_)::value_type(method, ListOfDependentPairs())).first;
+  it->second.push_back({dependent_method, dependent_header});
 }
 
-std::vector<std::pair<ArtMethod*, OatQuickMethodHeader*>>*
-    ClassHierarchyAnalysis::GetDependents(ArtMethod* method) {
+static const ClassHierarchyAnalysis::ListOfDependentPairs s_empty_vector;
+
+const ClassHierarchyAnalysis::ListOfDependentPairs& ClassHierarchyAnalysis::GetDependents(
+    ArtMethod* method) {
   auto it = cha_dependency_map_.find(method);
   if (it != cha_dependency_map_.end()) {
-    DCHECK(it->second != nullptr);
     return it->second;
   }
-  return nullptr;
+  return s_empty_vector;
 }
 
-void ClassHierarchyAnalysis::RemoveDependencyFor(ArtMethod* method) {
-  auto it = cha_dependency_map_.find(method);
-  if (it != cha_dependency_map_.end()) {
-    auto dependents = it->second;
-    cha_dependency_map_.erase(it);
-    delete dependents;
-  }
+void ClassHierarchyAnalysis::RemoveAllDependenciesFor(ArtMethod* method) {
+  cha_dependency_map_.erase(method);
 }
 
 void ClassHierarchyAnalysis::RemoveDependentsWithMethodHeaders(
@@ -66,20 +56,19 @@
   // Iterate through all entries in the dependency map and remove any entry that
   // contains one of those in method_headers.
   for (auto map_it = cha_dependency_map_.begin(); map_it != cha_dependency_map_.end(); ) {
-    auto dependents = map_it->second;
-    for (auto vec_it = dependents->begin(); vec_it != dependents->end(); ) {
-      OatQuickMethodHeader* method_header = vec_it->second;
-      auto it = std::find(method_headers.begin(), method_headers.end(), method_header);
-      if (it != method_headers.end()) {
-        vec_it = dependents->erase(vec_it);
-      } else {
-        vec_it++;
-      }
-    }
+    ListOfDependentPairs& dependents = map_it->second;
+    dependents.erase(
+        std::remove_if(
+            dependents.begin(),
+            dependents.end(),
+            [&method_headers](MethodAndMethodHeaderPair& dependent) {
+              return method_headers.find(dependent.second) != method_headers.end();
+            }),
+        dependents.end());
+
     // Remove the map entry if there are no more dependents.
-    if (dependents->empty()) {
+    if (dependents.empty()) {
       map_it = cha_dependency_map_.erase(map_it);
-      delete dependents;
     } else {
       map_it++;
     }
@@ -554,11 +543,7 @@
         }
 
         // Invalidate all dependents.
-        auto dependents = GetDependents(invalidated);
-        if (dependents == nullptr) {
-          continue;
-        }
-        for (const auto& dependent : *dependents) {
+        for (const auto& dependent : GetDependents(invalidated)) {
           ArtMethod* method = dependent.first;;
           OatQuickMethodHeader* method_header = dependent.second;
           VLOG(class_linker) << "CHA invalidated compiled code for " << method->PrettyMethod();
@@ -567,7 +552,7 @@
               method, method_header);
           dependent_method_headers.insert(method_header);
         }
-        RemoveDependencyFor(invalidated);
+        RemoveAllDependenciesFor(invalidated);
       }
     }
 
diff --git a/runtime/cha.h b/runtime/cha.h
index 99c49d2..81458db 100644
--- a/runtime/cha.h
+++ b/runtime/cha.h
@@ -17,7 +17,6 @@
 #ifndef ART_RUNTIME_CHA_H_
 #define ART_RUNTIME_CHA_H_
 
-#include "art_method.h"
 #include "base/enums.h"
 #include "base/mutex.h"
 #include "handle.h"
@@ -28,6 +27,8 @@
 
 namespace art {
 
+class ArtMethod;
+
 /**
  * Class Hierarchy Analysis (CHA) tries to devirtualize virtual calls into
  * direct calls based on the info generated by analyzing class hierarchies.
@@ -94,12 +95,11 @@
                      OatQuickMethodHeader* dependent_header) REQUIRES(Locks::cha_lock_);
 
   // Return compiled code that assumes that `method` has single-implementation.
-  std::vector<MethodAndMethodHeaderPair>* GetDependents(ArtMethod* method)
-      REQUIRES(Locks::cha_lock_);
+  const ListOfDependentPairs& GetDependents(ArtMethod* method) REQUIRES(Locks::cha_lock_);
 
   // Remove dependency tracking for compiled code that assumes that
   // `method` has single-implementation.
-  void RemoveDependencyFor(ArtMethod* method) REQUIRES(Locks::cha_lock_);
+  void RemoveAllDependenciesFor(ArtMethod* method) REQUIRES(Locks::cha_lock_);
 
   // Remove from cha_dependency_map_ all entries that contain OatQuickMethodHeader from
   // the given `method_headers` set.
@@ -158,7 +158,7 @@
 
   // A map that maps a method to a set of compiled code that assumes that method has a
   // single implementation, which is used to do CHA-based devirtualization.
-  std::unordered_map<ArtMethod*, ListOfDependentPairs*> cha_dependency_map_
+  std::unordered_map<ArtMethod*, ListOfDependentPairs> cha_dependency_map_
     GUARDED_BY(Locks::cha_lock_);
 
   DISALLOW_COPY_AND_ASSIGN(ClassHierarchyAnalysis);
diff --git a/runtime/cha_test.cc b/runtime/cha_test.cc
index d2f335e..c60720f 100644
--- a/runtime/cha_test.cc
+++ b/runtime/cha_test.cc
@@ -36,58 +36,58 @@
   ClassHierarchyAnalysis cha;
   MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
 
-  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
-  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
-  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  ASSERT_TRUE(cha.GetDependents(METHOD1).empty());
+  ASSERT_TRUE(cha.GetDependents(METHOD2).empty());
+  ASSERT_TRUE(cha.GetDependents(METHOD3).empty());
 
   cha.AddDependency(METHOD1, METHOD2, METHOD_HEADER2);
-  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
-  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  ASSERT_TRUE(cha.GetDependents(METHOD2).empty());
+  ASSERT_TRUE(cha.GetDependents(METHOD3).empty());
   auto dependents = cha.GetDependents(METHOD1);
-  ASSERT_EQ(dependents->size(), 1u);
-  ASSERT_EQ(dependents->at(0).first, METHOD2);
-  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
+  ASSERT_EQ(dependents.size(), 1u);
+  ASSERT_EQ(dependents[0].first, METHOD2);
+  ASSERT_EQ(dependents[0].second, METHOD_HEADER2);
 
   cha.AddDependency(METHOD1, METHOD3, METHOD_HEADER3);
-  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
-  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  ASSERT_TRUE(cha.GetDependents(METHOD2).empty());
+  ASSERT_TRUE(cha.GetDependents(METHOD3).empty());
   dependents = cha.GetDependents(METHOD1);
-  ASSERT_EQ(dependents->size(), 2u);
-  ASSERT_EQ(dependents->at(0).first, METHOD2);
-  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER2);
-  ASSERT_EQ(dependents->at(1).first, METHOD3);
-  ASSERT_EQ(dependents->at(1).second, METHOD_HEADER3);
+  ASSERT_EQ(dependents.size(), 2u);
+  ASSERT_EQ(dependents[0].first, METHOD2);
+  ASSERT_EQ(dependents[0].second, METHOD_HEADER2);
+  ASSERT_EQ(dependents[1].first, METHOD3);
+  ASSERT_EQ(dependents[1].second, METHOD_HEADER3);
 
   std::unordered_set<OatQuickMethodHeader*> headers;
   headers.insert(METHOD_HEADER2);
   cha.RemoveDependentsWithMethodHeaders(headers);
-  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
-  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  ASSERT_TRUE(cha.GetDependents(METHOD2).empty());
+  ASSERT_TRUE(cha.GetDependents(METHOD3).empty());
   dependents = cha.GetDependents(METHOD1);
-  ASSERT_EQ(dependents->size(), 1u);
-  ASSERT_EQ(dependents->at(0).first, METHOD3);
-  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER3);
+  ASSERT_EQ(dependents.size(), 1u);
+  ASSERT_EQ(dependents[0].first, METHOD3);
+  ASSERT_EQ(dependents[0].second, METHOD_HEADER3);
 
   cha.AddDependency(METHOD2, METHOD1, METHOD_HEADER1);
-  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  ASSERT_TRUE(cha.GetDependents(METHOD3).empty());
   dependents = cha.GetDependents(METHOD1);
-  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents.size(), 1u);
   dependents = cha.GetDependents(METHOD2);
-  ASSERT_EQ(dependents->size(), 1u);
+  ASSERT_EQ(dependents.size(), 1u);
 
   headers.insert(METHOD_HEADER3);
   cha.RemoveDependentsWithMethodHeaders(headers);
-  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
-  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  ASSERT_TRUE(cha.GetDependents(METHOD1).empty());
+  ASSERT_TRUE(cha.GetDependents(METHOD3).empty());
   dependents = cha.GetDependents(METHOD2);
-  ASSERT_EQ(dependents->size(), 1u);
-  ASSERT_EQ(dependents->at(0).first, METHOD1);
-  ASSERT_EQ(dependents->at(0).second, METHOD_HEADER1);
+  ASSERT_EQ(dependents.size(), 1u);
+  ASSERT_EQ(dependents[0].first, METHOD1);
+  ASSERT_EQ(dependents[0].second, METHOD_HEADER1);
 
-  cha.RemoveDependencyFor(METHOD2);
-  ASSERT_EQ(cha.GetDependents(METHOD1), nullptr);
-  ASSERT_EQ(cha.GetDependents(METHOD2), nullptr);
-  ASSERT_EQ(cha.GetDependents(METHOD3), nullptr);
+  cha.RemoveAllDependenciesFor(METHOD2);
+  ASSERT_TRUE(cha.GetDependents(METHOD1).empty());
+  ASSERT_TRUE(cha.GetDependents(METHOD2).empty());
+  ASSERT_TRUE(cha.GetDependents(METHOD3).empty());
 }
 
 }  // namespace art
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index a955cb5..f6c8fa9 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -20,6 +20,7 @@
 #include "art_method-inl.h"
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "stack_map.h"
 
 namespace art {
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 694c113..0fa25d1 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -107,6 +107,7 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "trace.h"
+#include "utf.h"
 #include "utils.h"
 #include "utils/dex_cache_arrays_layout-inl.h"
 #include "verifier/method_verifier.h"
@@ -352,7 +353,8 @@
 }
 
 ClassLinker::ClassLinker(InternTable* intern_table)
-    : failed_dex_cache_class_lookups_(0),
+    : boot_class_table_(new ClassTable()),
+      failed_dex_cache_class_lookups_(0),
       class_roots_(nullptr),
       array_iftable_(nullptr),
       find_array_class_cache_next_victim_(0),
@@ -1222,15 +1224,36 @@
   }
 }
 
-bool ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
+// new_class_set is the set of classes that were read from the class table section in the image.
+// If there was no class table section, it is null.
+// Note: using a class here to avoid having to make ClassLinker internals public.
+class AppImageClassLoadersAndDexCachesHelper {
+ public:
+  static bool Update(
+      ClassLinker* class_linker,
+      gc::space::ImageSpace* space,
+      Handle<mirror::ClassLoader> class_loader,
+      Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
+      ClassTable::ClassSet* new_class_set,
+      bool* out_forward_dex_cache_array,
+      std::string* out_error_msg)
+      REQUIRES(!Locks::dex_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+};
+
+bool AppImageClassLoadersAndDexCachesHelper::Update(
+    ClassLinker* class_linker,
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
     Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
     ClassTable::ClassSet* new_class_set,
     bool* out_forward_dex_cache_array,
-    std::string* out_error_msg) {
+    std::string* out_error_msg)
+    REQUIRES(!Locks::dex_lock_)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(out_forward_dex_cache_array != nullptr);
   DCHECK(out_error_msg != nullptr);
+  PointerSize image_pointer_size = class_linker->GetImagePointerSize();
   Thread* const self = Thread::Current();
   gc::Heap* const heap = Runtime::Current()->GetHeap();
   const ImageHeader& header = space->GetImageHeader();
@@ -1295,7 +1318,7 @@
         CHECK_EQ(num_fields, dex_cache->NumResolvedFields());
         CHECK_EQ(num_method_types, dex_cache->NumResolvedMethodTypes());
         CHECK_EQ(num_call_sites, dex_cache->NumResolvedCallSites());
-        DexCacheArraysLayout layout(image_pointer_size_, dex_file);
+        DexCacheArraysLayout layout(image_pointer_size, dex_file);
         uint8_t* const raw_arrays = oat_dex_file->GetDexCacheArrays();
         if (num_strings != 0u) {
           mirror::StringDexCacheType* const image_resolved_strings = dex_cache->GetStrings();
@@ -1331,17 +1354,17 @@
           mirror::FieldDexCacheType* const fields =
               reinterpret_cast<mirror::FieldDexCacheType*>(raw_arrays + layout.FieldsOffset());
           for (size_t j = 0; j < num_fields; ++j) {
-            DCHECK_EQ(mirror::DexCache::GetNativePairPtrSize(fields, j, image_pointer_size_).index,
+            DCHECK_EQ(mirror::DexCache::GetNativePairPtrSize(fields, j, image_pointer_size).index,
                       0u);
-            DCHECK(mirror::DexCache::GetNativePairPtrSize(fields, j, image_pointer_size_).object ==
+            DCHECK(mirror::DexCache::GetNativePairPtrSize(fields, j, image_pointer_size).object ==
                    nullptr);
             mirror::DexCache::SetNativePairPtrSize(
                 fields,
                 j,
                 mirror::DexCache::GetNativePairPtrSize(image_resolved_fields,
                                                        j,
-                                                       image_pointer_size_),
-                image_pointer_size_);
+                                                       image_pointer_size),
+                image_pointer_size);
           }
           dex_cache->SetResolvedFields(fields);
         }
@@ -1379,8 +1402,8 @@
         // Make sure to do this after we update the arrays since we store the resolved types array
         // in DexCacheData in RegisterDexFileLocked. We need the array pointer to be the one in the
         // BSS.
-        CHECK(!FindDexCacheDataLocked(*dex_file).IsValid());
-        RegisterDexFileLocked(*dex_file, dex_cache, class_loader.Get());
+        CHECK(!class_linker->FindDexCacheDataLocked(*dex_file).IsValid());
+        class_linker->RegisterDexFileLocked(*dex_file, dex_cache, class_loader.Get());
       }
       if (kIsDebugBuild) {
         CHECK(new_class_set != nullptr);
@@ -1402,20 +1425,20 @@
             }
             for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
               const void* code = m.GetEntryPointFromQuickCompiledCode();
-              const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
-              if (!IsQuickResolutionStub(code) &&
-                  !IsQuickGenericJniStub(code) &&
-                  !IsQuickToInterpreterBridge(code) &&
+              const void* oat_code = m.IsInvokable() ? class_linker->GetQuickOatCodeFor(&m) : code;
+              if (!class_linker->IsQuickResolutionStub(code) &&
+                  !class_linker->IsQuickGenericJniStub(code) &&
+                  !class_linker->IsQuickToInterpreterBridge(code) &&
                   !m.IsNative()) {
                 DCHECK_EQ(code, oat_code) << m.PrettyMethod();
               }
             }
             for (ArtMethod& m : klass->GetVirtualMethods(kRuntimePointerSize)) {
               const void* code = m.GetEntryPointFromQuickCompiledCode();
-              const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
-              if (!IsQuickResolutionStub(code) &&
-                  !IsQuickGenericJniStub(code) &&
-                  !IsQuickToInterpreterBridge(code) &&
+              const void* oat_code = m.IsInvokable() ? class_linker->GetQuickOatCodeFor(&m) : code;
+              if (!class_linker->IsQuickResolutionStub(code) &&
+                  !class_linker->IsQuickGenericJniStub(code) &&
+                  !class_linker->IsQuickToInterpreterBridge(code) &&
                   !m.IsNative()) {
                 DCHECK_EQ(code, oat_code) << m.PrettyMethod();
               }
@@ -1875,12 +1898,13 @@
   }
   if (app_image) {
     bool forward_dex_cache_arrays = false;
-    if (!UpdateAppImageClassLoadersAndDexCaches(space,
-                                                class_loader,
-                                                dex_caches,
-                                                &temp_set,
-                                                /*out*/&forward_dex_cache_arrays,
-                                                /*out*/error_msg)) {
+    if (!AppImageClassLoadersAndDexCachesHelper::Update(this,
+                                                        space,
+                                                        class_loader,
+                                                        dex_caches,
+                                                        &temp_set,
+                                                        /*out*/&forward_dex_cache_arrays,
+                                                        /*out*/error_msg)) {
       return false;
     }
     // Update class loader and resolved strings. If added_class_table is false, the resolved
@@ -1979,7 +2003,7 @@
     // ClassTable::TableSlot. The buffered root visiting would access a stale stack location for
     // these objects.
     UnbufferedRootVisitor root_visitor(visitor, RootInfo(kRootStickyClass));
-    boot_class_table_.VisitRoots(root_visitor);
+    boot_class_table_->VisitRoots(root_visitor);
     // If tracing is enabled, then mark all the class loaders to prevent unloading.
     if ((flags & kVisitRootFlagClassLoader) != 0 || tracing_enabled) {
       for (const ClassLoaderData& data : class_loaders_) {
@@ -2078,7 +2102,7 @@
 };
 
 void ClassLinker::VisitClassesInternal(ClassVisitor* visitor) {
-  if (boot_class_table_.Visit(*visitor)) {
+  if (boot_class_table_->Visit(*visitor)) {
     VisitClassLoaderClassesVisitor loader_visitor(visitor);
     VisitClassLoaders(&loader_visitor);
   }
@@ -3393,8 +3417,11 @@
   // Example dex_cache location is SettingsProvider.apk and
   // dex file location is /system/priv-app/SettingsProvider/SettingsProvider.apk
   CHECK_EQ(dex_cache_location, dex_file_suffix);
-  // Clean up pass to remove null dex caches.
+  const OatFile* oat_file =
+      (dex_file.GetOatDexFile() != nullptr) ? dex_file.GetOatDexFile()->GetOatFile() : nullptr;
+  // Clean up pass to remove null dex caches. Also check if we need to initialize OatFile .bss.
   // Null dex caches can occur due to class unloading and we are lazily removing null entries.
+  bool initialize_oat_file_bss = (oat_file != nullptr);
   JavaVMExt* const vm = self->GetJniEnv()->vm;
   for (auto it = dex_caches_.begin(); it != dex_caches_.end(); ) {
     DexCacheData data = *it;
@@ -3402,9 +3429,21 @@
       vm->DeleteWeakGlobalRef(self, data.weak_root);
       it = dex_caches_.erase(it);
     } else {
+      if (initialize_oat_file_bss &&
+          it->dex_file->GetOatDexFile() != nullptr &&
+          it->dex_file->GetOatDexFile()->GetOatFile() == oat_file) {
+        initialize_oat_file_bss = false;  // Already initialized.
+      }
       ++it;
     }
   }
+  if (initialize_oat_file_bss) {
+    // TODO: Pre-initialize from boot/app image?
+    ArtMethod* resolution_method = Runtime::Current()->GetResolutionMethod();
+    for (ArtMethod*& entry : oat_file->GetBssMethods()) {
+      entry = resolution_method;
+    }
+  }
   jweak dex_cache_jweak = vm->AddWeakGlobalRef(self, dex_cache);
   dex_cache->SetDexFile(&dex_file);
   DexCacheData data;
@@ -3437,6 +3476,39 @@
   return dex_cache;
 }
 
+void ClassLinker::RegisterExistingDexCache(ObjPtr<mirror::DexCache> dex_cache,
+                                           ObjPtr<mirror::ClassLoader> class_loader) {
+  Thread* self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(dex_cache));
+  Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(class_loader));
+  const DexFile* dex_file = dex_cache->GetDexFile();
+  DCHECK(dex_file != nullptr) << "Attempt to register uninitialized dex_cache object!";
+  if (kIsDebugBuild) {
+    DexCacheData old_data;
+    {
+      ReaderMutexLock mu(self, *Locks::dex_lock_);
+      old_data = FindDexCacheDataLocked(*dex_file);
+    }
+    ObjPtr<mirror::DexCache> old_dex_cache = DecodeDexCache(self, old_data);
+    DCHECK(old_dex_cache.IsNull()) << "Attempt to manually register a dex cache thats already "
+                                   << "been registered on dex file " << dex_file->GetLocation();
+  }
+  ClassTable* table;
+  {
+    WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
+    table = InsertClassTableForClassLoader(h_class_loader.Get());
+  }
+  WriterMutexLock mu(self, *Locks::dex_lock_);
+  RegisterDexFileLocked(*dex_file, h_dex_cache.Get(), h_class_loader.Get());
+  table->InsertStrongRoot(h_dex_cache.Get());
+  if (h_class_loader.Get() != nullptr) {
+    // Since we added a strong root to the class table, do the write barrier as required for
+    // remembered sets and generational GCs.
+    Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(h_class_loader.Get());
+  }
+}
+
 ObjPtr<mirror::DexCache> ClassLinker::RegisterDexFile(const DexFile& dex_file,
                                                       ObjPtr<mirror::ClassLoader> class_loader) {
   Thread* self = Thread::Current();
@@ -3851,6 +3923,12 @@
 }
 
 mirror::Class* ClassLinker::LookupClass(Thread* self,
+                           const char* descriptor,
+                           ObjPtr<mirror::ClassLoader> class_loader) {
+  return LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor), class_loader);
+}
+
+mirror::Class* ClassLinker::LookupClass(Thread* self,
                                         const char* descriptor,
                                         size_t hash,
                                         ObjPtr<mirror::ClassLoader> class_loader) {
@@ -3881,7 +3959,7 @@
 
 void ClassLinker::MoveClassTableToPreZygote() {
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-  boot_class_table_.FreezeSnapshot();
+  boot_class_table_->FreezeSnapshot();
   MoveClassTableToPreZygoteVisitor visitor;
   VisitClassLoaders(&visitor);
 }
@@ -3918,7 +3996,7 @@
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
   const size_t hash = ComputeModifiedUtf8Hash(descriptor);
-  ObjPtr<mirror::Class> klass = boot_class_table_.Lookup(descriptor, hash);
+  ObjPtr<mirror::Class> klass = boot_class_table_->Lookup(descriptor, hash);
   if (klass != nullptr) {
     DCHECK(klass->GetClassLoader() == nullptr);
     result.push_back(klass);
@@ -4473,7 +4551,10 @@
   DCHECK(out != nullptr);
   out->CopyFrom(proxy_constructor, image_pointer_size_);
   // Make this constructor public and fix the class to be our Proxy version
-  out->SetAccessFlags((out->GetAccessFlags() & ~kAccProtected) | kAccPublic);
+  // Mark kAccCompileDontBother so that we don't take JIT samples for the method. b/62349349
+  out->SetAccessFlags((out->GetAccessFlags() & ~kAccProtected) |
+                      kAccPublic |
+                      kAccCompileDontBother);
   out->SetDeclaringClass(klass.Get());
 }
 
@@ -4507,7 +4588,8 @@
   // preference to the invocation handler.
   const uint32_t kRemoveFlags = kAccAbstract | kAccDefault | kAccDefaultConflict;
   // Make the method final.
-  const uint32_t kAddFlags = kAccFinal;
+  // Mark kAccCompileDontBother so that we don't take JIT samples for the method. b/62349349
+  const uint32_t kAddFlags = kAccFinal | kAccCompileDontBother;
   out->SetAccessFlags((out->GetAccessFlags() & ~kRemoveFlags) | kAddFlags);
 
   // Clear the dex_code_item_offset_. It needs to be 0 since proxy methods have no CodeItems but the
@@ -5202,7 +5284,7 @@
 
 ClassTable* ClassLinker::InsertClassTableForClassLoader(ObjPtr<mirror::ClassLoader> class_loader) {
   if (class_loader == nullptr) {
-    return &boot_class_table_;
+    return boot_class_table_.get();
   }
   ClassTable* class_table = class_loader->GetClassTable();
   if (class_table == nullptr) {
@@ -5214,7 +5296,7 @@
 }
 
 ClassTable* ClassLinker::ClassTableForClassLoader(ObjPtr<mirror::ClassLoader> class_loader) {
-  return class_loader == nullptr ? &boot_class_table_ : class_loader->GetClassTable();
+  return class_loader == nullptr ? boot_class_table_.get() : class_loader->GetClassTable();
 }
 
 static ImTable* FindSuperImt(ObjPtr<mirror::Class> klass, PointerSize pointer_size)
@@ -8571,13 +8653,13 @@
 size_t ClassLinker::NumZygoteClasses() const {
   CountClassesVisitor visitor;
   VisitClassLoaders(&visitor);
-  return visitor.num_zygote_classes + boot_class_table_.NumZygoteClasses(nullptr);
+  return visitor.num_zygote_classes + boot_class_table_->NumZygoteClasses(nullptr);
 }
 
 size_t ClassLinker::NumNonZygoteClasses() const {
   CountClassesVisitor visitor;
   VisitClassLoaders(&visitor);
-  return visitor.num_non_zygote_classes + boot_class_table_.NumNonZygoteClasses(nullptr);
+  return visitor.num_non_zygote_classes + boot_class_table_->NumNonZygoteClasses(nullptr);
 }
 
 size_t ClassLinker::NumLoadedClasses() {
@@ -8851,7 +8933,8 @@
         last_dex_file_ = &dex_file;
         DexCacheResolvedClasses resolved_classes(dex_file.GetLocation(),
                                                  dex_file.GetBaseLocation(),
-                                                 dex_file.GetLocationChecksum());
+                                                 dex_file.GetLocationChecksum(),
+                                                 dex_file.NumMethodIds());
         last_resolved_classes_ = result_->find(resolved_classes);
         if (last_resolved_classes_ == result_->end()) {
           last_resolved_classes_ = result_->insert(resolved_classes).first;
@@ -8948,6 +9031,12 @@
                      << info.GetClasses().size() << " classes";
       DCHECK_EQ(dex_file->GetLocationChecksum(), info.GetLocationChecksum());
       for (dex::TypeIndex type_idx : info.GetClasses()) {
+        if (!dex_file->IsTypeIndexValid(type_idx)) {
+          // Something went bad. The profile is probably corrupted. Abort and return an emtpy set.
+          LOG(WARNING) << "Corrupted profile: invalid type index "
+              << type_idx.index_ << " in dex " << location;
+          return std::unordered_set<std::string>();
+        }
         const DexFile::TypeId& type_id = dex_file->GetTypeId(type_idx);
         const char* descriptor = dex_file->GetTypeDescriptor(type_id);
         ret.insert(descriptor);
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 7f652ec..1e8125e 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -27,7 +27,6 @@
 #include "base/enums.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "class_table.h"
 #include "dex_cache_resolved_classes.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
@@ -35,7 +34,6 @@
 #include "handle.h"
 #include "jni.h"
 #include "mirror/class.h"
-#include "object_callbacks.h"
 #include "verifier/verifier_enums.h"
 
 namespace art {
@@ -59,11 +57,13 @@
   class StackTraceElement;
 }  // namespace mirror
 
+class ClassTable;
 template<class T> class Handle;
 class ImtConflictTable;
 template<typename T> class LengthPrefixedArray;
 template<class T> class MutableHandle;
 class InternTable;
+class LinearAlloc;
 class OatFile;
 template<class T> class ObjectLock;
 class Runtime;
@@ -212,9 +212,7 @@
                              const char* descriptor,
                              ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    return LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor), class_loader);
-  }
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Finds all the classes with the given descriptor, regardless of ClassLoader.
   void LookupClasses(const char* descriptor, std::vector<ObjPtr<mirror::Class>>& classes)
@@ -385,6 +383,13 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
+  // Directly register an already existing dex cache. RegisterDexFile should be preferred since that
+  // reduplicates DexCaches when possible. The DexCache given to this function must already be fully
+  // initialized and not already registered.
+  void RegisterExistingDexCache(ObjPtr<mirror::DexCache> cache,
+                                ObjPtr<mirror::ClassLoader> class_loader)
+      REQUIRES(!Locks::dex_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
   ObjPtr<mirror::DexCache> RegisterDexFile(const DexFile& dex_file,
                                            ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!Locks::dex_lock_)
@@ -640,8 +645,11 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns null if not found.
+  // This returns a pointer to the class-table, without requiring any locking - including the
+  // boot class-table. It is the caller's responsibility to access this under lock, if required.
   ClassTable* ClassTableForClassLoader(ObjPtr<mirror::ClassLoader> class_loader)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      NO_THREAD_SAFETY_ANALYSIS;
 
   void AppendToBootClassPath(Thread* self, const DexFile& dex_file)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -1110,18 +1118,6 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
-  // new_class_set is the set of classes that were read from the class table section in the image.
-  // If there was no class table section, it is null.
-  bool UpdateAppImageClassLoadersAndDexCaches(
-      gc::space::ImageSpace* space,
-      Handle<mirror::ClassLoader> class_loader,
-      Handle<mirror::ObjectArray<mirror::DexCache>> dex_caches,
-      ClassTable::ClassSet* new_class_set,
-      bool* out_forward_dex_cache_array,
-      std::string* out_error_msg)
-      REQUIRES(!Locks::dex_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Check that c1 == FindSystemClass(self, descriptor). Abort with class dumps otherwise.
   void CheckSystemClass(Thread* self, Handle<mirror::Class> c1, const char* descriptor)
       REQUIRES(!Locks::dex_lock_)
@@ -1171,7 +1167,7 @@
       GUARDED_BY(Locks::classlinker_classes_lock_);
 
   // Boot class path table. Since the class loader for this is null.
-  ClassTable boot_class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
+  std::unique_ptr<ClassTable> boot_class_table_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
   // New class roots, only used by CMS since the GC needs to mark these in the pause.
   std::vector<GcRoot<mirror::Class>> new_class_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
@@ -1212,12 +1208,14 @@
   PointerSize image_pointer_size_;
 
   class FindVirtualMethodHolderVisitor;
+
+  friend class AppImageClassLoadersAndDexCachesHelper;
   friend struct CompilationHelper;  // For Compile in ImageTest.
   friend class ImageDumper;  // for DexLock
   friend class ImageWriter;  // for GetClassRoots
-  friend class VMClassLoader;  // for LookupClass and FindClassInBaseDexClassLoader.
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
   friend class JniInternalTest;  // for GetRuntimeQuickGenericJniStub
+  friend class VMClassLoader;  // for LookupClass and FindClassInBaseDexClassLoader.
   ART_FRIEND_TEST(ClassLinkerTest, RegisterDexFileName);  // for DexLock, and RegisterDexFileLocked
   ART_FRIEND_TEST(mirror::DexCacheMethodHandlesTest, Open);  // for AllocDexCache
   ART_FRIEND_TEST(mirror::DexCacheTest, Open);  // for AllocDexCache
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index b421810..684a261 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -50,7 +50,7 @@
 #include "mirror/string-inl.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/class_table-inl.h b/runtime/class_table-inl.h
index dfe8949..b15d82f 100644
--- a/runtime/class_table-inl.h
+++ b/runtime/class_table-inl.h
@@ -18,6 +18,8 @@
 #define ART_RUNTIME_CLASS_TABLE_INL_H_
 
 #include "class_table.h"
+
+#include "gc_root-inl.h"
 #include "oat_file.h"
 
 namespace art {
@@ -93,7 +95,7 @@
   if (kReadBarrierOption != kWithoutReadBarrier && before_ptr != after_ptr) {
     // If another thread raced and updated the reference, do not store the read barrier updated
     // one.
-    data_.CompareExchangeStrongRelaxed(before, Encode(after_ptr, MaskHash(before)));
+    data_.CompareExchangeStrongRelease(before, Encode(after_ptr, MaskHash(before)));
   }
   return after_ptr.Ptr();
 }
@@ -108,7 +110,7 @@
   if (before_ptr != after_ptr) {
     // If another thread raced and updated the reference, do not store the read barrier updated
     // one.
-    data_.CompareExchangeStrongRelaxed(before, Encode(after_ptr, MaskHash(before)));
+    data_.CompareExchangeStrongRelease(before, Encode(after_ptr, MaskHash(before)));
   }
 }
 
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 0891d3f..c45bbe5 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -14,8 +14,9 @@
  * limitations under the License.
  */
 
-#include "class_table.h"
+#include "class_table-inl.h"
 
+#include "base/stl_util.h"
 #include "mirror/class-inl.h"
 #include "oat_file.h"
 
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 430edbb..8616dfb 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -25,18 +25,17 @@
 #include "base/hash_set.h"
 #include "base/macros.h"
 #include "base/mutex.h"
-#include "dex_file.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
-#include "runtime.h"
 
 namespace art {
 
 class OatFile;
 
 namespace mirror {
+  class Class;
   class ClassLoader;
+  class Object;
 }  // namespace mirror
 
 // Each loader has a ClassTable
diff --git a/runtime/common_dex_operations.h b/runtime/common_dex_operations.h
index 6693eef..133ddb0 100644
--- a/runtime/common_dex_operations.h
+++ b/runtime/common_dex_operations.h
@@ -36,8 +36,8 @@
 
   void ArtInterpreterToCompiledCodeBridge(Thread* self,
                                           ArtMethod* caller,
-                                          const DexFile::CodeItem* code_item,
                                           ShadowFrame* shadow_frame,
+                                          uint16_t arg_offset,
                                           JValue* result);
 }  // namespace interpreter
 
@@ -46,17 +46,15 @@
                         ArtMethod* caller_method,
                         const size_t first_dest_reg,
                         ShadowFrame* callee_frame,
-                        JValue* result)
+                        JValue* result,
+                        bool use_interpreter_entrypoint)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (LIKELY(Runtime::Current()->IsStarted())) {
-    ArtMethod* target = callee_frame->GetMethod();
-    if (ClassLinker::ShouldUseInterpreterEntrypoint(
-        target,
-        target->GetEntryPointFromQuickCompiledCode())) {
+    if (use_interpreter_entrypoint) {
       interpreter::ArtInterpreterToInterpreterBridge(self, code_item, callee_frame, result);
     } else {
       interpreter::ArtInterpreterToCompiledCodeBridge(
-          self, caller_method, code_item, callee_frame, result);
+          self, caller_method, callee_frame, first_dest_reg, result);
     }
   } else {
     interpreter::UnstartedRuntime::Invoke(self, code_item, callee_frame, result, first_dest_reg);
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 01c6641..f925994 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -728,13 +728,3 @@
 }
 
 }  // namespace art
-
-namespace std {
-
-template <typename T>
-std::ostream& operator<<(std::ostream& os, const std::vector<T>& rhs) {
-os << ::art::ToString(rhs);
-return os;
-}
-
-}  // namespace std
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index a29cc6c..1274a36 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -98,9 +98,12 @@
   // Returns bin directory which contains host's prebuild tools.
   static std::string GetAndroidHostToolsDir();
 
-  // Returns bin directory wahich contains target's prebuild tools.
+  // Returns bin directory which contains target's prebuild tools.
   static std::string GetAndroidTargetToolsDir(InstructionSet isa);
 
+  // Retuerns the filename for a test dex (i.e. XandY or ManyMethods).
+  std::string GetTestDexFileName(const char* name) const;
+
  protected:
   // Allow subclases such as CommonCompilerTest to add extra options.
   virtual void SetUpRuntimeOptions(RuntimeOptions* options ATTRIBUTE_UNUSED) {}
@@ -127,8 +130,6 @@
 
   std::string GetTestAndroidRoot();
 
-  std::string GetTestDexFileName(const char* name) const;
-
   std::vector<std::unique_ptr<const DexFile>> OpenTestDexFiles(const char* name);
 
   std::unique_ptr<const DexFile> OpenTestDexFile(const char* name)
@@ -257,12 +258,4 @@
 
 }  // namespace art
 
-namespace std {
-
-// TODO: isn't gtest supposed to be able to print STL types for itself?
-template <typename T>
-std::ostream& operator<<(std::ostream& os, const std::vector<T>& rhs);
-
-}  // namespace std
-
 #endif  // ART_RUNTIME_COMMON_RUNTIME_TEST_H_
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index d0b50fe..7e70b75 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -38,7 +38,7 @@
 #include "gc/scoped_gc_critical_section.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "handle_scope.h"
+#include "handle_scope-inl.h"
 #include "jdwp/jdwp_priv.h"
 #include "jdwp/object_registry.h"
 #include "jni_internal.h"
@@ -56,7 +56,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
 #include "ScopedPrimitiveArray.h"
-#include "handle_scope-inl.h"
+#include "stack.h"
 #include "thread_list.h"
 #include "utf.h"
 #include "well_known_classes.h"
@@ -149,7 +149,9 @@
   DebugInstrumentationListener() {}
   virtual ~DebugInstrumentationListener() {}
 
-  void MethodEntered(Thread* thread, mirror::Object* this_object, ArtMethod* method,
+  void MethodEntered(Thread* thread,
+                     Handle<mirror::Object> this_object,
+                     ArtMethod* method,
                      uint32_t dex_pc)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     if (method->IsNative()) {
@@ -171,12 +173,15 @@
       // also group it with other events for this location like BREAKPOINT or SINGLE_STEP.
       thread->SetDebugMethodEntry();
     } else {
-      Dbg::UpdateDebugger(thread, this_object, method, 0, Dbg::kMethodEntry, nullptr);
+      Dbg::UpdateDebugger(thread, this_object.Get(), method, 0, Dbg::kMethodEntry, nullptr);
     }
   }
 
-  void MethodExited(Thread* thread, mirror::Object* this_object, ArtMethod* method,
-                    uint32_t dex_pc, const JValue& return_value)
+  void MethodExited(Thread* thread,
+                    Handle<mirror::Object> this_object,
+                    ArtMethod* method,
+                    uint32_t dex_pc,
+                    const JValue& return_value)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     if (method->IsNative()) {
       // TODO: post location events is a suspension point and native method entry stubs aren't.
@@ -189,18 +194,22 @@
       events |= Dbg::kMethodEntry;
       thread->ClearDebugMethodEntry();
     }
-    Dbg::UpdateDebugger(thread, this_object, method, dex_pc, events, &return_value);
+    Dbg::UpdateDebugger(thread, this_object.Get(), method, dex_pc, events, &return_value);
   }
 
-  void MethodUnwind(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object ATTRIBUTE_UNUSED,
-                    ArtMethod* method, uint32_t dex_pc)
+  void MethodUnwind(Thread* thread ATTRIBUTE_UNUSED,
+                    Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    ArtMethod* method,
+                    uint32_t dex_pc)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
     LOG(ERROR) << "Unexpected method unwind event in debugger " << ArtMethod::PrettyMethod(method)
                << " " << dex_pc;
   }
 
-  void DexPcMoved(Thread* thread, mirror::Object* this_object, ArtMethod* method,
+  void DexPcMoved(Thread* thread,
+                  Handle<mirror::Object> this_object,
+                  ArtMethod* method,
                   uint32_t new_dex_pc)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     if (IsListeningToMethodExit() && IsReturn(method, new_dex_pc)) {
@@ -217,26 +226,33 @@
         events = Dbg::kMethodEntry;
         thread->ClearDebugMethodEntry();
       }
-      Dbg::UpdateDebugger(thread, this_object, method, new_dex_pc, events, nullptr);
+      Dbg::UpdateDebugger(thread, this_object.Get(), method, new_dex_pc, events, nullptr);
     }
   }
 
-  void FieldRead(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object,
-                 ArtMethod* method, uint32_t dex_pc, ArtField* field)
+  void FieldRead(Thread* thread ATTRIBUTE_UNUSED,
+                 Handle<mirror::Object> this_object,
+                 ArtMethod* method,
+                 uint32_t dex_pc,
+                 ArtField* field)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
-    Dbg::PostFieldAccessEvent(method, dex_pc, this_object, field);
+    Dbg::PostFieldAccessEvent(method, dex_pc, this_object.Get(), field);
   }
 
-  void FieldWritten(Thread* thread ATTRIBUTE_UNUSED, mirror::Object* this_object,
-                    ArtMethod* method, uint32_t dex_pc, ArtField* field,
+  void FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
+                    Handle<mirror::Object> this_object,
+                    ArtMethod* method,
+                    uint32_t dex_pc,
+                    ArtField* field,
                     const JValue& field_value)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
-    Dbg::PostFieldModificationEvent(method, dex_pc, this_object, field, &field_value);
+    Dbg::PostFieldModificationEvent(method, dex_pc, this_object.Get(), field, &field_value);
   }
 
-  void ExceptionCaught(Thread* thread ATTRIBUTE_UNUSED, mirror::Throwable* exception_object)
+  void ExceptionCaught(Thread* thread ATTRIBUTE_UNUSED,
+                       Handle<mirror::Throwable> exception_object)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
-    Dbg::PostException(exception_object);
+    Dbg::PostException(exception_object.Get());
   }
 
   // We only care about branches in the Jit.
@@ -248,10 +264,10 @@
 
   // We only care about invokes in the Jit.
   void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
-                                mirror::Object*,
+                                Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                                 ArtMethod* method,
                                 uint32_t dex_pc,
-                                ArtMethod*)
+                                ArtMethod* target ATTRIBUTE_UNUSED)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     LOG(ERROR) << "Unexpected invoke event in debugger " << ArtMethod::PrettyMethod(method)
                << " " << dex_pc;
diff --git a/runtime/dex2oat_environment_test.h b/runtime/dex2oat_environment_test.h
index 6765407..93daa45 100644
--- a/runtime/dex2oat_environment_test.h
+++ b/runtime/dex2oat_environment_test.h
@@ -23,6 +23,7 @@
 
 #include <gtest/gtest.h>
 
+#include "base/stl_util.h"
 #include "common_runtime_test.h"
 #include "compiler_callbacks.h"
 #include "exec_utils.h"
diff --git a/runtime/dex_cache_resolved_classes.h b/runtime/dex_cache_resolved_classes.h
index bebdf0d..2278b05 100644
--- a/runtime/dex_cache_resolved_classes.h
+++ b/runtime/dex_cache_resolved_classes.h
@@ -30,10 +30,12 @@
  public:
   DexCacheResolvedClasses(const std::string& dex_location,
                           const std::string& base_location,
-                          uint32_t location_checksum)
+                          uint32_t location_checksum,
+                          uint32_t num_method_ids)
       : dex_location_(dex_location),
         base_location_(base_location),
-        location_checksum_(location_checksum) {}
+        location_checksum_(location_checksum),
+        num_method_ids_(num_method_ids) {}
 
   // Only compare the key elements, ignore the resolved classes.
   int Compare(const DexCacheResolvedClasses& other) const {
@@ -69,10 +71,15 @@
     return classes_;
   }
 
+  size_t NumMethodIds() const {
+    return num_method_ids_;
+  }
+
  private:
   const std::string dex_location_;
   const std::string base_location_;
   const uint32_t location_checksum_;
+  const uint32_t num_method_ids_;
   // Array of resolved class def indexes.
   mutable std::unordered_set<dex::TypeIndex> classes_;
 };
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 3d68af1..b267e5f 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -35,6 +35,7 @@
 #include "base/enums.h"
 #include "base/file_magic.h"
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
@@ -204,7 +205,7 @@
                                                  verify_checksum,
                                                  error_msg);
   if (dex_file != nullptr) {
-    dex_file->mem_map_.reset(map.release());
+    dex_file->mem_map_ = std::move(map);
   }
   return dex_file;
 }
@@ -323,7 +324,7 @@
                                                  verify_checksum,
                                                  error_msg);
   if (dex_file != nullptr) {
-    dex_file->mem_map_.reset(map.release());
+    dex_file->mem_map_ = std::move(map);
   }
 
   return dex_file;
@@ -397,7 +398,7 @@
     }
     return nullptr;
   }
-  dex_file->mem_map_.reset(map.release());
+  dex_file->mem_map_ = std::move(map);
   if (!dex_file->DisableWrite()) {
     *error_msg = StringPrintf("Failed to make dex file '%s' read only", location.c_str());
     *error_code = ZipOpenErrorCode::kMakeReadOnlyError;
@@ -678,6 +679,32 @@
   UNREACHABLE();
 }
 
+uint32_t DexFile::GetCodeItemSize(const DexFile::CodeItem& code_item) {
+  uintptr_t code_item_start = reinterpret_cast<uintptr_t>(&code_item);
+  uint32_t insns_size = code_item.insns_size_in_code_units_;
+  uint32_t tries_size = code_item.tries_size_;
+  const uint8_t* handler_data = GetCatchHandlerData(code_item, 0);
+
+  if (tries_size == 0 || handler_data == nullptr) {
+    uintptr_t insns_end = reinterpret_cast<uintptr_t>(&code_item.insns_[insns_size]);
+    return insns_end - code_item_start;
+  } else {
+    // Get the start of the handler data.
+    uint32_t handlers_size = DecodeUnsignedLeb128(&handler_data);
+    // Manually read each handler.
+    for (uint32_t i = 0; i < handlers_size; ++i) {
+      int32_t uleb128_count = DecodeSignedLeb128(&handler_data) * 2;
+      if (uleb128_count <= 0) {
+        uleb128_count = -uleb128_count + 1;
+      }
+      for (int32_t j = 0; j < uleb128_count; ++j) {
+        DecodeUnsignedLeb128(&handler_data);
+      }
+    }
+    return reinterpret_cast<uintptr_t>(handler_data) - code_item_start;
+  }
+}
+
 const DexFile::FieldId* DexFile::FindFieldId(const DexFile::TypeId& declaring_klass,
                                              const DexFile::StringId& name,
                                              const DexFile::TypeId& type) const {
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 36c7341..3249edb 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -28,7 +28,6 @@
 #include "invoke_type.h"
 #include "jni.h"
 #include "modifiers.h"
-#include "utf.h"
 
 namespace art {
 
@@ -582,6 +581,10 @@
     return header_->type_ids_size_;
   }
 
+  bool IsTypeIndexValid(dex::TypeIndex idx) const {
+    return idx.IsValid() && idx.index_ < NumTypeIds();
+  }
+
   // Returns the TypeId at the specified index.
   const TypeId& GetTypeId(dex::TypeIndex idx) const {
     DCHECK_LT(idx.index_, NumTypeIds()) << GetLocation();
@@ -633,6 +636,8 @@
   uint32_t FindCodeItemOffset(const DexFile::ClassDef& class_def,
                               uint32_t dex_method_idx) const;
 
+  static uint32_t GetCodeItemSize(const DexFile::CodeItem& disk_code_item);
+
   // Returns the declaring class descriptor string of a field id.
   const char* GetFieldDeclaringClassDescriptor(const FieldId& field_id) const {
     const DexFile::TypeId& type_id = GetTypeId(field_id.class_idx_);
diff --git a/runtime/dex_file_test.cc b/runtime/dex_file_test.cc
index 6627550..78d5c5f 100644
--- a/runtime/dex_file_test.cc
+++ b/runtime/dex_file_test.cc
@@ -16,6 +16,8 @@
 
 #include "dex_file.h"
 
+#include <sys/mman.h>
+
 #include <memory>
 
 #include "base/stl_util.h"
@@ -25,7 +27,7 @@
 #include "mem_map.h"
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/dex_file_tracking_registrar.cc b/runtime/dex_file_tracking_registrar.cc
new file mode 100644
index 0000000..f41a50b
--- /dev/null
+++ b/runtime/dex_file_tracking_registrar.cc
@@ -0,0 +1,206 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dex_file_tracking_registrar.h"
+
+#include <deque>
+#include <tuple>
+
+// For dex tracking through poisoning. Note: Requires forcing sanitization. This is the reason for
+// the ifdefs and early include.
+#ifdef ART_DEX_FILE_ACCESS_TRACKING
+#ifndef ART_ENABLE_ADDRESS_SANITIZER
+#define ART_ENABLE_ADDRESS_SANITIZER
+#endif
+#endif
+#include "base/memory_tool.h"
+
+#include "base/logging.h"
+#include "dex_file-inl.h"
+
+namespace art {
+namespace dex {
+namespace tracking {
+
+// If true, poison dex files to track accesses.
+static constexpr bool kDexFileAccessTracking =
+#ifdef ART_DEX_FILE_ACCESS_TRACKING
+    true;
+#else
+    false;
+#endif
+
+// The following are configurations of poisoning certain sections of a Dex File.
+// More will be added
+enum DexTrackingType {
+  // Poisons all of a Dex File when set.
+  kWholeDexTracking,
+  // Poisons all Code Items of a Dex File when set.
+  kCodeItemTracking,
+  // Poisons all subsections of a Code Item, except the Insns bytecode array
+  // section, when set for all Code Items in a Dex File.
+  kCodeItemNonInsnsTracking,
+  // Poisons all subsections of a Code Item, except the Insns bytecode array
+  // section, when set for all Code Items in a Dex File.
+  // Additionally unpoisons the entire Code Item when method is a class
+  // initializer.
+  kCodeItemNonInsnsNoClinitTracking,
+  // Poisons based on a custom tracking system which can be specified in
+  // SetDexSections
+  kCustomTracking,
+};
+
+// Intended for local changes only.
+// Represents the current configuration being run.
+static constexpr DexTrackingType kCurrentTrackingSystem = kWholeDexTracking;
+
+// Intended for local changes only.
+void DexFileTrackingRegistrar::SetDexSections() {
+  if (kDexFileAccessTracking || dex_file_ != nullptr) {
+    switch (kCurrentTrackingSystem) {
+      case kWholeDexTracking:
+        SetDexFileRegistration(true);
+        break;
+      case kCodeItemTracking:
+        SetAllCodeItemRegistration(true);
+        break;
+      case kCodeItemNonInsnsTracking:
+        SetAllCodeItemRegistration(true);
+        SetAllInsnsRegistration(false);
+        break;
+      case kCodeItemNonInsnsNoClinitTracking:
+        SetAllCodeItemRegistration(true);
+        SetAllInsnsRegistration(false);
+        SetCodeItemRegistration("<clinit>", false);
+        break;
+      case kCustomTracking:
+        // TODO: Add/remove additional calls here to (un)poison sections of
+        // dex_file_
+        break;
+    }
+  }
+}
+
+void RegisterDexFile(const DexFile* dex_file) {
+  DexFileTrackingRegistrar dex_tracking_registrar(dex_file);
+  dex_tracking_registrar.SetDexSections();
+  dex_tracking_registrar.SetCurrentRanges();
+}
+
+inline void SetRegistrationRange(const void* begin, size_t size, bool should_poison) {
+  if (should_poison) {
+    MEMORY_TOOL_MAKE_NOACCESS(begin, size);
+  } else {
+    // Note: MEMORY_TOOL_MAKE_UNDEFINED has the same functionality with Address
+    // Sanitizer. The difference has not been tested with Valgrind
+    MEMORY_TOOL_MAKE_DEFINED(begin, size);
+  }
+}
+
+void DexFileTrackingRegistrar::SetCurrentRanges() {
+  // This also empties range_values_ to avoid redundant (un)poisoning upon
+  // subsequent calls.
+  while (!range_values_.empty()) {
+    const std::tuple<const void*, size_t, bool>& current_range = range_values_.front();
+    SetRegistrationRange(std::get<0>(current_range),
+                         std::get<1>(current_range),
+                         std::get<2>(current_range));
+    range_values_.pop_front();
+  }
+}
+
+void DexFileTrackingRegistrar::SetDexFileRegistration(bool should_poison) {
+  const void* dex_file_begin = reinterpret_cast<const void*>(dex_file_->Begin());
+  size_t dex_file_size = dex_file_->Size();
+  range_values_.push_back(std::make_tuple(dex_file_begin, dex_file_size, should_poison));
+}
+
+void DexFileTrackingRegistrar::SetAllCodeItemRegistration(bool should_poison) {
+  for (size_t classdef_ctr = 0; classdef_ctr < dex_file_->NumClassDefs(); ++classdef_ctr) {
+    const DexFile::ClassDef& cd = dex_file_->GetClassDef(classdef_ctr);
+    const uint8_t* class_data = dex_file_->GetClassData(cd);
+    if (class_data != nullptr) {
+      ClassDataItemIterator cdit(*dex_file_, class_data);
+      // Skipping Fields
+      while (cdit.HasNextStaticField() || cdit.HasNextInstanceField()) {
+        cdit.Next();
+      }
+      while (cdit.HasNextDirectMethod()) {
+        const DexFile::CodeItem* code_item = cdit.GetMethodCodeItem();
+        if (code_item != nullptr) {
+          const void* code_item_begin = reinterpret_cast<const void*>(code_item);
+          size_t code_item_size = DexFile::GetCodeItemSize(*code_item);
+          range_values_.push_back(std::make_tuple(code_item_begin, code_item_size, should_poison));
+        }
+        cdit.Next();
+      }
+    }
+  }
+}
+
+void DexFileTrackingRegistrar::SetAllInsnsRegistration(bool should_poison) {
+  for (size_t classdef_ctr = 0; classdef_ctr < dex_file_->NumClassDefs(); ++classdef_ctr) {
+    const DexFile::ClassDef& cd = dex_file_->GetClassDef(classdef_ctr);
+    const uint8_t* class_data = dex_file_->GetClassData(cd);
+    if (class_data != nullptr) {
+      ClassDataItemIterator cdit(*dex_file_, class_data);
+      // Skipping Fields
+      while (cdit.HasNextStaticField() || cdit.HasNextInstanceField()) {
+        cdit.Next();
+      }
+      while (cdit.HasNextDirectMethod()) {
+        const DexFile::CodeItem* code_item = cdit.GetMethodCodeItem();
+        if (code_item != nullptr) {
+          const void* insns_begin = reinterpret_cast<const void*>(&code_item->insns_);
+          // Member insns_size_in_code_units_ is in 2-byte units
+          size_t insns_size = code_item->insns_size_in_code_units_ * 2;
+          range_values_.push_back(std::make_tuple(insns_begin, insns_size, should_poison));
+        }
+        cdit.Next();
+      }
+    }
+  }
+}
+
+void DexFileTrackingRegistrar::SetCodeItemRegistration(const char* class_name, bool should_poison) {
+  for (size_t classdef_ctr = 0; classdef_ctr < dex_file_->NumClassDefs(); ++classdef_ctr) {
+    const DexFile::ClassDef& cd = dex_file_->GetClassDef(classdef_ctr);
+    const uint8_t* class_data = dex_file_->GetClassData(cd);
+    if (class_data != nullptr) {
+      ClassDataItemIterator cdit(*dex_file_, class_data);
+      // Skipping Fields
+      while (cdit.HasNextStaticField() || cdit.HasNextInstanceField()) {
+        cdit.Next();
+      }
+      while (cdit.HasNextDirectMethod()) {
+        const DexFile::MethodId& methodid_item = dex_file_->GetMethodId(cdit.GetMemberIndex());
+        const char * methodid_name = dex_file_->GetMethodName(methodid_item);
+        const DexFile::CodeItem* code_item = cdit.GetMethodCodeItem();
+        if (code_item != nullptr && strcmp(methodid_name, class_name) == 0) {
+          const void* code_item_begin = reinterpret_cast<const void*>(code_item);
+          size_t code_item_size = DexFile::GetCodeItemSize(*code_item);
+          range_values_.push_back(
+              std::make_tuple(code_item_begin, code_item_size, should_poison));
+        }
+        cdit.Next();
+      }
+    }
+  }
+}
+
+}  // namespace tracking
+}  // namespace dex
+}  // namespace art
diff --git a/runtime/dex_file_tracking_registrar.h b/runtime/dex_file_tracking_registrar.h
new file mode 100644
index 0000000..b0fa275
--- /dev/null
+++ b/runtime/dex_file_tracking_registrar.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX_FILE_TRACKING_REGISTRAR_H_
+#define ART_RUNTIME_DEX_FILE_TRACKING_REGISTRAR_H_
+
+#include <deque>
+#include <tuple>
+
+#include "dex_file.h"
+
+namespace art {
+namespace dex {
+namespace tracking {
+
+// Class for (un)poisoning various sections of Dex Files
+//
+// This class provides the means to log accesses only of sections whose
+// accesses are needed. All accesses are displayed as stack traces in
+// logcat.
+class DexFileTrackingRegistrar {
+ public:
+  explicit DexFileTrackingRegistrar(const DexFile* const dex_file)
+      : dex_file_(dex_file) {
+  }
+
+  // This function is where the functions below it are called to actually
+  // poison sections.
+  void SetDexSections();
+
+  // Uses data contained inside range_values_ to poison memory through the
+  // memory tool.
+  void SetCurrentRanges();
+
+ private:
+  void SetDexFileRegistration(bool should_poison);
+
+  // Set of functions concerning Code Items of dex_file_
+  void SetAllCodeItemRegistration(bool should_poison);
+  // Sets the insns_ section of all code items.
+  void SetAllInsnsRegistration(bool should_poison);
+  // This function finds the code item of a class based on class name.
+  void SetCodeItemRegistration(const char* class_name, bool should_poison);
+
+  // Contains tuples of all ranges of memory that need to be explicitly
+  // (un)poisoned by the memory tool.
+  std::deque<std::tuple<const void *, size_t, bool>> range_values_;
+
+  const DexFile* const dex_file_;
+};
+
+// This function is meant to called externally to use DexfileTrackingRegistrar
+void RegisterDexFile(const DexFile* dex_file);
+
+}  // namespace tracking
+}  // namespace dex
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEX_FILE_TRACKING_REGISTRAR_H_
diff --git a/runtime/dex_file_verifier.h b/runtime/dex_file_verifier.h
index d1043c6..74f8225 100644
--- a/runtime/dex_file_verifier.h
+++ b/runtime/dex_file_verifier.h
@@ -19,6 +19,8 @@
 
 #include <unordered_set>
 
+#include "base/allocator.h"
+#include "base/hash_map.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
 #include "safe_map.h"
@@ -226,6 +228,15 @@
     }
   };
   // Map from offset to dex file type, HashMap for performance reasons.
+  template<class Key,
+           class T,
+           class EmptyFn,
+           AllocatorTag kTag,
+           class Hash = std::hash<Key>,
+           class Pred = std::equal_to<Key>>
+  using AllocationTrackingHashMap = HashMap<
+      Key, T, EmptyFn, Hash, Pred, TrackingAllocator<std::pair<Key, T>, kTag>>;
+
   AllocationTrackingHashMap<uint32_t,
                             uint16_t,
                             OffsetTypeMapEmptyFn,
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index 068e122..0e58e6d 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -29,7 +29,7 @@
 #include "dex_file_types.h"
 #include "leb128.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/dex_method_iterator_test.cc b/runtime/dex_method_iterator_test.cc
index cd8c390..e83829b 100644
--- a/runtime/dex_method_iterator_test.cc
+++ b/runtime/dex_method_iterator_test.cc
@@ -20,7 +20,7 @@
 #include "common_runtime_test.h"
 #include "oat_file.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/dex_reference_collection.h b/runtime/dex_reference_collection.h
new file mode 100644
index 0000000..76355d6
--- /dev/null
+++ b/runtime/dex_reference_collection.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_DEX_REFERENCE_COLLECTION_H_
+#define ART_RUNTIME_DEX_REFERENCE_COLLECTION_H_
+
+#include "base/macros.h"
+
+#include <vector>
+#include <map>
+
+namespace art {
+
+class DexFile;
+
+// Collection of dex references that is more memory efficient than a vector of <dex, index> pairs.
+// Also allows quick lookups of all of the references for a single dex.
+template <class IndexType, template<typename Type> class Allocator>
+class DexReferenceCollection {
+ public:
+  using VectorAllocator = Allocator<IndexType>;
+  using IndexVector = std::vector<IndexType, VectorAllocator>;
+  using MapAllocator = Allocator<std::pair<const DexFile*, IndexVector>>;
+  using DexFileMap = std::map<
+      const DexFile*,
+      IndexVector,
+      std::less<const DexFile*>,
+      Allocator<std::pair<const DexFile* const, IndexVector>>>;
+
+  DexReferenceCollection(const MapAllocator& map_allocator = MapAllocator(),
+                         const VectorAllocator& vector_allocator = VectorAllocator())
+      : map_(map_allocator),
+        vector_allocator_(vector_allocator) {}
+
+  void AddReference(const DexFile* dex, IndexType index) {
+    GetOrInsertVector(dex)->push_back(index);
+  }
+
+  DexFileMap& GetMap() {
+    return map_;
+  }
+
+  size_t NumReferences() const {
+    size_t ret = 0;
+    for (auto&& pair : map_) {
+      ret += pair.second.size();
+    }
+    return ret;
+  }
+
+ private:
+  DexFileMap map_;
+  // Optimize for adding to same vector in succession.
+  const DexFile* current_dex_file_ = nullptr;
+  IndexVector* current_vector_ = nullptr;
+  VectorAllocator vector_allocator_;
+
+  ALWAYS_INLINE IndexVector* GetOrInsertVector(const DexFile* dex) {
+    if (UNLIKELY(current_dex_file_ != dex)) {
+      // There is an assumption that constructing an empty vector wont do any allocations. If this
+      // incorrect, this might leak for the arena case.
+      current_vector_ = &map_.emplace(dex, IndexVector(vector_allocator_)).first->second;
+      current_dex_file_ = dex;
+    }
+    return current_vector_;
+  }
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_DEX_REFERENCE_COLLECTION_H_
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 5fbdc46..afe4eeb 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -1698,7 +1698,7 @@
                                               low_4gb,
                                               file->GetPath().c_str(),
                                               error_msg));
-  if (map == nullptr && map->Size() != EI_NIDENT) {
+  if (map == nullptr || map->Size() != EI_NIDENT) {
     return nullptr;
   }
   uint8_t* header = map->Begin();
@@ -1749,7 +1749,7 @@
                                               low_4gb,
                                               file->GetPath().c_str(),
                                               error_msg));
-  if (map == nullptr && map->Size() != EI_NIDENT) {
+  if (map == nullptr || map->Size() != EI_NIDENT) {
     return nullptr;
   }
   uint8_t* header = map->Begin();
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index c340a88..88a5a13 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -177,7 +177,7 @@
 }
 
 static inline std::pair<ArtMethod*, uintptr_t> DoGetCalleeSaveMethodOuterCallerAndPc(
-    ArtMethod** sp, Runtime::CalleeSaveType type) REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod** sp, CalleeSaveType type) REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(type));
 
   const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, type);
@@ -232,9 +232,7 @@
   return caller;
 }
 
-ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp,
-                                     Runtime::CalleeSaveType type,
-                                     bool do_caller_check)
+ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, CalleeSaveType type, bool do_caller_check)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
   auto outer_caller_and_pc = DoGetCalleeSaveMethodOuterCallerAndPc(sp, type);
@@ -244,8 +242,7 @@
   return caller;
 }
 
-CallerAndOuterMethod GetCalleeSaveMethodCallerAndOuterMethod(Thread* self,
-                                                             Runtime::CalleeSaveType type) {
+CallerAndOuterMethod GetCalleeSaveMethodCallerAndOuterMethod(Thread* self, CalleeSaveType type) {
   CallerAndOuterMethod result;
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
   ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame();
@@ -257,7 +254,7 @@
   return result;
 }
 
-ArtMethod* GetCalleeSaveOuterMethod(Thread* self, Runtime::CalleeSaveType type) {
+ArtMethod* GetCalleeSaveOuterMethod(Thread* self, CalleeSaveType type) {
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
   ArtMethod** sp = self->GetManagedStack()->GetTopQuickFrame();
   return DoGetCalleeSaveMethodOuterCallerAndPc(sp, type).first;
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 69ee3eb..eed08aa 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -20,6 +20,7 @@
 #include <jni.h>
 #include <stdint.h>
 
+#include "base/callee_save_type.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "dex_instruction.h"
@@ -28,7 +29,6 @@
 #include "handle.h"
 #include "invoke_type.h"
 #include "jvalue.h"
-#include "runtime.h"
 
 namespace art {
 
@@ -178,7 +178,7 @@
 inline INT_TYPE art_float_to_integral(FLOAT_TYPE f);
 
 ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp,
-                                     Runtime::CalleeSaveType type,
+                                     CalleeSaveType type,
                                      bool do_caller_check = false)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -187,11 +187,10 @@
   ArtMethod* outer_method;
 };
 
-CallerAndOuterMethod GetCalleeSaveMethodCallerAndOuterMethod(Thread* self,
-                                                             Runtime::CalleeSaveType type)
+CallerAndOuterMethod GetCalleeSaveMethodCallerAndOuterMethod(Thread* self, CalleeSaveType type)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
-ArtMethod* GetCalleeSaveOuterMethod(Thread* self, Runtime::CalleeSaveType type)
+ArtMethod* GetCalleeSaveOuterMethod(Thread* self, CalleeSaveType type)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
 }  // namespace art
diff --git a/runtime/entrypoints/jni/jni_entrypoints.cc b/runtime/entrypoints/jni/jni_entrypoints.cc
index eeb138b..dd0819e 100644
--- a/runtime/entrypoints/jni/jni_entrypoints.cc
+++ b/runtime/entrypoints/jni/jni_entrypoints.cc
@@ -42,12 +42,11 @@
   // otherwise we return the address of the method we found.
   void* native_code = soa.Vm()->FindCodeForNativeMethod(method);
   if (native_code == nullptr) {
-    DCHECK(self->IsExceptionPending());
+    self->AssertPendingException();
     return nullptr;
-  } else {
-    // Register so that future calls don't come here
-    return method->RegisterNative(native_code, false);
   }
+  // Register so that future calls don't come here
+  return method->RegisterNative(native_code, false);
 }
 
 }  // namespace art
diff --git a/runtime/entrypoints/quick/callee_save_frame.h b/runtime/entrypoints/quick/callee_save_frame.h
index df37f95..69e3fc1 100644
--- a/runtime/entrypoints/quick/callee_save_frame.h
+++ b/runtime/entrypoints/quick/callee_save_frame.h
@@ -18,9 +18,9 @@
 #define ART_RUNTIME_ENTRYPOINTS_QUICK_CALLEE_SAVE_FRAME_H_
 
 #include "arch/instruction_set.h"
+#include "base/callee_save_type.h"
 #include "base/enums.h"
 #include "base/mutex.h"
-#include "runtime.h"
 #include "thread-inl.h"
 
 // Specific frame size code is in architecture-specific files. We include this to compile-time
@@ -46,13 +46,6 @@
     }
   }
 
-  ScopedQuickEntrypointChecks() REQUIRES_SHARED(Locks::mutator_lock_)
-      : self_(kIsDebugBuild ? Thread::Current() : nullptr), exit_check_(kIsDebugBuild) {
-    if (kIsDebugBuild) {
-      TestsOnEntry();
-    }
-  }
-
   ~ScopedQuickEntrypointChecks() REQUIRES_SHARED(Locks::mutator_lock_) {
     if (exit_check_) {
       TestsOnExit();
@@ -74,7 +67,7 @@
   bool exit_check_;
 };
 
-static constexpr size_t GetCalleeSaveFrameSize(InstructionSet isa, Runtime::CalleeSaveType type) {
+static constexpr size_t GetCalleeSaveFrameSize(InstructionSet isa, CalleeSaveType type) {
   // constexpr must be a return statement.
   return (isa == kArm || isa == kThumb2) ? arm::ArmCalleeSaveFrameSize(type) :
          isa == kArm64 ? arm64::Arm64CalleeSaveFrameSize(type) :
@@ -100,8 +93,7 @@
 }
 
 // Note: this specialized statement is sanity-checked in the quick-trampoline gtest.
-static constexpr size_t GetCalleeSaveReturnPcOffset(InstructionSet isa,
-                                                    Runtime::CalleeSaveType type) {
+static constexpr size_t GetCalleeSaveReturnPcOffset(InstructionSet isa, CalleeSaveType type) {
   return GetCalleeSaveFrameSize(isa, type) - static_cast<size_t>(GetConstExprPointerSize(isa));
 }
 
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 6481b97..267f384 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -26,7 +26,7 @@
 
 namespace art {
 
-void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
+static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // JNI
   jpoints->pDlsymLookup = art_jni_dlsym_lookup_stub;
 
diff --git a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
index 5762e4f..53f0727 100644
--- a/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_deoptimization_entrypoints.cc
@@ -21,6 +21,7 @@
 #include "interpreter/interpreter.h"
 #include "obj_ptr-inl.h"  // TODO: Find the other include that isn't complete, and clean this up.
 #include "quick_exception_handler.h"
+#include "runtime.h"
 #include "thread.h"
 
 namespace art {
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 6b96567..fe56543 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -15,6 +15,7 @@
  */
 
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "callee_save_frame.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "class_linker-inl.h"
@@ -64,7 +65,8 @@
   // A class may be accessing another class' fields when it doesn't have access, as access has been
   // given by inheritance.
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self,
+                                                                  CalleeSaveType::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::Class* result =
       ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, true, false);
@@ -78,7 +80,8 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Called when method->dex_cache_resolved_types_[] misses.
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self,
+                                                                  CalleeSaveType::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::Class* result =
       ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, false);
@@ -93,7 +96,8 @@
   // Called when caller isn't guaranteed to have access to a type and the dex cache may be
   // unpopulated.
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self,
+                                                                  CalleeSaveType::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::Class* result =
       ResolveVerifyAndClinit(dex::TypeIndex(type_idx), caller, self, false, true);
@@ -106,7 +110,8 @@
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveEverything);
+  auto caller_and_outer = GetCalleeSaveMethodCallerAndOuterMethod(self,
+                                                                  CalleeSaveType::kSaveEverything);
   ArtMethod* caller = caller_and_outer.caller;
   mirror::String* result = ResolveStringFromCode(caller, dex::StringIndex(string_idx));
   if (LIKELY(result != nullptr)) {
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index e2d45ac..74e7c18 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -145,22 +145,22 @@
   V(A64Load, int64_t, volatile const int64_t *) \
   V(A64Store, void, volatile int64_t *, int64_t) \
 \
-  V(NewEmptyString, void) \
-  V(NewStringFromBytes_B, void) \
-  V(NewStringFromBytes_BI, void) \
-  V(NewStringFromBytes_BII, void) \
-  V(NewStringFromBytes_BIII, void) \
-  V(NewStringFromBytes_BIIString, void) \
-  V(NewStringFromBytes_BString, void) \
-  V(NewStringFromBytes_BIICharset, void) \
-  V(NewStringFromBytes_BCharset, void) \
-  V(NewStringFromChars_C, void) \
-  V(NewStringFromChars_CII, void) \
-  V(NewStringFromChars_IIC, void) \
-  V(NewStringFromCodePoints, void) \
-  V(NewStringFromString, void) \
-  V(NewStringFromStringBuffer, void) \
-  V(NewStringFromStringBuilder, void) \
+  V(NewEmptyString, void, void) \
+  V(NewStringFromBytes_B, void, void) \
+  V(NewStringFromBytes_BI, void, void) \
+  V(NewStringFromBytes_BII, void, void) \
+  V(NewStringFromBytes_BIII, void, void) \
+  V(NewStringFromBytes_BIIString, void, void) \
+  V(NewStringFromBytes_BString, void, void) \
+  V(NewStringFromBytes_BIICharset, void, void) \
+  V(NewStringFromBytes_BCharset, void, void) \
+  V(NewStringFromChars_C, void, void) \
+  V(NewStringFromChars_CII, void, void) \
+  V(NewStringFromChars_IIC, void, void) \
+  V(NewStringFromCodePoints, void, void) \
+  V(NewStringFromString, void, void) \
+  V(NewStringFromStringBuffer, void, void) \
+  V(NewStringFromStringBuilder, void, void) \
 \
   V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
   V(ReadBarrierMarkReg00, mirror::Object*, mirror::Object*) \
diff --git a/runtime/entrypoints/quick/quick_field_entrypoints.cc b/runtime/entrypoints/quick/quick_field_entrypoints.cc
index 822c5a8..726bddd 100644
--- a/runtime/entrypoints/quick/quick_field_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_field_entrypoints.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "callee_save_frame.h"
 #include "dex_file-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
@@ -59,12 +60,12 @@
   if (kIsDebugBuild) {
     // stub_test doesn't call this code with a proper frame, so get the outer, and if
     // it does not have compiled code return it.
-    ArtMethod* outer = GetCalleeSaveOuterMethod(self, Runtime::kSaveRefsOnly);
+    ArtMethod* outer = GetCalleeSaveOuterMethod(self, CalleeSaveType::kSaveRefsOnly);
     if (outer->GetEntryPointFromQuickCompiledCode() == nullptr) {
       return outer;
     }
   }
-  return GetCalleeSaveMethodCallerAndOuterMethod(self, Runtime::kSaveRefsOnly).caller;
+  return GetCalleeSaveMethodCallerAndOuterMethod(self, CalleeSaveType::kSaveRefsOnly).caller;
 }
 
 #define ART_GET_FIELD_FROM_CODE(Kind, PrimitiveType, RetType, SetType,         \
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
deleted file mode 100644
index 81560cc..0000000
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2012 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "art_method.h"
-#include "base/enums.h"
-#include "callee_save_frame.h"
-#include "entrypoints/runtime_asm_entrypoints.h"
-#include "instrumentation.h"
-#include "mirror/object-inl.h"
-#include "runtime.h"
-#include "thread-inl.h"
-
-namespace art {
-
-extern "C" const void* artInstrumentationMethodEntryFromCode(ArtMethod* method,
-                                                             mirror::Object* this_object,
-                                                             Thread* self,
-                                                             uintptr_t lr)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Instrumentation changes the stack. Thus, when exiting, the stack cannot be verified, so skip
-  // that part.
-  ScopedQuickEntrypointChecks sqec(self, kIsDebugBuild, false);
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  const void* result;
-  if (instrumentation->IsDeoptimized(method)) {
-    result = GetQuickToInterpreterBridge();
-  } else {
-    result = instrumentation->GetQuickCodeFor(method, kRuntimePointerSize);
-    DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
-  }
-  bool interpreter_entry = (result == GetQuickToInterpreterBridge());
-  instrumentation->PushInstrumentationStackFrame(self, method->IsStatic() ? nullptr : this_object,
-                                                 method, lr, interpreter_entry);
-  CHECK(result != nullptr) << method->PrettyMethod();
-  return result;
-}
-
-extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self, ArtMethod** sp,
-                                                              uint64_t gpr_result,
-                                                              uint64_t fpr_result)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Instrumentation exit stub must not be entered with a pending exception.
-  CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
-                                     << self->GetException()->Dump();
-  // Compute address of return PC and sanity check that it currently holds 0.
-  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsOnly);
-  uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
-                                                      return_pc_offset);
-  CHECK_EQ(*return_pc, 0U);
-
-  // Pop the frame filling in the return pc. The low half of the return value is 0 when
-  // deoptimization shouldn't be performed with the high-half having the return address. When
-  // deoptimization should be performed the low half is zero and the high-half the address of the
-  // deoptimization entry point.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  TwoWordReturn return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(
-      self, return_pc, gpr_result, fpr_result);
-  return return_or_deoptimize_pc;
-}
-
-}  // namespace art
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 2b349e3..b7cd39f 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -15,9 +15,11 @@
  */
 
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "base/enums.h"
 #include "callee_save_frame.h"
 #include "common_throws.h"
+#include "debugger.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
 #include "entrypoints/entrypoint_utils-inl.h"
@@ -26,7 +28,9 @@
 #include "imt_conflict_table.h"
 #include "imtable-inl.h"
 #include "interpreter/interpreter.h"
+#include "instrumentation.h"
 #include "linear_alloc.h"
+#include "method_bss_mapping.h"
 #include "method_handles.h"
 #include "method_reference.h"
 #include "mirror/class-inl.h"
@@ -35,23 +39,24 @@
 #include "mirror/method_handle_impl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "oat_file.h"
 #include "oat_quick_method_header.h"
 #include "quick_exception_handler.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
-#include "debugger.h"
+#include "thread-inl.h"
 #include "well_known_classes.h"
 
 namespace art {
 
-// Visits the arguments as saved to the stack by a Runtime::kRefAndArgs callee save frame.
+// Visits the arguments as saved to the stack by a CalleeSaveType::kRefAndArgs callee save frame.
 class QuickArgumentVisitor {
   // Number of bytes for each out register in the caller method's frame.
   static constexpr size_t kBytesStackArgLocation = 4;
   // Frame size in bytes of a callee-save frame for RefsAndArgs.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_FrameSize =
-      GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kSaveRefsAndArgs);
+      GetCalleeSaveFrameSize(kRuntimeISA, CalleeSaveType::kSaveRefsAndArgs);
 #if defined(__arm__)
   // The callee save frame is pointed to by SP.
   // | argN       |  |
@@ -80,11 +85,11 @@
   static constexpr size_t kNumQuickFprArgs = kArm32QuickCodeUseSoftFloat ? 0 : 16;
   static constexpr bool kGprFprLockstep = false;
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
-      arm::ArmCalleeSaveFpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first FPR arg.
+      arm::ArmCalleeSaveFpr1Offset(CalleeSaveType::kSaveRefsAndArgs);  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
-      arm::ArmCalleeSaveGpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first GPR arg.
+      arm::ArmCalleeSaveGpr1Offset(CalleeSaveType::kSaveRefsAndArgs);  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
-      arm::ArmCalleeSaveLrOffset(Runtime::kSaveRefsAndArgs);  // Offset of return address.
+      arm::ArmCalleeSaveLrOffset(CalleeSaveType::kSaveRefsAndArgs);  // Offset of return address.
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -117,12 +122,15 @@
   static constexpr size_t kNumQuickGprArgs = 7;  // 7 arguments passed in GPRs.
   static constexpr size_t kNumQuickFprArgs = 8;  // 8 arguments passed in FPRs.
   static constexpr bool kGprFprLockstep = false;
+  // Offset of first FPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Fpr1Offset =
-      arm64::Arm64CalleeSaveFpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first FPR arg.
+      arm64::Arm64CalleeSaveFpr1Offset(CalleeSaveType::kSaveRefsAndArgs);
+  // Offset of first GPR arg.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_Gpr1Offset =
-      arm64::Arm64CalleeSaveGpr1Offset(Runtime::kSaveRefsAndArgs);  // Offset of first GPR arg.
+      arm64::Arm64CalleeSaveGpr1Offset(CalleeSaveType::kSaveRefsAndArgs);
+  // Offset of return address.
   static constexpr size_t kQuickCalleeSaveFrame_RefAndArgs_LrOffset =
-      arm64::Arm64CalleeSaveLrOffset(Runtime::kSaveRefsAndArgs);  // Offset of return address.
+      arm64::Arm64CalleeSaveLrOffset(CalleeSaveType::kSaveRefsAndArgs);
   static size_t GprIndexToGprOffset(uint32_t gpr_index) {
     return gpr_index * GetBytesPerGprSpillLocation(kRuntimeISA);
   }
@@ -322,7 +330,7 @@
 
   static ArtMethod* GetCallingMethod(ArtMethod** sp) REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK((*sp)->IsCalleeSaveMethod());
-    return GetCalleeSaveMethodCaller(sp, Runtime::kSaveRefsAndArgs);
+    return GetCalleeSaveMethodCaller(sp, CalleeSaveType::kSaveRefsAndArgs);
   }
 
   static ArtMethod* GetOuterMethod(ArtMethod** sp) REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -334,7 +342,8 @@
 
   static uint32_t GetCallingDexPc(ArtMethod** sp) REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK((*sp)->IsCalleeSaveMethod());
-    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kSaveRefsAndArgs);
+    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA,
+                                                            CalleeSaveType::kSaveRefsAndArgs);
     ArtMethod** caller_sp = reinterpret_cast<ArtMethod**>(
         reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
     uintptr_t outer_pc = QuickArgumentVisitor::GetCallingPc(sp);
@@ -361,7 +370,8 @@
   static bool GetInvokeType(ArtMethod** sp, InvokeType* invoke_type, uint32_t* dex_method_index)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK((*sp)->IsCalleeSaveMethod());
-    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA, Runtime::kSaveRefsAndArgs);
+    const size_t callee_frame_size = GetCalleeSaveFrameSize(kRuntimeISA,
+                                                            CalleeSaveType::kSaveRefsAndArgs);
     ArtMethod** caller_sp = reinterpret_cast<ArtMethod**>(
         reinterpret_cast<uintptr_t>(sp) + callee_frame_size);
     uintptr_t outer_pc = QuickArgumentVisitor::GetCallingPc(sp);
@@ -886,7 +896,6 @@
     soa_->Env()->DeleteLocalRef(pair.first);
   }
 }
-
 // Handler for invocation on proxy methods. On entry a frame will exist for the proxy object method
 // which is responsible for recording callee save registers. We explicitly place into jobjects the
 // incoming reference arguments (so they survive GC). We invoke the invocation handler, which is a
@@ -979,6 +988,77 @@
   }
 }
 
+extern "C" const void* artInstrumentationMethodEntryFromCode(ArtMethod* method,
+                                                             mirror::Object* this_object,
+                                                             Thread* self,
+                                                             ArtMethod** sp)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  const void* result;
+  // Instrumentation changes the stack. Thus, when exiting, the stack cannot be verified, so skip
+  // that part.
+  ScopedQuickEntrypointChecks sqec(self, kIsDebugBuild, false);
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (instrumentation->IsDeoptimized(method)) {
+    result = GetQuickToInterpreterBridge();
+  } else {
+    result = instrumentation->GetQuickCodeFor(method, kRuntimePointerSize);
+    DCHECK(!Runtime::Current()->GetClassLinker()->IsQuickToInterpreterBridge(result));
+  }
+
+  bool interpreter_entry = (result == GetQuickToInterpreterBridge());
+  bool is_static = method->IsStatic();
+  uint32_t shorty_len;
+  const char* shorty =
+      method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetShorty(&shorty_len);
+
+  ScopedObjectAccessUnchecked soa(self);
+  RememberForGcArgumentVisitor visitor(sp, is_static, shorty, shorty_len, &soa);
+  visitor.VisitArguments();
+
+  instrumentation->PushInstrumentationStackFrame(self,
+                                                 is_static ? nullptr : this_object,
+                                                 method,
+                                                 QuickArgumentVisitor::GetCallingPc(sp),
+                                                 interpreter_entry);
+
+  visitor.FixupReferences();
+  if (UNLIKELY(self->IsExceptionPending())) {
+    return nullptr;
+  }
+  CHECK(result != nullptr) << method->PrettyMethod();
+  return result;
+}
+
+extern "C" TwoWordReturn artInstrumentationMethodExitFromCode(Thread* self,
+                                                              ArtMethod** sp,
+                                                              uint64_t* gpr_result,
+                                                              uint64_t* fpr_result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK_EQ(reinterpret_cast<uintptr_t>(self), reinterpret_cast<uintptr_t>(Thread::Current()));
+  CHECK(gpr_result != nullptr);
+  CHECK(fpr_result != nullptr);
+  // Instrumentation exit stub must not be entered with a pending exception.
+  CHECK(!self->IsExceptionPending()) << "Enter instrumentation exit stub with pending exception "
+                                     << self->GetException()->Dump();
+  // Compute address of return PC and sanity check that it currently holds 0.
+  size_t return_pc_offset = GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveRefsOnly);
+  uintptr_t* return_pc = reinterpret_cast<uintptr_t*>(reinterpret_cast<uint8_t*>(sp) +
+                                                      return_pc_offset);
+  CHECK_EQ(*return_pc, 0U);
+
+  // Pop the frame filling in the return pc. The low half of the return value is 0 when
+  // deoptimization shouldn't be performed with the high-half having the return address. When
+  // deoptimization should be performed the low half is zero and the high-half the address of the
+  // deoptimization entry point.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  TwoWordReturn return_or_deoptimize_pc = instrumentation->PopInstrumentationStackFrame(
+      self, return_pc, gpr_result, fpr_result);
+  if (self->IsExceptionPending()) {
+    return GetTwoWordFailureValue();
+  }
+  return return_or_deoptimize_pc;
+}
+
 // Lazily resolve a method for quick. Called by stub code.
 extern "C" const void* artQuickResolutionTrampoline(
     ArtMethod* called, mirror::Object* receiver, Thread* self, ArtMethod** sp)
@@ -1104,6 +1184,32 @@
     DCHECK_EQ(caller->GetDexFile(), called_method.dex_file);
     called = linker->ResolveMethod<ClassLinker::kForceICCECheck>(
         self, called_method.dex_method_index, caller, invoke_type);
+
+    // Update .bss entry in oat file if any.
+    if (called != nullptr && called_method.dex_file->GetOatDexFile() != nullptr) {
+      const MethodBssMapping* mapping =
+          called_method.dex_file->GetOatDexFile()->GetMethodBssMapping();
+      if (mapping != nullptr) {
+        auto pp = std::partition_point(
+            mapping->begin(),
+            mapping->end(),
+            [called_method](const MethodBssMappingEntry& entry) {
+              return entry.method_index < called_method.dex_method_index;
+            });
+        if (pp != mapping->end() && pp->CoversIndex(called_method.dex_method_index)) {
+          size_t bss_offset = pp->GetBssOffset(called_method.dex_method_index,
+                                               static_cast<size_t>(kRuntimePointerSize));
+          DCHECK_ALIGNED(bss_offset, static_cast<size_t>(kRuntimePointerSize));
+          const OatFile* oat_file = called_method.dex_file->GetOatDexFile()->GetOatFile();
+          ArtMethod** method_entry = reinterpret_cast<ArtMethod**>(const_cast<uint8_t*>(
+              oat_file->BssBegin() + bss_offset));
+          DCHECK_GE(method_entry, oat_file->GetBssMethods().data());
+          DCHECK_LT(method_entry,
+                    oat_file->GetBssMethods().data() + oat_file->GetBssMethods().size());
+          *method_entry = called;
+        }
+      }
+    }
   }
   const void* code = nullptr;
   if (LIKELY(!self->IsExceptionPending())) {
@@ -2235,7 +2341,7 @@
                                      Thread* self,
                                      ArtMethod** sp) {
   ScopedQuickEntrypointChecks sqec(self);
-  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
+  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs));
   ArtMethod* caller_method = QuickArgumentVisitor::GetCallingMethod(sp);
   ArtMethod* method = FindMethodFast(method_idx, this_object, caller_method, access_check, type);
   if (UNLIKELY(method == nullptr)) {
@@ -2456,7 +2562,7 @@
     ArtMethod** sp)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
+  DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs));
 
   // Start new JNI local reference state
   JNIEnvExt* env = self->GetJniEnv();
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 1cd641b..7e08b7a 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -17,6 +17,7 @@
 #include <stdint.h>
 
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "callee_save_frame.h"
 #include "common_runtime_test.h"
 #include "quick/quick_method_frame_info.h"
@@ -38,7 +39,7 @@
     ASSERT_EQ(InstructionSet::kX86_64, Runtime::Current()->GetInstructionSet());
   }
 
-  static ArtMethod* CreateCalleeSaveMethod(InstructionSet isa, Runtime::CalleeSaveType type)
+  static ArtMethod* CreateCalleeSaveMethod(InstructionSet isa, CalleeSaveType type)
       NO_THREAD_SAFETY_ANALYSIS {
     Runtime* r = Runtime::Current();
 
@@ -53,7 +54,7 @@
     return save_method;
   }
 
-  static void CheckFrameSize(InstructionSet isa, Runtime::CalleeSaveType type, uint32_t save_size)
+  static void CheckFrameSize(InstructionSet isa, CalleeSaveType type, uint32_t save_size)
       NO_THREAD_SAFETY_ANALYSIS {
     ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
     QuickMethodFrameInfo frame_info = Runtime::Current()->GetRuntimeMethodFrameInfo(save_method);
@@ -62,7 +63,7 @@
         << frame_info.FpSpillMask() << std::dec << " ISA " << isa;
   }
 
-  static void CheckPCOffset(InstructionSet isa, Runtime::CalleeSaveType type, size_t pc_offset)
+  static void CheckPCOffset(InstructionSet isa, CalleeSaveType type, size_t pc_offset)
       NO_THREAD_SAFETY_ANALYSIS {
     ArtMethod* save_method = CreateCalleeSaveMethod(isa, type);
     QuickMethodFrameInfo frame_info = Runtime::Current()->GetRuntimeMethodFrameInfo(save_method);
@@ -80,16 +81,16 @@
 // This test ensures that kQuickCalleeSaveFrame_RefAndArgs_FrameSize is correct.
 TEST_F(QuickTrampolineEntrypointsTest, FrameSize) {
   // We have to use a define here as the callee_save_frame.h functions are constexpr.
-#define CHECK_FRAME_SIZE(isa)                                                 \
-  CheckFrameSize(isa,                                                         \
-                 Runtime::kSaveRefsAndArgs,                                   \
-                 GetCalleeSaveFrameSize(isa, Runtime::kSaveRefsAndArgs));     \
-  CheckFrameSize(isa,                                                         \
-                 Runtime::kSaveRefsOnly,                                      \
-                 GetCalleeSaveFrameSize(isa, Runtime::kSaveRefsOnly));        \
-  CheckFrameSize(isa,                                                         \
-                 Runtime::kSaveAllCalleeSaves,                                \
-                 GetCalleeSaveFrameSize(isa, Runtime::kSaveAllCalleeSaves))
+#define CHECK_FRAME_SIZE(isa)                                                        \
+  CheckFrameSize(isa,                                                                \
+                 CalleeSaveType::kSaveRefsAndArgs,                                   \
+                 GetCalleeSaveFrameSize(isa, CalleeSaveType::kSaveRefsAndArgs));     \
+  CheckFrameSize(isa,                                                                \
+                 CalleeSaveType::kSaveRefsOnly,                                      \
+                 GetCalleeSaveFrameSize(isa, CalleeSaveType::kSaveRefsOnly));        \
+  CheckFrameSize(isa,                                                                \
+                 CalleeSaveType::kSaveAllCalleeSaves,                                \
+                 GetCalleeSaveFrameSize(isa, CalleeSaveType::kSaveAllCalleeSaves))
 
   CHECK_FRAME_SIZE(kArm);
   CHECK_FRAME_SIZE(kArm64);
@@ -116,12 +117,12 @@
   // Ensure that the computation in callee_save_frame.h correct.
   // Note: we can only check against the kRuntimeISA, because the ArtMethod computation uses
   // sizeof(void*), which is wrong when the target bitwidth is not the same as the host's.
-  CheckPCOffset(kRuntimeISA, Runtime::kSaveRefsAndArgs,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsAndArgs));
-  CheckPCOffset(kRuntimeISA, Runtime::kSaveRefsOnly,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveRefsOnly));
-  CheckPCOffset(kRuntimeISA, Runtime::kSaveAllCalleeSaves,
-                GetCalleeSaveReturnPcOffset(kRuntimeISA, Runtime::kSaveAllCalleeSaves));
+  CheckPCOffset(kRuntimeISA, CalleeSaveType::kSaveRefsAndArgs,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveRefsAndArgs));
+  CheckPCOffset(kRuntimeISA, CalleeSaveType::kSaveRefsOnly,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveRefsOnly));
+  CheckPCOffset(kRuntimeISA, CalleeSaveType::kSaveAllCalleeSaves,
+                GetCalleeSaveReturnPcOffset(kRuntimeISA, CalleeSaveType::kSaveAllCalleeSaves));
 }
 
 }  // namespace art
diff --git a/runtime/exec_utils.cc b/runtime/exec_utils.cc
index 9efb1a3..db1baa7 100644
--- a/runtime/exec_utils.cc
+++ b/runtime/exec_utils.cc
@@ -28,7 +28,6 @@
 
 namespace art {
 
-using android::base::StringAppendF;
 using android::base::StringPrintf;
 
 int ExecAndReturnCode(std::vector<std::string>& arg_vector, std::string* error_msg) {
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index 5594f4d..fd0cd5f 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -27,7 +27,7 @@
 #include "mirror/object_reference.h"
 #include "oat_quick_method_header.h"
 #include "sigchain.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "verify_object-inl.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/atomic_stack.h b/runtime/gc/accounting/atomic_stack.h
index 351798e..3d0e817 100644
--- a/runtime/gc/accounting/atomic_stack.h
+++ b/runtime/gc/accounting/atomic_stack.h
@@ -29,6 +29,13 @@
 #include "mem_map.h"
 #include "stack_reference.h"
 
+// This implements a double-ended queue (deque) with various flavors of PushBack operations,
+// as well as PopBack and PopFront operations. We expect that all calls are performed
+// by a single thread (normally the GC). There is one exception, which accounts for the
+// name:
+// - Multiple calls to AtomicPushBack*() and AtomicBumpBack() may be made concurrently,
+// provided no other calls are made at the same time.
+
 namespace art {
 namespace gc {
 namespace accounting {
@@ -150,7 +157,7 @@
   // Pop a number of elements.
   void PopBackCount(int32_t n) {
     DCHECK_GE(Size(), static_cast<size_t>(n));
-    back_index_.FetchAndSubSequentiallyConsistent(n);
+    back_index_.StoreRelaxed(back_index_.LoadRelaxed() - n);
   }
 
   bool IsEmpty() const {
diff --git a/runtime/gc/accounting/bitmap.h b/runtime/gc/accounting/bitmap.h
index eb00472..d039d88 100644
--- a/runtime/gc/accounting/bitmap.h
+++ b/runtime/gc/accounting/bitmap.h
@@ -25,7 +25,6 @@
 
 #include "base/mutex.h"
 #include "globals.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index 4506597..01b5896 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -16,6 +16,8 @@
 
 #include "card_table.h"
 
+#include <sys/mman.h>
+
 #include "base/logging.h"
 #include "base/systrace.h"
 #include "card_table-inl.h"
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index c3dd21f..17acc76 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -155,6 +155,14 @@
 };
 
 }  // namespace accounting
+
+class AgeCardVisitor {
+ public:
+  uint8_t operator()(uint8_t card) const {
+    return (card == accounting::CardTable::kCardDirty) ? card - 1 : 0;
+  }
+};
+
 }  // namespace gc
 }  // namespace art
 
diff --git a/runtime/gc/accounting/heap_bitmap.h b/runtime/gc/accounting/heap_bitmap.h
index 76247bc..7097f87 100644
--- a/runtime/gc/accounting/heap_bitmap.h
+++ b/runtime/gc/accounting/heap_bitmap.h
@@ -19,7 +19,6 @@
 
 #include "base/allocator.h"
 #include "base/logging.h"
-#include "object_callbacks.h"
 #include "space_bitmap.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index c416b9c..57c290e 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -28,7 +28,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object-refvisitor-inl.h"
 #include "space_bitmap-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc
index 48a8742..e5b8ea5 100644
--- a/runtime/gc/accounting/mod_union_table_test.cc
+++ b/runtime/gc/accounting/mod_union_table_test.cc
@@ -21,7 +21,7 @@
 #include "gc/space/space-inl.h"
 #include "mirror/array-inl.h"
 #include "space_bitmap-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
diff --git a/runtime/gc/accounting/remembered_set.h b/runtime/gc/accounting/remembered_set.h
index 5594781..c332f96 100644
--- a/runtime/gc/accounting/remembered_set.h
+++ b/runtime/gc/accounting/remembered_set.h
@@ -19,7 +19,6 @@
 
 #include "base/allocator.h"
 #include "globals.h"
-#include "object_callbacks.h"
 #include "safe_map.h"
 
 #include <set>
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index b136488..889f57b 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -25,7 +25,6 @@
 
 #include "base/mutex.h"
 #include "globals.h"
-#include "object_callbacks.h"
 
 namespace art {
 
@@ -35,6 +34,9 @@
 }  // namespace mirror
 class MemMap;
 
+// Same as in object_callbacks.h. Just avoid the include.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+
 namespace gc {
 namespace accounting {
 
diff --git a/runtime/gc/allocation_listener.h b/runtime/gc/allocation_listener.h
index f60bc0c..21fa214 100644
--- a/runtime/gc/allocation_listener.h
+++ b/runtime/gc/allocation_listener.h
@@ -23,14 +23,13 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "gc_root.h"
 
 namespace art {
 
 namespace mirror {
   class Object;
-}
+}  // namespace mirror
 
 class Thread;
 
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 122f779..2257b81 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -20,6 +20,7 @@
 #include "base/enums.h"
 #include "base/stl_util.h"
 #include "obj_ptr-inl.h"
+#include "object_callbacks.h"
 #include "stack.h"
 
 #ifdef ART_TARGET_ANDROID
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index 90cff6a..d31e442 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -22,18 +22,18 @@
 
 #include "base/mutex.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "gc_root.h"
 
 namespace art {
 
 class ArtMethod;
+class IsMarkedVisitor;
 class Thread;
 
 namespace mirror {
   class Class;
   class Object;
-}
+}  // namespace mirror
 
 namespace gc {
 
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 35a251f..d5d3540 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -30,7 +30,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 562fc75..b85d7df 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -707,6 +707,9 @@
   // the end of the memory region that's ever managed by this allocator.
   size_t max_capacity_;
 
+  template<class Key, AllocatorTag kTag, class Compare = std::less<Key>>
+  using AllocationTrackingSet = std::set<Key, Compare, TrackingAllocator<Key, kTag>>;
+
   // The run sets that hold the runs whose slots are not all
   // full. non_full_runs_[i] is guarded by size_bracket_locks_[i].
   AllocationTrackingSet<Run*, kAllocatorTagRosAlloc> non_full_runs_[kNumOfSizeBrackets];
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 3503973..85a656e 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -19,11 +19,12 @@
 
 #include "concurrent_copying.h"
 
+#include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
 #include "gc/space/region_space.h"
-#include "mirror/object-readbarrier-inl.h"
 #include "lock_word.h"
+#include "mirror/object-readbarrier-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index ef843c6..c0d6481 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -359,7 +359,7 @@
     ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
     // We can use the non-CAS VisitRoots functions below because we update thread-local GC roots
     // only.
-    thread->VisitRoots(this);
+    thread->VisitRoots(this, kVisitRootFlagAllRoots);
     concurrent_copying_->GetBarrier().Pass(self);
   }
 
@@ -2086,8 +2086,11 @@
       // It was updated by the mutator.
       break;
     }
-  } while (!obj->CasFieldWeakRelaxedObjectWithoutWriteBarrier<
-      false, false, kVerifyNone>(offset, expected_ref, new_ref));
+    // Use release cas to make sure threads reading the reference see contents of copied objects.
+  } while (!obj->CasFieldWeakReleaseObjectWithoutWriteBarrier<false, false, kVerifyNone>(
+      offset,
+      expected_ref,
+      new_ref));
 }
 
 // Process some roots.
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 377f4d3..7b4340e 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -21,10 +21,7 @@
 #include "garbage_collector.h"
 #include "immune_spaces.h"
 #include "jni.h"
-#include "object_callbacks.h"
 #include "offsets.h"
-#include "gc/accounting/space_bitmap.h"
-#include "mirror/object.h"
 #include "mirror/object_reference.h"
 #include "safe_map.h"
 
@@ -35,11 +32,16 @@
 class Closure;
 class RootInfo;
 
+namespace mirror {
+class Object;
+}  // namespace mirror
+
 namespace gc {
 
 namespace accounting {
   template<typename T> class AtomicStack;
   typedef AtomicStack<mirror::Object> ObjectStack;
+  template <size_t kAlignment> class SpaceBitmap;
   typedef SpaceBitmap<kObjectAlignment> ContinuousSpaceBitmap;
   class HeapBitmap;
   class ReadBarrierTable;
@@ -284,7 +286,7 @@
   bool is_active_;                        // True while the collection is ongoing.
   bool is_asserting_to_space_invariant_;  // True while asserting the to-space invariant.
   ImmuneSpaces immune_spaces_;
-  accounting::SpaceBitmap<kObjectAlignment>* region_space_bitmap_;
+  accounting::ContinuousSpaceBitmap* region_space_bitmap_;
   // A cache of Heap::GetMarkBitmap().
   accounting::HeapBitmap* heap_mark_bitmap_;
   size_t live_stack_freeze_size_;
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 1e4196b..c5a341f 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -31,7 +31,8 @@
 #include "gc/heap.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "thread-inl.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "utils.h"
 
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 14d0499..dec206b 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -27,6 +27,8 @@
 #include "gc/gc_cause.h"
 #include "gc_root.h"
 #include "gc_type.h"
+#include "iteration.h"
+#include "object_byte_pair.h"
 #include "object_callbacks.h"
 
 namespace art {
@@ -43,85 +45,6 @@
 
 namespace collector {
 
-struct ObjectBytePair {
-  explicit ObjectBytePair(uint64_t num_objects = 0, int64_t num_bytes = 0)
-      : objects(num_objects), bytes(num_bytes) {}
-  void Add(const ObjectBytePair& other) {
-    objects += other.objects;
-    bytes += other.bytes;
-  }
-  // Number of objects which were freed.
-  uint64_t objects;
-  // Freed bytes are signed since the GC can free negative bytes if it promotes objects to a space
-  // which has a larger allocation size.
-  int64_t bytes;
-};
-
-// A information related single garbage collector iteration. Since we only ever have one GC running
-// at any given time, we can have a single iteration info.
-class Iteration {
- public:
-  Iteration();
-  // Returns how long the mutators were paused in nanoseconds.
-  const std::vector<uint64_t>& GetPauseTimes() const {
-    return pause_times_;
-  }
-  TimingLogger* GetTimings() {
-    return &timings_;
-  }
-  // Returns how long the GC took to complete in nanoseconds.
-  uint64_t GetDurationNs() const {
-    return duration_ns_;
-  }
-  int64_t GetFreedBytes() const {
-    return freed_.bytes;
-  }
-  int64_t GetFreedLargeObjectBytes() const {
-    return freed_los_.bytes;
-  }
-  uint64_t GetFreedObjects() const {
-    return freed_.objects;
-  }
-  uint64_t GetFreedLargeObjects() const {
-    return freed_los_.objects;
-  }
-  uint64_t GetFreedRevokeBytes() const {
-    return freed_bytes_revoke_;
-  }
-  void SetFreedRevoke(uint64_t freed) {
-    freed_bytes_revoke_ = freed;
-  }
-  void Reset(GcCause gc_cause, bool clear_soft_references);
-  // Returns the estimated throughput of the iteration.
-  uint64_t GetEstimatedThroughput() const;
-  bool GetClearSoftReferences() const {
-    return clear_soft_references_;
-  }
-  void SetClearSoftReferences(bool clear_soft_references) {
-    clear_soft_references_ = clear_soft_references;
-  }
-  GcCause GetGcCause() const {
-    return gc_cause_;
-  }
-
- private:
-  void SetDurationNs(uint64_t duration) {
-    duration_ns_ = duration;
-  }
-
-  GcCause gc_cause_;
-  bool clear_soft_references_;
-  uint64_t duration_ns_;
-  TimingLogger timings_;
-  ObjectBytePair freed_;
-  ObjectBytePair freed_los_;
-  uint64_t freed_bytes_revoke_;  // see Heap::num_bytes_freed_revoke_.
-  std::vector<uint64_t> pause_times_;
-
-  friend class GarbageCollector;
-  DISALLOW_COPY_AND_ASSIGN(Iteration);
-};
-
 class GarbageCollector : public RootVisitor, public IsMarkedVisitor, public MarkObjectVisitor {
  public:
   class SCOPED_LOCKABLE ScopedPause {
diff --git a/runtime/gc/collector/immune_spaces_test.cc b/runtime/gc/collector/immune_spaces_test.cc
index cf93ec6..9823708 100644
--- a/runtime/gc/collector/immune_spaces_test.cc
+++ b/runtime/gc/collector/immune_spaces_test.cc
@@ -14,12 +14,14 @@
  * limitations under the License.
  */
 
+#include <sys/mman.h>
+
 #include "common_runtime_test.h"
 #include "gc/collector/immune_spaces.h"
 #include "gc/space/image_space.h"
 #include "gc/space/space-inl.h"
 #include "oat_file.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/gc/collector/iteration.h b/runtime/gc/collector/iteration.h
new file mode 100644
index 0000000..fbe4166
--- /dev/null
+++ b/runtime/gc/collector/iteration.h
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_ITERATION_H_
+#define ART_RUNTIME_GC_COLLECTOR_ITERATION_H_
+
+#include <inttypes.h>
+#include <vector>
+
+#include "android-base/macros.h"
+#include "base/timing_logger.h"
+#include "object_byte_pair.h"
+
+namespace art {
+namespace gc {
+namespace collector {
+
+// A information related single garbage collector iteration. Since we only ever have one GC running
+// at any given time, we can have a single iteration info.
+class Iteration {
+ public:
+  Iteration();
+  // Returns how long the mutators were paused in nanoseconds.
+  const std::vector<uint64_t>& GetPauseTimes() const {
+    return pause_times_;
+  }
+  TimingLogger* GetTimings() {
+    return &timings_;
+  }
+  // Returns how long the GC took to complete in nanoseconds.
+  uint64_t GetDurationNs() const {
+    return duration_ns_;
+  }
+  int64_t GetFreedBytes() const {
+    return freed_.bytes;
+  }
+  int64_t GetFreedLargeObjectBytes() const {
+    return freed_los_.bytes;
+  }
+  uint64_t GetFreedObjects() const {
+    return freed_.objects;
+  }
+  uint64_t GetFreedLargeObjects() const {
+    return freed_los_.objects;
+  }
+  uint64_t GetFreedRevokeBytes() const {
+    return freed_bytes_revoke_;
+  }
+  void SetFreedRevoke(uint64_t freed) {
+    freed_bytes_revoke_ = freed;
+  }
+  void Reset(GcCause gc_cause, bool clear_soft_references);
+  // Returns the estimated throughput of the iteration.
+  uint64_t GetEstimatedThroughput() const;
+  bool GetClearSoftReferences() const {
+    return clear_soft_references_;
+  }
+  void SetClearSoftReferences(bool clear_soft_references) {
+    clear_soft_references_ = clear_soft_references;
+  }
+  GcCause GetGcCause() const {
+    return gc_cause_;
+  }
+
+ private:
+  void SetDurationNs(uint64_t duration) {
+    duration_ns_ = duration;
+  }
+
+  GcCause gc_cause_;
+  bool clear_soft_references_;
+  uint64_t duration_ns_;
+  TimingLogger timings_;
+  ObjectBytePair freed_;
+  ObjectBytePair freed_los_;
+  uint64_t freed_bytes_revoke_;  // see Heap::num_bytes_freed_revoke_.
+  std::vector<uint64_t> pause_times_;
+
+  friend class GarbageCollector;
+  DISALLOW_COPY_AND_ASSIGN(Iteration);
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_ITERATION_H_
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 9d3d950..aef98de 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -32,7 +32,7 @@
 #include "mirror/object-refvisitor-inl.h"
 #include "runtime.h"
 #include "stack.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index 85727c2..0bf4095 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -28,7 +28,6 @@
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_spaces.h"
 #include "lock_word.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 
 namespace art {
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index f591cf0..fb82b4d 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -42,7 +42,7 @@
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
@@ -1141,7 +1141,7 @@
     Thread* const self = Thread::Current();
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
-    thread->VisitRoots(this);
+    thread->VisitRoots(this, kVisitRootFlagAllRoots);
     if (revoke_ros_alloc_thread_local_buffers_at_checkpoint_) {
       ScopedTrace trace2("RevokeRosAllocThreadLocalBuffers");
       mark_sweep_->GetHeap()->RevokeRosAllocThreadLocalBuffers(thread);
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index 5a9b9f8..b9e06f9 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -27,7 +27,6 @@
 #include "gc_root.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_spaces.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 
 namespace art {
diff --git a/runtime/gc/collector/object_byte_pair.h b/runtime/gc/collector/object_byte_pair.h
new file mode 100644
index 0000000..16ef06b
--- /dev/null
+++ b/runtime/gc/collector/object_byte_pair.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_COLLECTOR_OBJECT_BYTE_PAIR_H_
+#define ART_RUNTIME_GC_COLLECTOR_OBJECT_BYTE_PAIR_H_
+
+#include <inttypes.h>
+
+namespace art {
+namespace gc {
+namespace collector {
+
+struct ObjectBytePair {
+  explicit ObjectBytePair(uint64_t num_objects = 0, int64_t num_bytes = 0)
+      : objects(num_objects), bytes(num_bytes) {}
+  void Add(const ObjectBytePair& other) {
+    objects += other.objects;
+    bytes += other.bytes;
+  }
+  // Number of objects which were freed.
+  uint64_t objects;
+  // Freed bytes are signed since the GC can free negative bytes if it promotes objects to a space
+  // which has a larger allocation size.
+  int64_t bytes;
+};
+
+}  // namespace collector
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_COLLECTOR_OBJECT_BYTE_PAIR_H_
diff --git a/runtime/gc/collector/partial_mark_sweep.cc b/runtime/gc/collector/partial_mark_sweep.cc
index 9847794..f6ca867 100644
--- a/runtime/gc/collector/partial_mark_sweep.cc
+++ b/runtime/gc/collector/partial_mark_sweep.cc
@@ -19,7 +19,7 @@
 #include "gc/heap.h"
 #include "gc/space/space.h"
 #include "partial_mark_sweep.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 9d6e74d..d3858ba 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -27,7 +27,6 @@
 #include "gc/accounting/heap_bitmap.h"
 #include "immune_spaces.h"
 #include "mirror/object_reference.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 
 namespace art {
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index a2dbe3f..98fdfac 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -14,11 +14,15 @@
  * limitations under the License.
  */
 
+#include "sticky_mark_sweep.h"
+
+#include "gc/accounting/atomic_stack.h"
+#include "gc/accounting/card_table.h"
 #include "gc/heap.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
-#include "sticky_mark_sweep.h"
-#include "thread-inl.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 79086da..060f12d 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -21,6 +21,7 @@
 
 #include "allocation_listener.h"
 #include "base/time_utils.h"
+#include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/allocation_record.h"
 #include "gc/collector/semi_space.h"
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index f3c345d..d944ce4 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -37,10 +37,10 @@
 #include "cutils/sched_policy.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
-#include "gc/accounting/atomic_stack.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/accounting/mod_union_table-inl.h"
+#include "gc/accounting/read_barrier_table.h"
 #include "gc/accounting/remembered_set.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/collector/concurrent_copying.h"
@@ -63,6 +63,7 @@
 #include "gc/verification.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
 #include "gc_pause_listener.h"
+#include "gc_root.h"
 #include "heap-inl.h"
 #include "image.h"
 #include "intern_table.h"
@@ -89,8 +90,6 @@
 
 namespace gc {
 
-using android::base::StringPrintf;
-
 static constexpr size_t kCollectorTransitionStressIterations = 0;
 static constexpr size_t kCollectorTransitionStressWait = 10 * 1000;  // Microseconds
 // Minimum amount of remaining bytes before a concurrent GC is triggered.
@@ -1452,6 +1451,9 @@
 }
 
 void Heap::StartGC(Thread* self, GcCause cause, CollectorType collector_type) {
+  // Need to do this before acquiring the locks since we don't want to get suspended while
+  // holding any locks.
+  ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
   MutexLock mu(self, *gc_complete_lock_);
   // Ensure there is only one GC at a time.
   WaitForGcToCompleteLocked(cause, self);
@@ -1460,14 +1462,9 @@
 }
 
 void Heap::TrimSpaces(Thread* self) {
-  {
-    // Need to do this before acquiring the locks since we don't want to get suspended while
-    // holding any locks.
-    ScopedThreadStateChange tsc(self, kWaitingForGcToComplete);
-    // Pretend we are doing a GC to prevent background compaction from deleting the space we are
-    // trimming.
-    StartGC(self, kGcCauseTrim, kCollectorTypeHeapTrim);
-  }
+  // Pretend we are doing a GC to prevent background compaction from deleting the space we are
+  // trimming.
+  StartGC(self, kGcCauseTrim, kCollectorTypeHeapTrim);
   ScopedTrace trace(__PRETTY_FUNCTION__);
   const uint64_t start_ns = NanoTime();
   // Trim the managed spaces.
@@ -4006,7 +4003,8 @@
       native_blocking_gcs_finished_++;
       native_blocking_gc_cond_->Broadcast(self);
     }
-  } else if (new_value > NativeAllocationGcWatermark() && !IsGCRequestPending()) {
+  } else if (new_value > NativeAllocationGcWatermark() * HeapGrowthMultiplier() &&
+             !IsGCRequestPending()) {
     // Trigger another GC because there have been enough native bytes
     // allocated since the last GC.
     if (IsGcConcurrent()) {
@@ -4052,7 +4050,7 @@
       << " IsVariableSize=" << c->IsVariableSize()
       << " ObjectSize=" << c->GetObjectSize()
       << " sizeof(Class)=" << sizeof(mirror::Class)
-      << " klass=" << c.Ptr();
+      << verification_->DumpObjectInfo(c.Ptr(), /*tag*/ "klass");
   CHECK_GE(byte_count, sizeof(mirror::Object));
 }
 
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index 7287178..0289250 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -26,17 +26,14 @@
 #include "arch/instruction_set.h"
 #include "atomic.h"
 #include "base/time_utils.h"
-#include "gc/accounting/atomic_stack.h"
-#include "gc/accounting/card_table.h"
-#include "gc/accounting/read_barrier_table.h"
 #include "gc/gc_cause.h"
 #include "gc/collector/gc_type.h"
+#include "gc/collector/iteration.h"
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
 #include "globals.h"
 #include "handle.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "process_state.h"
 #include "safe_map.h"
@@ -45,13 +42,18 @@
 namespace art {
 
 class ConditionVariable;
+class IsMarkedVisitor;
 class Mutex;
+class RootVisitor;
 class StackVisitor;
 class Thread;
 class ThreadPool;
 class TimingLogger;
 class VariableSizedHandleScope;
 
+// Same as in object_callbacks.h. Just avoid the include.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+
 namespace mirror {
   class Class;
   class Object;
@@ -67,8 +69,12 @@
 class Verification;
 
 namespace accounting {
+  template <typename T> class AtomicStack;
+  typedef AtomicStack<mirror::Object> ObjectStack;
+  class CardTable;
   class HeapBitmap;
   class ModUnionTable;
+  class ReadBarrierTable;
   class RememberedSet;
 }  // namespace accounting
 
@@ -99,13 +105,6 @@
   class ZygoteSpace;
 }  // namespace space
 
-class AgeCardVisitor {
- public:
-  uint8_t operator()(uint8_t card) const {
-    return (card == accounting::CardTable::kCardDirty) ? card - 1 : 0;
-  }
-};
-
 enum HomogeneousSpaceCompactResult {
   // Success.
   kSuccess,
diff --git a/runtime/gc/heap_verification_test.cc b/runtime/gc/heap_verification_test.cc
index 2cdfc16..a307c51 100644
--- a/runtime/gc/heap_verification_test.cc
+++ b/runtime/gc/heap_verification_test.cc
@@ -17,7 +17,7 @@
 #include "common_runtime_test.h"
 
 #include "base/memory_tool.h"
-#include "class_linker.h"
+#include "class_linker-inl.h"
 #include "handle_scope-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
diff --git a/runtime/gc/reference_processor-inl.h b/runtime/gc/reference_processor-inl.h
index f619a15..0f47d3d 100644
--- a/runtime/gc/reference_processor-inl.h
+++ b/runtime/gc/reference_processor-inl.h
@@ -19,6 +19,8 @@
 
 #include "reference_processor.h"
 
+#include "mirror/reference-inl.h"
+
 namespace art {
 namespace gc {
 
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 886c950..52da763 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -22,6 +22,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "object_callbacks.h"
 #include "reference_processor-inl.h"
 #include "reflection.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index 38b68cb..a8135d9 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -20,11 +20,11 @@
 #include "base/mutex.h"
 #include "globals.h"
 #include "jni.h"
-#include "object_callbacks.h"
 #include "reference_queue.h"
 
 namespace art {
 
+class IsMarkedVisitor;
 class TimingLogger;
 
 namespace mirror {
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index fd5dcf9..321d22a 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -22,6 +22,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "object_callbacks.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index b73a880..c48d48c 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -27,7 +27,6 @@
 #include "globals.h"
 #include "jni.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "thread_pool.h"
 
@@ -36,6 +35,9 @@
 class Reference;
 }  // namespace mirror
 
+class IsMarkedVisitor;
+class MarkObjectVisitor;
+
 namespace gc {
 
 namespace collector {
diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc
index f937d2c..2976dd0 100644
--- a/runtime/gc/scoped_gc_critical_section.cc
+++ b/runtime/gc/scoped_gc_critical_section.cc
@@ -19,7 +19,7 @@
 #include "gc/collector_type.h"
 #include "gc/heap.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/space/bump_pointer_space-inl.h b/runtime/gc/space/bump_pointer_space-inl.h
index 45cea5a..1509bb0 100644
--- a/runtime/gc/space/bump_pointer_space-inl.h
+++ b/runtime/gc/space/bump_pointer_space-inl.h
@@ -17,9 +17,10 @@
 #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_INL_H_
 #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_INL_H_
 
-#include "base/bit_utils.h"
 #include "bump_pointer_space.h"
 
+#include "base/bit_utils.h"
+
 namespace art {
 namespace gc {
 namespace space {
@@ -86,15 +87,6 @@
   return ret;
 }
 
-inline size_t BumpPointerSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  size_t num_bytes = obj->SizeOf();
-  if (usable_size != nullptr) {
-    *usable_size = RoundUp(num_bytes, kAlignment);
-  }
-  return num_bytes;
-}
-
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/bump_pointer_space.cc b/runtime/gc/space/bump_pointer_space.cc
index 426b332..bb1ede1 100644
--- a/runtime/gc/space/bump_pointer_space.cc
+++ b/runtime/gc/space/bump_pointer_space.cc
@@ -271,6 +271,14 @@
   // Caller's job to print failed_alloc_bytes.
 }
 
+size_t BumpPointerSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) {
+  size_t num_bytes = obj->SizeOf();
+  if (usable_size != nullptr) {
+    *usable_size = RoundUp(num_bytes, kAlignment);
+  }
+  return num_bytes;
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/bump_pointer_space.h b/runtime/gc/space/bump_pointer_space.h
index e9982e9..566dc5d 100644
--- a/runtime/gc/space/bump_pointer_space.h
+++ b/runtime/gc/space/bump_pointer_space.h
@@ -17,10 +17,17 @@
 #ifndef ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_BUMP_POINTER_SPACE_H_
 
-#include "object_callbacks.h"
 #include "space.h"
 
 namespace art {
+
+namespace mirror {
+class Object;
+}
+
+// Same as in object_callbacks.h. Just avoid the include.
+typedef void (ObjectCallback)(mirror::Object* obj, void* arg);
+
 namespace gc {
 
 namespace collector {
diff --git a/runtime/gc/space/dlmalloc_space.cc b/runtime/gc/space/dlmalloc_space.cc
index 9282ec7..7ec54f5 100644
--- a/runtime/gc/space/dlmalloc_space.cc
+++ b/runtime/gc/space/dlmalloc_space.cc
@@ -26,6 +26,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 748d378..9da2876 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -27,6 +27,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "base/enums.h"
 #include "base/macros.h"
 #include "base/stl_util.h"
@@ -42,6 +43,7 @@
 #include "mirror/object-refvisitor-inl.h"
 #include "oat_file.h"
 #include "os.h"
+#include "runtime.h"
 #include "space-inl.h"
 #include "utils.h"
 
@@ -713,13 +715,13 @@
                image_header->GetImageMethod(ImageHeader::kImtConflictMethod));
       CHECK_EQ(runtime->GetImtUnimplementedMethod(),
                image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
-      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves),
+      CHECK_EQ(runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveAllCalleeSaves),
                image_header->GetImageMethod(ImageHeader::kSaveAllCalleeSavesMethod));
-      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveRefsOnly),
+      CHECK_EQ(runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsOnly),
                image_header->GetImageMethod(ImageHeader::kSaveRefsOnlyMethod));
-      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs),
+      CHECK_EQ(runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs),
                image_header->GetImageMethod(ImageHeader::kSaveRefsAndArgsMethod));
-      CHECK_EQ(runtime->GetCalleeSaveMethod(Runtime::kSaveEverything),
+      CHECK_EQ(runtime->GetCalleeSaveMethod(CalleeSaveType::kSaveEverything),
                image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod));
     } else if (!runtime->HasResolutionMethod()) {
       runtime->SetInstructionSet(space->oat_file_non_owned_->GetOatHeader().GetInstructionSet());
@@ -729,14 +731,16 @@
           image_header->GetImageMethod(ImageHeader::kImtUnimplementedMethod));
       runtime->SetCalleeSaveMethod(
           image_header->GetImageMethod(ImageHeader::kSaveAllCalleeSavesMethod),
-          Runtime::kSaveAllCalleeSaves);
+          CalleeSaveType::kSaveAllCalleeSaves);
       runtime->SetCalleeSaveMethod(
-          image_header->GetImageMethod(ImageHeader::kSaveRefsOnlyMethod), Runtime::kSaveRefsOnly);
+          image_header->GetImageMethod(ImageHeader::kSaveRefsOnlyMethod),
+          CalleeSaveType::kSaveRefsOnly);
       runtime->SetCalleeSaveMethod(
           image_header->GetImageMethod(ImageHeader::kSaveRefsAndArgsMethod),
-          Runtime::kSaveRefsAndArgs);
+          CalleeSaveType::kSaveRefsAndArgs);
       runtime->SetCalleeSaveMethod(
-          image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod), Runtime::kSaveEverything);
+          image_header->GetImageMethod(ImageHeader::kSaveEverythingMethod),
+          CalleeSaveType::kSaveEverything);
     }
 
     VLOG(image) << "ImageSpace::Init exiting " << *space.get();
diff --git a/runtime/gc/space/image_space.h b/runtime/gc/space/image_space.h
index aa3dd42..3383d6b 100644
--- a/runtime/gc/space/image_space.h
+++ b/runtime/gc/space/image_space.h
@@ -19,7 +19,7 @@
 
 #include "arch/instruction_set.h"
 #include "gc/accounting/space_bitmap.h"
-#include "runtime.h"
+#include "image.h"
 #include "space.h"
 
 namespace art {
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 3988073..4597a96 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -16,19 +16,22 @@
 
 #include "large_object_space.h"
 
+#include <sys/mman.h>
+
 #include <memory>
 
-#include "gc/accounting/heap_bitmap-inl.h"
-#include "gc/accounting/space_bitmap-inl.h"
 #include "base/logging.h"
 #include "base/memory_tool.h"
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
+#include "gc/accounting/heap_bitmap-inl.h"
+#include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
 #include "image.h"
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
 #include "space-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index 3910a03..fc24fc2 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -18,7 +18,7 @@
 #define ART_RUNTIME_GC_SPACE_REGION_SPACE_INL_H_
 
 #include "region_space.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
@@ -138,20 +138,6 @@
   return reinterpret_cast<mirror::Object*>(old_top);
 }
 
-inline size_t RegionSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) {
-  size_t num_bytes = obj->SizeOf();
-  if (usable_size != nullptr) {
-    if (LIKELY(num_bytes <= kRegionSize)) {
-      DCHECK(RefToRegion(obj)->IsAllocated());
-      *usable_size = RoundUp(num_bytes, kAlignment);
-    } else {
-      DCHECK(RefToRegion(obj)->IsLarge());
-      *usable_size = RoundUp(num_bytes, kRegionSize);
-    }
-  }
-  return num_bytes;
-}
-
 template<RegionSpace::RegionType kRegionType>
 uint64_t RegionSpace::GetBytesAllocatedInternal() {
   uint64_t bytes = 0;
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 09b4a3a..8d8c488 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -16,6 +16,7 @@
 
 #include "bump_pointer_space.h"
 #include "bump_pointer_space-inl.h"
+#include "gc/accounting/read_barrier_table.h"
 #include "mirror/object-inl.h"
 #include "mirror/class-inl.h"
 #include "thread_list.h"
@@ -254,13 +255,28 @@
   MutexLock mu(Thread::Current(), region_lock_);
   VerifyNonFreeRegionLimit();
   size_t new_non_free_region_index_limit = 0;
+
+  // Combine zeroing and releasing pages to reduce how often madvise is called. This helps
+  // reduce contention on the mmap semaphore. b/62194020
+  // clear_region adds a region to the current block. If the region is not adjacent, the
+  // clear block is zeroed, released, and a new block begins.
+  uint8_t* clear_block_begin = nullptr;
+  uint8_t* clear_block_end = nullptr;
+  auto clear_region = [&clear_block_begin, &clear_block_end](Region* r) {
+    r->Clear(/*zero_and_release_pages*/false);
+    if (clear_block_end != r->Begin()) {
+      ZeroAndReleasePages(clear_block_begin, clear_block_end - clear_block_begin);
+      clear_block_begin = r->Begin();
+    }
+    clear_block_end = r->End();
+  };
   for (size_t i = 0; i < std::min(num_regions_, non_free_region_index_limit_); ++i) {
     Region* r = &regions_[i];
     if (r->IsInFromSpace()) {
       *cleared_bytes += r->BytesAllocated();
       *cleared_objects += r->ObjectsAllocated();
       --num_non_free_regions_;
-      r->Clear();
+      clear_region(r);
     } else if (r->IsInUnevacFromSpace()) {
       if (r->LiveBytes() == 0) {
         // Special case for 0 live bytes, this means all of the objects in the region are dead and
@@ -273,13 +289,13 @@
         // Also release RAM for large tails.
         while (i + free_regions < num_regions_ && regions_[i + free_regions].IsLargeTail()) {
           DCHECK(r->IsLarge());
-          regions_[i + free_regions].Clear();
+          clear_region(&regions_[i + free_regions]);
           ++free_regions;
         }
         *cleared_bytes += r->BytesAllocated();
         *cleared_objects += r->ObjectsAllocated();
         num_non_free_regions_ -= free_regions;
-        r->Clear();
+        clear_region(r);
         GetLiveBitmap()->ClearRange(
             reinterpret_cast<mirror::Object*>(r->Begin()),
             reinterpret_cast<mirror::Object*>(r->Begin() + free_regions * kRegionSize));
@@ -316,6 +332,8 @@
                                                  last_checked_region->Idx() + 1);
     }
   }
+  // Clear pages for the last block since clearing happens when a new block opens.
+  ZeroAndReleasePages(clear_block_begin, clear_block_end - clear_block_begin);
   // Update non_free_region_index_limit_.
   SetNonFreeRegionLimit(new_non_free_region_index_limit);
   evac_region_ = nullptr;
@@ -368,7 +386,7 @@
     if (!r->IsFree()) {
       --num_non_free_regions_;
     }
-    r->Clear();
+    r->Clear(/*zero_and_release_pages*/true);
   }
   SetNonFreeRegionLimit(0);
   current_region_ = &full_region_;
@@ -394,7 +412,7 @@
     } else {
       DCHECK(reg->IsLargeTail());
     }
-    reg->Clear();
+    reg->Clear(/*zero_and_release_pages*/true);
     --num_non_free_regions_;
   }
   if (end_addr < Limit()) {
@@ -511,6 +529,20 @@
      << " is_newly_allocated=" << is_newly_allocated_ << " is_a_tlab=" << is_a_tlab_ << " thread=" << thread_ << "\n";
 }
 
+size_t RegionSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) {
+  size_t num_bytes = obj->SizeOf();
+  if (usable_size != nullptr) {
+    if (LIKELY(num_bytes <= kRegionSize)) {
+      DCHECK(RefToRegion(obj)->IsAllocated());
+      *usable_size = RoundUp(num_bytes, kAlignment);
+    } else {
+      DCHECK(RefToRegion(obj)->IsLarge());
+      *usable_size = RoundUp(num_bytes, kRegionSize);
+    }
+  }
+  return num_bytes;
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 80eecca..323ccdb 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -17,13 +17,17 @@
 #ifndef ART_RUNTIME_GC_SPACE_REGION_SPACE_H_
 #define ART_RUNTIME_GC_SPACE_REGION_SPACE_H_
 
-#include "gc/accounting/read_barrier_table.h"
 #include "object_callbacks.h"
 #include "space.h"
 #include "thread.h"
 
 namespace art {
 namespace gc {
+
+namespace accounting {
+class ReadBarrierTable;
+}  // namespace accounting
+
 namespace space {
 
 // A space that consists of equal-sized regions.
@@ -280,14 +284,16 @@
       return type_;
     }
 
-    void Clear() {
+    void Clear(bool zero_and_release_pages) {
       top_.StoreRelaxed(begin_);
       state_ = RegionState::kRegionStateFree;
       type_ = RegionType::kRegionTypeNone;
       objects_allocated_.StoreRelaxed(0);
       alloc_time_ = 0;
       live_bytes_ = static_cast<size_t>(-1);
-      ZeroAndReleasePages(begin_, end_ - begin_);
+      if (zero_and_release_pages) {
+        ZeroAndReleasePages(begin_, end_ - begin_);
+      }
       is_newly_allocated_ = false;
       is_a_tlab_ = false;
       thread_ = nullptr;
diff --git a/runtime/gc/space/rosalloc_space-inl.h b/runtime/gc/space/rosalloc_space-inl.h
index 8bff2b4..09aa7cf 100644
--- a/runtime/gc/space/rosalloc_space-inl.h
+++ b/runtime/gc/space/rosalloc_space-inl.h
@@ -17,49 +17,17 @@
 #ifndef ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
 #define ART_RUNTIME_GC_SPACE_ROSALLOC_SPACE_INL_H_
 
+#include "rosalloc_space.h"
+
 #include "base/memory_tool.h"
 #include "gc/allocator/rosalloc-inl.h"
 #include "gc/space/memory_tool_settings.h"
-#include "rosalloc_space.h"
 #include "thread.h"
 
 namespace art {
 namespace gc {
 namespace space {
 
-template<bool kMaybeIsRunningOnMemoryTool>
-inline size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) {
-  // obj is a valid object. Use its class in the header to get the size.
-  // Don't use verification since the object may be dead if we are sweeping.
-  size_t size = obj->SizeOf<kVerifyNone>();
-  bool add_redzones = false;
-  if (kMaybeIsRunningOnMemoryTool) {
-    add_redzones = RUNNING_ON_MEMORY_TOOL ? kMemoryToolAddsRedzones : 0;
-    if (add_redzones) {
-      size += 2 * kDefaultMemoryToolRedZoneBytes;
-    }
-  } else {
-    DCHECK_EQ(RUNNING_ON_MEMORY_TOOL, 0U);
-  }
-  size_t size_by_size = rosalloc_->UsableSize(size);
-  if (kIsDebugBuild) {
-    // On memory tool, the red zone has an impact...
-    const uint8_t* obj_ptr = reinterpret_cast<const uint8_t*>(obj);
-    size_t size_by_ptr = rosalloc_->UsableSize(
-        obj_ptr - (add_redzones ? kDefaultMemoryToolRedZoneBytes : 0));
-    if (size_by_size != size_by_ptr) {
-      LOG(INFO) << "Found a bad sized obj of size " << size
-                << " at " << std::hex << reinterpret_cast<intptr_t>(obj_ptr) << std::dec
-                << " size_by_size=" << size_by_size << " size_by_ptr=" << size_by_ptr;
-    }
-    DCHECK_EQ(size_by_size, size_by_ptr);
-  }
-  if (usable_size != nullptr) {
-    *usable_size = size_by_size;
-  }
-  return size_by_size;
-}
-
 template<bool kThreadSafe>
 inline mirror::Object* RosAllocSpace::AllocCommon(Thread* self, size_t num_bytes,
                                                   size_t* bytes_allocated, size_t* usable_size,
diff --git a/runtime/gc/space/rosalloc_space.cc b/runtime/gc/space/rosalloc_space.cc
index 8ccbfaa..9e900e4 100644
--- a/runtime/gc/space/rosalloc_space.cc
+++ b/runtime/gc/space/rosalloc_space.cc
@@ -24,6 +24,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
@@ -373,6 +374,39 @@
   rosalloc_->DumpStats(os);
 }
 
+template<bool kMaybeIsRunningOnMemoryTool>
+size_t RosAllocSpace::AllocationSizeNonvirtual(mirror::Object* obj, size_t* usable_size) {
+  // obj is a valid object. Use its class in the header to get the size.
+  // Don't use verification since the object may be dead if we are sweeping.
+  size_t size = obj->SizeOf<kVerifyNone>();
+  bool add_redzones = false;
+  if (kMaybeIsRunningOnMemoryTool) {
+    add_redzones = RUNNING_ON_MEMORY_TOOL ? kMemoryToolAddsRedzones : 0;
+    if (add_redzones) {
+      size += 2 * kDefaultMemoryToolRedZoneBytes;
+    }
+  } else {
+    DCHECK_EQ(RUNNING_ON_MEMORY_TOOL, 0U);
+  }
+  size_t size_by_size = rosalloc_->UsableSize(size);
+  if (kIsDebugBuild) {
+    // On memory tool, the red zone has an impact...
+    const uint8_t* obj_ptr = reinterpret_cast<const uint8_t*>(obj);
+    size_t size_by_ptr = rosalloc_->UsableSize(
+        obj_ptr - (add_redzones ? kDefaultMemoryToolRedZoneBytes : 0));
+    if (size_by_size != size_by_ptr) {
+      LOG(INFO) << "Found a bad sized obj of size " << size
+                << " at " << std::hex << reinterpret_cast<intptr_t>(obj_ptr) << std::dec
+                << " size_by_size=" << size_by_size << " size_by_ptr=" << size_by_ptr;
+    }
+    DCHECK_EQ(size_by_size, size_by_ptr);
+  }
+  if (usable_size != nullptr) {
+    *usable_size = size_by_size;
+  }
+  return size_by_size;
+}
+
 }  // namespace space
 
 namespace allocator {
diff --git a/runtime/gc/space/space.cc b/runtime/gc/space/space.cc
index a2e2c1c..74ce273 100644
--- a/runtime/gc/space/space.cc
+++ b/runtime/gc/space/space.cc
@@ -19,8 +19,9 @@
 #include "base/logging.h"
 #include "gc/accounting/heap_bitmap.h"
 #include "gc/accounting/space_bitmap-inl.h"
+#include "gc/heap.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/gc/space/space.h b/runtime/gc/space/space.h
index fc558cf..2a4f830 100644
--- a/runtime/gc/space/space.h
+++ b/runtime/gc/space/space.h
@@ -24,9 +24,8 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "gc/accounting/space_bitmap.h"
-#include "gc/collector/garbage_collector.h"
+#include "gc/collector/object_byte_pair.h"
 #include "globals.h"
-#include "image.h"
 #include "mem_map.h"
 
 namespace art {
diff --git a/runtime/gc/space/zygote_space.cc b/runtime/gc/space/zygote_space.cc
index bbfcb31..fddb3f2 100644
--- a/runtime/gc/space/zygote_space.cc
+++ b/runtime/gc/space/zygote_space.cc
@@ -16,10 +16,12 @@
 
 #include "zygote_space.h"
 
+#include "base/mutex-inl.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "gc/heap.h"
-#include "thread-inl.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/gc/task_processor_test.cc b/runtime/gc/task_processor_test.cc
index f1d26d9..5a75b37 100644
--- a/runtime/gc/task_processor_test.cc
+++ b/runtime/gc/task_processor_test.cc
@@ -18,7 +18,7 @@
 #include "common_runtime_test.h"
 #include "task_processor.h"
 #include "thread_pool.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace gc {
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 2de4f19..3a7f21d 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -27,13 +27,13 @@
 #define COMPRESSED_REFERENCE_SIZE_SHIFT 0x2
 DEFINE_CHECK_EQ(static_cast<size_t>(COMPRESSED_REFERENCE_SIZE_SHIFT), (static_cast<size_t>(art::WhichPowerOf2(sizeof(art::mirror::CompressedReference<art::mirror::Object>)))))
 #define RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET 0
-DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveAllCalleeSaves))))
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveAllCalleeSaves))))
 #define RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET 0x8
-DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveRefsOnly))))
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveRefsOnly))))
 #define RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET 0x10
-DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveRefsAndArgs))))
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveRefsAndArgs))))
 #define RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 0x18
-DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: kSaveEverything))))
+DEFINE_CHECK_EQ(static_cast<size_t>(RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET), (static_cast<size_t>(art::Runtime::GetCalleeSaveMethodOffset(art::CalleeSaveType::kSaveEverything))))
 #define THREAD_FLAGS_OFFSET 0
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_FLAGS_OFFSET), (static_cast<int32_t>(art::Thread:: ThreadFlagsOffset<art::kRuntimePointerSize>().Int32Value())))
 #define THREAD_ID_OFFSET 12
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
index 492d4b4..d091e7f 100644
--- a/runtime/handle_scope-inl.h
+++ b/runtime/handle_scope-inl.h
@@ -22,7 +22,7 @@
 #include "base/mutex.h"
 #include "handle.h"
 #include "obj_ptr-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "verify_object.h"
 
 namespace art {
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index c43a482..f248a11 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -36,7 +36,7 @@
 
 namespace mirror {
 class Object;
-}
+}  // namespace mirror
 
 // Basic handle scope, tracked by a list. May be variable sized.
 class PACKED(4) BaseHandleScope {
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index 8bdf6b1..ec860c7 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -1198,26 +1198,67 @@
     // Class is allocated but not yet resolved: we cannot access its fields or super class.
     return;
   }
-  const size_t num_static_fields = klass->NumStaticFields();
-  // Total class size including embedded IMT, embedded vtable, and static fields.
-  const size_t class_size = klass->GetClassSize();
-  // Class size excluding static fields (relies on reference fields being the first static fields).
-  const size_t class_size_without_overhead = sizeof(mirror::Class);
-  CHECK_LE(class_size_without_overhead, class_size);
-  const size_t overhead_size = class_size - class_size_without_overhead;
 
-  if (overhead_size != 0) {
+  // Note: We will emit instance fields of Class as synthetic static fields with a prefix of
+  //       "$class$" so the class fields are visible in hprof dumps. For tools to account for that
+  //       correctly, we'll emit an instance size of zero for java.lang.Class, and also emit the
+  //       instance fields of java.lang.Object.
+  //
+  //       For other overhead (currently only the embedded vtable), we will generate a synthetic
+  //       byte array (or field[s] in case the overhead size is of reference size or less).
+
+  const size_t num_static_fields = klass->NumStaticFields();
+
+  // Total class size:
+  //   * class instance fields (including Object instance fields)
+  //   * vtable
+  //   * class static fields
+  const size_t total_class_size = klass->GetClassSize();
+
+  // Base class size (common parts of all Class instances):
+  //   * class instance fields (including Object instance fields)
+  constexpr size_t base_class_size = sizeof(mirror::Class);
+  CHECK_LE(base_class_size, total_class_size);
+
+  // Difference of Total and Base:
+  //   * vtable
+  //   * class static fields
+  const size_t base_overhead_size = total_class_size - base_class_size;
+
+  // Tools (ahat/Studio) will count the static fields and account for them in the class size. We
+  // must thus subtract them from base_overhead_size or they will be double-counted.
+  size_t class_static_fields_size = 0;
+  for (ArtField& class_static_field : klass->GetSFields()) {
+    size_t size = 0;
+    SignatureToBasicTypeAndSize(class_static_field.GetTypeDescriptor(), &size);
+    class_static_fields_size += size;
+  }
+
+  CHECK_GE(base_overhead_size, class_static_fields_size);
+  // Now we have:
+  //   * vtable
+  const size_t base_no_statics_overhead_size = base_overhead_size - class_static_fields_size;
+
+  // We may decide to display native overhead (the actual IMT, ArtFields and ArtMethods) in the
+  // future.
+  const size_t java_heap_overhead_size = base_no_statics_overhead_size;
+
+  // For overhead greater 4, we'll allocate a synthetic array.
+  if (java_heap_overhead_size > 4) {
     // Create a byte array to reflect the allocation of the
     // StaticField array at the end of this class.
     __ AddU1(HPROF_PRIMITIVE_ARRAY_DUMP);
     __ AddClassStaticsId(klass);
     __ AddStackTraceSerialNumber(LookupStackTraceSerialNumber(klass));
-    __ AddU4(overhead_size);
+    __ AddU4(java_heap_overhead_size - 4);
     __ AddU1(hprof_basic_byte);
-    for (size_t i = 0; i < overhead_size; ++i) {
+    for (size_t i = 0; i < java_heap_overhead_size - 4; ++i) {
       __ AddU1(0);
     }
   }
+  const size_t java_heap_overhead_field_count = java_heap_overhead_size > 0
+                                                    ? (java_heap_overhead_size == 3 ? 2u : 1u)
+                                                    : 0;
 
   __ AddU1(HPROF_CLASS_DUMP);
   __ AddClassId(LookupClassId(klass));
@@ -1228,10 +1269,11 @@
   __ AddObjectId(nullptr);    // no prot domain
   __ AddObjectId(nullptr);    // reserved
   __ AddObjectId(nullptr);    // reserved
+  // Instance size.
   if (klass->IsClassClass()) {
-    // ClassObjects have their static fields appended, so aren't all the same size.
-    // But they're at least this size.
-    __ AddU4(class_size_without_overhead);  // instance size
+    // As mentioned above, we will emit instance fields as synthetic static fields. So the
+    // base object is "empty."
+    __ AddU4(0);
   } else if (klass->IsStringClass()) {
     // Strings are variable length with character data at the end like arrays.
     // This outputs the size of an empty string.
@@ -1245,48 +1287,116 @@
   __ AddU2(0);  // empty const pool
 
   // Static fields
-  if (overhead_size == 0) {
-    __ AddU2(static_cast<uint16_t>(0));
-  } else {
-    __ AddU2(static_cast<uint16_t>(num_static_fields + 1));
+  //
+  // Note: we report Class' and Object's instance fields here, too. This is for visibility reasons.
+  //       (b/38167721)
+  mirror::Class* class_class = klass->GetClass();
+
+  DCHECK(class_class->GetSuperClass()->IsObjectClass());
+  const size_t static_fields_reported = class_class->NumInstanceFields()
+                                        + class_class->GetSuperClass()->NumInstanceFields()
+                                        + java_heap_overhead_field_count
+                                        + num_static_fields;
+  __ AddU2(dchecked_integral_cast<uint16_t>(static_fields_reported));
+
+  if (java_heap_overhead_size != 0) {
     __ AddStringId(LookupStringId(kClassOverheadName));
-    __ AddU1(hprof_basic_object);
-    __ AddClassStaticsId(klass);
+    size_t overhead_fields = 0;
+    if (java_heap_overhead_size > 4) {
+      __ AddU1(hprof_basic_object);
+      __ AddClassStaticsId(klass);
+      ++overhead_fields;
+    } else {
+      switch (java_heap_overhead_size) {
+        case 4: {
+          __ AddU1(hprof_basic_int);
+          __ AddU4(0);
+          ++overhead_fields;
+          break;
+        }
 
-    for (size_t i = 0; i < num_static_fields; ++i) {
-      ArtField* f = klass->GetStaticField(i);
+        case 2: {
+          __ AddU1(hprof_basic_short);
+          __ AddU2(0);
+          ++overhead_fields;
+          break;
+        }
 
-      size_t size;
-      HprofBasicType t = SignatureToBasicTypeAndSize(f->GetTypeDescriptor(), &size);
-      __ AddStringId(LookupStringId(f->GetName()));
-      __ AddU1(t);
-      switch (t) {
-        case hprof_basic_byte:
-          __ AddU1(f->GetByte(klass));
+        case 3: {
+          __ AddU1(hprof_basic_short);
+          __ AddU2(0);
+          __ AddStringId(LookupStringId(std::string(kClassOverheadName) + "2"));
+          ++overhead_fields;
+        }
+        FALLTHROUGH_INTENDED;
+
+        case 1: {
+          __ AddU1(hprof_basic_byte);
+          __ AddU1(0);
+          ++overhead_fields;
           break;
-        case hprof_basic_boolean:
-          __ AddU1(f->GetBoolean(klass));
-          break;
-        case hprof_basic_char:
-          __ AddU2(f->GetChar(klass));
-          break;
-        case hprof_basic_short:
-          __ AddU2(f->GetShort(klass));
-          break;
-        case hprof_basic_float:
-        case hprof_basic_int:
-        case hprof_basic_object:
-          __ AddU4(f->Get32(klass));
-          break;
-        case hprof_basic_double:
-        case hprof_basic_long:
-          __ AddU8(f->Get64(klass));
-          break;
-        default:
-          LOG(FATAL) << "Unexpected size " << size;
-          UNREACHABLE();
+        }
       }
     }
+    DCHECK_EQ(java_heap_overhead_field_count, overhead_fields);
+  }
+
+  // Helper lambda to emit the given static field. The second argument name_fn will be called to
+  // generate the name to emit. This can be used to emit something else than the field's actual
+  // name.
+  auto static_field_writer = [&](ArtField& field, auto name_fn)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    __ AddStringId(LookupStringId(name_fn(field)));
+
+    size_t size;
+    HprofBasicType t = SignatureToBasicTypeAndSize(field.GetTypeDescriptor(), &size);
+    __ AddU1(t);
+    switch (t) {
+      case hprof_basic_byte:
+        __ AddU1(field.GetByte(klass));
+        return;
+      case hprof_basic_boolean:
+        __ AddU1(field.GetBoolean(klass));
+        return;
+      case hprof_basic_char:
+        __ AddU2(field.GetChar(klass));
+        return;
+      case hprof_basic_short:
+        __ AddU2(field.GetShort(klass));
+        return;
+      case hprof_basic_float:
+      case hprof_basic_int:
+      case hprof_basic_object:
+        __ AddU4(field.Get32(klass));
+        return;
+      case hprof_basic_double:
+      case hprof_basic_long:
+        __ AddU8(field.Get64(klass));
+        return;
+    }
+    LOG(FATAL) << "Unexpected size " << size;
+    UNREACHABLE();
+  };
+
+  {
+    auto class_instance_field_name_fn = [](ArtField& field) REQUIRES_SHARED(Locks::mutator_lock_) {
+      return std::string("$class$") + field.GetName();
+    };
+    for (ArtField& class_instance_field : class_class->GetIFields()) {
+      static_field_writer(class_instance_field, class_instance_field_name_fn);
+    }
+    for (ArtField& object_instance_field : class_class->GetSuperClass()->GetIFields()) {
+      static_field_writer(object_instance_field, class_instance_field_name_fn);
+    }
+  }
+
+  {
+    auto class_static_field_name_fn = [](ArtField& field) REQUIRES_SHARED(Locks::mutator_lock_) {
+      return field.GetName();
+    };
+    for (ArtField& class_static_field : klass->GetSFields()) {
+      static_field_writer(class_static_field, class_static_field_name_fn);
+    }
   }
 
   // Instance fields for this class (no superclass fields)
diff --git a/runtime/imtable_test.cc b/runtime/imtable_test.cc
index 17149df..d482183 100644
--- a/runtime/imtable_test.cc
+++ b/runtime/imtable_test.cc
@@ -29,7 +29,7 @@
 #include "mirror/class_loader.h"
 #include "handle_scope-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
index 2128f8c..9673bd9 100644
--- a/runtime/indirect_reference_table-inl.h
+++ b/runtime/indirect_reference_table-inl.h
@@ -111,12 +111,12 @@
   if (serial_ == kIRTPrevCount) {
     serial_ = 0;
   }
-  references_[serial_] = GcRoot<mirror::Object>(obj);
+  references_[serial_] = GcRoot<mirror::Object>(obj.Ptr());
 }
 
 inline void IrtEntry::SetReference(ObjPtr<mirror::Object> obj) {
   DCHECK_LT(serial_, kIRTPrevCount);
-  references_[serial_] = GcRoot<mirror::Object>(obj);
+  references_[serial_] = GcRoot<mirror::Object>(obj.Ptr());
 }
 
 }  // namespace art
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index c852d5a..cff3ea7 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -34,6 +34,9 @@
 static constexpr bool kDumpStackOnNonLocalReference = false;
 static constexpr bool kDebugIRT = false;
 
+// Maximum table size we allow.
+static constexpr size_t kMaxTableSizeInBytes = 128 * MB;
+
 const char* GetIndirectRefKindString(const IndirectRefKind& kind) {
   switch (kind) {
     case kHandleScopeOrInvalid:
@@ -71,6 +74,9 @@
   CHECK(error_msg != nullptr);
   CHECK_NE(desired_kind, kHandleScopeOrInvalid);
 
+  // Overflow and maximum check.
+  CHECK_LE(max_count, kMaxTableSizeInBytes / sizeof(IrtEntry));
+
   const size_t table_bytes = max_count * sizeof(IrtEntry);
   table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes,
                                             PROT_READ | PROT_WRITE, false, false, error_msg));
@@ -203,6 +209,13 @@
 bool IndirectReferenceTable::Resize(size_t new_size, std::string* error_msg) {
   CHECK_GT(new_size, max_entries_);
 
+  constexpr size_t kMaxEntries = kMaxTableSizeInBytes / sizeof(IrtEntry);
+  if (new_size > kMaxEntries) {
+    *error_msg = android::base::StringPrintf("Requested size exceeds maximum: %zu", new_size);
+    return false;
+  }
+  // Note: the above check also ensures that there is no overflow below.
+
   const size_t table_bytes = new_size * sizeof(IrtEntry);
   std::unique_ptr<MemMap> new_map(MemMap::MapAnonymous("indirect ref table",
                                                        nullptr,
@@ -247,6 +260,14 @@
     }
 
     // Try to double space.
+    if (std::numeric_limits<size_t>::max() / 2 < max_entries_) {
+      LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
+                 << "(max=" << max_entries_ << ")" << std::endl
+                 << MutatorLockedDumpable<IndirectReferenceTable>(*this)
+                << " Resizing failed: exceeds size_t";
+      UNREACHABLE();
+    }
+
     std::string error_msg;
     if (!Resize(max_entries_ * 2, &error_msg)) {
       LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
@@ -453,4 +474,38 @@
   segment_state_ = new_state;
 }
 
+bool IndirectReferenceTable::EnsureFreeCapacity(size_t free_capacity, std::string* error_msg) {
+  size_t top_index = segment_state_.top_index;
+  if (top_index < max_entries_ && top_index + free_capacity <= max_entries_) {
+    return true;
+  }
+
+  // We're only gonna do a simple best-effort here, ensuring the asked-for capacity at the end.
+  if (resizable_ == ResizableCapacity::kNo) {
+    *error_msg = "Table is not resizable";
+    return false;
+  }
+
+  // Try to increase the table size.
+
+  // Would this overflow?
+  if (std::numeric_limits<size_t>::max() - free_capacity < top_index) {
+    *error_msg = "Cannot resize table, overflow.";
+    return false;
+  }
+
+  if (!Resize(top_index + free_capacity, error_msg)) {
+    LOG(WARNING) << "JNI ERROR: Unable to reserve space in EnsureFreeCapacity (" << free_capacity
+                 << "): " << std::endl
+                 << MutatorLockedDumpable<IndirectReferenceTable>(*this)
+                 << " Resizing failed: " << *error_msg;
+    return false;
+  }
+  return true;
+}
+
+size_t IndirectReferenceTable::FreeCapacity() {
+  return max_entries_ - segment_state_.top_index;
+}
+
 }  // namespace art
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 7e452a2..6d52d95 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -28,7 +28,6 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "read_barrier_option.h"
 
@@ -285,6 +284,13 @@
     return segment_state_.top_index;
   }
 
+  // Ensure that at least free_capacity elements are available, or return false.
+  bool EnsureFreeCapacity(size_t free_capacity, std::string* error_msg)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // See implementation of EnsureFreeCapacity. We'll only state here how much is trivially free,
+  // without recovering holes. Thus this is a conservative estimate.
+  size_t FreeCapacity() REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Note IrtIterator does not have a read barrier as it's used to visit roots.
   IrtIterator begin() {
     return IrtIterator(table_, 0, Capacity());
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index d862ff2..8120cc4 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -20,7 +20,9 @@
 
 #include "arch/context.h"
 #include "art_method-inl.h"
+#include "art_field-inl.h"
 #include "atomic.h"
+#include "base/callee_save_type.h"
 #include "class_linker.h"
 #include "debugger.h"
 #include "dex_file-inl.h"
@@ -31,6 +33,7 @@
 #include "interpreter/interpreter.h"
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
+#include "jvalue-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache.h"
 #include "mirror/object_array-inl.h"
@@ -45,6 +48,30 @@
 
 constexpr bool kVerboseInstrumentation = false;
 
+void InstrumentationListener::MethodExited(Thread* thread,
+                                           Handle<mirror::Object> this_object,
+                                           ArtMethod* method,
+                                           uint32_t dex_pc,
+                                           Handle<mirror::Object> return_value) {
+  DCHECK_EQ(method->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetReturnTypePrimitive(),
+            Primitive::kPrimNot);
+  JValue v;
+  v.SetL(return_value.Get());
+  MethodExited(thread, this_object, method, dex_pc, v);
+}
+
+void InstrumentationListener::FieldWritten(Thread* thread,
+                                           Handle<mirror::Object> this_object,
+                                           ArtMethod* method,
+                                           uint32_t dex_pc,
+                                           ArtField* field,
+                                           Handle<mirror::Object> field_value) {
+  DCHECK(!field->IsPrimitiveType());
+  JValue v;
+  v.SetL(field_value.Get());
+  FieldWritten(thread, this_object, method, dex_pc, field, v);
+}
+
 // Instrumentation works on non-inlined frames by updating returned PCs
 // of compiled frames.
 static constexpr StackVisitor::StackWalkKind kInstrumentationStackWalk =
@@ -357,7 +384,7 @@
             LOG(INFO) << "  Removing exit stub in " << DescribeLocation();
           }
           if (instrumentation_frame.interpreter_entry_) {
-            CHECK(m == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
+            CHECK(m == Runtime::Current()->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs));
           } else {
             CHECK(m == instrumentation_frame.method_) << ArtMethod::PrettyMethod(m);
           }
@@ -916,48 +943,75 @@
   return class_linker->GetQuickOatCodeFor(method);
 }
 
-void Instrumentation::MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
+void Instrumentation::MethodEnterEventImpl(Thread* thread,
+                                           ObjPtr<mirror::Object> this_object,
                                            ArtMethod* method,
                                            uint32_t dex_pc) const {
   if (HasMethodEntryListeners()) {
+    Thread* self = Thread::Current();
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Object> thiz(hs.NewHandle(this_object));
     for (InstrumentationListener* listener : method_entry_listeners_) {
       if (listener != nullptr) {
-        listener->MethodEntered(thread, this_object, method, dex_pc);
+        listener->MethodEntered(thread, thiz, method, dex_pc);
       }
     }
   }
 }
 
-void Instrumentation::MethodExitEventImpl(Thread* thread, mirror::Object* this_object,
+void Instrumentation::MethodExitEventImpl(Thread* thread,
+                                          ObjPtr<mirror::Object> this_object,
                                           ArtMethod* method,
-                                          uint32_t dex_pc, const JValue& return_value) const {
+                                          uint32_t dex_pc,
+                                          const JValue& return_value) const {
   if (HasMethodExitListeners()) {
-    for (InstrumentationListener* listener : method_exit_listeners_) {
-      if (listener != nullptr) {
-        listener->MethodExited(thread, this_object, method, dex_pc, return_value);
+    Thread* self = Thread::Current();
+    StackHandleScope<2> hs(self);
+    Handle<mirror::Object> thiz(hs.NewHandle(this_object));
+    if (method->GetInterfaceMethodIfProxy(kRuntimePointerSize)
+              ->GetReturnTypePrimitive() != Primitive::kPrimNot) {
+      for (InstrumentationListener* listener : method_exit_listeners_) {
+        if (listener != nullptr) {
+          listener->MethodExited(thread, thiz, method, dex_pc, return_value);
+        }
+      }
+    } else {
+      Handle<mirror::Object> ret(hs.NewHandle(return_value.GetL()));
+      for (InstrumentationListener* listener : method_exit_listeners_) {
+        if (listener != nullptr) {
+          listener->MethodExited(thread, thiz, method, dex_pc, ret);
+        }
       }
     }
   }
 }
 
-void Instrumentation::MethodUnwindEvent(Thread* thread, mirror::Object* this_object,
+void Instrumentation::MethodUnwindEvent(Thread* thread,
+                                        mirror::Object* this_object,
                                         ArtMethod* method,
                                         uint32_t dex_pc) const {
   if (HasMethodUnwindListeners()) {
+    Thread* self = Thread::Current();
+    StackHandleScope<1> hs(self);
+    Handle<mirror::Object> thiz(hs.NewHandle(this_object));
     for (InstrumentationListener* listener : method_unwind_listeners_) {
       if (listener != nullptr) {
-        listener->MethodUnwind(thread, this_object, method, dex_pc);
+        listener->MethodUnwind(thread, thiz, method, dex_pc);
       }
     }
   }
 }
 
-void Instrumentation::DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
+void Instrumentation::DexPcMovedEventImpl(Thread* thread,
+                                          ObjPtr<mirror::Object> this_object,
                                           ArtMethod* method,
                                           uint32_t dex_pc) const {
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Object> thiz(hs.NewHandle(this_object));
   for (InstrumentationListener* listener : dex_pc_listeners_) {
     if (listener != nullptr) {
-      listener->DexPcMoved(thread, this_object, method, dex_pc);
+      listener->DexPcMoved(thread, thiz, method, dex_pc);
     }
   }
 }
@@ -974,36 +1028,56 @@
 }
 
 void Instrumentation::InvokeVirtualOrInterfaceImpl(Thread* thread,
-                                                   mirror::Object* this_object,
+                                                   ObjPtr<mirror::Object> this_object,
                                                    ArtMethod* caller,
                                                    uint32_t dex_pc,
                                                    ArtMethod* callee) const {
-  // We cannot have thread suspension since that would cause the this_object parameter to
-  // potentially become a dangling pointer. An alternative could be to put it in a handle instead.
-  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Object> thiz(hs.NewHandle(this_object));
   for (InstrumentationListener* listener : invoke_virtual_or_interface_listeners_) {
     if (listener != nullptr) {
-      listener->InvokeVirtualOrInterface(thread, this_object, caller, dex_pc, callee);
+      listener->InvokeVirtualOrInterface(thread, thiz, caller, dex_pc, callee);
     }
   }
 }
 
-void Instrumentation::FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
-                                         ArtMethod* method, uint32_t dex_pc,
+void Instrumentation::FieldReadEventImpl(Thread* thread,
+                                         ObjPtr<mirror::Object> this_object,
+                                         ArtMethod* method,
+                                         uint32_t dex_pc,
                                          ArtField* field) const {
+  Thread* self = Thread::Current();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Object> thiz(hs.NewHandle(this_object));
   for (InstrumentationListener* listener : field_read_listeners_) {
     if (listener != nullptr) {
-      listener->FieldRead(thread, this_object, method, dex_pc, field);
+      listener->FieldRead(thread, thiz, method, dex_pc, field);
     }
   }
 }
 
-void Instrumentation::FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
-                                         ArtMethod* method, uint32_t dex_pc,
-                                         ArtField* field, const JValue& field_value) const {
-  for (InstrumentationListener* listener : field_write_listeners_) {
-    if (listener != nullptr) {
-      listener->FieldWritten(thread, this_object, method, dex_pc, field, field_value);
+void Instrumentation::FieldWriteEventImpl(Thread* thread,
+                                          ObjPtr<mirror::Object> this_object,
+                                          ArtMethod* method,
+                                          uint32_t dex_pc,
+                                          ArtField* field,
+                                          const JValue& field_value) const {
+  Thread* self = Thread::Current();
+  StackHandleScope<2> hs(self);
+  Handle<mirror::Object> thiz(hs.NewHandle(this_object));
+  if (field->IsPrimitiveType()) {
+    for (InstrumentationListener* listener : field_write_listeners_) {
+      if (listener != nullptr) {
+        listener->FieldWritten(thread, thiz, method, dex_pc, field, field_value);
+      }
+    }
+  } else {
+    Handle<mirror::Object> val(hs.NewHandle(field_value.GetL()));
+    for (InstrumentationListener* listener : field_write_listeners_) {
+      if (listener != nullptr) {
+        listener->FieldWritten(thread, thiz, method, dex_pc, field, val);
+      }
     }
   }
 }
@@ -1018,7 +1092,7 @@
     thread->ClearException();
     for (InstrumentationListener* listener : exception_caught_listeners_) {
       if (listener != nullptr) {
-        listener->ExceptionCaught(thread, h_exception.Get());
+        listener->ExceptionCaught(thread, h_exception);
       }
     }
     thread->SetException(h_exception.Get());
@@ -1049,25 +1123,40 @@
 void Instrumentation::PushInstrumentationStackFrame(Thread* self, mirror::Object* this_object,
                                                     ArtMethod* method,
                                                     uintptr_t lr, bool interpreter_entry) {
-  // We have a callee-save frame meaning this value is guaranteed to never be 0.
-  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk);
+  DCHECK(!self->IsExceptionPending());
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   if (kVerboseInstrumentation) {
     LOG(INFO) << "Entering " << ArtMethod::PrettyMethod(method) << " from PC "
               << reinterpret_cast<void*>(lr);
   }
-  instrumentation::InstrumentationStackFrame instrumentation_frame(this_object, method, lr,
+
+  // We send the enter event before pushing the instrumentation frame to make cleanup easier. If the
+  // event causes an exception we can simply send the unwind event and return.
+  StackHandleScope<1> hs(self);
+  Handle<mirror::Object> h_this(hs.NewHandle(this_object));
+  if (!interpreter_entry) {
+    MethodEnterEvent(self, h_this.Get(), method, 0);
+    if (self->IsExceptionPending()) {
+      MethodUnwindEvent(self, h_this.Get(), method, 0);
+      return;
+    }
+  }
+
+  // We have a callee-save frame meaning this value is guaranteed to never be 0.
+  DCHECK(!self->IsExceptionPending());
+  size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk);
+
+  instrumentation::InstrumentationStackFrame instrumentation_frame(h_this.Get(), method, lr,
                                                                    frame_id, interpreter_entry);
   stack->push_front(instrumentation_frame);
-
-  if (!interpreter_entry) {
-    MethodEnterEvent(self, this_object, method, 0);
-  }
 }
 
-TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
-                                                            uint64_t gpr_result,
-                                                            uint64_t fpr_result) {
+TwoWordReturn Instrumentation::PopInstrumentationStackFrame(Thread* self,
+                                                            uintptr_t* return_pc,
+                                                            uint64_t* gpr_result,
+                                                            uint64_t* fpr_result) {
+  DCHECK(gpr_result != nullptr);
+  DCHECK(fpr_result != nullptr);
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
@@ -1083,13 +1172,20 @@
   uint32_t length;
   const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   char return_shorty = method->GetInterfaceMethodIfProxy(pointer_size)->GetShorty(&length)[0];
+  bool is_ref = return_shorty == '[' || return_shorty == 'L';
+  StackHandleScope<1> hs(self);
+  MutableHandle<mirror::Object> res(hs.NewHandle<mirror::Object>(nullptr));
   JValue return_value;
   if (return_shorty == 'V') {
     return_value.SetJ(0);
   } else if (return_shorty == 'F' || return_shorty == 'D') {
-    return_value.SetJ(fpr_result);
+    return_value.SetJ(*fpr_result);
   } else {
-    return_value.SetJ(gpr_result);
+    return_value.SetJ(*gpr_result);
+  }
+  if (is_ref) {
+    // Take a handle to the return value so we won't lose it if we suspend.
+    res.Assign(return_value.GetL());
   }
   // TODO: improve the dex pc information here, requires knowledge of current PC as opposed to
   //       return_pc.
@@ -1106,6 +1202,10 @@
   bool deoptimize = (visitor.caller != nullptr) &&
                     (interpreter_stubs_installed_ || IsDeoptimized(visitor.caller) ||
                     Dbg::IsForcedInterpreterNeededForUpcall(self, visitor.caller));
+  if (is_ref) {
+    // Restore the return value if it's a reference since it might have moved.
+    *reinterpret_cast<mirror::Object**>(gpr_result) = res.Get();
+  }
   if (deoptimize && Runtime::Current()->IsAsyncDeoptimizeable(*return_pc)) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << "Deoptimizing "
@@ -1140,9 +1240,8 @@
   // Do the pop.
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   CHECK_GT(stack->size(), 0U);
+  size_t idx = stack->size();
   InstrumentationStackFrame instrumentation_frame = stack->front();
-  // TODO: bring back CheckStackDepth(self, instrumentation_frame, 2);
-  stack->pop_front();
 
   ArtMethod* method = instrumentation_frame.method_;
   if (is_deoptimization) {
@@ -1160,6 +1259,10 @@
     uint32_t dex_pc = DexFile::kDexNoIndex;
     MethodUnwindEvent(self, instrumentation_frame.this_object_, method, dex_pc);
   }
+  // TODO: bring back CheckStackDepth(self, instrumentation_frame, 2);
+  CHECK_EQ(stack->size(), idx);
+  DCHECK(instrumentation_frame.method_ == stack->front().method_);
+  stack->pop_front();
   return instrumentation_frame.return_pc_;
 }
 
diff --git a/runtime/instrumentation.h b/runtime/instrumentation.h
index 01071a5..90b5def 100644
--- a/runtime/instrumentation.h
+++ b/runtime/instrumentation.h
@@ -36,6 +36,7 @@
 }  // namespace mirror
 class ArtField;
 class ArtMethod;
+template <typename T> class Handle;
 union JValue;
 class Thread;
 
@@ -62,37 +63,70 @@
   virtual ~InstrumentationListener() {}
 
   // Call-back for when a method is entered.
-  virtual void MethodEntered(Thread* thread, mirror::Object* this_object,
+  virtual void MethodEntered(Thread* thread,
+                             Handle<mirror::Object> this_object,
                              ArtMethod* method,
                              uint32_t dex_pc) REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 
-  // Call-back for when a method is exited.
-  virtual void MethodExited(Thread* thread, mirror::Object* this_object,
-                            ArtMethod* method, uint32_t dex_pc,
+  virtual void MethodExited(Thread* thread,
+                            Handle<mirror::Object> this_object,
+                            ArtMethod* method,
+                            uint32_t dex_pc,
+                            Handle<mirror::Object> return_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Call-back for when a method is exited. The implementor should either handler-ize the return
+  // value (if appropriate) or use the alternate MethodExited callback instead if they need to
+  // go through a suspend point.
+  virtual void MethodExited(Thread* thread,
+                            Handle<mirror::Object> this_object,
+                            ArtMethod* method,
+                            uint32_t dex_pc,
                             const JValue& return_value)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 
   // Call-back for when a method is popped due to an exception throw. A method will either cause a
   // MethodExited call-back or a MethodUnwind call-back when its activation is removed.
-  virtual void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                            ArtMethod* method, uint32_t dex_pc)
+  virtual void MethodUnwind(Thread* thread,
+                            Handle<mirror::Object> this_object,
+                            ArtMethod* method,
+                            uint32_t dex_pc)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 
   // Call-back for when the dex pc moves in a method.
-  virtual void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                          ArtMethod* method, uint32_t new_dex_pc)
+  virtual void DexPcMoved(Thread* thread,
+                          Handle<mirror::Object> this_object,
+                          ArtMethod* method,
+                          uint32_t new_dex_pc)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 
   // Call-back for when we read from a field.
-  virtual void FieldRead(Thread* thread, mirror::Object* this_object, ArtMethod* method,
-                         uint32_t dex_pc, ArtField* field) = 0;
+  virtual void FieldRead(Thread* thread,
+                         Handle<mirror::Object> this_object,
+                         ArtMethod* method,
+                         uint32_t dex_pc,
+                         ArtField* field) = 0;
+
+  virtual void FieldWritten(Thread* thread,
+                            Handle<mirror::Object> this_object,
+                            ArtMethod* method,
+                            uint32_t dex_pc,
+                            ArtField* field,
+                            Handle<mirror::Object> field_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Call-back for when we write into a field.
-  virtual void FieldWritten(Thread* thread, mirror::Object* this_object, ArtMethod* method,
-                            uint32_t dex_pc, ArtField* field, const JValue& field_value) = 0;
+  virtual void FieldWritten(Thread* thread,
+                            Handle<mirror::Object> this_object,
+                            ArtMethod* method,
+                            uint32_t dex_pc,
+                            ArtField* field,
+                            const JValue& field_value)
+      REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 
   // Call-back when an exception is caught.
-  virtual void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
+  virtual void ExceptionCaught(Thread* thread,
+                               Handle<mirror::Throwable> exception_object)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 
   // Call-back for when we execute a branch.
@@ -104,11 +138,10 @@
 
   // Call-back for when we get an invokevirtual or an invokeinterface.
   virtual void InvokeVirtualOrInterface(Thread* thread,
-                                        mirror::Object* this_object,
+                                        Handle<mirror::Object> this_object,
                                         ArtMethod* caller,
                                         uint32_t dex_pc,
                                         ArtMethod* callee)
-      REQUIRES(Roles::uninterruptible_)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 };
 
@@ -323,8 +356,10 @@
   }
 
   // Inform listeners that a method has been exited.
-  void MethodExitEvent(Thread* thread, mirror::Object* this_object,
-                       ArtMethod* method, uint32_t dex_pc,
+  void MethodExitEvent(Thread* thread,
+                       mirror::Object* this_object,
+                       ArtMethod* method,
+                       uint32_t dex_pc,
                        const JValue& return_value) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (UNLIKELY(HasMethodExitListeners())) {
@@ -397,9 +432,13 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Called when an instrumented method is exited. Removes the pushed instrumentation frame
-  // returning the intended link register. Generates method exit events.
+  // returning the intended link register. Generates method exit events. The gpr_result and
+  // fpr_result pointers are pointers to the locations where the integer/pointer and floating point
+  // result values of the function are stored. Both pointers must always be valid but the values
+  // held there will only be meaningful if interpreted as the appropriate type given the function
+  // being returned from.
   TwoWordReturn PopInstrumentationStackFrame(Thread* self, uintptr_t* return_pc,
-                                             uint64_t gpr_result, uint64_t fpr_result)
+                                             uint64_t* gpr_result, uint64_t* fpr_result)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!deoptimized_methods_lock_);
 
   // Pops an instrumentation frame from the current thread and generate an unwind event.
@@ -465,31 +504,42 @@
   // exclusive access to mutator lock which you can't get if the runtime isn't started.
   void SetEntrypointsInstrumented(bool instrumented) NO_THREAD_SAFETY_ANALYSIS;
 
-  void MethodEnterEventImpl(Thread* thread, mirror::Object* this_object,
-                            ArtMethod* method, uint32_t dex_pc) const
+  void MethodEnterEventImpl(Thread* thread,
+                            ObjPtr<mirror::Object> this_object,
+                            ArtMethod* method,
+                            uint32_t dex_pc) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void MethodExitEventImpl(Thread* thread, mirror::Object* this_object,
+  void MethodExitEventImpl(Thread* thread,
+                           ObjPtr<mirror::Object> this_object,
                            ArtMethod* method,
-                           uint32_t dex_pc, const JValue& return_value) const
+                           uint32_t dex_pc,
+                           const JValue& return_value) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void DexPcMovedEventImpl(Thread* thread, mirror::Object* this_object,
-                           ArtMethod* method, uint32_t dex_pc) const
+  void DexPcMovedEventImpl(Thread* thread,
+                           ObjPtr<mirror::Object> this_object,
+                           ArtMethod* method,
+                           uint32_t dex_pc) const
       REQUIRES_SHARED(Locks::mutator_lock_);
   void BranchImpl(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t offset) const
       REQUIRES_SHARED(Locks::mutator_lock_);
   void InvokeVirtualOrInterfaceImpl(Thread* thread,
-                                    mirror::Object* this_object,
+                                    ObjPtr<mirror::Object> this_object,
                                     ArtMethod* caller,
                                     uint32_t dex_pc,
                                     ArtMethod* callee) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void FieldReadEventImpl(Thread* thread, mirror::Object* this_object,
-                           ArtMethod* method, uint32_t dex_pc,
-                           ArtField* field) const
+  void FieldReadEventImpl(Thread* thread,
+                          ObjPtr<mirror::Object> this_object,
+                          ArtMethod* method,
+                          uint32_t dex_pc,
+                          ArtField* field) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void FieldWriteEventImpl(Thread* thread, mirror::Object* this_object,
-                           ArtMethod* method, uint32_t dex_pc,
-                           ArtField* field, const JValue& field_value) const
+  void FieldWriteEventImpl(Thread* thread,
+                           ObjPtr<mirror::Object> this_object,
+                           ArtMethod* method,
+                           uint32_t dex_pc,
+                           ArtField* field,
+                           const JValue& field_value) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Read barrier-aware utility functions for accessing deoptimized_methods_
diff --git a/runtime/instrumentation_test.cc b/runtime/instrumentation_test.cc
index 7f9f04f..2a601c9 100644
--- a/runtime/instrumentation_test.cc
+++ b/runtime/instrumentation_test.cc
@@ -23,11 +23,13 @@
 #include "dex_file.h"
 #include "gc/scoped_gc_critical_section.h"
 #include "handle_scope-inl.h"
+#include "jni_internal.h"
 #include "jvalue.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread_list.h"
 #include "thread-inl.h"
+#include "well_known_classes.h"
 
 namespace art {
 namespace instrumentation {
@@ -35,16 +37,22 @@
 class TestInstrumentationListener FINAL : public instrumentation::InstrumentationListener {
  public:
   TestInstrumentationListener()
-    : received_method_enter_event(false), received_method_exit_event(false),
-      received_method_unwind_event(false), received_dex_pc_moved_event(false),
-      received_field_read_event(false), received_field_written_event(false),
-      received_exception_caught_event(false), received_branch_event(false),
+    : received_method_enter_event(false),
+      received_method_exit_event(false),
+      received_method_exit_object_event(false),
+      received_method_unwind_event(false),
+      received_dex_pc_moved_event(false),
+      received_field_read_event(false),
+      received_field_written_event(false),
+      received_field_written_object_event(false),
+      received_exception_caught_event(false),
+      received_branch_event(false),
       received_invoke_virtual_or_interface_event(false) {}
 
   virtual ~TestInstrumentationListener() {}
 
   void MethodEntered(Thread* thread ATTRIBUTE_UNUSED,
-                     mirror::Object* this_object ATTRIBUTE_UNUSED,
+                     Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                      ArtMethod* method ATTRIBUTE_UNUSED,
                      uint32_t dex_pc ATTRIBUTE_UNUSED)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -52,7 +60,16 @@
   }
 
   void MethodExited(Thread* thread ATTRIBUTE_UNUSED,
-                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    Handle<mirror::Object> return_value ATTRIBUTE_UNUSED)
+      OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+    received_method_exit_object_event = true;
+  }
+
+  void MethodExited(Thread* thread ATTRIBUTE_UNUSED,
+                    Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                     ArtMethod* method ATTRIBUTE_UNUSED,
                     uint32_t dex_pc ATTRIBUTE_UNUSED,
                     const JValue& return_value ATTRIBUTE_UNUSED)
@@ -61,7 +78,7 @@
   }
 
   void MethodUnwind(Thread* thread ATTRIBUTE_UNUSED,
-                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                     ArtMethod* method ATTRIBUTE_UNUSED,
                     uint32_t dex_pc ATTRIBUTE_UNUSED)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -69,7 +86,7 @@
   }
 
   void DexPcMoved(Thread* thread ATTRIBUTE_UNUSED,
-                  mirror::Object* this_object ATTRIBUTE_UNUSED,
+                  Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                   ArtMethod* method ATTRIBUTE_UNUSED,
                   uint32_t new_dex_pc ATTRIBUTE_UNUSED)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -77,7 +94,7 @@
   }
 
   void FieldRead(Thread* thread ATTRIBUTE_UNUSED,
-                 mirror::Object* this_object ATTRIBUTE_UNUSED,
+                 Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                  ArtMethod* method ATTRIBUTE_UNUSED,
                  uint32_t dex_pc ATTRIBUTE_UNUSED,
                  ArtField* field ATTRIBUTE_UNUSED)
@@ -86,7 +103,17 @@
   }
 
   void FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
-                    mirror::Object* this_object ATTRIBUTE_UNUSED,
+                    Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    ArtField* field ATTRIBUTE_UNUSED,
+                    Handle<mirror::Object> field_value ATTRIBUTE_UNUSED)
+      OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+    received_field_written_object_event = true;
+  }
+
+  void FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
+                    Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                     ArtMethod* method ATTRIBUTE_UNUSED,
                     uint32_t dex_pc ATTRIBUTE_UNUSED,
                     ArtField* field ATTRIBUTE_UNUSED,
@@ -96,7 +123,7 @@
   }
 
   void ExceptionCaught(Thread* thread ATTRIBUTE_UNUSED,
-                       mirror::Throwable* exception_object ATTRIBUTE_UNUSED)
+                       Handle<mirror::Throwable> exception_object ATTRIBUTE_UNUSED)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     received_exception_caught_event = true;
   }
@@ -110,7 +137,7 @@
   }
 
   void InvokeVirtualOrInterface(Thread* thread ATTRIBUTE_UNUSED,
-                                mirror::Object* this_object ATTRIBUTE_UNUSED,
+                                Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                                 ArtMethod* caller ATTRIBUTE_UNUSED,
                                 uint32_t dex_pc ATTRIBUTE_UNUSED,
                                 ArtMethod* callee ATTRIBUTE_UNUSED)
@@ -121,10 +148,12 @@
   void Reset() {
     received_method_enter_event = false;
     received_method_exit_event = false;
+    received_method_exit_object_event = false;
     received_method_unwind_event = false;
     received_dex_pc_moved_event = false;
     received_field_read_event = false;
     received_field_written_event = false;
+    received_field_written_object_event = false;
     received_exception_caught_event = false;
     received_branch_event = false;
     received_invoke_virtual_or_interface_event = false;
@@ -132,10 +161,12 @@
 
   bool received_method_enter_event;
   bool received_method_exit_event;
+  bool received_method_exit_object_event;
   bool received_method_unwind_event;
   bool received_dex_pc_moved_event;
   bool received_field_read_event;
   bool received_field_written_event;
+  bool received_field_written_object_event;
   bool received_exception_caught_event;
   bool received_branch_event;
   bool received_invoke_virtual_or_interface_event;
@@ -171,6 +202,13 @@
   }
 
   void TestEvent(uint32_t instrumentation_event) {
+    TestEvent(instrumentation_event, nullptr, nullptr, false);
+  }
+
+  void TestEvent(uint32_t instrumentation_event,
+                 ArtMethod* event_method,
+                 ArtField* event_field,
+                 bool with_object) {
     ScopedObjectAccess soa(Thread::Current());
     instrumentation::Instrumentation* instr = Runtime::Current()->GetInstrumentation();
     TestInstrumentationListener listener;
@@ -180,15 +218,20 @@
       instr->AddListener(&listener, instrumentation_event);
     }
 
-    ArtMethod* const event_method = nullptr;
     mirror::Object* const event_obj = nullptr;
     const uint32_t event_dex_pc = 0;
 
     // Check the listener is registered and is notified of the event.
     EXPECT_TRUE(HasEventListener(instr, instrumentation_event));
-    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
-    ReportEvent(instr, instrumentation_event, soa.Self(), event_method, event_obj, event_dex_pc);
-    EXPECT_TRUE(DidListenerReceiveEvent(listener, instrumentation_event));
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event, with_object));
+    ReportEvent(instr,
+                instrumentation_event,
+                soa.Self(),
+                event_method,
+                event_obj,
+                event_field,
+                event_dex_pc);
+    EXPECT_TRUE(DidListenerReceiveEvent(listener, instrumentation_event, with_object));
 
     listener.Reset();
     {
@@ -199,9 +242,15 @@
 
     // Check the listener is not registered and is not notified of the event.
     EXPECT_FALSE(HasEventListener(instr, instrumentation_event));
-    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
-    ReportEvent(instr, instrumentation_event, soa.Self(), event_method, event_obj, event_dex_pc);
-    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event));
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event, with_object));
+    ReportEvent(instr,
+                instrumentation_event,
+                soa.Self(),
+                event_method,
+                event_obj,
+                event_field,
+                event_dex_pc);
+    EXPECT_FALSE(DidListenerReceiveEvent(listener, instrumentation_event, with_object));
   }
 
   void DeoptimizeMethod(Thread* self, ArtMethod* method, bool enable_deoptimization)
@@ -317,8 +366,12 @@
     }
   }
 
-  static void ReportEvent(const instrumentation::Instrumentation* instr, uint32_t event_type,
-                          Thread* self, ArtMethod* method, mirror::Object* obj,
+  static void ReportEvent(const instrumentation::Instrumentation* instr,
+                          uint32_t event_type,
+                          Thread* self,
+                          ArtMethod* method,
+                          mirror::Object* obj,
+                          ArtField* field,
                           uint32_t dex_pc)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     switch (event_type) {
@@ -337,11 +390,11 @@
         instr->DexPcMovedEvent(self, obj, method, dex_pc);
         break;
       case instrumentation::Instrumentation::kFieldRead:
-        instr->FieldReadEvent(self, obj, method, dex_pc, nullptr);
+        instr->FieldReadEvent(self, obj, method, dex_pc, field);
         break;
       case instrumentation::Instrumentation::kFieldWritten: {
         JValue value;
-        instr->FieldWriteEvent(self, obj, method, dex_pc, nullptr, value);
+        instr->FieldWriteEvent(self, obj, method, dex_pc, field, value);
         break;
       }
       case instrumentation::Instrumentation::kExceptionCaught: {
@@ -364,12 +417,14 @@
   }
 
   static bool DidListenerReceiveEvent(const TestInstrumentationListener& listener,
-                                      uint32_t event_type) {
+                                      uint32_t event_type,
+                                      bool with_object) {
     switch (event_type) {
       case instrumentation::Instrumentation::kMethodEntered:
         return listener.received_method_enter_event;
       case instrumentation::Instrumentation::kMethodExited:
-        return listener.received_method_exit_event;
+        return (!with_object && listener.received_method_exit_event) ||
+            (with_object && listener.received_method_exit_object_event);
       case instrumentation::Instrumentation::kMethodUnwind:
         return listener.received_method_unwind_event;
       case instrumentation::Instrumentation::kDexPcMoved:
@@ -377,7 +432,8 @@
       case instrumentation::Instrumentation::kFieldRead:
         return listener.received_field_read_event;
       case instrumentation::Instrumentation::kFieldWritten:
-        return listener.received_field_written_event;
+        return (!with_object && listener.received_field_written_event) ||
+            (with_object && listener.received_field_written_object_event);
       case instrumentation::Instrumentation::kExceptionCaught:
         return listener.received_exception_caught_event;
       case instrumentation::Instrumentation::kBranch:
@@ -419,8 +475,42 @@
   TestEvent(instrumentation::Instrumentation::kMethodEntered);
 }
 
-TEST_F(InstrumentationTest, MethodExitEvent) {
-  TestEvent(instrumentation::Instrumentation::kMethodExited);
+TEST_F(InstrumentationTest, MethodExitObjectEvent) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  ArtMethod* method = klass->FindDeclaredDirectMethod("returnReference",
+                                                      "()Ljava/lang/Object;",
+                                                      kRuntimePointerSize);
+  ASSERT_TRUE(method != nullptr);
+  TestEvent(instrumentation::Instrumentation::kMethodExited,
+            /*event_method*/ method,
+            /*event_field*/ nullptr,
+            /*with_object*/ true);
+}
+
+TEST_F(InstrumentationTest, MethodExitPrimEvent) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  ArtMethod* method = klass->FindDeclaredDirectMethod("returnPrimitive",
+                                                      "()I",
+                                                      kRuntimePointerSize);
+  ASSERT_TRUE(method != nullptr);
+  TestEvent(instrumentation::Instrumentation::kMethodExited,
+            /*event_method*/ method,
+            /*event_field*/ nullptr,
+            /*with_object*/ false);
 }
 
 TEST_F(InstrumentationTest, MethodUnwindEvent) {
@@ -435,8 +525,40 @@
   TestEvent(instrumentation::Instrumentation::kFieldRead);
 }
 
-TEST_F(InstrumentationTest, FieldWriteEvent) {
-  TestEvent(instrumentation::Instrumentation::kFieldWritten);
+TEST_F(InstrumentationTest, FieldWriteObjectEvent) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  ArtField* field = klass->FindDeclaredStaticField("referenceField", "Ljava/lang/Object;");
+  ASSERT_TRUE(field != nullptr);
+
+  TestEvent(instrumentation::Instrumentation::kFieldWritten,
+            /*event_method*/ nullptr,
+            /*event_field*/ field,
+            /*with_object*/ true);
+}
+
+TEST_F(InstrumentationTest, FieldWritePrimEvent) {
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("Instrumentation");
+  Runtime* const runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::ClassLoader> loader(hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
+  mirror::Class* klass = class_linker->FindClass(soa.Self(), "LInstrumentation;", loader);
+  ASSERT_TRUE(klass != nullptr);
+  ArtField* field = klass->FindDeclaredStaticField("primitiveField", "I");
+  ASSERT_TRUE(field != nullptr);
+
+  TestEvent(instrumentation::Instrumentation::kFieldWritten,
+            /*event_method*/ nullptr,
+            /*event_field*/ field,
+            /*with_object*/ false);
 }
 
 TEST_F(InstrumentationTest, ExceptionCaughtEvent) {
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index 3e19146..2bac231 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -27,6 +27,8 @@
 #include "mirror/object_array-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/string-inl.h"
+#include "object_callbacks.h"
+#include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 #include "utf.h"
 
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 68454fb..2ec03be 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -25,10 +25,11 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "gc/weak_root_state.h"
-#include "object_callbacks.h"
 
 namespace art {
 
+class IsMarkedVisitor;
+
 namespace gc {
 namespace space {
 class ImageSpace;
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index 311515c..bb27b34 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -23,6 +23,7 @@
 #include "handle_scope-inl.h"
 #include "mirror/string.h"
 #include "scoped_thread_state_change-inl.h"
+#include "utf.h"
 
 namespace art {
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index bf49e84..85cf73b 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -22,15 +22,16 @@
 #include "interpreter_common.h"
 #include "interpreter_mterp_impl.h"
 #include "interpreter_switch_impl.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
 #include "jvalue-inl.h"
 #include "mirror/string-inl.h"
+#include "mterp/mterp.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
 #include "stack.h"
+#include "thread-inl.h"
 #include "unstarted_runtime.h"
-#include "mterp/mterp.h"
-#include "jit/jit.h"
-#include "jit/jit_code_cache.h"
 
 namespace art {
 namespace interpreter {
@@ -253,6 +254,13 @@
     if (UNLIKELY(instrumentation->HasMethodEntryListeners())) {
       instrumentation->MethodEnterEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                         method, 0);
+      if (UNLIKELY(self->IsExceptionPending())) {
+        instrumentation->MethodUnwindEvent(self,
+                                           shadow_frame.GetThisObject(code_item->ins_size_),
+                                           method,
+                                           0);
+        return JValue();
+      }
     }
 
     if (!stay_in_interpreter) {
@@ -264,7 +272,11 @@
 
           // Pop the shadow frame before calling into compiled code.
           self->PopShadowFrame();
-          ArtInterpreterToCompiledCodeBridge(self, nullptr, code_item, &shadow_frame, &result);
+          // Calculate the offset of the first input reg. The input registers are in the high regs.
+          // It's ok to access the code item here since JIT code will have been touched by the
+          // interpreter and compiler already.
+          uint16_t arg_offset = code_item->registers_size_ - code_item->ins_size_;
+          ArtInterpreterToCompiledCodeBridge(self, nullptr, &shadow_frame, arg_offset, &result);
           // Push the shadow frame back as the caller will expect it.
           self->PushShadowFrame(&shadow_frame);
 
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index ef0ddb3..d06ac23 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -32,6 +32,7 @@
 #include "reflection.h"
 #include "reflection-inl.h"
 #include "stack.h"
+#include "thread-inl.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -458,8 +459,8 @@
 
 void ArtInterpreterToCompiledCodeBridge(Thread* self,
                                         ArtMethod* caller,
-                                        const DexFile::CodeItem* code_item,
                                         ShadowFrame* shadow_frame,
+                                        uint16_t arg_offset,
                                         JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtMethod* method = shadow_frame->GetMethod();
@@ -482,9 +483,17 @@
       method = shadow_frame->GetMethod();
     }
   }
-  uint16_t arg_offset = (code_item == nullptr)
-                            ? 0
-                            : code_item->registers_size_ - code_item->ins_size_;
+  // Basic checks for the arg_offset. If there's no code item, the arg_offset must be 0. Otherwise,
+  // check that the arg_offset isn't greater than the number of registers. A stronger check is
+  // difficult since the frame may contain space for all the registers in the method, or only enough
+  // space for the arguments.
+  if (kIsDebugBuild) {
+    if (method->GetCodeItem() == nullptr) {
+      DCHECK_EQ(0u, arg_offset) << method->PrettyMethod();
+    } else {
+      DCHECK_LE(arg_offset, shadow_frame->NumberOfVRegs());
+    }
+  }
   jit::Jit* jit = Runtime::Current()->GetJit();
   if (jit != nullptr && caller != nullptr) {
     jit->NotifyInterpreterToCompiledCodeTransition(self, caller);
@@ -918,12 +927,23 @@
 
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
-
   // Number of registers for the callee's call frame.
   uint16_t num_regs;
+  // Test whether to use the interpreter or compiler entrypoint, and save that result to pass to
+  // PerformCall. A deoptimization could occur at any time, and we shouldn't change which
+  // entrypoint to use once we start building the shadow frame.
+  bool use_interpreter_entrypoint = ClassLinker::ShouldUseInterpreterEntrypoint(
+      called_method, called_method->GetEntryPointFromQuickCompiledCode());
   if (LIKELY(code_item != nullptr)) {
-    num_regs = code_item->registers_size_;
-    DCHECK_EQ(string_init ? number_of_inputs - 1 : number_of_inputs, code_item->ins_size_);
+    // When transitioning to compiled code, space only needs to be reserved for the input registers.
+    // The rest of the frame gets discarded. This also prevents accessing the called method's code
+    // item, saving memory by keeping code items of compiled code untouched.
+    if (Runtime::Current()->IsStarted() && !use_interpreter_entrypoint) {
+      num_regs = number_of_inputs;
+    } else {
+      num_regs = code_item->registers_size_;
+      DCHECK_EQ(string_init ? number_of_inputs - 1 : number_of_inputs, code_item->ins_size_);
+    }
   } else {
     DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
     num_regs = number_of_inputs;
@@ -1077,7 +1097,13 @@
     self->EndAssertNoThreadSuspension(old_cause);
   }
 
-  PerformCall(self, code_item, shadow_frame.GetMethod(), first_dest_reg, new_shadow_frame, result);
+  PerformCall(self,
+              code_item,
+              shadow_frame.GetMethod(),
+              first_dest_reg,
+              new_shadow_frame,
+              result,
+              use_interpreter_entrypoint);
 
   if (string_init && !self->IsExceptionPending()) {
     SetStringInitValueToAllAliases(&shadow_frame, string_init_vreg_this, *result);
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 2589ad0..38edc7a 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -140,12 +140,6 @@
     result->SetJ(0);
     return false;
   } else {
-    if (called_method->IsIntrinsic()) {
-      if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data,
-                               shadow_frame.GetResultRegister())) {
-        return !self->IsExceptionPending();
-      }
-    }
     jit::Jit* jit = Runtime::Current()->GetJit();
     if (jit != nullptr) {
       if (type == kVirtual) {
@@ -153,6 +147,12 @@
       }
       jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
     }
+    if (called_method->IsIntrinsic()) {
+      if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data,
+                               shadow_frame.GetResultRegister())) {
+        return !self->IsExceptionPending();
+      }
+    }
     return DoCall<false, false>(called_method, self, shadow_frame, inst, inst_data, result);
   }
 }
@@ -527,10 +527,11 @@
   }
 }
 
+// The arg_offset is the offset to the first input register in the frame.
 void ArtInterpreterToCompiledCodeBridge(Thread* self,
                                         ArtMethod* caller,
-                                        const DexFile::CodeItem* code_item,
                                         ShadowFrame* shadow_frame,
+                                        uint16_t arg_offset,
                                         JValue* result);
 
 // Set string value created from StringFactory.newStringFromXXX() into all aliases of
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index b191dd7..45788e7 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -26,13 +26,13 @@
 namespace art {
 namespace interpreter {
 
-#define HANDLE_PENDING_EXCEPTION()                                                              \
+#define HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(instr)                                    \
   do {                                                                                          \
     DCHECK(self->IsExceptionPending());                                                         \
     self->AllowThreadSuspension();                                                              \
     uint32_t found_dex_pc = FindNextInstructionFollowingException(self, shadow_frame,           \
                                                                   inst->GetDexPc(insns),        \
-                                                                  instrumentation);             \
+                                                                  instr);                       \
     if (found_dex_pc == DexFile::kDexNoIndex) {                                                 \
       /* Structured locking is to be enforced for abnormal termination, too. */                 \
       DoMonitorCheckOnExit<do_assignability_check>(self, &shadow_frame);                        \
@@ -47,6 +47,8 @@
     }                                                                                           \
   } while (false)
 
+#define HANDLE_PENDING_EXCEPTION() HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(instrumentation)
+
 #define POSSIBLY_HANDLE_PENDING_EXCEPTION(_is_exception_pending, _next_function)  \
   do {                                                                            \
     if (UNLIKELY(_is_exception_pending)) {                                        \
@@ -218,6 +220,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -235,6 +241,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -253,6 +263,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -270,6 +284,10 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
@@ -307,6 +325,12 @@
           instrumentation->MethodExitEvent(self, shadow_frame.GetThisObject(code_item->ins_size_),
                                            shadow_frame.GetMethod(), inst->GetDexPc(insns),
                                            result);
+          if (UNLIKELY(self->IsExceptionPending())) {
+            // Don't send another method exit event.
+            HANDLE_PENDING_EXCEPTION_WITH_INSTRUMENTATION(nullptr);
+          }
+          // Re-load since it might have moved during the MethodExitEvent.
+          result.SetL(shadow_frame.GetVRegReference(ref_idx));
         }
         if (interpret_one_instruction) {
           /* Signal mterp to return to caller */
diff --git a/runtime/interpreter/mterp/mips/op_double_to_int.S b/runtime/interpreter/mterp/mips/op_double_to_int.S
index 3b44964..6d7c6ca 100644
--- a/runtime/interpreter/mterp/mips/op_double_to_int.S
+++ b/runtime/interpreter/mterp/mips/op_double_to_int.S
@@ -3,7 +3,8 @@
      *
      * We have to clip values to int min/max per the specification.  The
      * expected common case is a "reasonable" value that converts directly
-     * to modest integer.  The EABI convert function isn't doing this for us.
+     * to modest integer.  The EABI convert function isn't doing this for us
+     * for pre-R6.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -11,29 +12,20 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-
+#ifndef MIPS32REVGE6
     li        t0, INT_MIN_AS_DOUBLE_HIGH
     mtc1      zero, fa1
     MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
-#ifdef MIPS32REVGE6
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    cmp.le.d  ft0, fa1, fa0
-    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
-    cmp.eq.d  ft0, fa0, fa0
-    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
-#else
     c.ole.d   fcc0, fa1, fa0
+#endif
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+#ifndef MIPS32REVGE6
     bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
     c.eq.d    fcc0, fa0, fa0
     mtc1      zero, fa0
     MOVE_TO_FPU_HIGH(zero, fa0, fa0f)
     movt.d    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
-#endif
 1:
+#endif
     trunc.w.d fa0, fa0
     SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/op_double_to_long.S b/runtime/interpreter/mterp/mips/op_double_to_long.S
index 78d4a8f..459ab7e 100644
--- a/runtime/interpreter/mterp/mips/op_double_to_long.S
+++ b/runtime/interpreter/mterp/mips/op_double_to_long.S
@@ -3,7 +3,8 @@
      *
      * We have to clip values to long min/max per the specification.  The
      * expected common case is a "reasonable" value that converts directly
-     * to modest integer.  The EABI convert function isn't doing this for us.
+     * to modest integer.  The EABI convert function isn't doing this for us
+     * for pre-R6.
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -13,19 +14,7 @@
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
 #ifdef MIPS32REVGE6
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    li        t0, LONG_MIN_AS_DOUBLE_HIGH
-    mtc1      zero, fa1
-    mthc1     t0, fa1
-    cmp.le.d  ft0, fa1, fa0
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
-    cmp.eq.d  ft0, fa0, fa0
-    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_DOUBLE : 0
-1:
     trunc.l.d fa0, fa0
     SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
 #else
diff --git a/runtime/interpreter/mterp/mips/op_float_to_int.S b/runtime/interpreter/mterp/mips/op_float_to_int.S
index 087e50f..26a0988 100644
--- a/runtime/interpreter/mterp/mips/op_float_to_int.S
+++ b/runtime/interpreter/mterp/mips/op_float_to_int.S
@@ -3,7 +3,8 @@
      *
      * We have to clip values to int min/max per the specification.  The
      * expected common case is a "reasonable" value that converts directly
-     * to modest integer.  The EABI convert function isn't doing this for us.
+     * to modest integer.  The EABI convert function isn't doing this for us
+     * for pre-R6.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -11,26 +12,18 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
+#ifndef MIPS32REVGE6
     li        t0, INT_MIN_AS_FLOAT
     mtc1      t0, fa1
-#ifdef MIPS32REVGE6
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    cmp.le.s  ft0, fa1, fa0
-    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
-    cmp.eq.s  ft0, fa0, fa0
-    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
-#else
     c.ole.s   fcc0, fa1, fa0
+#endif
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+#ifndef MIPS32REVGE6
     bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
     c.eq.s    fcc0, fa0, fa0
     mtc1      zero, fa0
     movt.s    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
-#endif
 1:
+#endif
     trunc.w.s fa0, fa0
     SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/op_float_to_long.S b/runtime/interpreter/mterp/mips/op_float_to_long.S
index dc88a78..b8f8efb 100644
--- a/runtime/interpreter/mterp/mips/op_float_to_long.S
+++ b/runtime/interpreter/mterp/mips/op_float_to_long.S
@@ -3,7 +3,8 @@
      *
      * We have to clip values to long min/max per the specification.  The
      * expected common case is a "reasonable" value that converts directly
-     * to modest integer.  The EABI convert function isn't doing this for us.
+     * to modest integer.  The EABI convert function isn't doing this for us
+     * for pre-R6.
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -12,18 +13,7 @@
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
 #ifdef MIPS32REVGE6
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    li        t0, LONG_MIN_AS_FLOAT
-    mtc1      t0, fa1
-    cmp.le.s  ft0, fa1, fa0
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
-    cmp.eq.s  ft0, fa0, fa0
-    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_FLOAT : 0
-1:
     trunc.l.s fa0, fa0
     SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
 #else
diff --git a/runtime/interpreter/mterp/mips64/op_double_to_int.S b/runtime/interpreter/mterp/mips64/op_double_to_int.S
index aa2cbca..d099522 100644
--- a/runtime/interpreter/mterp/mips64/op_double_to_int.S
+++ b/runtime/interpreter/mterp/mips64/op_double_to_int.S
@@ -1,23 +1,3 @@
 %include "mips64/fcvtHeader.S" { "suffix":"_DOUBLE", "valreg":"f0" }
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    dli     t0, INT_MIN_AS_DOUBLE
-    dmtc1   t0, f1
-    cmp.le.d f1, f1, f0
-    bc1nez  f1, .L${opcode}_trunc
-    cmp.eq.d f1, f0, f0
-    li      t0, INT_MIN
-    mfc1    t1, f1
-    and     t0, t0, t1
-    b       .L${opcode}_done
-%break
-.L${opcode}_trunc:
     trunc.w.d f0, f0
-    mfc1    t0, f0
-.L${opcode}_done:
-    /* Can't include fcvtFooter.S after break */
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    SET_VREG t0, a1
-    GOTO_OPCODE v0                      # jump to next instruction
+%include "mips64/fcvtFooter.S" { "suffix":"_FLOAT", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_double_to_long.S b/runtime/interpreter/mterp/mips64/op_double_to_long.S
index 777cfeb..9b65da5 100644
--- a/runtime/interpreter/mterp/mips64/op_double_to_long.S
+++ b/runtime/interpreter/mterp/mips64/op_double_to_long.S
@@ -1,23 +1,3 @@
 %include "mips64/fcvtHeader.S" { "suffix":"_DOUBLE", "valreg":"f0" }
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    dli     t0, LONG_MIN_AS_DOUBLE
-    dmtc1   t0, f1
-    cmp.le.d f1, f1, f0
-    bc1nez  f1, .L${opcode}_trunc
-    cmp.eq.d f1, f0, f0
-    dli     t0, LONG_MIN
-    mfc1    t1, f1
-    and     t0, t0, t1
-    b       .L${opcode}_done
-%break
-.L${opcode}_trunc:
     trunc.l.d f0, f0
-    dmfc1   t0, f0
-.L${opcode}_done:
-    /* Can't include fcvtFooter.S after break */
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    SET_VREG_WIDE t0, a1
-    GOTO_OPCODE v0                      # jump to next instruction
+%include "mips64/fcvtFooter.S" { "suffix":"_DOUBLE", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_float_to_int.S b/runtime/interpreter/mterp/mips64/op_float_to_int.S
index d957540..2806973 100644
--- a/runtime/interpreter/mterp/mips64/op_float_to_int.S
+++ b/runtime/interpreter/mterp/mips64/op_float_to_int.S
@@ -1,23 +1,3 @@
 %include "mips64/fcvtHeader.S" { "suffix":"_FLOAT", "valreg":"f0" }
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    li      t0, INT_MIN_AS_FLOAT
-    mtc1    t0, f1
-    cmp.le.s f1, f1, f0
-    bc1nez  f1, .L${opcode}_trunc
-    cmp.eq.s f1, f0, f0
-    li      t0, INT_MIN
-    mfc1    t1, f1
-    and     t0, t0, t1
-    b       .L${opcode}_done
-%break
-.L${opcode}_trunc:
     trunc.w.s f0, f0
-    mfc1    t0, f0
-.L${opcode}_done:
-    /* Can't include fcvtFooter.S after break */
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    SET_VREG t0, a1
-    GOTO_OPCODE v0                      # jump to next instruction
+%include "mips64/fcvtFooter.S" { "suffix":"_FLOAT", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mips64/op_float_to_long.S b/runtime/interpreter/mterp/mips64/op_float_to_long.S
index 5d036c8..c40c8a6 100644
--- a/runtime/interpreter/mterp/mips64/op_float_to_long.S
+++ b/runtime/interpreter/mterp/mips64/op_float_to_long.S
@@ -1,23 +1,3 @@
 %include "mips64/fcvtHeader.S" { "suffix":"_FLOAT", "valreg":"f0" }
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    li      t0, LONG_MIN_AS_FLOAT
-    mtc1    t0, f1
-    cmp.le.s f1, f1, f0
-    bc1nez  f1, .L${opcode}_trunc
-    cmp.eq.s f1, f0, f0
-    dli     t0, LONG_MIN
-    mfc1    t1, f1
-    and     t0, t0, t1
-    b       .L${opcode}_done
-%break
-.L${opcode}_trunc:
     trunc.l.s f0, f0
-    dmfc1   t0, f0
-.L${opcode}_done:
-    /* Can't include fcvtFooter.S after break */
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    SET_VREG_WIDE t0, a1
-    GOTO_OPCODE v0                      # jump to next instruction
+%include "mips64/fcvtFooter.S" { "suffix":"_DOUBLE", "valreg":"f0" }
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index a53040c..5f94d04 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -276,6 +276,12 @@
         vtable_idx, kRuntimePointerSize);
     if ((called_method != nullptr) && called_method->IsIntrinsic()) {
       if (MterpHandleIntrinsic(shadow_frame, called_method, inst, inst_data, result_register)) {
+        jit::Jit* jit = Runtime::Current()->GetJit();
+        if (jit != nullptr) {
+          jit->InvokeVirtualOrInterface(
+              receiver, shadow_frame->GetMethod(), shadow_frame->GetDexPC(), called_method);
+          jit->AddSamples(self, shadow_frame->GetMethod(), 1, /*with_backedges*/false);
+        }
         return !self->IsExceptionPending();
       }
     }
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index 579afc2..6362897 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -3967,7 +3967,8 @@
      *
      * We have to clip values to int min/max per the specification.  The
      * expected common case is a "reasonable" value that converts directly
-     * to modest integer.  The EABI convert function isn't doing this for us.
+     * to modest integer.  The EABI convert function isn't doing this for us
+     * for pre-R6.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3975,27 +3976,19 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
+#ifndef MIPS32REVGE6
     li        t0, INT_MIN_AS_FLOAT
     mtc1      t0, fa1
-#ifdef MIPS32REVGE6
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    cmp.le.s  ft0, fa1, fa0
-    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
-    cmp.eq.s  ft0, fa0, fa0
-    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
-#else
     c.ole.s   fcc0, fa1, fa0
+#endif
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+#ifndef MIPS32REVGE6
     bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
     c.eq.s    fcc0, fa0, fa0
     mtc1      zero, fa0
     movt.s    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
-#endif
 1:
+#endif
     trunc.w.s fa0, fa0
     SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
 
@@ -4008,7 +4001,8 @@
      *
      * We have to clip values to long min/max per the specification.  The
      * expected common case is a "reasonable" value that converts directly
-     * to modest integer.  The EABI convert function isn't doing this for us.
+     * to modest integer.  The EABI convert function isn't doing this for us
+     * for pre-R6.
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -4017,18 +4011,7 @@
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
 #ifdef MIPS32REVGE6
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    li        t0, LONG_MIN_AS_FLOAT
-    mtc1      t0, fa1
-    cmp.le.s  ft0, fa1, fa0
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
-    cmp.eq.s  ft0, fa0, fa0
-    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_FLOAT : 0
-1:
     trunc.l.s fa0, fa0
     SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
 #else
@@ -4084,7 +4067,8 @@
      *
      * We have to clip values to int min/max per the specification.  The
      * expected common case is a "reasonable" value that converts directly
-     * to modest integer.  The EABI convert function isn't doing this for us.
+     * to modest integer.  The EABI convert function isn't doing this for us
+     * for pre-R6.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -4092,30 +4076,21 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-
+#ifndef MIPS32REVGE6
     li        t0, INT_MIN_AS_DOUBLE_HIGH
     mtc1      zero, fa1
     MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
-#ifdef MIPS32REVGE6
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    cmp.le.d  ft0, fa1, fa0
-    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
-    cmp.eq.d  ft0, fa0, fa0
-    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
-#else
     c.ole.d   fcc0, fa1, fa0
+#endif
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+#ifndef MIPS32REVGE6
     bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
     c.eq.d    fcc0, fa0, fa0
     mtc1      zero, fa0
     MOVE_TO_FPU_HIGH(zero, fa0, fa0f)
     movt.d    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
-#endif
 1:
+#endif
     trunc.w.d fa0, fa0
     SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
 
@@ -4128,7 +4103,8 @@
      *
      * We have to clip values to long min/max per the specification.  The
      * expected common case is a "reasonable" value that converts directly
-     * to modest integer.  The EABI convert function isn't doing this for us.
+     * to modest integer.  The EABI convert function isn't doing this for us
+     * for pre-R6.
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -4138,19 +4114,7 @@
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
 #ifdef MIPS32REVGE6
-    /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
-     */
-    li        t0, LONG_MIN_AS_DOUBLE_HIGH
-    mtc1      zero, fa1
-    mthc1     t0, fa1
-    cmp.le.d  ft0, fa1, fa0
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
-    cmp.eq.d  ft0, fa0, fa0
-    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_DOUBLE : 0
-1:
     trunc.l.d fa0, fa0
     SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
 #else
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index 3656df9..bc0d90c 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -3699,19 +3699,27 @@
     GET_VREG_FLOAT f0, a2
     FETCH_ADVANCE_INST 1                # advance rPC, load rINST
 
+    trunc.w.s f0, f0
+/* File: mips64/fcvtFooter.S */
     /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
      */
-    li      t0, INT_MIN_AS_FLOAT
-    mtc1    t0, f1
-    cmp.le.s f1, f1, f0
-    bc1nez  f1, .Lop_float_to_int_trunc
-    cmp.eq.s f1, f0, f0
-    li      t0, INT_MIN
-    mfc1    t1, f1
-    and     t0, t0, t1
-    b       .Lop_float_to_int_done
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
 
 /* ------------------------------ */
     .balign 128
@@ -3734,19 +3742,28 @@
     GET_VREG_FLOAT f0, a2
     FETCH_ADVANCE_INST 1                # advance rPC, load rINST
 
+    trunc.l.s f0, f0
+/* File: mips64/fcvtFooter.S */
     /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
      */
-    li      t0, LONG_MIN_AS_FLOAT
-    mtc1    t0, f1
-    cmp.le.s f1, f1, f0
-    bc1nez  f1, .Lop_float_to_long_trunc
-    cmp.eq.s f1, f0, f0
-    dli     t0, LONG_MIN
-    mfc1    t1, f1
-    and     t0, t0, t1
-    b       .Lop_float_to_long_done
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
+
 
 /* ------------------------------ */
     .balign 128
@@ -3817,19 +3834,27 @@
     GET_VREG_DOUBLE f0, a2
     FETCH_ADVANCE_INST 1                # advance rPC, load rINST
 
+    trunc.w.d f0, f0
+/* File: mips64/fcvtFooter.S */
     /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
      */
-    dli     t0, INT_MIN_AS_DOUBLE
-    dmtc1   t0, f1
-    cmp.le.d f1, f1, f0
-    bc1nez  f1, .Lop_double_to_int_trunc
-    cmp.eq.d f1, f0, f0
-    li      t0, INT_MIN
-    mfc1    t1, f1
-    and     t0, t0, t1
-    b       .Lop_double_to_int_done
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_FLOAT f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
 
 /* ------------------------------ */
     .balign 128
@@ -3852,19 +3877,27 @@
     GET_VREG_DOUBLE f0, a2
     FETCH_ADVANCE_INST 1                # advance rPC, load rINST
 
+    trunc.l.d f0, f0
+/* File: mips64/fcvtFooter.S */
     /*
-     * TODO: simplify this when the MIPS64R6 emulator
-     * supports NAN2008=1.
+     * Stores a specified register containing the result of conversion
+     * from or to a floating-point type and jumps to the next instruction.
+     *
+     * Expects a1 to contain the destination Dalvik register number.
+     * a1 is set up by fcvtHeader.S.
+     *
+     * For: int-to-float, int-to-double, long-to-float, long-to-double,
+     *      float-to-int, float-to-long, float-to-double, double-to-int,
+     *      double-to-long, double-to-float, neg-float, neg-double.
+     *
+     * Note that this file can't be included after a break in other files
+     * and in those files its contents appear as a copy.
+     * See: float-to-int, float-to-long, double-to-int, double-to-long.
      */
-    dli     t0, LONG_MIN_AS_DOUBLE
-    dmtc1   t0, f1
-    cmp.le.d f1, f1, f0
-    bc1nez  f1, .Lop_double_to_long_trunc
-    cmp.eq.d f1, f0, f0
-    dli     t0, LONG_MIN
-    mfc1    t1, f1
-    and     t0, t0, t1
-    b       .Lop_double_to_long_done
+    GET_INST_OPCODE v0                  # extract opcode from rINST
+    SET_VREG_DOUBLE f0, a1
+    GOTO_OPCODE v0                      # jump to next instruction
+
 
 /* ------------------------------ */
     .balign 128
@@ -7132,46 +7165,6 @@
     .balign 4
 artMterpAsmSisterStart:
 
-/* continuation for op_float_to_int */
-.Lop_float_to_int_trunc:
-    trunc.w.s f0, f0
-    mfc1    t0, f0
-.Lop_float_to_int_done:
-    /* Can't include fcvtFooter.S after break */
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    SET_VREG t0, a1
-    GOTO_OPCODE v0                      # jump to next instruction
-
-/* continuation for op_float_to_long */
-.Lop_float_to_long_trunc:
-    trunc.l.s f0, f0
-    dmfc1   t0, f0
-.Lop_float_to_long_done:
-    /* Can't include fcvtFooter.S after break */
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    SET_VREG_WIDE t0, a1
-    GOTO_OPCODE v0                      # jump to next instruction
-
-/* continuation for op_double_to_int */
-.Lop_double_to_int_trunc:
-    trunc.w.d f0, f0
-    mfc1    t0, f0
-.Lop_double_to_int_done:
-    /* Can't include fcvtFooter.S after break */
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    SET_VREG t0, a1
-    GOTO_OPCODE v0                      # jump to next instruction
-
-/* continuation for op_double_to_long */
-.Lop_double_to_long_trunc:
-    trunc.l.d f0, f0
-    dmfc1   t0, f0
-.Lop_double_to_long_done:
-    /* Can't include fcvtFooter.S after break */
-    GET_INST_OPCODE v0                  # extract opcode from rINST
-    SET_VREG_WIDE t0, a1
-    GOTO_OPCODE v0                      # jump to next instruction
-
     .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
     .global artMterpAsmSisterEnd
 artMterpAsmSisterEnd:
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 96934bc..152cce4 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -50,7 +50,7 @@
 #include "mirror/string-inl.h"
 #include "nth_caller_visitor.h"
 #include "reflection.h"
-#include "thread.h"
+#include "thread-inl.h"
 #include "transaction.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
diff --git a/runtime/invoke_type.h b/runtime/invoke_type.h
index de07c72..a003f7f 100644
--- a/runtime/invoke_type.h
+++ b/runtime/invoke_type.h
@@ -21,7 +21,7 @@
 
 namespace art {
 
-enum InvokeType {
+enum InvokeType : uint32_t {
   kStatic,     // <<static>>
   kDirect,     // <<direct>>
   kVirtual,    // <<virtual>>
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index 6d3118e..2ad3b29 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "jni_internal.h"
+#include "java_vm_ext.h"
 
 #include <dlfcn.h>
 
@@ -22,18 +22,20 @@
 
 #include "art_method-inl.h"
 #include "base/dumpable.h"
-#include "base/mutex.h"
+#include "base/mutex-inl.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
 #include "check_jni.h"
 #include "dex_file-inl.h"
 #include "fault_handler.h"
+#include "gc_root-inl.h"
 #include "indirect_reference_table-inl.h"
+#include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "nativebridge/native_bridge.h"
 #include "nativeloader/native_loader.h"
-#include "java_vm_ext.h"
+#include "object_callbacks.h"
 #include "parsed_options.h"
 #include "runtime-inl.h"
 #include "runtime_options.h"
@@ -144,19 +146,24 @@
     return needs_native_bridge_;
   }
 
-  void* FindSymbol(const std::string& symbol_name, const char* shorty = nullptr) {
+  // No mutator lock since dlsym may block for a while if another thread is doing dlopen.
+  void* FindSymbol(const std::string& symbol_name, const char* shorty = nullptr)
+      REQUIRES(!Locks::mutator_lock_) {
     return NeedsNativeBridge()
         ? FindSymbolWithNativeBridge(symbol_name.c_str(), shorty)
         : FindSymbolWithoutNativeBridge(symbol_name.c_str());
   }
 
-  void* FindSymbolWithoutNativeBridge(const std::string& symbol_name) {
+  // No mutator lock since dlsym may block for a while if another thread is doing dlopen.
+  void* FindSymbolWithoutNativeBridge(const std::string& symbol_name)
+      REQUIRES(!Locks::mutator_lock_) {
     CHECK(!NeedsNativeBridge());
 
     return dlsym(handle_, symbol_name.c_str());
   }
 
-  void* FindSymbolWithNativeBridge(const std::string& symbol_name, const char* shorty) {
+  void* FindSymbolWithNativeBridge(const std::string& symbol_name, const char* shorty)
+      REQUIRES(!Locks::mutator_lock_) {
     CHECK(NeedsNativeBridge());
 
     uint32_t len = 0;
@@ -235,8 +242,8 @@
   }
 
   // See section 11.3 "Linking Native Methods" of the JNI spec.
-  void* FindNativeMethod(ArtMethod* m, std::string& detail)
-      REQUIRES(Locks::jni_libraries_lock_)
+  void* FindNativeMethod(Thread* self, ArtMethod* m, std::string& detail)
+      REQUIRES(!Locks::jni_libraries_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     std::string jni_short_name(m->JniShortName());
     std::string jni_long_name(m->JniLongName());
@@ -245,25 +252,18 @@
     void* const declaring_class_loader_allocator =
         Runtime::Current()->GetClassLinker()->GetAllocatorForClassLoader(declaring_class_loader);
     CHECK(declaring_class_loader_allocator != nullptr);
-    for (const auto& lib : libraries_) {
-      SharedLibrary* const library = lib.second;
-      // Use the allocator address for class loader equality to avoid unnecessary weak root decode.
-      if (library->GetClassLoaderAllocator() != declaring_class_loader_allocator) {
-        // We only search libraries loaded by the appropriate ClassLoader.
-        continue;
-      }
-      // Try the short name then the long name...
-      const char* shorty = library->NeedsNativeBridge()
-          ? m->GetShorty()
-          : nullptr;
-      void* fn = library->FindSymbol(jni_short_name, shorty);
-      if (fn == nullptr) {
-        fn = library->FindSymbol(jni_long_name, shorty);
-      }
-      if (fn != nullptr) {
-        VLOG(jni) << "[Found native code for " << m->PrettyMethod()
-                  << " in \"" << library->GetPath() << "\"]";
-        return fn;
+    // TODO: Avoid calling GetShorty here to prevent dirtying dex pages?
+    const char* shorty = m->GetShorty();
+    {
+      // Go to suspended since dlsym may block for a long time if other threads are using dlopen.
+      ScopedThreadSuspension sts(self, kNative);
+      void* native_code = FindNativeMethodInternal(self,
+                                                   declaring_class_loader_allocator,
+                                                   shorty,
+                                                   jni_short_name,
+                                                   jni_long_name);
+      if (native_code != nullptr) {
+        return native_code;
       }
     }
     detail += "No implementation found for ";
@@ -272,22 +272,51 @@
     return nullptr;
   }
 
+  void* FindNativeMethodInternal(Thread* self,
+                                 void* declaring_class_loader_allocator,
+                                 const char* shorty,
+                                 const std::string& jni_short_name,
+                                 const std::string& jni_long_name)
+      REQUIRES(!Locks::jni_libraries_lock_)
+      REQUIRES(!Locks::mutator_lock_) {
+    MutexLock mu(self, *Locks::jni_libraries_lock_);
+    for (const auto& lib : libraries_) {
+      SharedLibrary* const library = lib.second;
+      // Use the allocator address for class loader equality to avoid unnecessary weak root decode.
+      if (library->GetClassLoaderAllocator() != declaring_class_loader_allocator) {
+        // We only search libraries loaded by the appropriate ClassLoader.
+        continue;
+      }
+      // Try the short name then the long name...
+      const char* arg_shorty = library->NeedsNativeBridge() ? shorty : nullptr;
+      void* fn = library->FindSymbol(jni_short_name, arg_shorty);
+      if (fn == nullptr) {
+        fn = library->FindSymbol(jni_long_name, arg_shorty);
+      }
+      if (fn != nullptr) {
+        VLOG(jni) << "[Found native code for " << jni_long_name
+                  << " in \"" << library->GetPath() << "\"]";
+        return fn;
+      }
+    }
+    return nullptr;
+  }
+
   // Unload native libraries with cleared class loaders.
   void UnloadNativeLibraries()
       REQUIRES(!Locks::jni_libraries_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ScopedObjectAccessUnchecked soa(Thread::Current());
+    Thread* const self = Thread::Current();
     std::vector<SharedLibrary*> unload_libraries;
     {
-      MutexLock mu(soa.Self(), *Locks::jni_libraries_lock_);
+      MutexLock mu(self, *Locks::jni_libraries_lock_);
       for (auto it = libraries_.begin(); it != libraries_.end(); ) {
         SharedLibrary* const library = it->second;
         // If class loader is null then it was unloaded, call JNI_OnUnload.
         const jweak class_loader = library->GetClassLoader();
         // If class_loader is a null jobject then it is the boot class loader. We should not unload
         // the native libraries of the boot class loader.
-        if (class_loader != nullptr &&
-            soa.Self()->IsJWeakCleared(class_loader)) {
+        if (class_loader != nullptr && self->IsJWeakCleared(class_loader)) {
           unload_libraries.push_back(library);
           it = libraries_.erase(it);
         } else {
@@ -295,6 +324,7 @@
         }
       }
     }
+    ScopedThreadSuspension sts(self, kNative);
     // Do this without holding the jni libraries lock to prevent possible deadlocks.
     typedef void (*JNI_OnUnloadFn)(JavaVM*, void*);
     for (auto library : unload_libraries) {
@@ -304,7 +334,7 @@
       } else {
         VLOG(jni) << "[JNI_OnUnload found for \"" << library->GetPath() << "\"]: Calling...";
         JNI_OnUnloadFn jni_on_unload = reinterpret_cast<JNI_OnUnloadFn>(sym);
-        jni_on_unload(soa.Vm(), nullptr);
+        jni_on_unload(self->GetJniEnv()->vm, nullptr);
       }
       delete library;
     }
@@ -955,12 +985,8 @@
   // If this is a static method, it could be called before the class has been initialized.
   CHECK(c->IsInitializing()) << c->GetStatus() << " " << m->PrettyMethod();
   std::string detail;
-  void* native_method;
-  Thread* self = Thread::Current();
-  {
-    MutexLock mu(self, *Locks::jni_libraries_lock_);
-    native_method = libraries_->FindNativeMethod(m, detail);
-  }
+  Thread* const self = Thread::Current();
+  void* native_method = libraries_->FindNativeMethod(self, m, detail);
   if (native_method == nullptr) {
     // Lookup JNI native methods from native TI Agent libraries. See runtime/ti/agent.h for more
     // information. Agent libraries are searched for native methods after all jni libraries.
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 7374920..50aabdc 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -32,6 +32,7 @@
 }  // namespace mirror
 
 class ArtMethod;
+class IsMarkedVisitor;
 class Libraries;
 class ParsedOptions;
 class Runtime;
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index 0aa04c1..ede4f9e 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -24,7 +24,7 @@
 
 #include "base/logging.h"
 #include "jdwp/jdwp_priv.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 #ifdef ART_TARGET_ANDROID
 #include "cutils/sockets.h"
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 96249f9..4ab3d69 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -500,8 +500,8 @@
       }
       break;
     case MK_CONDITIONAL:
-      CHECK(false);  // should not be getting these
-      break;
+      LOG(FATAL) << "Unexpected MK_CONDITIONAL";  // should not be getting these
+      UNREACHABLE();
     case MK_THREAD_ONLY:
       if (!Dbg::MatchThread(pMod->threadOnly.threadId, basket.thread)) {
         return false;
diff --git a/runtime/jdwp/jdwp_expand_buf.cc b/runtime/jdwp/jdwp_expand_buf.cc
index 961dd36..f0b8c91 100644
--- a/runtime/jdwp/jdwp_expand_buf.cc
+++ b/runtime/jdwp/jdwp_expand_buf.cc
@@ -152,7 +152,9 @@
 
 static void SetUtf8String(uint8_t* buf, const char* str, size_t strLen) {
   Set4BE(buf, strLen);
-  memcpy(buf + sizeof(uint32_t), str, strLen);
+  if (str != nullptr) {
+    memcpy(buf + sizeof(uint32_t), str, strLen);
+  }
 }
 
 /*
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index e8a9904..618332b 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -33,7 +33,7 @@
 #include "jdwp/jdwp_priv.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
diff --git a/runtime/jit/debugger_interface.cc b/runtime/jit/debugger_interface.cc
index 7cdd7c5..135d9b1 100644
--- a/runtime/jit/debugger_interface.cc
+++ b/runtime/jit/debugger_interface.cc
@@ -18,7 +18,7 @@
 
 #include "base/logging.h"
 #include "base/mutex.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread.h"
 
 #include <unordered_map>
@@ -143,7 +143,7 @@
 bool DeleteJITCodeEntryForAddress(uintptr_t address) {
   Thread* self = Thread::Current();
   MutexLock mu(self, g_jit_debug_mutex);
-  const auto& it = g_jit_code_entries.find(address);
+  const auto it = g_jit_code_entries.find(address);
   if (it == g_jit_code_entries.end()) {
     return false;
   }
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index ae474da..969a570 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -32,7 +32,9 @@
 #include "profile_saver.h"
 #include "runtime.h"
 #include "runtime_options.h"
+#include "stack.h"
 #include "stack_map.h"
+#include "thread-inl.h"
 #include "thread_list.h"
 #include "utils.h"
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 4f5bebf..f898d41 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -17,14 +17,12 @@
 #ifndef ART_RUNTIME_JIT_JIT_H_
 #define ART_RUNTIME_JIT_JIT_H_
 
-#include "base/arena_allocator.h"
 #include "base/histogram-inl.h"
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "jit/profile_saver_options.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "profile_compilation_info.h"
 #include "thread_pool.h"
 
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 5232252..2744c4f 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -29,12 +29,16 @@
 #include "entrypoints/runtime_asm_entrypoints.h"
 #include "gc/accounting/bitmap-inl.h"
 #include "gc/scoped_gc_critical_section.h"
+#include "intern_table.h"
 #include "jit/jit.h"
 #include "jit/profiling_info.h"
 #include "linear_alloc.h"
 #include "mem_map.h"
 #include "oat_file-inl.h"
+#include "oat_quick_method_header.h"
+#include "object_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "thread_list.h"
 
 namespace art {
@@ -192,19 +196,26 @@
 
 class ScopedCodeCacheWrite : ScopedTrace {
  public:
-  explicit ScopedCodeCacheWrite(MemMap* code_map)
+  explicit ScopedCodeCacheWrite(MemMap* code_map, bool only_for_tlb_shootdown = false)
       : ScopedTrace("ScopedCodeCacheWrite"),
-        code_map_(code_map) {
+        code_map_(code_map),
+        only_for_tlb_shootdown_(only_for_tlb_shootdown) {
     ScopedTrace trace("mprotect all");
-    CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtAll);
+    CHECKED_MPROTECT(
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtAll);
   }
   ~ScopedCodeCacheWrite() {
     ScopedTrace trace("mprotect code");
-    CHECKED_MPROTECT(code_map_->Begin(), code_map_->Size(), kProtCode);
+    CHECKED_MPROTECT(
+        code_map_->Begin(), only_for_tlb_shootdown_ ? kPageSize : code_map_->Size(), kProtCode);
   }
  private:
   MemMap* const code_map_;
 
+  // If we're using ScopedCacheWrite only for TLB shootdown, we limit the scope of mprotect to
+  // one page.
+  const bool only_for_tlb_shootdown_;
+
   DISALLOW_COPY_AND_ASSIGN(ScopedCodeCacheWrite);
 };
 
@@ -519,6 +530,18 @@
   }
 }
 
+static void ClearMethodCounter(ArtMethod* method, bool was_warm) {
+  if (was_warm) {
+    method->AddAccessFlags(kAccPreviouslyWarm);
+  }
+  // We reset the counter to 1 so that the profile knows that the method was executed at least once.
+  // This is required for layout purposes.
+  // We also need to make sure we'll pass the warmup threshold again, so we set to 0 if
+  // the warmup threshold is 1.
+  uint16_t jit_warmup_threshold = Runtime::Current()->GetJITOptions()->GetWarmupThreshold();
+  method->SetCounter(std::min(jit_warmup_threshold - 1, 1));
+}
+
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           uint8_t* stack_map,
@@ -565,11 +588,6 @@
           core_spill_mask,
           fp_spill_mask,
           code_size);
-      DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
-      DCHECK_LE(roots_data, stack_map);
-      // Flush data cache, as compiled code references literals in it.
-      FlushDataCache(reinterpret_cast<char*>(roots_data),
-                     reinterpret_cast<char*>(roots_data + data_size));
       // Flush caches before we remove write permission because some ARMv8 Qualcomm kernels may
       // trigger a segfault if a page fault occurs when requesting a cache maintenance operation.
       // This is a kernel bug that we need to work around until affected devices (e.g. Nexus 5X and
@@ -595,11 +613,10 @@
     bool single_impl_still_valid = true;
     for (ArtMethod* single_impl : cha_single_implementation_list) {
       if (!single_impl->HasSingleImplementation()) {
-        // We simply discard the compiled code. Clear the
-        // counter so that it may be recompiled later. Hopefully the
-        // class hierarchy will be more stable when compilation is retried.
+        // Simply discard the compiled code. Clear the counter so that it may be recompiled later.
+        // Hopefully the class hierarchy will be more stable when compilation is retried.
         single_impl_still_valid = false;
-        method->ClearCounter();
+        ClearMethodCounter(method, /*was_warm*/ false);
         break;
       }
     }
@@ -621,10 +638,18 @@
     // possible that the compiled code is considered invalidated by some class linking,
     // but below we still make the compiled code valid for the method.
     MutexLock mu(self, lock_);
-    method_code_map_.Put(code_ptr, method);
     // Fill the root table before updating the entry point.
     DCHECK_EQ(FromStackMapToRoots(stack_map), roots_data);
+    DCHECK_LE(roots_data, stack_map);
     FillRootTable(roots_data, roots);
+    {
+      // Flush data cache, as compiled code references literals in it.
+      // We also need a TLB shootdown to act as memory barrier across cores.
+      ScopedCodeCacheWrite ccw(code_map_.get(), /* only_for_tlb_shootdown */ true);
+      FlushDataCache(reinterpret_cast<char*>(roots_data),
+                     reinterpret_cast<char*>(roots_data + data_size));
+    }
+    method_code_map_.Put(code_ptr, method);
     if (osr) {
       number_of_osr_compilations_++;
       osr_code_map_.Put(method, code_ptr);
@@ -1060,7 +1085,7 @@
           info->SetSavedEntryPoint(nullptr);
           // We are going to move this method back to interpreter. Clear the counter now to
           // give it a chance to be hot again.
-          info->GetMethod()->ClearCounter();
+          ClearMethodCounter(info->GetMethod(), /*was_warm*/ true);
         }
       }
     } else if (kIsDebugBuild) {
@@ -1269,6 +1294,7 @@
                                       std::vector<ProfileMethodInfo>& methods) {
   ScopedTrace trace(__FUNCTION__);
   MutexLock mu(Thread::Current(), lock_);
+  uint16_t jit_compile_threshold = Runtime::Current()->GetJITOptions()->GetCompileThreshold();
   for (const ProfilingInfo* info : profiling_infos_) {
     ArtMethod* method = info->GetMethod();
     const DexFile* dex_file = method->GetDexFile();
@@ -1277,8 +1303,18 @@
       continue;
     }
     std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches;
+
+    // If the method didn't reach the compilation threshold don't save the inline caches.
+    // They might be incomplete and cause unnecessary deoptimizations.
+    // If the inline cache is empty the compiler will generate a regular invoke virtual/interface.
+    if (method->GetCounter() < jit_compile_threshold) {
+      methods.emplace_back(/*ProfileMethodInfo*/
+          dex_file, method->GetDexMethodIndex(), inline_caches);
+      continue;
+    }
+
     for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
-      std::vector<ProfileMethodInfo::ProfileClassReference> profile_classes;
+      std::vector<TypeReference> profile_classes;
       const InlineCache& cache = info->cache_[i];
       ArtMethod* caller = info->GetMethod();
       bool is_missing_types = false;
@@ -1356,10 +1392,9 @@
   ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
   if (info == nullptr) {
     VLOG(jit) << method->PrettyMethod() << " needs a ProfilingInfo to be compiled";
-    // Because the counter is not atomic, there are some rare cases where we may not
-    // hit the threshold for creating the ProfilingInfo. Reset the counter now to
-    // "correct" this.
-    method->ClearCounter();
+    // Because the counter is not atomic, there are some rare cases where we may not hit the
+    // threshold for creating the ProfilingInfo. Reset the counter now to "correct" this.
+    ClearMethodCounter(method, /*was_warm*/ false);
     return false;
   }
 
@@ -1411,12 +1446,11 @@
   }
 
   if (method->GetEntryPointFromQuickCompiledCode() == header->GetEntryPoint()) {
-    // The entrypoint is the one to invalidate, so we just update
-    // it to the interpreter entry point and clear the counter to get the method
-    // Jitted again.
+    // The entrypoint is the one to invalidate, so we just update it to the interpreter entry point
+    // and clear the counter to get the method Jitted again.
     Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
         method, GetQuickToInterpreterBridge());
-    method->ClearCounter();
+    ClearMethodCounter(method, /*was_warm*/ profiling_info != nullptr);
   } else {
     MutexLock mu(Thread::Current(), lock_);
     auto it = osr_code_map_.find(method);
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 612d06b..9ecc876 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -29,7 +29,6 @@
 #include "jni.h"
 #include "method_reference.h"
 #include "oat_file.h"
-#include "object_callbacks.h"
 #include "profile_compilation_info.h"
 #include "safe_map.h"
 #include "thread_pool.h"
@@ -39,6 +38,8 @@
 class ArtMethod;
 class LinearAlloc;
 class InlineCache;
+class IsMarkedVisitor;
+class OatQuickMethodHeader;
 class ProfilingInfo;
 
 namespace jit {
diff --git a/runtime/jit/profile_compilation_info-inl.h b/runtime/jit/profile_compilation_info-inl.h
new file mode 100644
index 0000000..8a067a5
--- /dev/null
+++ b/runtime/jit/profile_compilation_info-inl.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_INL_H_
+#define ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_INL_H_
+
+#include "profile_compilation_info.h"
+
+namespace art {
+
+template <class Iterator>
+inline bool ProfileCompilationInfo::AddSampledMethodsForDex(bool startup,
+                                                            const DexFile* dex_file,
+                                                            Iterator index_begin,
+                                                            Iterator index_end) {
+  DexFileData* data = GetOrAddDexFileData(dex_file);
+  if (data == nullptr) {
+    return false;
+  }
+  for (auto it = index_begin; it != index_end; ++it) {
+    DCHECK_LT(*it, data->num_method_ids);
+    data->AddSampledMethod(startup, *it);
+  }
+  return true;
+}
+
+template <class Iterator>
+inline bool ProfileCompilationInfo::AddHotMethodsForDex(const DexFile* dex_file,
+                                                        Iterator index_begin,
+                                                        Iterator index_end) {
+  DexFileData* data = GetOrAddDexFileData(dex_file);
+  if (data == nullptr) {
+    return false;
+  }
+  for (auto it = index_begin; it != index_end; ++it) {
+    DCHECK_LT(*it, data->num_method_ids);
+    data->FindOrAddMethod(*it);
+  }
+  return true;
+}
+
+template <class Iterator>
+inline bool ProfileCompilationInfo::AddClassesForDex(const DexFile* dex_file,
+                                                     Iterator index_begin,
+                                                     Iterator index_end) {
+  DexFileData* data = GetOrAddDexFileData(dex_file);
+  if (data == nullptr) {
+    return false;
+  }
+  data->class_set.insert(index_begin, index_end);
+  return true;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_INL_H_
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 4d5c9d6..ea27d3b 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -31,6 +31,8 @@
 #include <zlib.h>
 #include <base/time_utils.h>
 
+#include "base/arena_allocator.h"
+#include "base/dumpable.h"
 #include "base/mutex.h"
 #include "base/scoped_flock.h"
 #include "base/stl_util.h"
@@ -45,9 +47,9 @@
 namespace art {
 
 const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
-// Last profile version: Instead of method index, put the difference with the last
-// method's index.
-const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '7', '\0' };
+// Last profile version: Move startup methods to use a bitmap. Also add support for post-startup
+// methods.
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '8', '\0' };
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
@@ -66,12 +68,25 @@
 static_assert(InlineCache::kIndividualCacheSize < kIsMissingTypesEncoding,
               "InlineCache::kIndividualCacheSize is larger than expected");
 
-ProfileCompilationInfo::ProfileCompilationInfo(const ProfileCompilationInfo& pci) {
-  MergeWith(pci);
+ProfileCompilationInfo::ProfileCompilationInfo(ArenaPool* custom_arena_pool)
+    : default_arena_pool_(),
+      arena_(custom_arena_pool),
+      info_(arena_.Adapter(kArenaAllocProfile)),
+      profile_key_map_(std::less<const std::string>(), arena_.Adapter(kArenaAllocProfile)) {
+}
+
+ProfileCompilationInfo::ProfileCompilationInfo()
+    : default_arena_pool_(/*use_malloc*/true, /*low_4gb*/false, "ProfileCompilationInfo"),
+      arena_(&default_arena_pool_),
+      info_(arena_.Adapter(kArenaAllocProfile)),
+      profile_key_map_(std::less<const std::string>(), arena_.Adapter(kArenaAllocProfile)) {
 }
 
 ProfileCompilationInfo::~ProfileCompilationInfo() {
-  ClearProfile();
+  VLOG(profiler) << Dumpable<MemStats>(arena_.GetMemStats());
+  for (DexFileData* data : info_) {
+    delete data;
+  }
 }
 
 void ProfileCompilationInfo::DexPcData::AddClass(uint16_t dex_profile_idx,
@@ -79,11 +94,27 @@
   if (is_megamorphic || is_missing_types) {
     return;
   }
-  classes.emplace(dex_profile_idx, type_idx);
-  if (classes.size() >= InlineCache::kIndividualCacheSize) {
+
+  // Perform an explicit lookup for the type instead of directly emplacing the
+  // element. We do this because emplace() allocates the node before doing the
+  // lookup and if it then finds an identical element, it shall deallocate the
+  // node. For Arena allocations, that's essentially a leak.
+  ClassReference ref(dex_profile_idx, type_idx);
+  auto it = classes.find(ref);
+  if (it != classes.end()) {
+    // The type index exists.
+    return;
+  }
+
+  // Check if the adding the type will cause the cache to become megamorphic.
+  if (classes.size() + 1 >= InlineCache::kIndividualCacheSize) {
     is_megamorphic = true;
     classes.clear();
+    return;
   }
+
+  // The type does not exist and the inline cache will not be megamorphic.
+  classes.insert(ref);
 }
 
 // Transform the actual dex location into relative paths.
@@ -101,6 +132,21 @@
   }
 }
 
+bool ProfileCompilationInfo::AddSampledMethod(bool startup,
+                                              const std::string& dex_location,
+                                              uint32_t checksum,
+                                              uint16_t method_idx,
+                                              uint32_t num_method_ids) {
+  DexFileData* data = GetOrAddDexFileData(GetProfileDexFileKey(dex_location),
+                                          checksum,
+                                          num_method_ids);
+  if (data == nullptr) {
+    return false;
+  }
+  data->AddSampledMethod(startup, method_idx);
+  return true;
+}
+
 bool ProfileCompilationInfo::AddMethodsAndClasses(
     const std::vector<ProfileMethodInfo>& methods,
     const std::set<DexCacheResolvedClasses>& resolved_classes) {
@@ -221,15 +267,18 @@
 
 static constexpr size_t kLineHeaderSize =
     2 * sizeof(uint16_t) +  // class_set.size + dex_location.size
-    2 * sizeof(uint32_t);   // method_map.size + checksum
+    3 * sizeof(uint32_t);   // method_map.size + checksum + num_method_ids
 
 /**
  * Serialization format:
  *    magic,version,number_of_dex_files,uncompressed_size_of_zipped_data,compressed_data_size,
- *    zipped[dex_location1,number_of_classes1,methods_region_size,dex_location_checksum1, \
+ *    zipped[dex_location1,number_of_classes1,methods_region_size,dex_location_checksum1
+ *        num_method_ids,
  *        method_encoding_11,method_encoding_12...,class_id1,class_id2...
- *    dex_location2,number_of_classes2,methods_region_size,dex_location_checksum2, \
+ *        startup/post startup bitmap,
+ *    dex_location2,number_of_classes2,methods_region_size,dex_location_checksum2, num_method_ids,
  *        method_encoding_21,method_encoding_22...,,class_id1,class_id2...
+ *        startup/post startup bitmap,
  *    .....]
  * The method_encoding is:
  *    method_id,number_of_inline_caches,inline_cache1,inline_cache2...
@@ -266,7 +315,8 @@
     required_capacity += kLineHeaderSize +
         dex_data.profile_key.size() +
         sizeof(uint16_t) * dex_data.class_set.size() +
-        methods_region_size;
+        methods_region_size +
+        dex_data.bitmap_storage.size();
   }
   if (required_capacity > kProfileSizeErrorThresholdInBytes) {
     LOG(ERROR) << "Profile data size exceeds "
@@ -304,10 +354,12 @@
 
     DCHECK_LE(dex_data.profile_key.size(), std::numeric_limits<uint16_t>::max());
     DCHECK_LE(dex_data.class_set.size(), std::numeric_limits<uint16_t>::max());
+    // Write profile line header.
     AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.profile_key.size()));
     AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
     AddUintToBuffer(&buffer, methods_region_size);  // uint32_t
     AddUintToBuffer(&buffer, dex_data.checksum);  // uint32_t
+    AddUintToBuffer(&buffer, dex_data.num_method_ids);  // uint32_t
 
     AddStringToBuffer(&buffer, dex_data.profile_key);
 
@@ -331,6 +383,10 @@
       last_class_index = class_id.index_;
       AddUintToBuffer(&buffer, diff_with_last_class_index);
     }
+
+    buffer.insert(buffer.end(),
+                  dex_data.bitmap_storage.begin(),
+                  dex_data.bitmap_storage.end());
   }
 
   uint32_t output_size = 0;
@@ -445,8 +501,9 @@
 
 ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::GetOrAddDexFileData(
     const std::string& profile_key,
-    uint32_t checksum) {
-  const auto& profile_index_it = profile_key_map_.FindOrAdd(profile_key, profile_key_map_.size());
+    uint32_t checksum,
+    uint32_t num_method_ids) {
+  const auto profile_index_it = profile_key_map_.FindOrAdd(profile_key, profile_key_map_.size());
   if (profile_key_map_.size() > std::numeric_limits<uint8_t>::max()) {
     // Allow only 255 dex files to be profiled. This allows us to save bytes
     // when encoding. The number is well above what we expect for normal applications.
@@ -460,13 +517,20 @@
   uint8_t profile_index = profile_index_it->second;
   if (info_.size() <= profile_index) {
     // This is a new addition. Add it to the info_ array.
-    info_.emplace_back(new DexFileData(profile_key, checksum, profile_index));
+    DexFileData* dex_file_data = new (&arena_) DexFileData(
+        &arena_,
+        profile_key,
+        checksum,
+        profile_index,
+        num_method_ids);
+    info_.push_back(dex_file_data);
   }
   DexFileData* result = info_[profile_index];
   // DCHECK that profile info map key is consistent with the one stored in the dex file data.
   // This should always be the case since since the cache map is managed by ProfileCompilationInfo.
   DCHECK_EQ(profile_key, result->profile_key);
   DCHECK_EQ(profile_index, result->profile_index);
+  DCHECK_EQ(num_method_ids, result->num_method_ids);
 
   // Check that the checksum matches.
   // This may different if for example the dex file was updated and
@@ -480,7 +544,7 @@
 
 const ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::FindDexData(
       const std::string& profile_key) const {
-  const auto& profile_index_it = profile_key_map_.find(profile_key);
+  const auto profile_index_it = profile_key_map_.find(profile_key);
   if (profile_index_it == profile_key_map_.end()) {
     return nullptr;
   }
@@ -495,7 +559,7 @@
 bool ProfileCompilationInfo::AddResolvedClasses(const DexCacheResolvedClasses& classes) {
   const std::string dex_location = GetProfileDexFileKey(classes.GetDexLocation());
   const uint32_t checksum = classes.GetLocationChecksum();
-  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum);
+  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum, classes.NumMethodIds());
   if (data == nullptr) {
     return false;
   }
@@ -505,34 +569,48 @@
 
 bool ProfileCompilationInfo::AddMethodIndex(const std::string& dex_location,
                                             uint32_t dex_checksum,
-                                            uint16_t method_index) {
-  return AddMethod(dex_location, dex_checksum, method_index, OfflineProfileMethodInfo());
+                                            uint16_t method_index,
+                                            uint32_t num_method_ids) {
+  return AddMethod(dex_location,
+                   dex_checksum,
+                   method_index,
+                   num_method_ids,
+                   OfflineProfileMethodInfo(nullptr));
 }
 
 bool ProfileCompilationInfo::AddMethod(const std::string& dex_location,
                                        uint32_t dex_checksum,
                                        uint16_t method_index,
+                                       uint32_t num_method_ids,
                                        const OfflineProfileMethodInfo& pmi) {
-  DexFileData* const data = GetOrAddDexFileData(GetProfileDexFileKey(dex_location), dex_checksum);
+  DexFileData* const data = GetOrAddDexFileData(GetProfileDexFileKey(dex_location),
+                                                dex_checksum,
+                                                num_method_ids);
   if (data == nullptr) {  // checksum mismatch
     return false;
   }
-  auto inline_cache_it = data->method_map.FindOrAdd(method_index);
-  for (const auto& pmi_inline_cache_it : pmi.inline_caches) {
+  // Add the method.
+  InlineCacheMap* inline_cache = data->FindOrAddMethod(method_index);
+
+  if (pmi.inline_caches == nullptr) {
+    // If we don't have inline caches return success right away.
+    return true;
+  }
+  for (const auto& pmi_inline_cache_it : *pmi.inline_caches) {
     uint16_t pmi_ic_dex_pc = pmi_inline_cache_it.first;
     const DexPcData& pmi_ic_dex_pc_data = pmi_inline_cache_it.second;
-    DexPcData& dex_pc_data = inline_cache_it->second.FindOrAdd(pmi_ic_dex_pc)->second;
-    if (dex_pc_data.is_missing_types || dex_pc_data.is_megamorphic) {
+    DexPcData* dex_pc_data = FindOrAddDexPc(inline_cache, pmi_ic_dex_pc);
+    if (dex_pc_data->is_missing_types || dex_pc_data->is_megamorphic) {
       // We are already megamorphic or we are missing types; no point in going forward.
       continue;
     }
 
     if (pmi_ic_dex_pc_data.is_missing_types) {
-      dex_pc_data.SetIsMissingTypes();
+      dex_pc_data->SetIsMissingTypes();
       continue;
     }
     if (pmi_ic_dex_pc_data.is_megamorphic) {
-      dex_pc_data.SetIsMegamorphic();
+      dex_pc_data->SetIsMegamorphic();
       continue;
     }
 
@@ -540,44 +618,40 @@
       const DexReference& dex_ref = pmi.dex_references[class_ref.dex_profile_index];
       DexFileData* class_dex_data = GetOrAddDexFileData(
           GetProfileDexFileKey(dex_ref.dex_location),
-          dex_ref.dex_checksum);
+          dex_ref.dex_checksum,
+          dex_ref.num_method_ids);
       if (class_dex_data == nullptr) {  // checksum mismatch
         return false;
       }
-      dex_pc_data.AddClass(class_dex_data->profile_index, class_ref.type_index);
+      dex_pc_data->AddClass(class_dex_data->profile_index, class_ref.type_index);
     }
   }
   return true;
 }
 
 bool ProfileCompilationInfo::AddMethod(const ProfileMethodInfo& pmi) {
-  DexFileData* const data = GetOrAddDexFileData(
-      GetProfileDexFileKey(pmi.dex_file->GetLocation()),
-      pmi.dex_file->GetLocationChecksum());
+  DexFileData* const data = GetOrAddDexFileData(pmi.dex_file);
   if (data == nullptr) {  // checksum mismatch
     return false;
   }
-  auto inline_cache_it = data->method_map.FindOrAdd(pmi.dex_method_index);
+  InlineCacheMap* inline_cache = data->FindOrAddMethod(pmi.dex_method_index);
 
   for (const ProfileMethodInfo::ProfileInlineCache& cache : pmi.inline_caches) {
     if (cache.is_missing_types) {
-      auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(cache.dex_pc);
-      dex_pc_data_it->second.SetIsMissingTypes();
+      FindOrAddDexPc(inline_cache, cache.dex_pc)->SetIsMissingTypes();
       continue;
     }
-    for (const ProfileMethodInfo::ProfileClassReference& class_ref : cache.classes) {
-      DexFileData* class_dex_data = GetOrAddDexFileData(
-          GetProfileDexFileKey(class_ref.dex_file->GetLocation()),
-          class_ref.dex_file->GetLocationChecksum());
+    for (const TypeReference& class_ref : cache.classes) {
+      DexFileData* class_dex_data = GetOrAddDexFileData(class_ref.dex_file);
       if (class_dex_data == nullptr) {  // checksum mismatch
         return false;
       }
-      auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(cache.dex_pc);
-      if (dex_pc_data_it->second.is_missing_types) {
+      DexPcData* dex_pc_data = FindOrAddDexPc(inline_cache, cache.dex_pc);
+      if (dex_pc_data->is_missing_types) {
         // Don't bother adding classes if we are missing types.
         break;
       }
-      dex_pc_data_it->second.AddClass(class_dex_data->profile_index, class_ref.type_index);
+      dex_pc_data->AddClass(class_dex_data->profile_index, class_ref.type_index);
     }
   }
   return true;
@@ -585,8 +659,9 @@
 
 bool ProfileCompilationInfo::AddClassIndex(const std::string& dex_location,
                                            uint32_t checksum,
-                                           dex::TypeIndex type_idx) {
-  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum);
+                                           dex::TypeIndex type_idx,
+                                           uint32_t num_method_ids) {
+  DexFileData* const data = GetOrAddDexFileData(dex_location, checksum, num_method_ids);
   if (data == nullptr) {
     return false;
   }
@@ -614,13 +689,13 @@
     uint8_t dex_to_classes_map_size;
     READ_UINT(uint16_t, buffer, dex_pc, error);
     READ_UINT(uint8_t, buffer, dex_to_classes_map_size, error);
-    auto dex_pc_data_it = inline_cache->FindOrAdd(dex_pc);
+    DexPcData* dex_pc_data = FindOrAddDexPc(inline_cache, dex_pc);
     if (dex_to_classes_map_size == kIsMissingTypesEncoding) {
-      dex_pc_data_it->second.SetIsMissingTypes();
+      dex_pc_data->SetIsMissingTypes();
       continue;
     }
     if (dex_to_classes_map_size == kIsMegamorphicEncoding) {
-      dex_pc_data_it->second.SetIsMegamorphic();
+      dex_pc_data->SetIsMegamorphic();
       continue;
     }
     for (; dex_to_classes_map_size > 0; dex_to_classes_map_size--) {
@@ -636,7 +711,7 @@
       for (; dex_classes_size > 0; dex_classes_size--) {
         uint16_t type_index;
         READ_UINT(uint16_t, buffer, type_index, error);
-        dex_pc_data_it->second.AddClass(dex_profile_index, dex::TypeIndex(type_index));
+        dex_pc_data->AddClass(dex_profile_index, dex::TypeIndex(type_index));
       }
     }
   }
@@ -656,13 +731,15 @@
       - line_header.method_region_size_bytes;
   uint16_t last_method_index = 0;
   while (buffer.CountUnreadBytes() > expected_unread_bytes_after_operation) {
-    DexFileData* const data = GetOrAddDexFileData(line_header.dex_location, line_header.checksum);
+    DexFileData* const data = GetOrAddDexFileData(line_header.dex_location,
+                                                  line_header.checksum,
+                                                  line_header.num_method_ids);
     uint16_t diff_with_last_method_index;
     READ_UINT(uint16_t, buffer, diff_with_last_method_index, error);
     uint16_t method_index = last_method_index + diff_with_last_method_index;
     last_method_index = method_index;
-    auto it = data->method_map.FindOrAdd(method_index);
-    if (!ReadInlineCache(buffer, number_of_dex_files, &(it->second), error)) {
+    InlineCacheMap* inline_cache = data->FindOrAddMethod(method_index);
+    if (!ReadInlineCache(buffer, number_of_dex_files, inline_cache, error)) {
       return false;
     }
   }
@@ -691,7 +768,8 @@
     last_class_index = type_index;
     if (!AddClassIndex(line_header.dex_location,
                        line_header.checksum,
-                       dex::TypeIndex(type_index))) {
+                       dex::TypeIndex(type_index),
+                       line_header.num_method_ids)) {
       return false;
     }
   }
@@ -825,6 +903,7 @@
   READ_UINT(uint16_t, buffer, line_header->class_set_size, error);
   READ_UINT(uint32_t, buffer, line_header->method_region_size_bytes, error);
   READ_UINT(uint32_t, buffer, line_header->checksum, error);
+  READ_UINT(uint32_t, buffer, line_header->num_method_ids, error);
   return true;
 }
 
@@ -864,7 +943,10 @@
       uint8_t number_of_dex_files,
       const ProfileLineHeader& line_header,
       /*out*/std::string* error) {
-  if (GetOrAddDexFileData(line_header.dex_location, line_header.checksum) == nullptr) {
+  DexFileData* data = GetOrAddDexFileData(line_header.dex_location,
+                                          line_header.checksum,
+                                          line_header.num_method_ids);
+  if (data == nullptr) {
     *error = "Error when reading profile file line header: checksum mismatch for "
         + line_header.dex_location;
     return kProfileLoadBadData;
@@ -877,6 +959,16 @@
   if (!ReadClasses(buffer, line_header, error)) {
     return kProfileLoadBadData;
   }
+
+  const size_t bytes = data->bitmap_storage.size();
+  if (buffer.CountUnreadBytes() < bytes) {
+    *error += "Profile EOF reached prematurely for ReadProfileHeaderDexLocation";
+    return kProfileLoadBadData;
+  }
+  const uint8_t* base_ptr = buffer.GetCurrentPtr();
+  std::copy_n(base_ptr, bytes, &data->bitmap_storage[0]);
+  buffer.Advance(bytes);
+  // Read method bitmap.
   return kProfileLoadSuccess;
 }
 
@@ -1072,7 +1164,8 @@
   SafeMap<uint8_t, uint8_t> dex_profile_index_remap;
   for (const DexFileData* other_dex_data : other.info_) {
     const DexFileData* dex_data = GetOrAddDexFileData(other_dex_data->profile_key,
-                                                      other_dex_data->checksum);
+                                                      other_dex_data->checksum,
+                                                      other_dex_data->num_method_ids);
     if (dex_data == nullptr) {
       return false;  // Could happen if we exceed the number of allowed dex files.
     }
@@ -1091,24 +1184,27 @@
     // Merge the methods and the inline caches.
     for (const auto& other_method_it : other_dex_data->method_map) {
       uint16_t other_method_index = other_method_it.first;
-      auto method_it = dex_data->method_map.FindOrAdd(other_method_index);
+      InlineCacheMap* inline_cache = dex_data->FindOrAddMethod(other_method_index);
       const auto& other_inline_cache = other_method_it.second;
       for (const auto& other_ic_it : other_inline_cache) {
         uint16_t other_dex_pc = other_ic_it.first;
         const ClassSet& other_class_set = other_ic_it.second.classes;
-        auto class_set = method_it->second.FindOrAdd(other_dex_pc);
+        DexPcData* dex_pc_data = FindOrAddDexPc(inline_cache, other_dex_pc);
         if (other_ic_it.second.is_missing_types) {
-          class_set->second.SetIsMissingTypes();
+          dex_pc_data->SetIsMissingTypes();
         } else if (other_ic_it.second.is_megamorphic) {
-          class_set->second.SetIsMegamorphic();
+          dex_pc_data->SetIsMegamorphic();
         } else {
           for (const auto& class_it : other_class_set) {
-            class_set->second.AddClass(dex_profile_index_remap.Get(
+            dex_pc_data->AddClass(dex_profile_index_remap.Get(
                 class_it.dex_profile_index), class_it.type_index);
           }
         }
       }
     }
+
+    // Merge the bitmaps.
+    dex_data->MergeBitmap(*other_dex_data);
   }
   return true;
 }
@@ -1121,7 +1217,28 @@
   return ChecksumMatch(dex_file.GetLocationChecksum(), checksum);
 }
 
-bool ProfileCompilationInfo::ContainsMethod(const MethodReference& method_ref) const {
+bool ProfileCompilationInfo::IsStartupOrHotMethod(const MethodReference& method_ref) const {
+  return IsStartupOrHotMethod(method_ref.dex_file->GetLocation(),
+                              method_ref.dex_file->GetLocationChecksum(),
+                              method_ref.dex_method_index);
+}
+
+bool ProfileCompilationInfo::IsStartupOrHotMethod(const std::string& dex_location,
+                                                  uint32_t dex_checksum,
+                                                  uint16_t dex_method_index) const {
+  const DexFileData* dex_data = FindDexData(GetProfileDexFileKey(dex_location));
+  if (dex_data == nullptr || !ChecksumMatch(dex_checksum, dex_data->checksum)) {
+    return false;
+  }
+  if (dex_data->HasSampledMethod(/*startup*/ true, dex_method_index)) {
+    return true;
+  }
+  const MethodMap& methods = dex_data->method_map;
+  const auto method_it = methods.find(dex_method_index);
+  return method_it != methods.end();
+}
+
+bool ProfileCompilationInfo::ContainsHotMethod(const MethodReference& method_ref) const {
   return FindMethod(method_ref.dex_file->GetLocation(),
                     method_ref.dex_file->GetLocationChecksum(),
                     method_ref.dex_method_index) != nullptr;
@@ -1143,24 +1260,25 @@
   return nullptr;
 }
 
-bool ProfileCompilationInfo::GetMethod(const std::string& dex_location,
-                                       uint32_t dex_checksum,
-                                       uint16_t dex_method_index,
-                                       /*out*/OfflineProfileMethodInfo* pmi) const {
+std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> ProfileCompilationInfo::GetMethod(
+      const std::string& dex_location,
+      uint32_t dex_checksum,
+      uint16_t dex_method_index) const {
   const InlineCacheMap* inline_caches = FindMethod(dex_location, dex_checksum, dex_method_index);
   if (inline_caches == nullptr) {
-    return false;
+    return nullptr;
   }
 
+  std::unique_ptr<OfflineProfileMethodInfo> pmi(new OfflineProfileMethodInfo(inline_caches));
+
   pmi->dex_references.resize(info_.size());
   for (const DexFileData* dex_data : info_) {
     pmi->dex_references[dex_data->profile_index].dex_location = dex_data->profile_key;
     pmi->dex_references[dex_data->profile_index].dex_checksum = dex_data->checksum;
+    pmi->dex_references[dex_data->profile_index].num_method_ids = dex_data->num_method_ids;
   }
 
-  // TODO(calin): maybe expose a direct pointer to avoid copying
-  pmi->inline_caches = *inline_caches;
-  return true;
+  return pmi;
 }
 
 
@@ -1170,7 +1288,7 @@
     if (!ChecksumMatch(dex_file, dex_data->checksum)) {
       return false;
     }
-    const std::set<dex::TypeIndex>& classes = dex_data->class_set;
+    const ArenaSet<dex::TypeIndex>& classes = dex_data->class_set;
     return classes.find(type_idx) != classes.end();
   }
   return false;
@@ -1239,7 +1357,7 @@
         }
       }
     }
-    os << "\n\tmethods: ";
+    os << "\n\thot methods: ";
     for (const auto& method_it : dex_data->method_map) {
       if (dex_file != nullptr) {
         os << "\n\t\t" << dex_file->PrettyMethod(method_it.first, true);
@@ -1264,6 +1382,19 @@
       }
       os << "], ";
     }
+    bool startup = true;
+    while (true) {
+      os << "\n\t" << (startup ? "startup methods: " : "post startup methods: ");
+      for (uint32_t method_idx = 0; method_idx < dex_data->num_method_ids; ++method_idx) {
+        if (dex_data->HasSampledMethod(startup, method_idx)) {
+          os << method_idx << ", ";
+        }
+      }
+      if (startup == false) {
+        break;
+      }
+      startup = false;
+    }
     os << "\n\tclasses: ";
     for (const auto class_it : dex_data->class_set) {
       if (dex_file != nullptr) {
@@ -1276,17 +1407,32 @@
   return os.str();
 }
 
-bool ProfileCompilationInfo::GetClassesAndMethods(const DexFile* dex_file,
-                                                  std::set<dex::TypeIndex>* class_set,
-                                                  MethodMap* method_map) const {
+bool ProfileCompilationInfo::GetClassesAndMethods(
+    const DexFile& dex_file,
+    /*out*/std::set<dex::TypeIndex>* class_set,
+    /*out*/std::set<uint16_t>* hot_method_set,
+    /*out*/std::set<uint16_t>* startup_method_set,
+    /*out*/std::set<uint16_t>* post_startup_method_method_set) const {
   std::set<std::string> ret;
-  std::string profile_key = GetProfileDexFileKey(dex_file->GetLocation());
+  std::string profile_key = GetProfileDexFileKey(dex_file.GetLocation());
   const DexFileData* dex_data = FindDexData(profile_key);
-  if (dex_data == nullptr || dex_data->checksum != dex_file->GetLocationChecksum()) {
+  if (dex_data == nullptr || dex_data->checksum != dex_file.GetLocationChecksum()) {
     return false;
   }
-  *method_map = dex_data->method_map;
-  *class_set = dex_data->class_set;
+  for (const auto& it : dex_data->method_map) {
+    hot_method_set->insert(it.first);
+  }
+  for (uint32_t method_idx = 0; method_idx < dex_data->num_method_ids; ++method_idx) {
+    if (dex_data->HasSampledMethod(/*startup*/ true, method_idx)) {
+      startup_method_set->insert(method_idx);
+    }
+    if (dex_data->HasSampledMethod(/*startup*/ false, method_idx)) {
+      post_startup_method_method_set->insert(method_idx);
+    }
+  }
+  for (const dex::TypeIndex& type_index : dex_data->class_set) {
+    class_set->insert(type_index);
+  }
   return true;
 }
 
@@ -1307,16 +1453,27 @@
 }
 
 std::set<DexCacheResolvedClasses> ProfileCompilationInfo::GetResolvedClasses(
-    const std::unordered_set<std::string>& dex_files_locations) const {
-  std::unordered_map<std::string, std::string> key_to_location_map;
-  for (const std::string& location : dex_files_locations) {
-    key_to_location_map.emplace(GetProfileDexFileKey(location), location);
+    const std::vector<const DexFile*>& dex_files) const {
+  std::unordered_map<std::string, const DexFile* > key_to_dex_file;
+  for (const DexFile* dex_file : dex_files) {
+    key_to_dex_file.emplace(GetProfileDexFileKey(dex_file->GetLocation()), dex_file);
   }
   std::set<DexCacheResolvedClasses> ret;
   for (const DexFileData* dex_data : info_) {
-    const auto& it = key_to_location_map.find(dex_data->profile_key);
-    if (it != key_to_location_map.end()) {
-      DexCacheResolvedClasses classes(it->second, it->second, dex_data->checksum);
+    const auto it = key_to_dex_file.find(dex_data->profile_key);
+    if (it != key_to_dex_file.end()) {
+      const DexFile* dex_file = it->second;
+      const std::string& dex_location = dex_file->GetLocation();
+      if (dex_data->checksum != it->second->GetLocationChecksum()) {
+        LOG(ERROR) << "Dex checksum mismatch when getting resolved classes from profile for "
+            << "location " << dex_location << " (checksum=" << dex_file->GetLocationChecksum()
+            << ", profile checksum=" << dex_data->checksum;
+        return std::set<DexCacheResolvedClasses>();
+      }
+      DexCacheResolvedClasses classes(dex_location,
+                                      dex_location,
+                                      dex_data->checksum,
+                                      dex_data->num_method_ids);
       classes.AddClasses(dex_data->class_set.begin(), dex_data->class_set.end());
       ret.insert(classes);
     }
@@ -1324,12 +1481,6 @@
   return ret;
 }
 
-void ProfileCompilationInfo::ClearResolvedClasses() {
-  for (DexFileData* dex_data : info_) {
-    dex_data->class_set.clear();
-  }
-}
-
 // Naive implementation to generate a random profile file suitable for testing.
 bool ProfileCompilationInfo::GenerateTestProfile(int fd,
                                                  uint16_t number_of_dex_files,
@@ -1339,8 +1490,8 @@
   const std::string base_dex_location = "base.apk";
   ProfileCompilationInfo info;
   // The limits are defined by the dex specification.
-  uint16_t max_method = std::numeric_limits<uint16_t>::max();
-  uint16_t max_classes = std::numeric_limits<uint16_t>::max();
+  const uint16_t max_method = std::numeric_limits<uint16_t>::max();
+  const uint16_t max_classes = std::numeric_limits<uint16_t>::max();
   uint16_t number_of_methods = max_method * method_ratio / 100;
   uint16_t number_of_classes = max_classes * class_ratio / 100;
 
@@ -1360,7 +1511,7 @@
       if (m < (number_of_methods / kFavorSplit)) {
         method_idx %= kFavorFirstN;
       }
-      info.AddMethodIndex(profile_key, 0, method_idx);
+      info.AddMethodIndex(profile_key, 0, method_idx, max_method);
     }
 
     for (uint16_t c = 0; c < number_of_classes; c++) {
@@ -1368,7 +1519,7 @@
       if (c < (number_of_classes / kFavorSplit)) {
         type_idx %= kFavorFirstN;
       }
-      info.AddClassIndex(profile_key, 0, dex::TypeIndex(type_idx));
+      info.AddClassIndex(profile_key, 0, dex::TypeIndex(type_idx), max_method);
     }
   }
   return info.Save(fd);
@@ -1387,13 +1538,16 @@
     for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
       // Randomly add a class from the dex file (with 50% chance).
       if (std::rand() % 2 != 0) {
-        info.AddClassIndex(location, checksum, dex::TypeIndex(dex_file->GetClassDef(i).class_idx_));
+        info.AddClassIndex(location,
+                           checksum,
+                           dex::TypeIndex(dex_file->GetClassDef(i).class_idx_),
+                           dex_file->NumMethodIds());
       }
     }
     for (uint32_t i = 0; i < dex_file->NumMethodIds(); ++i) {
       // Randomly add a method from the dex file (with 50% chance).
       if (std::rand() % 2 != 0) {
-        info.AddMethodIndex(location, checksum, i);
+        info.AddMethodIndex(location, checksum, i, dex_file->NumMethodIds());
       }
     }
   }
@@ -1402,17 +1556,17 @@
 
 bool ProfileCompilationInfo::OfflineProfileMethodInfo::operator==(
       const OfflineProfileMethodInfo& other) const {
-  if (inline_caches.size() != other.inline_caches.size()) {
+  if (inline_caches->size() != other.inline_caches->size()) {
     return false;
   }
 
   // We can't use a simple equality test because we need to match the dex files
   // of the inline caches which might have different profile indexes.
-  for (const auto& inline_cache_it : inline_caches) {
+  for (const auto& inline_cache_it : *inline_caches) {
     uint16_t dex_pc = inline_cache_it.first;
     const DexPcData dex_pc_data = inline_cache_it.second;
-    const auto other_it = other.inline_caches.find(dex_pc);
-    if (other_it == other.inline_caches.end()) {
+    const auto& other_it = other.inline_caches->find(dex_pc);
+    if (other_it == other.inline_caches->end()) {
       return false;
     }
     const DexPcData& other_dex_pc_data = other_it->second;
@@ -1441,17 +1595,21 @@
   return true;
 }
 
-void ProfileCompilationInfo::ClearProfile() {
-  for (DexFileData* dex_data : info_) {
-    delete dex_data;
-  }
-  info_.clear();
-  profile_key_map_.clear();
-}
-
 bool ProfileCompilationInfo::IsEmpty() const {
   DCHECK_EQ(info_.empty(), profile_key_map_.empty());
   return info_.empty();
 }
 
+ProfileCompilationInfo::InlineCacheMap*
+ProfileCompilationInfo::DexFileData::FindOrAddMethod(uint16_t method_index) {
+  return &(method_map.FindOrAdd(
+      method_index,
+      InlineCacheMap(std::less<uint16_t>(), arena_->Adapter(kArenaAllocProfile)))->second);
+}
+
+ProfileCompilationInfo::DexPcData*
+ProfileCompilationInfo::FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc) {
+  return &(inline_cache->FindOrAdd(dex_pc, DexPcData(&arena_))->second);
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index ee1935f..bd1b9d6 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -17,16 +17,19 @@
 #ifndef ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_H_
 #define ART_RUNTIME_JIT_PROFILE_COMPILATION_INFO_H_
 
-#include <memory>
 #include <set>
 #include <vector>
 
 #include "atomic.h"
+#include "base/arena_object.h"
+#include "base/arena_containers.h"
+#include "bit_memory_region.h"
 #include "dex_cache_resolved_classes.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
 #include "method_reference.h"
 #include "safe_map.h"
+#include "type_reference.h"
 
 namespace art {
 
@@ -35,24 +38,15 @@
  *  without the need to hold GC-able objects.
  */
 struct ProfileMethodInfo {
-  struct ProfileClassReference {
-    ProfileClassReference() : dex_file(nullptr) {}
-    ProfileClassReference(const DexFile* dex, const dex::TypeIndex& index)
-        : dex_file(dex), type_index(index) {}
-
-    const DexFile* dex_file;
-    dex::TypeIndex type_index;
-  };
-
   struct ProfileInlineCache {
     ProfileInlineCache(uint32_t pc,
                        bool missing_types,
-                       const std::vector<ProfileClassReference>& profile_classes)
+                       const std::vector<TypeReference>& profile_classes)
         : dex_pc(pc), is_missing_types(missing_types), classes(profile_classes) {}
 
     const uint32_t dex_pc;
     const bool is_missing_types;
-    const std::vector<ProfileClassReference> classes;
+    const std::vector<TypeReference> classes;
   };
 
   ProfileMethodInfo(const DexFile* dex, uint32_t method_index)
@@ -61,7 +55,9 @@
   ProfileMethodInfo(const DexFile* dex,
                     uint32_t method_index,
                     const std::vector<ProfileInlineCache>& caches)
-      : dex_file(dex), dex_method_index(method_index), inline_caches(caches) {}
+      : dex_file(dex),
+        dex_method_index(method_index),
+        inline_caches(caches) {}
 
   const DexFile* dex_file;
   const uint32_t dex_method_index;
@@ -86,13 +82,15 @@
 
   // A dex location together with its checksum.
   struct DexReference {
-    DexReference() : dex_checksum(0) {}
+    DexReference() : dex_checksum(0), num_method_ids(0) {}
 
-    DexReference(const std::string& location, uint32_t checksum)
-        : dex_location(location), dex_checksum(checksum) {}
+    DexReference(const std::string& location, uint32_t checksum, uint32_t num_methods)
+        : dex_location(location), dex_checksum(checksum), num_method_ids(num_methods) {}
 
     bool operator==(const DexReference& other) const {
-      return dex_checksum == other.dex_checksum && dex_location == other.dex_location;
+      return dex_checksum == other.dex_checksum &&
+          dex_location == other.dex_location &&
+          num_method_ids == other.num_method_ids;
     }
 
     bool MatchesDex(const DexFile* dex_file) const {
@@ -102,6 +100,7 @@
 
     std::string dex_location;
     uint32_t dex_checksum;
+    uint32_t num_method_ids;
   };
 
   // Encodes a class reference in the profile.
@@ -115,8 +114,8 @@
   // We cannot rely on the actual multidex index because a single profile may store
   // data from multiple splits. This means that a profile may contain a classes2.dex from split-A
   // and one from split-B.
-  struct ClassReference {
-    ClassReference(uint8_t dex_profile_idx, const dex::TypeIndex& type_idx) :
+  struct ClassReference : public ValueObject {
+    ClassReference(uint8_t dex_profile_idx, const dex::TypeIndex type_idx) :
       dex_profile_index(dex_profile_idx), type_index(type_idx) {}
 
     bool operator==(const ClassReference& other) const {
@@ -133,13 +132,16 @@
   };
 
   // The set of classes that can be found at a given dex pc.
-  using ClassSet = std::set<ClassReference>;
+  using ClassSet = ArenaSet<ClassReference>;
 
   // Encodes the actual inline cache for a given dex pc (whether or not the receiver is
   // megamorphic and its possible types).
   // If the receiver is megamorphic or is missing types the set of classes will be empty.
-  struct DexPcData {
-    DexPcData() : is_missing_types(false), is_megamorphic(false) {}
+  struct DexPcData : public ArenaObject<kArenaAllocProfile> {
+    explicit DexPcData(ArenaAllocator* arena)
+        : is_missing_types(false),
+          is_megamorphic(false),
+          classes(std::less<ClassReference>(), arena->Adapter(kArenaAllocProfile)) {}
     void AddClass(uint16_t dex_profile_idx, const dex::TypeIndex& type_idx);
     void SetIsMegamorphic() {
       if (is_missing_types) return;
@@ -166,32 +168,68 @@
   };
 
   // The inline cache map: DexPc -> DexPcData.
-  using InlineCacheMap = SafeMap<uint16_t, DexPcData>;
+  using InlineCacheMap = ArenaSafeMap<uint16_t, DexPcData>;
 
   // Maps a method dex index to its inline cache.
-  using MethodMap = SafeMap<uint16_t, InlineCacheMap>;
+  using MethodMap = ArenaSafeMap<uint16_t, InlineCacheMap>;
 
   // Encodes the full set of inline caches for a given method.
   // The dex_references vector is indexed according to the ClassReference::dex_profile_index.
   // i.e. the dex file of any ClassReference present in the inline caches can be found at
   // dex_references[ClassReference::dex_profile_index].
   struct OfflineProfileMethodInfo {
+    explicit OfflineProfileMethodInfo(const InlineCacheMap* inline_cache_map)
+        : inline_caches(inline_cache_map) {}
+
     bool operator==(const OfflineProfileMethodInfo& other) const;
 
+    const InlineCacheMap* const inline_caches;
     std::vector<DexReference> dex_references;
-    InlineCacheMap inline_caches;
   };
 
   // Public methods to create, extend or query the profile.
+  ProfileCompilationInfo();
+  explicit ProfileCompilationInfo(ArenaPool* arena_pool);
 
-  ProfileCompilationInfo() {}
-  ProfileCompilationInfo(const ProfileCompilationInfo& pci);
   ~ProfileCompilationInfo();
 
   // Add the given methods and classes to the current profile object.
   bool AddMethodsAndClasses(const std::vector<ProfileMethodInfo>& methods,
                             const std::set<DexCacheResolvedClasses>& resolved_classes);
 
+  // Iterator is type for ids not class defs.
+  template <class Iterator>
+  bool AddClassesForDex(const DexFile* dex_file, Iterator index_begin, Iterator index_end);
+
+  // Add a method index to the profile (without inline caches).
+  bool AddMethodIndex(const std::string& dex_location,
+                      uint32_t checksum,
+                      uint16_t method_idx,
+                      uint32_t num_method_ids);
+
+  // Add a method to the profile using its online representation (containing runtime structures).
+  bool AddMethod(const ProfileMethodInfo& pmi);
+
+  // Add methods that have samples but are are not necessarily hot. These are partitioned into two
+  // possibly intersecting sets startup and post startup.
+  bool AddSampledMethod(bool startup,
+                        const std::string& dex_location,
+                        uint32_t checksum,
+                        uint16_t method_idx,
+                        uint32_t num_method_ids);
+  // Bulk add sampled methods for a single dex, fast since it only has one GetOrAddDexFileData call.
+  template <class Iterator>
+  bool AddSampledMethodsForDex(bool startup,
+                               const DexFile* dex_file,
+                               Iterator index_begin,
+                               Iterator index_end);
+
+  // Bulk add hot methods for a single dex, fast since it only has one GetOrAddDexFileData call.
+  template <class Iterator>
+  bool AddHotMethodsForDex(const DexFile* dex_file,
+                           Iterator index_begin,
+                           Iterator index_end);
+
   // Load profile information from the given file descriptor.
   // If the current profile is non-empty the load will fail.
   bool Load(int fd);
@@ -217,18 +255,25 @@
   // Return the number of resolved classes that were profiled.
   uint32_t GetNumberOfResolvedClasses() const;
 
-  // Return true if the method reference is present in the profiling info.
-  bool ContainsMethod(const MethodReference& method_ref) const;
+  // Return true if the method reference is a hot or startup method in the profiling info.
+  bool IsStartupOrHotMethod(const MethodReference& method_ref) const;
+  bool IsStartupOrHotMethod(const std::string& dex_location,
+                            uint32_t dex_checksum,
+                            uint16_t dex_method_index) const;
+
+  // Return true if the method reference iS present and hot in the profiling info.
+  bool ContainsHotMethod(const MethodReference& method_ref) const;
 
   // Return true if the class's type is present in the profiling info.
   bool ContainsClass(const DexFile& dex_file, dex::TypeIndex type_idx) const;
 
-  // Return true if the method is present in the profiling info.
-  // If the method is found, `pmi` is populated with its inline caches.
-  bool GetMethod(const std::string& dex_location,
-                 uint32_t dex_checksum,
-                 uint16_t dex_method_index,
-                 /*out*/OfflineProfileMethodInfo* pmi) const;
+  // Return the method data for the given location and index from the profiling info.
+  // If the method index is not found or the checksum doesn't match, null is returned.
+  // Note: the inline cache map is a pointer to the map stored in the profile and
+  // its allocation will go away if the profile goes out of scope.
+  std::unique_ptr<OfflineProfileMethodInfo> GetMethod(const std::string& dex_location,
+                                                      uint32_t dex_checksum,
+                                                      uint16_t dex_method_index) const;
 
   // Dump all the loaded profile info into a string and returns it.
   // If dex_files is not null then the method indices will be resolved to their
@@ -239,22 +284,21 @@
   std::string DumpInfo(const std::vector<const DexFile*>* dex_files,
                        bool print_full_dex_location = true) const;
 
-  // Return the classes and methods for a given dex file through out args. The otu args are the set
+  // Return the classes and methods for a given dex file through out args. The out args are the set
   // of class as well as the methods and their associated inline caches. Returns true if the dex
   // file is register and has a matching checksum, false otherwise.
-  bool GetClassesAndMethods(const DexFile* dex_file,
-                            std::set<dex::TypeIndex>* class_set,
-                            MethodMap* method_map) const;
+  bool GetClassesAndMethods(const DexFile& dex_file,
+                            /*out*/std::set<dex::TypeIndex>* class_set,
+                            /*out*/std::set<uint16_t>* hot_method_set,
+                            /*out*/std::set<uint16_t>* startup_method_set,
+                            /*out*/std::set<uint16_t>* post_startup_method_method_set) const;
 
   // Perform an equality test with the `other` profile information.
   bool Equals(const ProfileCompilationInfo& other);
 
   // Return the class descriptors for all of the classes in the profiles' class sets.
   std::set<DexCacheResolvedClasses> GetResolvedClasses(
-      const std::unordered_set<std::string>& dex_files_locations) const;
-
-  // Clear the resolved classes from the current object.
-  void ClearResolvedClasses();
+      const std::vector<const DexFile*>& dex_files_) const;
 
   // Return the profile key associated with the given dex location.
   static std::string GetProfileDexFileKey(const std::string& dex_location);
@@ -277,6 +321,11 @@
   static bool Equals(const ProfileCompilationInfo::OfflineProfileMethodInfo& pmi1,
                      const ProfileCompilationInfo::OfflineProfileMethodInfo& pmi2);
 
+  ArenaAllocator* GetArena() { return &arena_; }
+
+  // Add a method index to the profile (without inline caches).
+  bool AddMethodIndex(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
+
  private:
   enum ProfileLoadSatus {
     kProfileLoadWouldOverwiteData,
@@ -295,9 +344,50 @@
   // profile) fields in this struct because we can infer them from
   // profile_key_map_ and info_. However, it makes the profiles logic much
   // simpler if we have references here as well.
-  struct DexFileData {
-    DexFileData(const std::string& key, uint32_t location_checksum, uint16_t index)
-         : profile_key(key), profile_index(index), checksum(location_checksum) {}
+  struct DexFileData : public DeletableArenaObject<kArenaAllocProfile> {
+    DexFileData(ArenaAllocator* arena,
+                const std::string& key,
+                uint32_t location_checksum,
+                uint16_t index,
+                uint32_t num_methods)
+        : arena_(arena),
+          profile_key(key),
+          profile_index(index),
+          checksum(location_checksum),
+          method_map(std::less<uint16_t>(), arena->Adapter(kArenaAllocProfile)),
+          class_set(std::less<dex::TypeIndex>(), arena->Adapter(kArenaAllocProfile)),
+          num_method_ids(num_methods),
+          bitmap_storage(arena->Adapter(kArenaAllocProfile)) {
+      const size_t num_bits = num_method_ids * kBitmapCount;
+      bitmap_storage.resize(RoundUp(num_bits, kBitsPerByte) / kBitsPerByte);
+      if (!bitmap_storage.empty()) {
+        method_bitmap =
+            BitMemoryRegion(MemoryRegion(&bitmap_storage[0], bitmap_storage.size()), 0, num_bits);
+      }
+    }
+
+    bool operator==(const DexFileData& other) const {
+      return checksum == other.checksum && method_map == other.method_map;
+    }
+
+    // Mark a method as executed at least once.
+    void AddSampledMethod(bool startup, size_t index) {
+      method_bitmap.StoreBit(MethodBitIndex(startup, index), true);
+    }
+
+    bool HasSampledMethod(bool startup, size_t index) const {
+      return method_bitmap.LoadBit(MethodBitIndex(startup, index));
+    }
+
+    void MergeBitmap(const DexFileData& other) {
+      DCHECK_EQ(bitmap_storage.size(), other.bitmap_storage.size());
+      for (size_t i = 0; i < bitmap_storage.size(); ++i) {
+        bitmap_storage[i] |= other.bitmap_storage[i];
+      }
+    }
+
+    // The arena used to allocate new inline cache maps.
+    ArenaAllocator* arena_;
     // The profile key this data belongs to.
     std::string profile_key;
     // The profile index of this dex file (matches ClassReference#dex_profile_index).
@@ -308,32 +398,58 @@
     MethodMap method_map;
     // The classes which have been profiled. Note that these don't necessarily include
     // all the classes that can be found in the inline caches reference.
-    std::set<dex::TypeIndex> class_set;
+    ArenaSet<dex::TypeIndex> class_set;
+    // Find the inline caches of the the given method index. Add an empty entry if
+    // no previous data is found.
+    InlineCacheMap* FindOrAddMethod(uint16_t method_index);
+    // Num method ids.
+    uint32_t num_method_ids;
+    ArenaVector<uint8_t> bitmap_storage;
+    BitMemoryRegion method_bitmap;
 
-    bool operator==(const DexFileData& other) const {
-      return checksum == other.checksum && method_map == other.method_map;
+   private:
+    enum BitmapIndex {
+      kBitmapStartup,
+      kBitmapPostStartup,
+      kBitmapCount,
+    };
+
+    size_t MethodBitIndex(bool startup, size_t index) const {
+      DCHECK_LT(index, num_method_ids);
+      // The format is [startup bitmap][post startup bitmap]
+      // This compresses better than ([startup bit][post statup bit])*
+
+      return index + (startup
+          ? kBitmapStartup * num_method_ids
+          : kBitmapPostStartup * num_method_ids);
     }
   };
 
   // Return the profile data for the given profile key or null if the dex location
   // already exists but has a different checksum
-  DexFileData* GetOrAddDexFileData(const std::string& profile_key, uint32_t checksum);
+  DexFileData* GetOrAddDexFileData(const std::string& profile_key,
+                                   uint32_t checksum,
+                                   uint32_t num_method_ids);
 
-  // Add a method index to the profile (without inline caches).
-  bool AddMethodIndex(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
-
-  // Add a method to the profile using its online representation (containing runtime structures).
-  bool AddMethod(const ProfileMethodInfo& pmi);
+  DexFileData* GetOrAddDexFileData(const DexFile* dex_file) {
+    return GetOrAddDexFileData(GetProfileDexFileKey(dex_file->GetLocation()),
+                               dex_file->GetLocationChecksum(),
+                               dex_file->NumMethodIds());
+  }
 
   // Add a method to the profile using its offline representation.
   // This is mostly used to facilitate testing.
   bool AddMethod(const std::string& dex_location,
                  uint32_t dex_checksum,
                  uint16_t method_index,
+                 uint32_t num_method_ids,
                  const OfflineProfileMethodInfo& pmi);
 
   // Add a class index to the profile.
-  bool AddClassIndex(const std::string& dex_location, uint32_t checksum, dex::TypeIndex type_idx);
+  bool AddClassIndex(const std::string& dex_location,
+                     uint32_t checksum,
+                     dex::TypeIndex type_idx,
+                     uint32_t num_method_ids);
 
   // Add all classes from the given dex cache to the the profile.
   bool AddResolvedClasses(const DexCacheResolvedClasses& classes);
@@ -352,9 +468,6 @@
   // doesn't contain the key.
   const DexFileData* FindDexData(const std::string& profile_key) const;
 
-  // Clear all the profile data.
-  void ClearProfile();
-
   // Checks if the profile is empty.
   bool IsEmpty() const;
 
@@ -381,6 +494,7 @@
     uint16_t class_set_size;
     uint32_t method_region_size_bytes;
     uint32_t checksum;
+    uint32_t num_method_ids;
   };
 
   // A helper structure to make sure we don't read past our buffers in the loops.
@@ -485,20 +599,27 @@
       const ClassSet& classes,
       /*out*/SafeMap<uint8_t, std::vector<dex::TypeIndex>>* dex_to_classes_map);
 
+  // Find the data for the dex_pc in the inline cache. Adds an empty entry
+  // if no previous data exists.
+  DexPcData* FindOrAddDexPc(InlineCacheMap* inline_cache, uint32_t dex_pc);
+
   friend class ProfileCompilationInfoTest;
   friend class CompilerDriverProfileTest;
   friend class ProfileAssistantTest;
   friend class Dex2oatLayoutTest;
 
+  ArenaPool default_arena_pool_;
+  ArenaAllocator arena_;
+
   // Vector containing the actual profile info.
   // The vector index is the profile index of the dex data and
   // matched DexFileData::profile_index.
-  std::vector<DexFileData*> info_;
+  ArenaVector<DexFileData*> info_;
 
   // Cache mapping profile keys to profile index.
   // This is used to speed up searches since it avoids iterating
   // over the info_ vector when searching by profile key.
-  SafeMap<const std::string, uint8_t> profile_key_map_;
+  ArenaSafeMap<const std::string, uint8_t> profile_key_map_;
 };
 
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index e8f4ce2..39670af 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -26,11 +26,20 @@
 #include "mirror/class_loader.h"
 #include "handle_scope-inl.h"
 #include "jit/profile_compilation_info.h"
+#include "linear_alloc.h"
 #include "scoped_thread_state_change-inl.h"
+#include "type_reference.h"
 
 namespace art {
 
+static constexpr size_t kMaxMethodIds = 65535;
+
 class ProfileCompilationInfoTest : public CommonRuntimeTest {
+ public:
+  void PostRuntimeCreate() OVERRIDE {
+    arena_.reset(new ArenaAllocator(Runtime::Current()->GetArenaPool()));
+  }
+
  protected:
   std::vector<ArtMethod*> GetVirtualMethods(jobject class_loader,
                                             const std::string& clazz) {
@@ -54,7 +63,7 @@
                  uint32_t checksum,
                  uint16_t method_index,
                  ProfileCompilationInfo* info) {
-    return info->AddMethodIndex(dex_location, checksum, method_index);
+    return info->AddMethodIndex(dex_location, checksum, method_index, kMaxMethodIds);
   }
 
   bool AddMethod(const std::string& dex_location,
@@ -62,14 +71,14 @@
                  uint16_t method_index,
                  const ProfileCompilationInfo::OfflineProfileMethodInfo& pmi,
                  ProfileCompilationInfo* info) {
-    return info->AddMethod(dex_location, checksum, method_index, pmi);
+    return info->AddMethod(dex_location, checksum, method_index, kMaxMethodIds, pmi);
   }
 
   bool AddClass(const std::string& dex_location,
                 uint32_t checksum,
                 uint16_t class_index,
                 ProfileCompilationInfo* info) {
-    return info->AddMethodIndex(dex_location, checksum, class_index);
+    return info->AddMethodIndex(dex_location, checksum, class_index, kMaxMethodIds);
   }
 
   uint32_t GetFd(const ScratchFile& file) {
@@ -117,13 +126,13 @@
       std::vector<ProfileMethodInfo::ProfileInlineCache> caches;
       // Monomorphic
       for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
-        std::vector<ProfileMethodInfo::ProfileClassReference> classes;
+        std::vector<TypeReference> classes;
         classes.emplace_back(method->GetDexFile(), dex::TypeIndex(0));
         caches.emplace_back(dex_pc, /*is_missing_types*/false, classes);
       }
       // Polymorphic
       for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
-        std::vector<ProfileMethodInfo::ProfileClassReference> classes;
+        std::vector<TypeReference> classes;
         for (uint16_t k = 0; k < InlineCache::kIndividualCacheSize / 2; k++) {
           classes.emplace_back(method->GetDexFile(), dex::TypeIndex(k));
         }
@@ -131,7 +140,7 @@
       }
       // Megamorphic
       for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
-        std::vector<ProfileMethodInfo::ProfileClassReference> classes;
+        std::vector<TypeReference> classes;
         for (uint16_t k = 0; k < 2 * InlineCache::kIndividualCacheSize; k++) {
           classes.emplace_back(method->GetDexFile(), dex::TypeIndex(k));
         }
@@ -139,10 +148,12 @@
       }
       // Missing types
       for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
-        std::vector<ProfileMethodInfo::ProfileClassReference> classes;
+        std::vector<TypeReference> classes;
         caches.emplace_back(dex_pc, /*is_missing_types*/true, classes);
       }
-      ProfileMethodInfo pmi(method->GetDexFile(), method->GetDexMethodIndex(), caches);
+      ProfileMethodInfo pmi(method->GetDexFile(),
+                            method->GetDexMethodIndex(),
+                            caches);
       profile_methods.push_back(pmi);
       profile_methods_map->Put(method, pmi);
     }
@@ -156,13 +167,22 @@
     return info.Save(filename, nullptr);
   }
 
+  // Creates an inline cache which will be destructed at the end of the test.
+  ProfileCompilationInfo::InlineCacheMap* CreateInlineCacheMap() {
+    used_inline_caches.emplace_back(new ProfileCompilationInfo::InlineCacheMap(
+        std::less<uint16_t>(), arena_->Adapter(kArenaAllocProfile)));
+    return used_inline_caches.back().get();
+  }
+
   ProfileCompilationInfo::OfflineProfileMethodInfo ConvertProfileMethodInfo(
         const ProfileMethodInfo& pmi) {
-    ProfileCompilationInfo::OfflineProfileMethodInfo offline_pmi;
+    ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
+    ProfileCompilationInfo::OfflineProfileMethodInfo offline_pmi(ic_map);
     SafeMap<DexFile*, uint8_t> dex_map;  // dex files to profile index
     for (const auto& inline_cache : pmi.inline_caches) {
       ProfileCompilationInfo::DexPcData& dex_pc_data =
-          offline_pmi.inline_caches.FindOrAdd(inline_cache.dex_pc)->second;
+          ic_map->FindOrAdd(
+              inline_cache.dex_pc, ProfileCompilationInfo::DexPcData(arena_.get()))->second;
       if (inline_cache.is_missing_types) {
         dex_pc_data.SetIsMissingTypes();
       }
@@ -175,7 +195,8 @@
           const std::string& dex_key = ProfileCompilationInfo::GetProfileDexFileKey(
               class_ref.dex_file->GetLocation());
           offline_pmi.dex_references.emplace_back(dex_key,
-                                                  class_ref.dex_file->GetLocationChecksum());
+                                                  class_ref.dex_file->GetLocationChecksum(),
+                                                  class_ref.dex_file->NumMethodIds());
         }
       }
     }
@@ -184,45 +205,49 @@
 
   // Creates an offline profile used for testing inline caches.
   ProfileCompilationInfo::OfflineProfileMethodInfo GetOfflineProfileMethodInfo() {
-    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
-
-    pmi.dex_references.emplace_back("dex_location1", /* checksum */1);
-    pmi.dex_references.emplace_back("dex_location2", /* checksum */2);
-    pmi.dex_references.emplace_back("dex_location3", /* checksum */3);
+    ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
 
     // Monomorphic
     for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
-      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      ic_map->Put(dex_pc, dex_pc_data);
     }
     // Polymorphic
     for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       dex_pc_data.AddClass(1, dex::TypeIndex(1));
       dex_pc_data.AddClass(2, dex::TypeIndex(2));
 
-      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      ic_map->Put(dex_pc, dex_pc_data);
     }
     // Megamorphic
     for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.SetIsMegamorphic();
-      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      ic_map->Put(dex_pc, dex_pc_data);
     }
     // Missing types
     for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
-      ProfileCompilationInfo::DexPcData dex_pc_data;
+      ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
       dex_pc_data.SetIsMissingTypes();
-      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      ic_map->Put(dex_pc, dex_pc_data);
     }
 
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
+
+    pmi.dex_references.emplace_back("dex_location1", /* checksum */1, kMaxMethodIds);
+    pmi.dex_references.emplace_back("dex_location2", /* checksum */2, kMaxMethodIds);
+    pmi.dex_references.emplace_back("dex_location3", /* checksum */3, kMaxMethodIds);
+
     return pmi;
   }
 
   void MakeMegamorphic(/*out*/ProfileCompilationInfo::OfflineProfileMethodInfo* pmi) {
-    for (auto it : pmi->inline_caches) {
+    ProfileCompilationInfo::InlineCacheMap* ic_map =
+        const_cast<ProfileCompilationInfo::InlineCacheMap*>(pmi->inline_caches);
+    for (auto it : *ic_map) {
       for (uint16_t k = 0; k <= 2 * InlineCache::kIndividualCacheSize; k++) {
         it.second.AddClass(0, dex::TypeIndex(k));
       }
@@ -230,7 +255,9 @@
   }
 
   void SetIsMissingTypes(/*out*/ProfileCompilationInfo::OfflineProfileMethodInfo* pmi) {
-    for (auto it : pmi->inline_caches) {
+    ProfileCompilationInfo::InlineCacheMap* ic_map =
+        const_cast<ProfileCompilationInfo::InlineCacheMap*>(pmi->inline_caches);
+    for (auto it : *ic_map) {
       it.second.SetIsMissingTypes();
     }
   }
@@ -239,6 +266,13 @@
   // They should not change anyway.
   static constexpr int kProfileMagicSize = 4;
   static constexpr int kProfileVersionSize = 4;
+
+  std::unique_ptr<ArenaAllocator> arena_;
+
+  // Cache of inline caches generated during tests.
+  // This makes it easier to pass data between different utilities and ensure that
+  // caches are destructed at the end of the test.
+  std::vector<std::unique_ptr<ProfileCompilationInfo::InlineCacheMap>> used_inline_caches;
 };
 
 TEST_F(ProfileCompilationInfoTest, SaveArtMethods) {
@@ -264,7 +298,8 @@
   {
     ScopedObjectAccess soa(self);
     for (ArtMethod* m : main_methods) {
-      ASSERT_TRUE(info1.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+      ASSERT_TRUE(info1.ContainsHotMethod(
+          MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
     }
   }
 
@@ -280,10 +315,12 @@
   {
     ScopedObjectAccess soa(self);
     for (ArtMethod* m : main_methods) {
-      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+      ASSERT_TRUE(
+          info2.ContainsHotMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
     }
     for (ArtMethod* m : second_methods) {
-      ASSERT_TRUE(info2.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+      ASSERT_TRUE(
+          info2.ContainsHotMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
     }
   }
 }
@@ -500,18 +537,14 @@
 
   ASSERT_TRUE(loaded_info.Equals(saved_info));
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
-  ASSERT_TRUE(loaded_info.GetMethod("dex_location1",
-                                    /* checksum */ 1,
-                                    /* method_idx */ 3,
-                                    &loaded_pmi1));
-  ASSERT_TRUE(loaded_pmi1 == pmi);
-  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi2;
-  ASSERT_TRUE(loaded_info.GetMethod("dex_location4",
-                                    /* checksum */ 4,
-                                    /* method_idx */ 3,
-                                    &loaded_pmi2));
-  ASSERT_TRUE(loaded_pmi2 == pmi);
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi1 =
+      loaded_info.GetMethod("dex_location1", /* checksum */ 1, /* method_idx */ 3);
+  ASSERT_TRUE(loaded_pmi1 != nullptr);
+  ASSERT_TRUE(*loaded_pmi1 == pmi);
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi2 =
+      loaded_info.GetMethod("dex_location4", /* checksum */ 4, /* method_idx */ 3);
+  ASSERT_TRUE(loaded_pmi2 != nullptr);
+  ASSERT_TRUE(*loaded_pmi2 == pmi);
 }
 
 TEST_F(ProfileCompilationInfoTest, MegamorphicInlineCaches) {
@@ -550,12 +583,11 @@
 
   ASSERT_TRUE(loaded_info.Equals(saved_info));
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
-  ASSERT_TRUE(loaded_info.GetMethod("dex_location1",
-                                    /* checksum */ 1,
-                                    /* method_idx */ 3,
-                                    &loaded_pmi1));
-  ASSERT_TRUE(loaded_pmi1 == pmi_extra);
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi1 =
+      loaded_info.GetMethod("dex_location1", /* checksum */ 1, /* method_idx */ 3);
+
+  ASSERT_TRUE(loaded_pmi1 != nullptr);
+  ASSERT_TRUE(*loaded_pmi1 == pmi_extra);
 }
 
 TEST_F(ProfileCompilationInfoTest, MissingTypesInlineCaches) {
@@ -602,12 +634,10 @@
 
   ASSERT_TRUE(loaded_info.Equals(saved_info));
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
-  ASSERT_TRUE(loaded_info.GetMethod("dex_location1",
-                                    /* checksum */ 1,
-                                    /* method_idx */ 3,
-                                    &loaded_pmi1));
-  ASSERT_TRUE(loaded_pmi1 == pmi_extra);
+  std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi1 =
+      loaded_info.GetMethod("dex_location1", /* checksum */ 1, /* method_idx */ 3);
+  ASSERT_TRUE(loaded_pmi1 != nullptr);
+  ASSERT_TRUE(*loaded_pmi1 == pmi_extra);
 }
 
 TEST_F(ProfileCompilationInfoTest, SaveArtMethodsWithInlineCaches) {
@@ -636,16 +666,17 @@
   {
     ScopedObjectAccess soa(self);
     for (ArtMethod* m : main_methods) {
-      ASSERT_TRUE(info.ContainsMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
+      ASSERT_TRUE(
+          info.ContainsHotMethod(MethodReference(m->GetDexFile(), m->GetDexMethodIndex())));
       const ProfileMethodInfo& pmi = profile_methods_map.find(m)->second;
-      ProfileCompilationInfo::OfflineProfileMethodInfo offline_pmi;
-      ASSERT_TRUE(info.GetMethod(m->GetDexFile()->GetLocation(),
-                                 m->GetDexFile()->GetLocationChecksum(),
-                                 m->GetDexMethodIndex(),
-                                 &offline_pmi));
+      std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> offline_pmi =
+          info.GetMethod(m->GetDexFile()->GetLocation(),
+                         m->GetDexFile()->GetLocationChecksum(),
+                         m->GetDexMethodIndex());
+      ASSERT_TRUE(offline_pmi != nullptr);
       ProfileCompilationInfo::OfflineProfileMethodInfo converted_pmi =
           ConvertProfileMethodInfo(pmi);
-      ASSERT_EQ(converted_pmi, offline_pmi);
+      ASSERT_EQ(converted_pmi, *offline_pmi);
     }
   }
 }
@@ -671,24 +702,26 @@
   ProfileCompilationInfo info;
   ProfileCompilationInfo info_reindexed;
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
-  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
-  pmi.dex_references.emplace_back("dex_location2", /* checksum */ 2);
+  ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
+  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1, kMaxMethodIds);
+  pmi.dex_references.emplace_back("dex_location2", /* checksum */ 2, kMaxMethodIds);
   for (uint16_t dex_pc = 1; dex_pc < 5; dex_pc++) {
-    ProfileCompilationInfo::DexPcData dex_pc_data;
+    ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
     dex_pc_data.AddClass(0, dex::TypeIndex(0));
     dex_pc_data.AddClass(1, dex::TypeIndex(1));
-    pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    ic_map->Put(dex_pc, dex_pc_data);
   }
 
-  ProfileCompilationInfo::OfflineProfileMethodInfo pmi_reindexed;
-  pmi_reindexed.dex_references.emplace_back("dex_location2", /* checksum */ 2);
-  pmi_reindexed.dex_references.emplace_back("dex_location1", /* checksum */ 1);
+  ProfileCompilationInfo::InlineCacheMap* ic_map_reindexed = CreateInlineCacheMap();
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi_reindexed(ic_map_reindexed);
+  pmi_reindexed.dex_references.emplace_back("dex_location2", /* checksum */ 2, kMaxMethodIds);
+  pmi_reindexed.dex_references.emplace_back("dex_location1", /* checksum */ 1, kMaxMethodIds);
   for (uint16_t dex_pc = 1; dex_pc < 5; dex_pc++) {
-    ProfileCompilationInfo::DexPcData dex_pc_data;
+    ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
     dex_pc_data.AddClass(1, dex::TypeIndex(0));
     dex_pc_data.AddClass(0, dex::TypeIndex(1));
-    pmi_reindexed.inline_caches.Put(dex_pc, dex_pc_data);
+    ic_map_reindexed->Put(dex_pc, dex_pc_data);
   }
 
   // Profile 1 and Profile 2 get the same methods but in different order.
@@ -705,23 +738,20 @@
       "dex_location1", /* checksum */ 1, method_idx, pmi_reindexed, &info_reindexed));
   }
 
-  ProfileCompilationInfo info_backup = info;
+  ProfileCompilationInfo info_backup;
+  info_backup.MergeWith(info);
   ASSERT_TRUE(info.MergeWith(info_reindexed));
   // Merging should have no effect as we're adding the exact same stuff.
   ASSERT_TRUE(info.Equals(info_backup));
   for (uint16_t method_idx = 0; method_idx < 10; method_idx++) {
-    ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
-    ASSERT_TRUE(info.GetMethod("dex_location1",
-                                      /* checksum */ 1,
-                                      /* method_idx */ method_idx,
-                                      &loaded_pmi1));
-    ASSERT_TRUE(loaded_pmi1 == pmi);
-    ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi2;
-    ASSERT_TRUE(info.GetMethod("dex_location2",
-                                      /* checksum */ 2,
-                                      /* method_idx */ method_idx,
-                                      &loaded_pmi2));
-    ASSERT_TRUE(loaded_pmi2 == pmi);
+    std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi1 =
+        info.GetMethod("dex_location1", /* checksum */ 1, method_idx);
+    ASSERT_TRUE(loaded_pmi1 != nullptr);
+    ASSERT_TRUE(*loaded_pmi1 == pmi);
+    std::unique_ptr<ProfileCompilationInfo::OfflineProfileMethodInfo> loaded_pmi2 =
+        info.GetMethod("dex_location2", /* checksum */ 2, method_idx);
+    ASSERT_TRUE(loaded_pmi2 != nullptr);
+    ASSERT_TRUE(*loaded_pmi2 == pmi);
   }
 }
 
@@ -739,11 +769,12 @@
 
 TEST_F(ProfileCompilationInfoTest, MegamorphicInlineCachesMerge) {
   // Create a megamorphic inline cache.
-  ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
-  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
-  ProfileCompilationInfo::DexPcData dex_pc_data;
+  ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
+  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1, kMaxMethodIds);
+  ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
   dex_pc_data.SetIsMegamorphic();
-  pmi.inline_caches.Put(/*dex_pc*/ 0, dex_pc_data);
+  ic_map->Put(/*dex_pc*/ 0, dex_pc_data);
 
   ProfileCompilationInfo info_megamorphic;
   ASSERT_TRUE(AddMethod("dex_location1",
@@ -768,11 +799,12 @@
 
 TEST_F(ProfileCompilationInfoTest, MissingTypesInlineCachesMerge) {
   // Create an inline cache with missing types
-  ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
-  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
-  ProfileCompilationInfo::DexPcData dex_pc_data;
+  ProfileCompilationInfo::InlineCacheMap* ic_map = CreateInlineCacheMap();
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi(ic_map);
+  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1, kMaxMethodIds);
+  ProfileCompilationInfo::DexPcData dex_pc_data(arena_.get());
   dex_pc_data.SetIsMissingTypes();
-  pmi.inline_caches.Put(/*dex_pc*/ 0, dex_pc_data);
+  ic_map->Put(/*dex_pc*/ 0, dex_pc_data);
 
   ProfileCompilationInfo info_megamorphic;
   ASSERT_TRUE(AddMethod("dex_location1",
@@ -817,4 +849,48 @@
   // This should fail since the test_info already contains data and the load would overwrite it.
   ASSERT_FALSE(test_info.Load(GetFd(profile)));
 }
+
+TEST_F(ProfileCompilationInfoTest, SampledMethodsTest) {
+  ProfileCompilationInfo test_info;
+  static constexpr size_t kNumMethods = 1000;
+  static constexpr size_t kChecksum1 = 1234;
+  static constexpr size_t kChecksum2 = 4321;
+  static const std::string kDex1 = "dex1";
+  static const std::string kDex2 = "dex2";
+  test_info.AddSampledMethod(true, kDex1, kChecksum1, 1, kNumMethods);
+  test_info.AddSampledMethod(true, kDex1, kChecksum1, 5, kNumMethods);
+  test_info.AddSampledMethod(false, kDex2, kChecksum2, 1, kNumMethods);
+  test_info.AddSampledMethod(false, kDex2, kChecksum2, 5, kNumMethods);
+  auto run_test = [](const ProfileCompilationInfo& info) {
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 0));
+    EXPECT_TRUE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 1));
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 3));
+    EXPECT_TRUE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 5));
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex1, kChecksum1, 6));
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex2, kChecksum2, 5));
+    EXPECT_FALSE(info.IsStartupOrHotMethod(kDex2, kChecksum2, 5));
+  };
+  run_test(test_info);
+
+  // Save the profile.
+  ScratchFile profile;
+  ASSERT_TRUE(test_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+
+  // Load the profile and make sure we can read the data and it matches what we expect.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+  run_test(loaded_info);
+
+  // Test that the bitmap gets merged properly.
+  EXPECT_FALSE(test_info.IsStartupOrHotMethod(kDex1, kChecksum1, 11));
+  {
+    ProfileCompilationInfo merge_info;
+    merge_info.AddSampledMethod(true, kDex1, kChecksum1, 11, kNumMethods);
+    test_info.MergeWith(merge_info);
+  }
+  EXPECT_TRUE(test_info.IsStartupOrHotMethod(kDex1, kChecksum1, 11));
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 2dba9b7..edce9cd 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -25,12 +25,16 @@
 
 #include "art_method-inl.h"
 #include "base/enums.h"
+#include "base/scoped_arena_containers.h"
+#include "base/stl_util.h"
 #include "base/systrace.h"
 #include "base/time_utils.h"
 #include "compiler_filter.h"
+#include "dex_reference_collection.h"
 #include "gc/collector_type.h"
 #include "gc/gc_cause.h"
 #include "gc/scoped_gc_critical_section.h"
+#include "jit/profile_compilation_info-inl.h"
 #include "oat_file_manager.h"
 #include "scoped_thread_state_change-inl.h"
 
@@ -64,6 +68,12 @@
   AddTrackedLocations(output_filename, code_paths);
 }
 
+ProfileSaver::~ProfileSaver() {
+  for (auto& it : profile_cache_) {
+    delete it.second;
+  }
+}
+
 void ProfileSaver::Run() {
   Thread* self = Thread::Current();
 
@@ -173,96 +183,144 @@
   }
 }
 
+using MethodReferenceCollection = DexReferenceCollection<uint16_t, ScopedArenaAllocatorAdapter>;
+using TypeReferenceCollection = DexReferenceCollection<dex::TypeIndex,
+                                                       ScopedArenaAllocatorAdapter>;
+
 // Get resolved methods that have a profile info or more than kStartupMethodSamples samples.
 // Excludes native methods and classes in the boot image.
-class GetMethodsVisitor : public ClassVisitor {
+class GetClassesAndMethodsVisitor : public ClassVisitor {
  public:
-  GetMethodsVisitor(std::vector<MethodReference>* methods, uint32_t startup_method_samples)
-    : methods_(methods),
-      startup_method_samples_(startup_method_samples) {}
+  GetClassesAndMethodsVisitor(MethodReferenceCollection* hot_methods,
+                              MethodReferenceCollection* sampled_methods,
+                              TypeReferenceCollection* resolved_classes,
+                              uint32_t hot_method_sample_threshold)
+    : hot_methods_(hot_methods),
+      sampled_methods_(sampled_methods),
+      resolved_classes_(resolved_classes),
+      hot_method_sample_threshold_(hot_method_sample_threshold) {}
 
   virtual bool operator()(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
+    if (klass->IsProxyClass() ||
+        klass->IsArrayClass() ||
+        !klass->IsResolved() ||
+        klass->IsErroneousResolved() ||
+        klass->GetClassLoader() == nullptr) {
       return true;
     }
+    DCHECK(klass->GetDexCache() != nullptr) << klass->PrettyClass();
+    resolved_classes_->AddReference(&klass->GetDexFile(), klass->GetDexTypeIndex());
     for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
       if (!method.IsNative()) {
-        if (method.GetCounter() >= startup_method_samples_ ||
-            method.GetProfilingInfo(kRuntimePointerSize) != nullptr) {
-          // Have samples, add to profile.
-          const DexFile* dex_file =
-              method.GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetDexFile();
-          methods_->push_back(MethodReference(dex_file, method.GetDexMethodIndex()));
+        DCHECK(!method.IsProxyMethod());
+        const uint16_t counter = method.GetCounter();
+        // Mark startup methods as hot if they have more than hot_method_sample_threshold_ samples.
+        // This means they will get compiled by the compiler driver.
+        if (method.GetProfilingInfo(kRuntimePointerSize) != nullptr ||
+            (method.GetAccessFlags() & kAccPreviouslyWarm) != 0 ||
+            counter >= hot_method_sample_threshold_) {
+          hot_methods_->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
+        } else if (counter != 0) {
+          sampled_methods_->AddReference(method.GetDexFile(), method.GetDexMethodIndex());
         }
+      } else {
+        CHECK_EQ(method.GetCounter(), 0u);
       }
     }
     return true;
   }
 
  private:
-  std::vector<MethodReference>* const methods_;
-  uint32_t startup_method_samples_;
+  MethodReferenceCollection* const hot_methods_;
+  MethodReferenceCollection* const sampled_methods_;
+  TypeReferenceCollection* const resolved_classes_;
+  uint32_t hot_method_sample_threshold_;
 };
 
 void ProfileSaver::FetchAndCacheResolvedClassesAndMethods() {
   ScopedTrace trace(__PRETTY_FUNCTION__);
+  const uint64_t start_time = NanoTime();
 
   // Resolve any new registered locations.
   ResolveTrackedLocations();
 
   Thread* const self = Thread::Current();
-  std::vector<MethodReference> methods;
-  std::set<DexCacheResolvedClasses> resolved_classes;
+  Runtime* const runtime = Runtime::Current();
+  ArenaStack stack(runtime->GetArenaPool());
+  ScopedArenaAllocator allocator(&stack);
+  MethodReferenceCollection hot_methods(allocator.Adapter(), allocator.Adapter());
+  MethodReferenceCollection startup_methods(allocator.Adapter(), allocator.Adapter());
+  TypeReferenceCollection resolved_classes(allocator.Adapter(), allocator.Adapter());
+  const bool is_low_ram = Runtime::Current()->GetHeap()->IsLowMemoryMode();
+  const size_t hot_threshold = options_.GetHotStartupMethodSamples(is_low_ram);
   {
     ScopedObjectAccess soa(self);
     gc::ScopedGCCriticalSection sgcs(self,
                                      gc::kGcCauseProfileSaver,
                                      gc::kCollectorTypeCriticalSection);
-
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    resolved_classes = class_linker->GetResolvedClasses(/*ignore boot classes*/ true);
-
     {
       ScopedTrace trace2("Get hot methods");
-      GetMethodsVisitor visitor(&methods, options_.GetStartupMethodSamples());
-      class_linker->VisitClasses(&visitor);
-      VLOG(profiler) << "Methods with samples greater than "
-                     << options_.GetStartupMethodSamples() << " = " << methods.size();
+      GetClassesAndMethodsVisitor visitor(&hot_methods,
+                                          &startup_methods,
+                                          &resolved_classes,
+                                          hot_threshold);
+      runtime->GetClassLinker()->VisitClasses(&visitor);
     }
   }
+
   MutexLock mu(self, *Locks::profiler_lock_);
   uint64_t total_number_of_profile_entries_cached = 0;
 
   for (const auto& it : tracked_dex_base_locations_) {
     std::set<DexCacheResolvedClasses> resolved_classes_for_location;
     const std::string& filename = it.first;
-    const std::set<std::string>& locations = it.second;
-    std::vector<ProfileMethodInfo> profile_methods_for_location;
-    for (const MethodReference& ref : methods) {
-      if (locations.find(ref.dex_file->GetBaseLocation()) != locations.end()) {
-        profile_methods_for_location.emplace_back(ref.dex_file, ref.dex_method_index);
-      }
-    }
-    for (const DexCacheResolvedClasses& classes : resolved_classes) {
-      if (locations.find(classes.GetBaseLocation()) != locations.end()) {
-        VLOG(profiler) << "Added " << classes.GetClasses().size() << " classes for location "
-                       << classes.GetBaseLocation() << " (" << classes.GetDexLocation() << ")";
-        resolved_classes_for_location.insert(classes);
-      } else {
-        VLOG(profiler) << "Location not found " << classes.GetBaseLocation()
-                       << " (" << classes.GetDexLocation() << ")";
-      }
-    }
-    auto info_it = profile_cache_.Put(filename, ProfileCompilationInfo());
+    auto info_it = profile_cache_.Put(
+        filename,
+        new ProfileCompilationInfo(Runtime::Current()->GetArenaPool()));
+    ProfileCompilationInfo* cached_info = info_it->second;
 
-    ProfileCompilationInfo* cached_info = &(info_it->second);
-    cached_info->AddMethodsAndClasses(profile_methods_for_location,
-                                      resolved_classes_for_location);
+    const std::set<std::string>& locations = it.second;
+    for (const auto& pair : hot_methods.GetMap()) {
+      const DexFile* const dex_file = pair.first;
+      if (locations.find(dex_file->GetBaseLocation()) != locations.end()) {
+        cached_info->AddSampledMethodsForDex(/*startup*/ true,
+                                             dex_file,
+                                             pair.second.begin(),
+                                             pair.second.end());
+        // Adding hot methods is a bit slow, TODO: optimize.
+        cached_info->AddHotMethodsForDex(dex_file, pair.second.begin(), pair.second.end());
+      }
+    }
+    for (const auto& pair : startup_methods.GetMap()) {
+      const DexFile* const dex_file = pair.first;
+      if (locations.find(dex_file->GetBaseLocation()) != locations.end()) {
+        cached_info->AddSampledMethodsForDex(/*startup*/ true,
+                                             dex_file,
+                                             pair.second.begin(),
+                                             pair.second.end());
+      }
+    }
+    for (const auto& pair : resolved_classes.GetMap()) {
+      const DexFile* const dex_file = pair.first;
+      if (locations.find(dex_file->GetBaseLocation()) != locations.end()) {
+        const TypeReferenceCollection::IndexVector& classes = pair.second;
+        VLOG(profiler) << "Added " << classes.size() << " classes for location "
+                       << dex_file->GetBaseLocation()
+                       << " (" << dex_file->GetLocation() << ")";
+        cached_info->AddClassesForDex(dex_file, classes.begin(), classes.end());
+      } else {
+        VLOG(profiler) << "Location not found " << dex_file->GetBaseLocation()
+                       << " (" << dex_file->GetLocation() << ")";
+      }
+    }
     total_number_of_profile_entries_cached += resolved_classes_for_location.size();
   }
   max_number_of_profile_entries_cached_ = std::max(
       max_number_of_profile_entries_cached_,
       total_number_of_profile_entries_cached);
+  VLOG(profiler) << "Profile saver recorded " << hot_methods.NumReferences() << " hot methods and "
+                 << startup_methods.NumReferences() << " startup methods with threshold "
+                 << hot_threshold << " in " << PrettyDuration(NanoTime() - start_time);
 }
 
 bool ProfileSaver::ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods) {
@@ -279,7 +337,6 @@
   }
 
   bool profile_file_saved = false;
-  uint64_t total_number_of_profile_entries_cached = 0;
   if (number_of_new_methods != nullptr) {
     *number_of_new_methods = 0;
   }
@@ -300,60 +357,69 @@
       jit_code_cache_->GetProfiledMethods(locations, profile_methods);
       total_number_of_code_cache_queries_++;
     }
-    ProfileCompilationInfo info;
-    if (!info.Load(filename, /*clear_if_invalid*/ true)) {
-      LOG(WARNING) << "Could not forcefully load profile " << filename;
-      continue;
-    }
-    uint64_t last_save_number_of_methods = info.GetNumberOfMethods();
-    uint64_t last_save_number_of_classes = info.GetNumberOfResolvedClasses();
+    {
+      ProfileCompilationInfo info(Runtime::Current()->GetArenaPool());
+      if (!info.Load(filename, /*clear_if_invalid*/ true)) {
+        LOG(WARNING) << "Could not forcefully load profile " << filename;
+        continue;
+      }
+      uint64_t last_save_number_of_methods = info.GetNumberOfMethods();
+      uint64_t last_save_number_of_classes = info.GetNumberOfResolvedClasses();
 
-    info.AddMethodsAndClasses(profile_methods, std::set<DexCacheResolvedClasses>());
-    auto profile_cache_it = profile_cache_.find(filename);
-    if (profile_cache_it != profile_cache_.end()) {
-      info.MergeWith(profile_cache_it->second);
-    }
-
-    int64_t delta_number_of_methods = info.GetNumberOfMethods() - last_save_number_of_methods;
-    int64_t delta_number_of_classes = info.GetNumberOfResolvedClasses() - last_save_number_of_classes;
-
-    if (!force_save &&
-        delta_number_of_methods < options_.GetMinMethodsToSave() &&
-        delta_number_of_classes < options_.GetMinClassesToSave()) {
-      VLOG(profiler) << "Not enough information to save to: " << filename
-          << " Number of methods: " << delta_number_of_methods
-          << " Number of classes: " << delta_number_of_classes;
-      total_number_of_skipped_writes_++;
-      continue;
-    }
-    if (number_of_new_methods != nullptr) {
-      *number_of_new_methods = std::max(static_cast<uint16_t>(delta_number_of_methods),
-                                        *number_of_new_methods);
-    }
-    uint64_t bytes_written;
-    // Force the save. In case the profile data is corrupted or the the profile
-    // has the wrong version this will "fix" the file to the correct format.
-    if (info.Save(filename, &bytes_written)) {
-      // We managed to save the profile. Clear the cache stored during startup.
+      info.AddMethodsAndClasses(profile_methods, std::set<DexCacheResolvedClasses>());
+      auto profile_cache_it = profile_cache_.find(filename);
       if (profile_cache_it != profile_cache_.end()) {
-        profile_cache_.erase(profile_cache_it);
-        total_number_of_profile_entries_cached = 0;
+        info.MergeWith(*(profile_cache_it->second));
       }
-      if (bytes_written > 0) {
-        total_number_of_writes_++;
-        total_bytes_written_ += bytes_written;
-        profile_file_saved = true;
-      } else {
-        // At this point we could still have avoided the write.
-        // We load and merge the data from the file lazily at its first ever
-        // save attempt. So, whatever we are trying to save could already be
-        // in the file.
+
+      int64_t delta_number_of_methods =
+          info.GetNumberOfMethods() - last_save_number_of_methods;
+      int64_t delta_number_of_classes =
+          info.GetNumberOfResolvedClasses() - last_save_number_of_classes;
+
+      if (!force_save &&
+          delta_number_of_methods < options_.GetMinMethodsToSave() &&
+          delta_number_of_classes < options_.GetMinClassesToSave()) {
+        VLOG(profiler) << "Not enough information to save to: " << filename
+                       << " Number of methods: " << delta_number_of_methods
+                       << " Number of classes: " << delta_number_of_classes;
         total_number_of_skipped_writes_++;
+        continue;
       }
-    } else {
-      LOG(WARNING) << "Could not save profiling info to " << filename;
-      total_number_of_failed_writes_++;
+      if (number_of_new_methods != nullptr) {
+        *number_of_new_methods =
+            std::max(static_cast<uint16_t>(delta_number_of_methods),
+                     *number_of_new_methods);
+      }
+      uint64_t bytes_written;
+      // Force the save. In case the profile data is corrupted or the the profile
+      // has the wrong version this will "fix" the file to the correct format.
+      if (info.Save(filename, &bytes_written)) {
+        // We managed to save the profile. Clear the cache stored during startup.
+        if (profile_cache_it != profile_cache_.end()) {
+          ProfileCompilationInfo *cached_info = profile_cache_it->second;
+          profile_cache_.erase(profile_cache_it);
+          delete cached_info;
+        }
+        if (bytes_written > 0) {
+          total_number_of_writes_++;
+          total_bytes_written_ += bytes_written;
+          profile_file_saved = true;
+        } else {
+          // At this point we could still have avoided the write.
+          // We load and merge the data from the file lazily at its first ever
+          // save attempt. So, whatever we are trying to save could already be
+          // in the file.
+          total_number_of_skipped_writes_++;
+        }
+      } else {
+        LOG(WARNING) << "Could not save profiling info to " << filename;
+        total_number_of_failed_writes_++;
+      }
     }
+    // Trim the maps to madvise the pages used for profile info.
+    // It is unlikely we will need them again in the near feature.
+    Runtime::Current()->GetArenaPool()->TrimMaps();
   }
 
   return profile_file_saved;
@@ -579,11 +645,11 @@
                                  uint16_t method_idx) {
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
-    ProfileCompilationInfo info;
+    ProfileCompilationInfo info(Runtime::Current()->GetArenaPool());
     if (!info.Load(profile, /*clear_if_invalid*/false)) {
       return false;
     }
-    return info.ContainsMethod(MethodReference(dex_file, method_idx));
+    return info.ContainsHotMethod(MethodReference(dex_file, method_idx));
   }
   return false;
 }
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index 60c9cc6..01d72fe 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -65,6 +65,7 @@
                const std::string& output_filename,
                jit::JitCodeCache* jit_code_cache,
                const std::vector<std::string>& code_paths);
+  ~ProfileSaver();
 
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
   static void* RunProfileSaverThread(void* arg)
@@ -131,7 +132,7 @@
   // we don't hammer the disk to save them right away.
   // The size of this cache is usually very small and tops
   // to just a few hundreds entries in the ProfileCompilationInfo objects.
-  SafeMap<std::string, ProfileCompilationInfo> profile_cache_;
+  SafeMap<std::string, ProfileCompilationInfo*> profile_cache_;
 
   // Save period condition support.
   Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h
index 07aeb66..44550f4 100644
--- a/runtime/jit/profile_saver_options.h
+++ b/runtime/jit/profile_saver_options.h
@@ -22,18 +22,20 @@
  public:
   static constexpr uint32_t kMinSavePeriodMs = 40 * 1000;  // 40 seconds
   static constexpr uint32_t kSaveResolvedClassesDelayMs = 5 * 1000;  // 5 seconds
-  // Minimum number of JIT samples during launch to include a method into the profile.
-  static constexpr uint32_t kStartupMethodSamples = 1;
+  // Minimum number of JIT samples during launch to mark a method as hot in the profile.
+  static constexpr uint32_t kHotStartupMethodSamples = 1;
+  static constexpr uint32_t kHotStartupMethodSamplesLowRam = 256;
   static constexpr uint32_t kMinMethodsToSave = 10;
   static constexpr uint32_t kMinClassesToSave = 10;
   static constexpr uint32_t kMinNotificationBeforeWake = 10;
   static constexpr uint32_t kMaxNotificationBeforeWake = 50;
+  static constexpr uint32_t kHotStartupMethodSamplesNotSet = std::numeric_limits<uint32_t>::max();
 
   ProfileSaverOptions() :
     enabled_(false),
     min_save_period_ms_(kMinSavePeriodMs),
     save_resolved_classes_delay_ms_(kSaveResolvedClassesDelayMs),
-    startup_method_samples_(kStartupMethodSamples),
+    hot_startup_method_samples_(kHotStartupMethodSamplesNotSet),
     min_methods_to_save_(kMinMethodsToSave),
     min_classes_to_save_(kMinClassesToSave),
     min_notification_before_wake_(kMinNotificationBeforeWake),
@@ -44,7 +46,7 @@
       bool enabled,
       uint32_t min_save_period_ms,
       uint32_t save_resolved_classes_delay_ms,
-      uint32_t startup_method_samples,
+      uint32_t hot_startup_method_samples,
       uint32_t min_methods_to_save,
       uint32_t min_classes_to_save,
       uint32_t min_notification_before_wake,
@@ -53,7 +55,7 @@
     enabled_(enabled),
     min_save_period_ms_(min_save_period_ms),
     save_resolved_classes_delay_ms_(save_resolved_classes_delay_ms),
-    startup_method_samples_(startup_method_samples),
+    hot_startup_method_samples_(hot_startup_method_samples),
     min_methods_to_save_(min_methods_to_save),
     min_classes_to_save_(min_classes_to_save),
     min_notification_before_wake_(min_notification_before_wake),
@@ -73,8 +75,12 @@
   uint32_t GetSaveResolvedClassesDelayMs() const {
     return save_resolved_classes_delay_ms_;
   }
-  uint32_t GetStartupMethodSamples() const {
-    return startup_method_samples_;
+  uint32_t GetHotStartupMethodSamples(bool is_low_ram) const {
+    uint32_t ret = hot_startup_method_samples_;
+    if (ret == kHotStartupMethodSamplesNotSet) {
+      ret = is_low_ram ? kHotStartupMethodSamplesLowRam : kHotStartupMethodSamples;
+    }
+    return ret;
   }
   uint32_t GetMinMethodsToSave() const {
     return min_methods_to_save_;
@@ -96,7 +102,7 @@
     os << "enabled_" << pso.enabled_
         << ", min_save_period_ms_" << pso.min_save_period_ms_
         << ", save_resolved_classes_delay_ms_" << pso.save_resolved_classes_delay_ms_
-        << ", startup_method_samples_" << pso.startup_method_samples_
+        << ", hot_startup_method_samples_" << pso.hot_startup_method_samples_
         << ", min_methods_to_save_" << pso.min_methods_to_save_
         << ", min_classes_to_save_" << pso.min_classes_to_save_
         << ", min_notification_before_wake_" << pso.min_notification_before_wake_
@@ -107,7 +113,9 @@
   bool enabled_;
   uint32_t min_save_period_ms_;
   uint32_t save_resolved_classes_delay_ms_;
-  uint32_t startup_method_samples_;
+  // Do not access hot_startup_method_samples_ directly for reading since it may be set to the
+  // placeholder default.
+  uint32_t hot_startup_method_samples_;
   uint32_t min_methods_to_save_;
   uint32_t min_classes_to_save_;
   uint32_t min_notification_before_wake_;
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index d6881aa..788fa1f 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -29,11 +29,11 @@
 
 namespace jit {
 class JitCodeCache;
-}
+}  // namespace jit
 
 namespace mirror {
 class Class;
-}
+}  // namespace mirror
 
 // Structure to store the classes seen at runtime for a specific instruction.
 // Once the classes_ array is full, we consider the INVOKE to be megamorphic.
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index 0148a1c..3ff94f9 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -28,7 +28,7 @@
 #include "lock_word.h"
 #include "mirror/object-inl.h"
 #include "nth_caller_visitor.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
@@ -123,8 +123,8 @@
   monitors.Dump(os);
 }
 
-void JNIEnvExt::PushFrame(int capacity ATTRIBUTE_UNUSED) {
-  // TODO: take 'capacity' into account.
+void JNIEnvExt::PushFrame(int capacity) {
+  DCHECK_GE(locals.FreeCapacity(), static_cast<size_t>(capacity));
   stacked_local_ref_cookies.push_back(local_ref_cookie);
   local_ref_cookie = locals.GetSegmentState();
 }
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 60e4295..af933ae 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -22,7 +22,6 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "indirect_reference_table.h"
-#include "object_callbacks.h"
 #include "obj_ptr.h"
 #include "reference_table.h"
 
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 2626eef..6be0953 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -106,10 +106,9 @@
 static void ReportInvalidJNINativeMethod(const ScopedObjectAccess& soa,
                                          ObjPtr<mirror::Class> c,
                                          const char* kind,
-                                         jint idx,
-                                         bool return_errors)
+                                         jint idx)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
+  LOG(ERROR)
       << "Failed to register native method in " << c->PrettyDescriptor()
       << " in " << c->GetDexCache()->GetLocation()->ToModifiedUtf8()
       << ": " << kind << " is null at index " << idx;
@@ -2145,13 +2144,10 @@
                                                                      buf);
   }
 
-  static jint RegisterNatives(JNIEnv* env, jclass java_class, const JNINativeMethod* methods,
+  static jint RegisterNatives(JNIEnv* env,
+                              jclass java_class,
+                              const JNINativeMethod* methods,
                               jint method_count) {
-    return RegisterNativeMethods(env, java_class, methods, method_count, true);
-  }
-
-  static jint RegisterNativeMethods(JNIEnv* env, jclass java_class, const JNINativeMethod* methods,
-                                    jint method_count, bool return_errors) {
     if (UNLIKELY(method_count < 0)) {
       JavaVmExtFromEnv(env)->JniAbortF("RegisterNatives", "negative method count: %d",
                                        method_count);
@@ -2172,13 +2168,13 @@
       const char* sig = methods[i].signature;
       const void* fnPtr = methods[i].fnPtr;
       if (UNLIKELY(name == nullptr)) {
-        ReportInvalidJNINativeMethod(soa, c.Get(), "method name", i, return_errors);
+        ReportInvalidJNINativeMethod(soa, c.Get(), "method name", i);
         return JNI_ERR;
       } else if (UNLIKELY(sig == nullptr)) {
-        ReportInvalidJNINativeMethod(soa, c.Get(), "method signature", i, return_errors);
+        ReportInvalidJNINativeMethod(soa, c.Get(), "method signature", i);
         return JNI_ERR;
       } else if (UNLIKELY(fnPtr == nullptr)) {
-        ReportInvalidJNINativeMethod(soa, c.Get(), "native function", i, return_errors);
+        ReportInvalidJNINativeMethod(soa, c.Get(), "native function", i);
         return JNI_ERR;
       }
       bool is_fast = false;
@@ -2244,19 +2240,15 @@
       }
 
       if (m == nullptr) {
-        c->DumpClass(
-            LOG_STREAM(return_errors
-                           ? ::android::base::ERROR
-                           : ::android::base::FATAL_WITHOUT_ABORT),
-            mirror::Class::kDumpClassFullDetail);
-        LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
+        c->DumpClass(LOG_STREAM(ERROR), mirror::Class::kDumpClassFullDetail);
+        LOG(ERROR)
             << "Failed to register native method "
             << c->PrettyDescriptor() << "." << name << sig << " in "
             << c->GetDexCache()->GetLocation()->ToModifiedUtf8();
         ThrowNoSuchMethodError(soa, c.Get(), name, sig, "static or non-static");
         return JNI_ERR;
       } else if (!m->IsNative()) {
-        LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
+        LOG(ERROR)
             << "Failed to register non-native method "
             << c->PrettyDescriptor() << "." << name << sig
             << " as native";
@@ -2407,18 +2399,18 @@
   static jint EnsureLocalCapacityInternal(ScopedObjectAccess& soa, jint desired_capacity,
                                           const char* caller)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    // TODO: we should try to expand the table if necessary.
-    if (desired_capacity < 0 || desired_capacity > static_cast<jint>(kLocalsInitial)) {
+    if (desired_capacity < 0) {
       LOG(ERROR) << "Invalid capacity given to " << caller << ": " << desired_capacity;
       return JNI_ERR;
     }
-    // TODO: this isn't quite right, since "capacity" includes holes.
-    const size_t capacity = soa.Env()->locals.Capacity();
-    bool okay = (static_cast<jint>(kLocalsInitial - capacity) >= desired_capacity);
-    if (!okay) {
-      soa.Self()->ThrowOutOfMemoryError(caller);
+
+    std::string error_msg;
+    if (!soa.Env()->locals.EnsureFreeCapacity(static_cast<size_t>(desired_capacity), &error_msg)) {
+      std::string caller_error = android::base::StringPrintf("%s: %s", caller, error_msg.c_str());
+      soa.Self()->ThrowOutOfMemoryError(caller_error.c_str());
+      return JNI_ERR;
     }
-    return okay ? JNI_OK : JNI_ERR;
+    return JNI_OK;
   }
 
   template<typename JniT, typename ArtT>
@@ -3051,15 +3043,6 @@
   return reinterpret_cast<JNINativeInterface*>(&gJniSleepForeverStub);
 }
 
-void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods,
-                           jint method_count) {
-  ScopedLocalRef<jclass> c(env, env->FindClass(jni_class_name));
-  if (c.get() == nullptr) {
-    LOG(FATAL) << "Couldn't find class: " << jni_class_name;
-  }
-  JNI::RegisterNativeMethods(env, c.get(), methods, method_count, false);
-}
-
 }  // namespace art
 
 std::ostream& operator<<(std::ostream& os, const jobjectRefType& rhs) {
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index 580a42b..2c90b3b 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -19,13 +19,9 @@
 
 #include <jni.h>
 #include <iosfwd>
-#include "nativehelper/jni_macros.h"
 
 #include "base/macros.h"
 
-#define REGISTER_NATIVE_METHODS(jni_class_name) \
-  RegisterNativeMethods(env, jni_class_name, gMethods, arraysize(gMethods))
-
 namespace art {
 
 class ArtField;
@@ -34,11 +30,6 @@
 const JNINativeInterface* GetJniNativeInterface();
 const JNINativeInterface* GetRuntimeShutdownNativeInterface();
 
-// Similar to RegisterNatives except its passed a descriptor for a class name and failures are
-// fatal.
-void RegisterNativeMethods(JNIEnv* env, const char* jni_class_name, const JNINativeMethod* methods,
-                           jint method_count);
-
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
 
 namespace jni {
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 08d1eeb..e1e4f9c 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1908,9 +1908,6 @@
 
   // Negative capacities are not allowed.
   ASSERT_EQ(JNI_ERR, env_->PushLocalFrame(-1));
-
-  // And it's okay to have an upper limit. Ours is currently 512.
-  ASSERT_EQ(JNI_ERR, env_->PushLocalFrame(8192));
 }
 
 TEST_F(JniInternalTest, PushLocalFrame_PopLocalFrame) {
@@ -1962,6 +1959,28 @@
   check_jni_abort_catcher.Check("use of deleted local reference");
 }
 
+TEST_F(JniInternalTest, PushLocalFrame_LimitAndOverflow) {
+  // Try a very large value that should fail.
+  ASSERT_NE(JNI_OK, env_->PushLocalFrame(std::numeric_limits<jint>::max()));
+  ASSERT_TRUE(env_->ExceptionCheck());
+  env_->ExceptionClear();
+
+  // On 32-bit, also check for some overflow conditions.
+#ifndef __LP64__
+  ASSERT_EQ(JNI_OK, env_->PushLocalFrame(10));
+  ASSERT_NE(JNI_OK, env_->PushLocalFrame(std::numeric_limits<jint>::max() - 10));
+  ASSERT_TRUE(env_->ExceptionCheck());
+  env_->ExceptionClear();
+  EXPECT_EQ(env_->PopLocalFrame(nullptr), nullptr);
+#endif
+}
+
+TEST_F(JniInternalTest, PushLocalFrame_b62223672) {
+  // The 512 entry limit has been lifted, try a larger value.
+  ASSERT_EQ(JNI_OK, env_->PushLocalFrame(1024));
+  EXPECT_EQ(env_->PopLocalFrame(nullptr), nullptr);
+}
+
 TEST_F(JniInternalTest, NewGlobalRef_nullptr) {
   EXPECT_EQ(env_->NewGlobalRef(nullptr), nullptr);
 }
diff --git a/runtime/linear_alloc.cc b/runtime/linear_alloc.cc
index e9db9b8..3f01fc3 100644
--- a/runtime/linear_alloc.cc
+++ b/runtime/linear_alloc.cc
@@ -16,7 +16,7 @@
 
 #include "linear_alloc.h"
 
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/managed_stack-inl.h b/runtime/managed_stack-inl.h
new file mode 100644
index 0000000..f3f31cf
--- /dev/null
+++ b/runtime/managed_stack-inl.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MANAGED_STACK_INL_H_
+#define ART_RUNTIME_MANAGED_STACK_INL_H_
+
+#include "managed_stack.h"
+
+#include <cstring>
+#include <stdint.h>
+#include <string>
+
+#include "stack.h"
+
+namespace art {
+
+inline ShadowFrame* ManagedStack::PushShadowFrame(ShadowFrame* new_top_frame) {
+  DCHECK(top_quick_frame_ == nullptr);
+  ShadowFrame* old_frame = top_shadow_frame_;
+  top_shadow_frame_ = new_top_frame;
+  new_top_frame->SetLink(old_frame);
+  return old_frame;
+}
+
+inline ShadowFrame* ManagedStack::PopShadowFrame() {
+  DCHECK(top_quick_frame_ == nullptr);
+  CHECK(top_shadow_frame_ != nullptr);
+  ShadowFrame* frame = top_shadow_frame_;
+  top_shadow_frame_ = frame->GetLink();
+  return frame;
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_MANAGED_STACK_INL_H_
diff --git a/runtime/managed_stack.cc b/runtime/managed_stack.cc
new file mode 100644
index 0000000..be609c3
--- /dev/null
+++ b/runtime/managed_stack.cc
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_stack-inl.h"
+
+#include "android-base/stringprintf.h"
+
+#include "art_method.h"
+#include "mirror/object.h"
+#include "stack_reference.h"
+
+namespace art {
+
+size_t ManagedStack::NumJniShadowFrameReferences() const {
+  size_t count = 0;
+  for (const ManagedStack* current_fragment = this; current_fragment != nullptr;
+       current_fragment = current_fragment->GetLink()) {
+    for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_;
+         current_frame != nullptr;
+         current_frame = current_frame->GetLink()) {
+      if (current_frame->GetMethod()->IsNative()) {
+        // The JNI ShadowFrame only contains references. (For indirect reference.)
+        count += current_frame->NumberOfVRegs();
+      }
+    }
+  }
+  return count;
+}
+
+bool ManagedStack::ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const {
+  for (const ManagedStack* current_fragment = this; current_fragment != nullptr;
+       current_fragment = current_fragment->GetLink()) {
+    for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_;
+         current_frame != nullptr;
+         current_frame = current_frame->GetLink()) {
+      if (current_frame->Contains(shadow_frame_entry)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+}  // namespace art
diff --git a/runtime/managed_stack.h b/runtime/managed_stack.h
new file mode 100644
index 0000000..8337f96
--- /dev/null
+++ b/runtime/managed_stack.h
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MANAGED_STACK_H_
+#define ART_RUNTIME_MANAGED_STACK_H_
+
+#include <cstring>
+#include <stdint.h>
+#include <string>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+
+namespace art {
+
+namespace mirror {
+class Object;
+}  // namespace mirror
+
+class ArtMethod;
+class ShadowFrame;
+template <typename T> class StackReference;
+
+// The managed stack is used to record fragments of managed code stacks. Managed code stacks
+// may either be shadow frames or lists of frames using fixed frame sizes. Transition records are
+// necessary for transitions between code using different frame layouts and transitions into native
+// code.
+class PACKED(4) ManagedStack {
+ public:
+  ManagedStack()
+      : top_quick_frame_(nullptr), link_(nullptr), top_shadow_frame_(nullptr) {}
+
+  void PushManagedStackFragment(ManagedStack* fragment) {
+    // Copy this top fragment into given fragment.
+    memcpy(fragment, this, sizeof(ManagedStack));
+    // Clear this fragment, which has become the top.
+    memset(this, 0, sizeof(ManagedStack));
+    // Link our top fragment onto the given fragment.
+    link_ = fragment;
+  }
+
+  void PopManagedStackFragment(const ManagedStack& fragment) {
+    DCHECK(&fragment == link_);
+    // Copy this given fragment back to the top.
+    memcpy(this, &fragment, sizeof(ManagedStack));
+  }
+
+  ManagedStack* GetLink() const {
+    return link_;
+  }
+
+  ArtMethod** GetTopQuickFrame() const {
+    return top_quick_frame_;
+  }
+
+  void SetTopQuickFrame(ArtMethod** top) {
+    DCHECK(top_shadow_frame_ == nullptr);
+    top_quick_frame_ = top;
+  }
+
+  static size_t TopQuickFrameOffset() {
+    return OFFSETOF_MEMBER(ManagedStack, top_quick_frame_);
+  }
+
+  ALWAYS_INLINE ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame);
+  ALWAYS_INLINE ShadowFrame* PopShadowFrame();
+
+  ShadowFrame* GetTopShadowFrame() const {
+    return top_shadow_frame_;
+  }
+
+  void SetTopShadowFrame(ShadowFrame* top) {
+    DCHECK(top_quick_frame_ == nullptr);
+    top_shadow_frame_ = top;
+  }
+
+  static size_t TopShadowFrameOffset() {
+    return OFFSETOF_MEMBER(ManagedStack, top_shadow_frame_);
+  }
+
+  size_t NumJniShadowFrameReferences() const REQUIRES_SHARED(Locks::mutator_lock_);
+
+  bool ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const;
+
+ private:
+  ArtMethod** top_quick_frame_;
+  ManagedStack* link_;
+  ShadowFrame* top_shadow_frame_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_MANAGED_STACK_H_
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 6c39361..c847942 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -23,6 +23,7 @@
 #include <sys/resource.h>
 #endif
 
+#include <map>
 #include <memory>
 #include <sstream>
 
@@ -32,6 +33,7 @@
 #include "cutils/ashmem.h"
 
 #include "base/allocator.h"
+#include "base/bit_utils.h"
 #include "base/memory_tool.h"
 #include "globals.h"
 #include "utils.h"
@@ -46,6 +48,10 @@
 using android::base::StringPrintf;
 using android::base::unique_fd;
 
+template<class Key, class T, AllocatorTag kTag, class Compare = std::less<Key>>
+using AllocationTrackingMultiMap =
+    std::multimap<Key, T, Compare, TrackingAllocator<std::pair<const Key, T>, kTag>>;
+
 using Maps = AllocationTrackingMultiMap<void*, MemMap*, kAllocatorTagMaps>;
 
 // All the non-empty MemMaps. Use a multimap as we do a reserve-and-divide (eg ElfMap::Load()).
@@ -187,7 +193,7 @@
     *error_msg = StringPrintf("Failed to build process map");
     return false;
   }
-  ScopedBacktraceMapIteratorLock(map.get());
+  ScopedBacktraceMapIteratorLock lock(map.get());
   for (BacktraceMap::const_iterator it = map->begin(); it != map->end(); ++it) {
     if ((begin >= it->start && begin < it->end)      // start of new within old
         || (end > it->start && end < it->end)        // end of new within old
@@ -952,6 +958,9 @@
 }
 
 void ZeroAndReleasePages(void* address, size_t length) {
+  if (length == 0) {
+    return;
+  }
   uint8_t* const mem_begin = reinterpret_cast<uint8_t*>(address);
   uint8_t* const mem_end = mem_begin + length;
   uint8_t* const page_begin = AlignUp(mem_begin, kPageSize);
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index aa306ac..5f027b1 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -16,6 +16,8 @@
 
 #include "mem_map.h"
 
+#include <sys/mman.h>
+
 #include <memory>
 
 #include "common_runtime_test.h"
diff --git a/runtime/method_bss_mapping.h b/runtime/method_bss_mapping.h
new file mode 100644
index 0000000..1476f93
--- /dev/null
+++ b/runtime/method_bss_mapping.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_METHOD_BSS_MAPPING_H_
+#define ART_RUNTIME_METHOD_BSS_MAPPING_H_
+
+#include "base/bit_utils.h"
+#include "base/length_prefixed_array.h"
+
+namespace art {
+
+// MethodBssMappingEntry describes a mapping of up to 17 method indexes to their offsets
+// in the .bss. The highest index and its associated .bss offset are stored in plain form
+// as `method_index` and `bss_offset`, respectively, while the additional indexes can be
+// stored in compressed form if their associated .bss entries are consecutive and in the
+// method index order. Each of the 16 bits of the `index_mask` corresponds to one of the
+// previous 16 method indexes and indicates whether there is a .bss entry for that index.
+//
+struct MethodBssMappingEntry {
+  bool CoversIndex(uint32_t method_idx) const {
+    uint32_t diff = method_index - method_idx;
+    return (diff == 0) || (diff <= 16 && ((index_mask >> (16u - diff)) & 1u) != 0);
+  }
+
+  uint32_t GetBssOffset(uint32_t method_idx, size_t entry_size) const {
+    DCHECK(CoversIndex(method_idx));
+    uint32_t diff = method_index - method_idx;
+    if (diff == 0) {
+      return bss_offset;
+    } else {
+      return bss_offset - POPCOUNT(index_mask >> (16u - diff)) * entry_size;
+    }
+  }
+
+  uint16_t method_index;
+  uint16_t index_mask;
+  uint32_t bss_offset;
+};
+
+using MethodBssMapping = LengthPrefixedArray<MethodBssMappingEntry>;
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_METHOD_BSS_MAPPING_H_
diff --git a/runtime/method_handles.cc b/runtime/method_handles.cc
index 54d45b1..090bac1 100644
--- a/runtime/method_handles.cc
+++ b/runtime/method_handles.cc
@@ -514,7 +514,15 @@
     }
   }
 
-  PerformCall(self, code_item, shadow_frame.GetMethod(), first_dest_reg, new_shadow_frame, result);
+  bool use_interpreter_entrypoint = ClassLinker::ShouldUseInterpreterEntrypoint(
+      called_method, called_method->GetEntryPointFromQuickCompiledCode());
+  PerformCall(self,
+              code_item,
+              shadow_frame.GetMethod(),
+              first_dest_reg,
+              new_shadow_frame,
+              result,
+              use_interpreter_entrypoint);
   if (self->IsExceptionPending()) {
     return false;
   }
@@ -602,12 +610,15 @@
   new_shadow_frame->SetVRegReference(0, receiver.Get());
   new_shadow_frame->SetVRegReference(1, sf.Get());
 
+  bool use_interpreter_entrypoint = ClassLinker::ShouldUseInterpreterEntrypoint(
+      called_method, called_method->GetEntryPointFromQuickCompiledCode());
   PerformCall(self,
               code_item,
               shadow_frame.GetMethod(),
               0 /* first destination register */,
               new_shadow_frame,
-              result);
+              result,
+              use_interpreter_entrypoint);
   if (self->IsExceptionPending()) {
     return false;
   }
@@ -1091,7 +1102,15 @@
                                          num_input_regs);
   self->EndAssertNoThreadSuspension(old_cause);
 
-  PerformCall(self, code_item, shadow_frame.GetMethod(), first_dest_reg, new_shadow_frame, result);
+  bool use_interpreter_entrypoint = ClassLinker::ShouldUseInterpreterEntrypoint(
+      called_method, called_method->GetEntryPointFromQuickCompiledCode());
+  PerformCall(self,
+              code_item,
+              shadow_frame.GetMethod(),
+              first_dest_reg,
+              new_shadow_frame,
+              result,
+              use_interpreter_entrypoint);
   if (self->IsExceptionPending()) {
     return false;
   }
diff --git a/runtime/method_handles.h b/runtime/method_handles.h
index 5bea0ab..e02e620 100644
--- a/runtime/method_handles.h
+++ b/runtime/method_handles.h
@@ -23,13 +23,14 @@
 #include "handle.h"
 #include "jvalue.h"
 #include "mirror/class.h"
+#include "stack.h"
 
 namespace art {
 
 namespace mirror {
   class MethodHandle;
   class MethodType;
-}  // mirror
+}  // namespace mirror
 
 class ShadowFrame;
 
diff --git a/runtime/method_reference.h b/runtime/method_reference.h
index 0b0afe6..3948ed5 100644
--- a/runtime/method_reference.h
+++ b/runtime/method_reference.h
@@ -44,6 +44,56 @@
   }
 };
 
+// Compare the actual referenced method signatures. Used for method reference deduplication.
+struct MethodReferenceValueComparator {
+  bool operator()(MethodReference mr1, MethodReference mr2) const {
+    if (mr1.dex_file == mr2.dex_file) {
+      DCHECK_EQ(mr1.dex_method_index < mr2.dex_method_index, SlowCompare(mr1, mr2));
+      return mr1.dex_method_index < mr2.dex_method_index;
+    } else {
+      return SlowCompare(mr1, mr2);
+    }
+  }
+
+  bool SlowCompare(MethodReference mr1, MethodReference mr2) const {
+    // The order is the same as for method ids in a single dex file.
+    // Compare the class descriptors first.
+    const DexFile::MethodId& mid1 = mr1.dex_file->GetMethodId(mr1.dex_method_index);
+    const DexFile::MethodId& mid2 = mr2.dex_file->GetMethodId(mr2.dex_method_index);
+    int descriptor_diff = strcmp(mr1.dex_file->StringByTypeIdx(mid1.class_idx_),
+                                 mr2.dex_file->StringByTypeIdx(mid2.class_idx_));
+    if (descriptor_diff != 0) {
+      return descriptor_diff < 0;
+    }
+    // Compare names second.
+    int name_diff = strcmp(mr1.dex_file->GetMethodName(mid1), mr2.dex_file->GetMethodName(mid2));
+    if (name_diff != 0) {
+      return name_diff < 0;
+    }
+    // And then compare proto ids, starting with return type comparison.
+    const DexFile::ProtoId& prid1 = mr1.dex_file->GetProtoId(mid1.proto_idx_);
+    const DexFile::ProtoId& prid2 = mr2.dex_file->GetProtoId(mid2.proto_idx_);
+    int return_type_diff = strcmp(mr1.dex_file->StringByTypeIdx(prid1.return_type_idx_),
+                                  mr2.dex_file->StringByTypeIdx(prid2.return_type_idx_));
+    if (return_type_diff != 0) {
+      return return_type_diff < 0;
+    }
+    // And finishing with lexicographical parameter comparison.
+    const DexFile::TypeList* params1 = mr1.dex_file->GetProtoParameters(prid1);
+    size_t param1_size = (params1 != nullptr) ? params1->Size() : 0u;
+    const DexFile::TypeList* params2 = mr2.dex_file->GetProtoParameters(prid2);
+    size_t param2_size = (params2 != nullptr) ? params2->Size() : 0u;
+    for (size_t i = 0, num = std::min(param1_size, param2_size); i != num; ++i) {
+      int param_diff = strcmp(mr1.dex_file->StringByTypeIdx(params1->GetTypeItem(i).type_idx_),
+                              mr2.dex_file->StringByTypeIdx(params2->GetTypeItem(i).type_idx_));
+      if (param_diff != 0) {
+        return param_diff < 0;
+      }
+    }
+    return param1_size < param2_size;
+  }
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_METHOD_REFERENCE_H_
diff --git a/runtime/mirror/accessible_object.h b/runtime/mirror/accessible_object.h
index 2581ac2..a217193 100644
--- a/runtime/mirror/accessible_object.h
+++ b/runtime/mirror/accessible_object.h
@@ -20,7 +20,6 @@
 #include "class.h"
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 #include "thread.h"
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 51d9d24..99565c6 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -22,7 +22,6 @@
 #include "gc/allocator_type.h"
 #include "obj_ptr.h"
 #include "object.h"
-#include "object_callbacks.h"
 
 namespace art {
 
@@ -189,6 +188,16 @@
   DISALLOW_IMPLICIT_CONSTRUCTORS(PrimitiveArray);
 };
 
+// Declare the different primitive arrays. Instantiations will be in array.cc.
+extern template class PrimitiveArray<uint8_t>;   // BooleanArray
+extern template class PrimitiveArray<int8_t>;    // ByteArray
+extern template class PrimitiveArray<uint16_t>;  // CharArray
+extern template class PrimitiveArray<double>;    // DoubleArray
+extern template class PrimitiveArray<float>;     // FloatArray
+extern template class PrimitiveArray<int32_t>;   // IntArray
+extern template class PrimitiveArray<int64_t>;   // LongArray
+extern template class PrimitiveArray<int16_t>;   // ShortArray
+
 // Either an IntArray or a LongArray.
 class PointerArray : public Array {
  public:
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 5122b37..c8d4557 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -23,13 +23,14 @@
 #include "art_method.h"
 #include "base/array_slice.h"
 #include "base/length_prefixed_array.h"
-#include "class_linker-inl.h"
+#include "class_linker.h"
 #include "class_loader.h"
 #include "common_throws.h"
+#include "dex_cache.h"
 #include "dex_file-inl.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
-#include "object_array-inl.h"
+#include "object_array.h"
 #include "object-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 06ee3d3..e4b5320 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -1143,9 +1143,7 @@
 dex::TypeIndex Class::FindTypeIndexInOtherDexFile(const DexFile& dex_file) {
   std::string temp;
   const DexFile::TypeId* type_id = dex_file.FindTypeId(GetDescriptor(&temp));
-  return (type_id == nullptr)
-      ? dex::TypeIndex(DexFile::kDexNoIndex)
-      : dex_file.GetIndexForTypeId(*type_id);
+  return (type_id == nullptr) ? dex::TypeIndex() : dex_file.GetIndexForTypeId(*type_id);
 }
 
 template <PointerSize kPointerSize, bool kTransactionActive>
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index dfb2788..913ab79 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_H_
 #define ART_RUNTIME_MIRROR_CLASS_H_
 
+#include "base/bit_utils.h"
 #include "base/enums.h"
 #include "base/iteration_range.h"
 #include "dex_file.h"
@@ -29,7 +30,6 @@
 #include "modifiers.h"
 #include "object.h"
 #include "object_array.h"
-#include "object_callbacks.h"
 #include "primitive.h"
 #include "read_barrier_option.h"
 #include "stride_iterator.h"
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
index 708665d..75a3800 100644
--- a/runtime/mirror/class_ext.h
+++ b/runtime/mirror/class_ext.h
@@ -23,7 +23,6 @@
 #include "gc_root.h"
 #include "object.h"
 #include "object_array.h"
-#include "object_callbacks.h"
 #include "string.h"
 
 namespace art {
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index f5ecdae..39c8ee0 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -19,9 +19,7 @@
 
 #include "class_loader.h"
 
-#include "base/mutex-inl.h"
 #include "class_table-inl.h"
-#include "obj_ptr-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index a62a460..381d96b 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -17,12 +17,16 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_LOADER_H_
 #define ART_RUNTIME_MIRROR_CLASS_LOADER_H_
 
+#include "base/mutex.h"
 #include "object.h"
+#include "object_reference.h"
+#include "obj_ptr.h"
 
 namespace art {
 
 struct ClassLoaderOffsets;
 class ClassTable;
+class LinearAlloc;
 
 namespace mirror {
 
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index c95d92e..96e3475 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -23,6 +23,7 @@
 #include "gc/heap.h"
 #include "globals.h"
 #include "linear_alloc.h"
+#include "oat_file.h"
 #include "object.h"
 #include "object-inl.h"
 #include "object_array-inl.h"
diff --git a/runtime/mirror/executable.h b/runtime/mirror/executable.h
index 6c465f6..8a28f66 100644
--- a/runtime/mirror/executable.h
+++ b/runtime/mirror/executable.h
@@ -20,7 +20,6 @@
 #include "accessible_object.h"
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 
 namespace art {
diff --git a/runtime/mirror/field-inl.h b/runtime/mirror/field-inl.h
index 2496989..d33df5c 100644
--- a/runtime/mirror/field-inl.h
+++ b/runtime/mirror/field-inl.h
@@ -21,7 +21,6 @@
 
 #include "art_field-inl.h"
 #include "mirror/dex_cache-inl.h"
-#include "runtime-inl.h"
 
 namespace art {
 
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index 222d709..40186a6 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -22,7 +22,6 @@
 #include "gc_root.h"
 #include "obj_ptr.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 
 namespace art {
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index baed5f1..95f829d 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -26,8 +26,7 @@
 #include "class-inl.h"
 #include "class_flags.h"
 #include "class_linker.h"
-#include "class_loader-inl.h"
-#include "dex_cache-inl.h"
+#include "dex_cache.h"
 #include "lock_word-inl.h"
 #include "monitor.h"
 #include "object_array-inl.h"
@@ -899,6 +898,36 @@
   return success;
 }
 
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldWeakReleaseObjectWithoutWriteBarrier(
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeWeakRelease(old_ref.reference_,
+                                                         new_ref.reference_);
+  return success;
+}
+
 template<bool kIsStatic,
          VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption,
diff --git a/runtime/mirror/object-readbarrier-inl.h b/runtime/mirror/object-readbarrier-inl.h
index 58e7c20..69365af 100644
--- a/runtime/mirror/object-readbarrier-inl.h
+++ b/runtime/mirror/object-readbarrier-inl.h
@@ -221,6 +221,36 @@
   return success;
 }
 
+template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
+inline bool Object::CasFieldStrongReleaseObjectWithoutWriteBarrier(
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
+  if (kCheckTransaction) {
+    DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
+  }
+  if (kVerifyFlags & kVerifyThis) {
+    VerifyObject(this);
+  }
+  if (kVerifyFlags & kVerifyWrites) {
+    VerifyObject(new_value);
+  }
+  if (kVerifyFlags & kVerifyReads) {
+    VerifyObject(old_value);
+  }
+  if (kTransactionActive) {
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
+  }
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
+  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
+  Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
+
+  bool success = atomic_addr->CompareExchangeStrongRelease(old_ref.reference_,
+                                                           new_ref.reference_);
+  return success;
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/object-refvisitor-inl.h b/runtime/mirror/object-refvisitor-inl.h
index 49ab7c2..f5ab4dd 100644
--- a/runtime/mirror/object-refvisitor-inl.h
+++ b/runtime/mirror/object-refvisitor-inl.h
@@ -19,7 +19,9 @@
 
 #include "object-inl.h"
 
+#include "class_loader-inl.h"
 #include "class-refvisitor-inl.h"
+#include "dex_cache-inl.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 35a1b73..9cf4252 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -350,10 +350,25 @@
   template<bool kTransactionActive,
            bool kCheckTransaction = true,
            VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldWeakReleaseObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                    ObjPtr<Object> old_value,
+                                                    ObjPtr<Object> new_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldStrongRelaxedObjectWithoutWriteBarrier(MemberOffset field_offset,
                                                       ObjPtr<Object> old_value,
                                                       ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldStrongReleaseObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                      ObjPtr<Object> old_value,
+                                                      ObjPtr<Object> new_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   HeapReference<Object>* GetFieldObjectReferenceAddr(MemberOffset field_offset);
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index d7527d5..6230ae9 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -582,7 +582,7 @@
 
   // Primitive types are only assignable to themselves
   const char* prims = "ZBCSIJFD";
-  Class* prim_types[strlen(prims)];
+  std::vector<Class*> prim_types(strlen(prims));
   for (size_t i = 0; i < strlen(prims); i++) {
     prim_types[i] = class_linker_->FindPrimitiveClass(prims[i]);
   }
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index a449b41..84e5494 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -19,7 +19,9 @@
 
 #include "reference.h"
 
+#include "gc_root-inl.h"
 #include "obj_ptr-inl.h"
+#include "runtime.h"
 
 namespace art {
 namespace mirror {
@@ -47,6 +49,12 @@
   return SetFieldObjectVolatile<kTransactionActive>(ZombieOffset(), zombie);
 }
 
+template<ReadBarrierOption kReadBarrierOption>
+inline Class* Reference::GetJavaLangRefReference() {
+  DCHECK(!java_lang_ref_Reference_.IsNull());
+  return java_lang_ref_Reference_.Read<kReadBarrierOption>();
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index f2fa589..b10c294 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -18,14 +18,13 @@
 #define ART_RUNTIME_MIRROR_REFERENCE_H_
 
 #include "base/enums.h"
+#include "base/macros.h"
+#include "base/mutex.h"
 #include "class.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
 #include "object.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
-#include "runtime.h"
-#include "thread.h"
 
 namespace art {
 
@@ -100,10 +99,7 @@
   }
 
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  static Class* GetJavaLangRefReference() REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK(!java_lang_ref_Reference_.IsNull());
-    return java_lang_ref_Reference_.Read<kReadBarrierOption>();
-  }
+  static ALWAYS_INLINE Class* GetJavaLangRefReference() REQUIRES_SHARED(Locks::mutator_lock_);
   static void SetClass(ObjPtr<Class> klass);
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index c00cf91..53de821 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -19,6 +19,7 @@
 #include "class.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc_root-inl.h"
 #include "object-inl.h"
 #include "handle_scope-inl.h"
 #include "string.h"
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index d32d8dc..87e8a1f 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -19,7 +19,6 @@
 
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index 57b20a1..7560639 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -26,7 +26,6 @@
 #include "common_throws.h"
 #include "gc/heap-inl.h"
 #include "globals.h"
-#include "intern_table.h"
 #include "runtime.h"
 #include "thread.h"
 #include "utf.h"
@@ -161,10 +160,6 @@
   const int32_t offset_;
 };
 
-inline ObjPtr<String> String::Intern() {
-  return Runtime::Current()->GetInternTable()->InternWeak(this);
-}
-
 inline uint16_t String::CharAt(int32_t index) {
   int32_t count = GetLength();
   if (UNLIKELY((index < 0) || (index >= count))) {
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index de0e75b..82ff6dd 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -18,8 +18,11 @@
 
 #include "arch/memcmp16.h"
 #include "array.h"
+#include "base/array_ref.h"
+#include "base/stl_util.h"
 #include "class-inl.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc_root-inl.h"
 #include "handle_scope-inl.h"
 #include "intern_table.h"
 #include "object-inl.h"
@@ -418,5 +421,9 @@
   return PrettyDescriptor(ToModifiedUtf8().c_str());
 }
 
+ObjPtr<String> String::Intern() {
+  return Runtime::Current()->GetInternTable()->InternWeak(this);
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index b59bbfb..7fbe8bd 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -20,7 +20,6 @@
 #include "gc_root.h"
 #include "gc/allocator_type.h"
 #include "object.h"
-#include "object_callbacks.h"
 
 namespace art {
 
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index e50409f..7027410 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -26,7 +26,9 @@
 #include "object-inl.h"
 #include "object_array.h"
 #include "object_array-inl.h"
+#include "object_callbacks.h"
 #include "stack_trace_element.h"
+#include "string.h"
 #include "utils.h"
 #include "well_known_classes.h"
 
@@ -169,5 +171,17 @@
   java_lang_Throwable_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
+Object* Throwable::GetStackState() {
+  return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
+}
+
+Object* Throwable::GetStackTrace() {
+  return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
+}
+
+String* Throwable::GetDetailMessage() {
+  return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_));
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 0a4ab6f..fb45228 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -19,23 +19,22 @@
 
 #include "gc_root.h"
 #include "object.h"
-#include "object_callbacks.h"
-#include "string.h"
 
 namespace art {
 
+class RootVisitor;
 struct ThrowableOffsets;
 
 namespace mirror {
 
+class String;
+
 // C++ mirror of java.lang.Throwable
 class MANAGED Throwable : public Object {
  public:
   void SetDetailMessage(ObjPtr<String> new_detail_message) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  String* GetDetailMessage() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_));
-  }
+  String* GetDetailMessage() REQUIRES_SHARED(Locks::mutator_lock_);
 
   std::string Dump() REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -59,12 +58,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  Object* GetStackState() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
-  }
-  Object* GetStackTrace() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObjectVolatile<Object>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_));
-  }
+  Object* GetStackState() REQUIRES_SHARED(Locks::mutator_lock_);
+  Object* GetStackTrace() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
   HeapReference<Object> backtrace_;  // Note this is Java volatile:
diff --git a/runtime/modifiers.h b/runtime/modifiers.h
index 461f870..68ab4a4 100644
--- a/runtime/modifiers.h
+++ b/runtime/modifiers.h
@@ -60,16 +60,20 @@
 static constexpr uint32_t kAccCopied =                0x00100000;  // method (runtime)
 static constexpr uint32_t kAccMiranda =               0x00200000;  // method (dex only)
 static constexpr uint32_t kAccDefault =               0x00400000;  // method (runtime)
+
+// Set by the JIT when clearing profiling infos to denote that a method was previously warm.
+static constexpr uint32_t kAccPreviouslyWarm =        0x00800000;  // method (runtime)
+
 // This is set by the class linker during LinkInterfaceMethods. Prior to that point we do not know
 // if any particular method needs to be a default conflict. Used to figure out at runtime if
 // invoking this method will throw an exception.
-static constexpr uint32_t kAccDefaultConflict =       0x00800000;  // method (runtime)
+static constexpr uint32_t kAccDefaultConflict =       0x01000000;  // method (runtime)
 
 // Set by the verifier for a method we do not want the compiler to compile.
-static constexpr uint32_t kAccCompileDontBother =     0x01000000;  // method (runtime)
+static constexpr uint32_t kAccCompileDontBother =     0x02000000;  // method (runtime)
 
 // Set by the verifier for a method that could not be verified to follow structured locking.
-static constexpr uint32_t kAccMustCountLocks =        0x02000000;  // method (runtime)
+static constexpr uint32_t kAccMustCountLocks =        0x04000000;  // method (runtime)
 
 // Set by the class linker for a method that has only one implementation for a
 // virtual call.
@@ -85,8 +89,8 @@
 // class/ancestor overrides finalize()
 static constexpr uint32_t kAccClassIsFinalizable        = 0x80000000;
 
-static constexpr uint32_t kAccFlagsNotUsedByIntrinsic   = 0x007FFFFF;
-static constexpr uint32_t kAccMaxIntrinsic              = 0xFF;
+static constexpr uint32_t kAccFlagsNotUsedByIntrinsic   = 0x00FFFFFF;
+static constexpr uint32_t kAccMaxIntrinsic              = 0x7F;
 
 // Valid (meaningful) bits for a field.
 static constexpr uint32_t kAccValidFieldFlags = kAccPublic | kAccPrivate | kAccProtected |
@@ -96,7 +100,7 @@
 static constexpr uint32_t kAccValidMethodFlags = kAccPublic | kAccPrivate | kAccProtected |
     kAccStatic | kAccFinal | kAccSynchronized | kAccBridge | kAccVarargs | kAccNative |
     kAccAbstract | kAccStrict | kAccSynthetic | kAccMiranda | kAccConstructor |
-    kAccDeclaredSynchronized;
+    kAccDeclaredSynchronized | kAccPreviouslyWarm;
 
 // Valid (meaningful) bits for a class (not interface).
 // Note 1. These are positive bits. Other bits may have to be zero.
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index bb33047..a617818 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -31,7 +31,9 @@
 #include "lock_word-inl.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "object_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "verifier/method_verifier.h"
@@ -437,17 +439,11 @@
                     << " in " << ArtMethod::PrettyMethod(m) << " for "
                     << PrettyDuration(MsToNs(wait_ms));
               }
-              const char* owners_filename;
-              int32_t owners_line_number;
-              TranslateLocation(owners_method,
-                                owners_dex_pc,
-                                &owners_filename,
-                                &owners_line_number);
               LogContentionEvent(self,
                                  wait_ms,
                                  sample_percent,
-                                 owners_filename,
-                                 owners_line_number);
+                                 owners_method,
+                                 owners_dex_pc);
             }
           }
         }
diff --git a/runtime/monitor.h b/runtime/monitor.h
index e80d31c..96c5a5b 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -30,13 +30,13 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "lock_word.h"
-#include "object_callbacks.h"
 #include "read_barrier_option.h"
 #include "thread_state.h"
 
 namespace art {
 
 class ArtMethod;
+class IsMarkedVisitor;
 class LockWord;
 template<class T> class Handle;
 class StackVisitor;
@@ -181,8 +181,11 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       NO_THREAD_SAFETY_ANALYSIS;  // For m->Install(self)
 
-  void LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
-                          const char* owner_filename, int32_t owner_line_number)
+  void LogContentionEvent(Thread* self,
+                          uint32_t wait_ms,
+                          uint32_t sample_percent,
+                          ArtMethod* owner_method,
+                          uint32_t owner_dex_pc)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   static void FailedUnlock(mirror::Object* obj,
diff --git a/runtime/monitor_android.cc b/runtime/monitor_android.cc
index 1dd60f8..74623da 100644
--- a/runtime/monitor_android.cc
+++ b/runtime/monitor_android.cc
@@ -15,96 +15,94 @@
  */
 
 #include "monitor.h"
-#include "thread.h"
 
 #include <fcntl.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 
 #include <log/log.h>
+#include <log/log_event_list.h>
+
+#include "art_method.h"
+#include "thread.h"
 
 #define EVENT_LOG_TAG_dvm_lock_sample 20003
 
 namespace art {
 
-static void Set4LE(uint8_t* buf, uint32_t val) {
-  *buf++ = (uint8_t)(val);
-  *buf++ = (uint8_t)(val >> 8);
-  *buf++ = (uint8_t)(val >> 16);
-  *buf = (uint8_t)(val >> 24);
-}
+void Monitor::LogContentionEvent(Thread* self,
+                                 uint32_t wait_ms,
+                                 uint32_t sample_percent,
+                                 ArtMethod* owner_method,
+                                 uint32_t owner_dex_pc) {
+  android_log_event_list ctx(EVENT_LOG_TAG_dvm_lock_sample);
 
-static char* EventLogWriteInt(char* dst, int value) {
-  *dst++ = EVENT_TYPE_INT;
-  Set4LE(reinterpret_cast<uint8_t*>(dst), value);
-  return dst + 4;
-}
-
-static char* EventLogWriteString(char* dst, const char* value, size_t len) {
-  *dst++ = EVENT_TYPE_STRING;
-  len = len < 32 ? len : 32;
-  Set4LE(reinterpret_cast<uint8_t*>(dst), len);
-  dst += 4;
-  memcpy(dst, value, len);
-  return dst + len;
-}
-
-void Monitor::LogContentionEvent(Thread* self, uint32_t wait_ms, uint32_t sample_percent,
-                                 const char* owner_filename, int32_t owner_line_number) {
-  // Emit the event list length, 1 byte.
-  char eventBuffer[174];
-  char* cp = eventBuffer;
-  *cp++ = 9;
+  const char* owner_filename;
+  int32_t owner_line_number;
+  TranslateLocation(owner_method, owner_dex_pc, &owner_filename, &owner_line_number);
 
   // Emit the process name, <= 37 bytes.
-  int fd = open("/proc/self/cmdline", O_RDONLY);
-  char procName[33];
-  memset(procName, 0, sizeof(procName));
-  read(fd, procName, sizeof(procName) - 1);
-  close(fd);
-  size_t len = strlen(procName);
-  cp = EventLogWriteString(cp, procName, len);
+  {
+    int fd = open("/proc/self/cmdline", O_RDONLY);
+    char procName[33];
+    memset(procName, 0, sizeof(procName));
+    read(fd, procName, sizeof(procName) - 1);
+    close(fd);
+    ctx << procName;
+  }
 
-  // Emit the sensitive thread ("main thread") status, 5 bytes.
-  cp = EventLogWriteInt(cp, Thread::IsSensitiveThread());
+  // Emit the sensitive thread ("main thread") status. We follow tradition that this corresponds
+  // to a C++ bool's value, but be explicit.
+  constexpr uint32_t kIsSensitive = 1u;
+  constexpr uint32_t kIsNotSensitive = 0u;
+  ctx << (Thread::IsSensitiveThread() ? kIsSensitive : kIsNotSensitive);
 
-  // Emit self thread name string, <= 37 bytes.
-  std::string thread_name;
-  self->GetThreadName(thread_name);
-  cp = EventLogWriteString(cp, thread_name.c_str(), thread_name.size());
+  // Emit self thread name string.
+  {
+    std::string thread_name;
+    self->GetThreadName(thread_name);
+    ctx << thread_name;
+  }
 
-  // Emit the wait time, 5 bytes.
-  cp = EventLogWriteInt(cp, wait_ms);
+  // Emit the wait time.
+  ctx << wait_ms;
 
-  // Emit the source code file name, <= 37 bytes.
-  uint32_t pc;
-  ArtMethod* m = self->GetCurrentMethod(&pc);
-  const char* filename;
-  int32_t line_number;
-  TranslateLocation(m, pc, &filename, &line_number);
-  cp = EventLogWriteString(cp, filename, strlen(filename));
+  const char* filename = nullptr;
+  {
+    uint32_t pc;
+    ArtMethod* m = self->GetCurrentMethod(&pc);
+    int32_t line_number;
+    TranslateLocation(m, pc, &filename, &line_number);
 
-  // Emit the source code line number, 5 bytes.
-  cp = EventLogWriteInt(cp, line_number);
+    // Emit the source code file name.
+    ctx << filename;
 
-  // Emit the lock owner source code file name, <= 37 bytes.
+    // Emit the source code line number.
+    ctx << line_number;
+
+    // Emit the method name.
+    ctx << ArtMethod::PrettyMethod(m);
+  }
+
+  // Emit the lock owner source code file name.
   if (owner_filename == nullptr) {
     owner_filename = "";
   } else if (strcmp(filename, owner_filename) == 0) {
     // Common case, so save on log space.
     owner_filename = "-";
   }
-  cp = EventLogWriteString(cp, owner_filename, strlen(owner_filename));
+  ctx << owner_filename;
 
-  // Emit the source code line number, 5 bytes.
-  cp = EventLogWriteInt(cp, owner_line_number);
+  // Emit the source code line number.
+  ctx << owner_line_number;
 
-  // Emit the sample percentage, 5 bytes.
-  cp = EventLogWriteInt(cp, sample_percent);
+  // Emit the owner method name.
+  ctx << ArtMethod::PrettyMethod(owner_method);
 
-  CHECK_LE((size_t)(cp - eventBuffer), sizeof(eventBuffer));
-  android_btWriteLog(EVENT_LOG_TAG_dvm_lock_sample, EVENT_TYPE_LIST, eventBuffer,
-                     (size_t)(cp - eventBuffer));
+  // Emit the sample percentage.
+  ctx << sample_percent;
+
+  ctx << LOG_ID_EVENTS;
 }
 
 }  // namespace art
diff --git a/runtime/monitor_linux.cc b/runtime/monitor_linux.cc
index 1c77ac0..6678661 100644
--- a/runtime/monitor_linux.cc
+++ b/runtime/monitor_linux.cc
@@ -18,7 +18,7 @@
 
 namespace art {
 
-void Monitor::LogContentionEvent(Thread*, uint32_t, uint32_t, const char*, int32_t) {
+void Monitor::LogContentionEvent(Thread*, uint32_t, uint32_t, ArtMethod*, uint32_t) {
 }
 
 }  // namespace art
diff --git a/runtime/monitor_pool.cc b/runtime/monitor_pool.cc
index 0f4e238..48e9a6b 100644
--- a/runtime/monitor_pool.cc
+++ b/runtime/monitor_pool.cc
@@ -18,7 +18,7 @@
 
 #include "base/logging.h"
 #include "base/mutex-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "monitor.h"
 
 namespace art {
diff --git a/runtime/monitor_pool_test.cc b/runtime/monitor_pool_test.cc
index a111c6c..5463877 100644
--- a/runtime/monitor_pool_test.cc
+++ b/runtime/monitor_pool_test.cc
@@ -18,7 +18,7 @@
 
 #include "common_runtime_test.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 77554e8..ad00966 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -19,6 +19,7 @@
 #include <sstream>
 
 #include "android-base/stringprintf.h"
+#include "nativehelper/jni_macros.h"
 
 #include "base/logging.h"
 #include "base/stl_util.h"
@@ -30,6 +31,7 @@
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
+#include "native_util.h"
 #include "oat_file.h"
 #include "oat_file_assistant.h"
 #include "oat_file_manager.h"
@@ -657,7 +659,7 @@
   return oat_file != nullptr;
 }
 
-static jstring DexFile_getDexFileOutputPath(JNIEnv* env,
+static jobjectArray DexFile_getDexFileOutputPaths(JNIEnv* env,
                                             jclass,
                                             jstring javaFilename,
                                             jstring javaInstructionSet) {
@@ -689,7 +691,26 @@
     return nullptr;
   }
 
-  return env->NewStringUTF(best_oat_file->GetLocation().c_str());
+  std::string oat_filename = best_oat_file->GetLocation();
+  std::string vdex_filename = GetVdexFilename(best_oat_file->GetLocation());
+
+  ScopedLocalRef<jstring> jvdexFilename(env, env->NewStringUTF(vdex_filename.c_str()));
+  if (jvdexFilename.get() == nullptr) {
+    return nullptr;
+  }
+  ScopedLocalRef<jstring> joatFilename(env, env->NewStringUTF(oat_filename.c_str()));
+  if (joatFilename.get() == nullptr) {
+    return nullptr;
+  }
+
+  // Now create output array and copy the set into it.
+  jobjectArray result = env->NewObjectArray(2,
+                                            WellKnownClasses::java_lang_String,
+                                            nullptr);
+  env->SetObjectArrayElement(result, 0, jvdexFilename.get());
+  env->SetObjectArrayElement(result, 1, joatFilename.get());
+
+  return result;
 }
 
 static JNINativeMethod gMethods[] = {
@@ -726,8 +747,8 @@
   NATIVE_METHOD(DexFile, isBackedByOatFile, "(Ljava/lang/Object;)Z"),
   NATIVE_METHOD(DexFile, getDexFileStatus,
                 "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;"),
-  NATIVE_METHOD(DexFile, getDexFileOutputPath,
-                "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;")
+  NATIVE_METHOD(DexFile, getDexFileOutputPaths,
+                "(Ljava/lang/String;Ljava/lang/String;)[Ljava/lang/String;")
 };
 
 void register_dalvik_system_DexFile(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 5c4e242..e1eae21 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -21,6 +21,8 @@
 
 #include <sstream>
 
+#include "nativehelper/jni_macros.h"
+
 #include "base/histogram-inl.h"
 #include "base/time_utils.h"
 #include "class_linker.h"
@@ -37,6 +39,7 @@
 #include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/object_array-inl.h"
+#include "native_util.h"
 #include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "scoped_fast_native_object_access-inl.h"
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index ff4d931..fed9c1c 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -30,6 +30,7 @@
 #pragma GCC diagnostic pop
 
 #include "android-base/stringprintf.h"
+#include "nativehelper/jni_macros.h"
 
 #include "art_method-inl.h"
 #include "arch/instruction_set.h"
@@ -51,6 +52,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "runtime.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 0dfafa4..e86e64e 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -16,6 +16,8 @@
 
 #include "dalvik_system_VMStack.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "art_method-inl.h"
 #include "gc/task_processor.h"
 #include "jni_internal.h"
@@ -23,6 +25,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread_list.h"
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 0515ec6..31aeba0 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -19,6 +19,7 @@
 #include <stdlib.h>
 
 #include "android-base/stringprintf.h"
+#include "nativehelper/jni_macros.h"
 
 #include "arch/instruction_set.h"
 #include "art_method-inl.h"
@@ -27,10 +28,12 @@
 #include "jit/jit.h"
 #include "jni_internal.h"
 #include "JNIHelp.h"
+#include "native_util.h"
 #include "non_debuggable_classes.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
-#include "thread-inl.h"
+#include "stack.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "trace.h"
 
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 4f99947..d3377be 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -18,6 +18,8 @@
 
 #include <iostream>
 
+#include "nativehelper/jni_macros.h"
+
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/enums.h"
@@ -34,6 +36,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
+#include "native_util.h"
 #include "obj_ptr-inl.h"
 #include "reflection.h"
 #include "scoped_thread_state_change-inl.h"
diff --git a/runtime/native/java_lang_Object.cc b/runtime/native/java_lang_Object.cc
index fb4f99a..d52bf04 100644
--- a/runtime/native/java_lang_Object.cc
+++ b/runtime/native/java_lang_Object.cc
@@ -16,8 +16,11 @@
 
 #include "java_lang_Object.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "jni_internal.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
 namespace art {
diff --git a/runtime/native/java_lang_String.cc b/runtime/native/java_lang_String.cc
index bf33bf2..ac0d633 100644
--- a/runtime/native/java_lang_String.cc
+++ b/runtime/native/java_lang_String.cc
@@ -16,12 +16,15 @@
 
 #include "java_lang_String.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "common_throws.h"
 #include "jni_internal.h"
 #include "mirror/array.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "mirror/string-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/native/java_lang_StringFactory.cc b/runtime/native/java_lang_StringFactory.cc
index ec3c7c2..9c2e918 100644
--- a/runtime/native/java_lang_StringFactory.cc
+++ b/runtime/native/java_lang_StringFactory.cc
@@ -16,10 +16,13 @@
 
 #include "java_lang_StringFactory.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "common_throws.h"
 #include "jni_internal.h"
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index 2cabce8..0e5d740 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -16,6 +16,8 @@
 
 #include "java_lang_System.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "common_throws.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jni_internal.h"
@@ -24,6 +26,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
 namespace art {
diff --git a/runtime/native/java_lang_Thread.cc b/runtime/native/java_lang_Thread.cc
index 346bd30..e4d1705 100644
--- a/runtime/native/java_lang_Thread.cc
+++ b/runtime/native/java_lang_Thread.cc
@@ -16,10 +16,13 @@
 
 #include "java_lang_Thread.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "common_throws.h"
 #include "jni_internal.h"
 #include "monitor.h"
 #include "mirror/object.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
diff --git a/runtime/native/java_lang_Throwable.cc b/runtime/native/java_lang_Throwable.cc
index 654b8a8..03b7f9d 100644
--- a/runtime/native/java_lang_Throwable.cc
+++ b/runtime/native/java_lang_Throwable.cc
@@ -16,7 +16,10 @@
 
 #include "java_lang_Throwable.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "jni_internal.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "thread.h"
 
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index a9ba33e..fc50d55 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -16,12 +16,16 @@
 
 #include "java_lang_VMClassLoader.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "class_linker.h"
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "obj_ptr.h"
 #include "scoped_fast_native_object_access-inl.h"
+#include "ScopedLocalRef.h"
 #include "ScopedUtfChars.h"
 #include "well_known_classes.h"
 #include "zip_archive.h"
@@ -122,16 +126,24 @@
 static jobjectArray VMClassLoader_getBootClassPathEntries(JNIEnv* env, jclass) {
   const std::vector<const DexFile*>& path =
       Runtime::Current()->GetClassLinker()->GetBootClassPath();
-  jclass stringClass = env->FindClass("java/lang/String");
-  jobjectArray array = env->NewObjectArray(path.size(), stringClass, nullptr);
+  jobjectArray array =
+      env->NewObjectArray(path.size(), WellKnownClasses::java_lang_String, nullptr);
+  if (array == nullptr) {
+    DCHECK(env->ExceptionCheck());
+    return nullptr;
+  }
   for (size_t i = 0; i < path.size(); ++i) {
     const DexFile* dex_file = path[i];
 
     // For multidex locations, e.g., x.jar:classes2.dex, we want to look into x.jar.
     const std::string& location(dex_file->GetBaseLocation());
 
-    jstring javaPath = env->NewStringUTF(location.c_str());
-    env->SetObjectArrayElement(array, i, javaPath);
+    ScopedLocalRef<jstring> javaPath(env, env->NewStringUTF(location.c_str()));
+    if (javaPath.get() == nullptr) {
+      DCHECK(env->ExceptionCheck());
+      return nullptr;
+    }
+    env->SetObjectArrayElement(array, i, javaPath.get());
   }
   return array;
 }
diff --git a/runtime/native/java_lang_Void.cc b/runtime/native/java_lang_Void.cc
index e2b4b82..af83dd1 100644
--- a/runtime/native/java_lang_Void.cc
+++ b/runtime/native/java_lang_Void.cc
@@ -16,8 +16,11 @@
 
 #include "java_lang_Void.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "class_linker-inl.h"
 #include "jni_internal.h"
+#include "native_util.h"
 #include "runtime.h"
 #include "scoped_fast_native_object_access-inl.h"
 
diff --git a/runtime/native/java_lang_invoke_MethodHandleImpl.cc b/runtime/native/java_lang_invoke_MethodHandleImpl.cc
index 9113841..2e3b4d4 100644
--- a/runtime/native/java_lang_invoke_MethodHandleImpl.cc
+++ b/runtime/native/java_lang_invoke_MethodHandleImpl.cc
@@ -16,12 +16,15 @@
 
 #include "java_lang_invoke_MethodHandleImpl.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "art_method.h"
 #include "handle_scope-inl.h"
 #include "jni_internal.h"
 #include "mirror/field.h"
 #include "mirror/method.h"
 #include "mirror/method_handle_impl.h"
+#include "native_util.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 
diff --git a/runtime/native/java_lang_ref_FinalizerReference.cc b/runtime/native/java_lang_ref_FinalizerReference.cc
index afedc5e..72af5f7 100644
--- a/runtime/native/java_lang_ref_FinalizerReference.cc
+++ b/runtime/native/java_lang_ref_FinalizerReference.cc
@@ -16,11 +16,14 @@
 
 #include "java_lang_ref_FinalizerReference.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "gc/heap.h"
 #include "gc/reference_processor.h"
 #include "jni_internal.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
 namespace art {
diff --git a/runtime/native/java_lang_ref_Reference.cc b/runtime/native/java_lang_ref_Reference.cc
index b1cb2f2..524a18c 100644
--- a/runtime/native/java_lang_ref_Reference.cc
+++ b/runtime/native/java_lang_ref_Reference.cc
@@ -16,11 +16,14 @@
 
 #include "java_lang_ref_Reference.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "gc/heap.h"
 #include "gc/reference_processor.h"
 #include "jni_internal.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
 namespace art {
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index 54c2109..5be3171 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -16,14 +16,17 @@
 
 #include "java_lang_reflect_Array.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "class_linker-inl.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
+#include "handle_scope-inl.h"
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
-#include "handle_scope-inl.h"
 
 namespace art {
 
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index fb78046..242e87a 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -16,6 +16,8 @@
 
 #include "java_lang_reflect_Constructor.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "class_linker.h"
@@ -25,6 +27,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "reflection.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "well_known_classes.h"
diff --git a/runtime/native/java_lang_reflect_Executable.cc b/runtime/native/java_lang_reflect_Executable.cc
index 8f226ce..2aad12d 100644
--- a/runtime/native/java_lang_reflect_Executable.cc
+++ b/runtime/native/java_lang_reflect_Executable.cc
@@ -17,6 +17,7 @@
 #include "java_lang_reflect_Executable.h"
 
 #include "android-base/stringprintf.h"
+#include "nativehelper/jni_macros.h"
 
 #include "art_method-inl.h"
 #include "dex_file_annotations.h"
@@ -26,6 +27,7 @@
 #include "mirror/method.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "native_util.h"
 #include "reflection.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "well_known_classes.h"
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 0fb3903..f19004d 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -17,6 +17,7 @@
 #include "java_lang_reflect_Field.h"
 
 #include "android-base/stringprintf.h"
+#include "nativehelper/jni_macros.h"
 
 #include "art_field-inl.h"
 #include "class_linker.h"
@@ -27,6 +28,7 @@
 #include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "mirror/field.h"
+#include "native_util.h"
 #include "reflection-inl.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "utils.h"
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index 6f0130e..cbbb6a8 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -16,6 +16,8 @@
 
 #include "java_lang_reflect_Method.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "class_linker.h"
@@ -25,6 +27,7 @@
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "native_util.h"
 #include "reflection.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "well_known_classes.h"
diff --git a/runtime/native/java_lang_reflect_Parameter.cc b/runtime/native/java_lang_reflect_Parameter.cc
index 37aa16c..c4ab5d6 100644
--- a/runtime/native/java_lang_reflect_Parameter.cc
+++ b/runtime/native/java_lang_reflect_Parameter.cc
@@ -17,12 +17,14 @@
 #include "java_lang_reflect_Parameter.h"
 
 #include "android-base/stringprintf.h"
+#include "nativehelper/jni_macros.h"
 
 #include "art_method-inl.h"
 #include "common_throws.h"
 #include "dex_file-inl.h"
 #include "dex_file_annotations.h"
 #include "jni_internal.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "utils.h"
 
diff --git a/runtime/native/java_lang_reflect_Proxy.cc b/runtime/native/java_lang_reflect_Proxy.cc
index 0279b5f..691ed28 100644
--- a/runtime/native/java_lang_reflect_Proxy.cc
+++ b/runtime/native/java_lang_reflect_Proxy.cc
@@ -16,11 +16,14 @@
 
 #include "java_lang_reflect_Proxy.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "class_linker.h"
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "mirror/object_array.h"
 #include "mirror/string.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "verify_object.h"
 
diff --git a/runtime/native/java_util_concurrent_atomic_AtomicLong.cc b/runtime/native/java_util_concurrent_atomic_AtomicLong.cc
index 4d2ea67..bd4b0fe 100644
--- a/runtime/native/java_util_concurrent_atomic_AtomicLong.cc
+++ b/runtime/native/java_util_concurrent_atomic_AtomicLong.cc
@@ -16,9 +16,12 @@
 
 #include "java_util_concurrent_atomic_AtomicLong.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "arch/instruction_set.h"
 #include "atomic.h"
 #include "jni_internal.h"
+#include "native_util.h"
 
 namespace art {
 
diff --git a/runtime/native/libcore_util_CharsetUtils.cc b/runtime/native/libcore_util_CharsetUtils.cc
index 4138ccc..38634e6 100644
--- a/runtime/native/libcore_util_CharsetUtils.cc
+++ b/runtime/native/libcore_util_CharsetUtils.cc
@@ -14,15 +14,20 @@
  * limitations under the License.
  */
 
+#include "libcore_util_CharsetUtils.h"
+
+#include <string.h>
+
+#include "nativehelper/jni_macros.h"
+
 #include "jni_internal.h"
 #include "mirror/string.h"
 #include "mirror/string-inl.h"
-#include "native/libcore_util_CharsetUtils.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "ScopedPrimitiveArray.h"
 #include "unicode/utf16.h"
 
-#include <string.h>
 
 namespace art {
 
diff --git a/runtime/native/native_util.h b/runtime/native/native_util.h
new file mode 100644
index 0000000..98384e0
--- /dev/null
+++ b/runtime/native/native_util.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_NATIVE_NATIVE_UTIL_H_
+#define ART_RUNTIME_NATIVE_NATIVE_UTIL_H_
+
+#include <jni.h>
+
+#include "android-base/logging.h"
+#include "base/macros.h"
+#include "ScopedLocalRef.h"
+
+namespace art {
+
+ALWAYS_INLINE inline void RegisterNativeMethodsInternal(JNIEnv* env,
+                                                        const char* jni_class_name,
+                                                        const JNINativeMethod* methods,
+                                                        jint method_count) {
+  ScopedLocalRef<jclass> c(env, env->FindClass(jni_class_name));
+  if (c.get() == nullptr) {
+    LOG(FATAL) << "Couldn't find class: " << jni_class_name;
+  }
+  jint jni_result = env->RegisterNatives(c.get(), methods, method_count);
+  CHECK_EQ(JNI_OK, jni_result);
+}
+
+#define REGISTER_NATIVE_METHODS(jni_class_name) \
+  RegisterNativeMethodsInternal(env, (jni_class_name), gMethods, arraysize(gMethods))
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_NATIVE_NATIVE_UTIL_H_
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
index 5809708..925b909 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmServer.cc
@@ -16,9 +16,12 @@
 
 #include "org_apache_harmony_dalvik_ddmc_DdmServer.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "base/logging.h"
 #include "debugger.h"
 #include "jni_internal.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "ScopedPrimitiveArray.h"
 
diff --git a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
index 69ef59e..0a254ac 100644
--- a/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
+++ b/runtime/native/org_apache_harmony_dalvik_ddmc_DdmVmInternal.cc
@@ -16,10 +16,14 @@
 
 #include "org_apache_harmony_dalvik_ddmc_DdmVmInternal.h"
 
+#include "nativehelper/jni_macros.h"
+
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "debugger.h"
+#include "gc/heap.h"
 #include "jni_internal.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "ScopedLocalRef.h"
 #include "ScopedPrimitiveArray.h"
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index cc5a41a..e78c9da 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -15,19 +15,23 @@
  */
 
 #include "sun_misc_Unsafe.h"
+
+#include <atomic>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "nativehelper/jni_macros.h"
+
 #include "common_throws.h"
 #include "gc/accounting/card_table-inl.h"
 #include "jni_internal.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "native_util.h"
 #include "scoped_fast_native_object_access-inl.h"
 
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <atomic>
-
 namespace art {
 
 static jboolean Unsafe_compareAndSwapInt(JNIEnv* env, jobject, jobject javaObj, jlong offset,
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index cbc5024..cbff0bb 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -45,7 +45,7 @@
 #include "base/unix_file/fd_file.h"
 #include "oat_quick_method_header.h"
 #include "os.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 #endif
diff --git a/runtime/non_debuggable_classes.cc b/runtime/non_debuggable_classes.cc
index 829ea65..9cc7e60 100644
--- a/runtime/non_debuggable_classes.cc
+++ b/runtime/non_debuggable_classes.cc
@@ -21,7 +21,7 @@
 #include "mirror/class-inl.h"
 #include "obj_ptr-inl.h"
 #include "ScopedLocalRef.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/oat.cc b/runtime/oat.cc
index d14b399..21e20e9 100644
--- a/runtime/oat.cc
+++ b/runtime/oat.cc
@@ -23,6 +23,7 @@
 
 #include "arch/instruction_set_features.h"
 #include "base/bit_utils.h"
+#include "base/strlcpy.h"
 
 namespace art {
 
@@ -71,6 +72,7 @@
       instruction_set_(instruction_set),
       instruction_set_features_bitmap_(instruction_set_features->AsBitmap()),
       dex_file_count_(dex_file_count),
+      oat_dex_files_offset_(0),
       executable_offset_(0),
       interpreter_to_interpreter_bridge_offset_(0),
       interpreter_to_compiled_code_bridge_offset_(0),
@@ -203,6 +205,20 @@
   return instruction_set_features_bitmap_;
 }
 
+uint32_t OatHeader::GetOatDexFilesOffset() const {
+  DCHECK(IsValid());
+  DCHECK_GT(oat_dex_files_offset_, sizeof(OatHeader));
+  return oat_dex_files_offset_;
+}
+
+void OatHeader::SetOatDexFilesOffset(uint32_t oat_dex_files_offset) {
+  DCHECK_GT(oat_dex_files_offset, sizeof(OatHeader));
+  DCHECK(IsValid());
+  DCHECK_EQ(oat_dex_files_offset_, 0u);
+
+  oat_dex_files_offset_ = oat_dex_files_offset;
+}
+
 uint32_t OatHeader::GetExecutableOffset() const {
   DCHECK(IsValid());
   DCHECK_ALIGNED(executable_offset_, kPageSize);
@@ -505,9 +521,9 @@
     SafeMap<std::string, std::string>::const_iterator it = key_value_store->begin();
     SafeMap<std::string, std::string>::const_iterator end = key_value_store->end();
     for ( ; it != end; ++it) {
-      strcpy(data_ptr, it->first.c_str());
+      strlcpy(data_ptr, it->first.c_str(), it->first.length() + 1);
       data_ptr += it->first.length() + 1;
-      strcpy(data_ptr, it->second.c_str());
+      strlcpy(data_ptr, it->second.c_str(), it->second.length() + 1);
       data_ptr += it->second.length() + 1;
     }
   }
diff --git a/runtime/oat.h b/runtime/oat.h
index b7c715c..521cc40 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '2', '5', '\0' };  // ARM Baker narrow thunks.
+  static constexpr uint8_t kOatVersion[] = { '1', '2', '7', '\0' };  // .bss ArtMethod* section.
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
@@ -64,6 +64,8 @@
     DCHECK(IsValid());
     return dex_file_count_;
   }
+  uint32_t GetOatDexFilesOffset() const;
+  void SetOatDexFilesOffset(uint32_t oat_dex_files_offset);
   uint32_t GetExecutableOffset() const;
   void SetExecutableOffset(uint32_t executable_offset);
 
@@ -135,6 +137,7 @@
   InstructionSet instruction_set_;
   uint32_t instruction_set_features_bitmap_;
   uint32_t dex_file_count_;
+  uint32_t oat_dex_files_offset_;
   uint32_t executable_offset_;
   uint32_t interpreter_to_interpreter_bridge_offset_;
   uint32_t interpreter_to_compiled_code_bridge_offset_;
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index 9affeb0..888de45 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -246,6 +246,9 @@
     }
     // Readjust to be non-inclusive upper bound.
     bss_end_ += sizeof(uint32_t);
+    // Find bss methods if present.
+    bss_methods_ =
+        const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbssmethods", &symbol_error_msg));
     // Find bss roots if present.
     bss_roots_ = const_cast<uint8_t*>(FindDynamicSymbolAddress("oatbssroots", &symbol_error_msg));
   }
@@ -311,51 +314,63 @@
                               cause.c_str());
     return false;
   }
-  const uint8_t* oat = Begin();
-  oat += sizeof(OatHeader);
-  if (oat > End()) {
-    *error_msg = StringPrintf("In oat file '%s' found truncated OatHeader", GetLocation().c_str());
-    return false;
-  }
-
-  oat += GetOatHeader().GetKeyValueStoreSize();
-  if (oat > End()) {
-    *error_msg = StringPrintf("In oat file '%s' found truncated variable-size data: "
-                                  "%p + %zu + %u <= %p",
+  PointerSize pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet());
+  size_t key_value_store_size =
+      (Size() >= sizeof(OatHeader)) ? GetOatHeader().GetKeyValueStoreSize() : 0u;
+  if (Size() < sizeof(OatHeader) + key_value_store_size) {
+    *error_msg = StringPrintf("In oat file '%s' found truncated OatHeader, "
+                                  "size = %zu < %zu + %zu",
                               GetLocation().c_str(),
-                              Begin(),
+                              Size(),
                               sizeof(OatHeader),
-                              GetOatHeader().GetKeyValueStoreSize(),
-                              End());
+                              key_value_store_size);
     return false;
   }
 
-  if (!IsAligned<alignof(GcRoot<mirror::Object>)>(bss_begin_) ||
-      !IsAligned<alignof(GcRoot<mirror::Object>)>(bss_roots_) ||
+  size_t oat_dex_files_offset = GetOatHeader().GetOatDexFilesOffset();
+  if (oat_dex_files_offset < GetOatHeader().GetHeaderSize() || oat_dex_files_offset > Size()) {
+    *error_msg = StringPrintf("In oat file '%s' found invalid oat dex files offset: "
+                                  "%zu is not in [%zu, %zu]",
+                              GetLocation().c_str(),
+                              oat_dex_files_offset,
+                              GetOatHeader().GetHeaderSize(),
+                              Size());
+    return false;
+  }
+  const uint8_t* oat = Begin() + oat_dex_files_offset;  // Jump to the OatDexFile records.
+
+  DCHECK_GE(static_cast<size_t>(pointer_size), alignof(GcRoot<mirror::Object>));
+  if (!IsAligned<kPageSize>(bss_begin_) ||
+      !IsAlignedParam(bss_methods_, static_cast<size_t>(pointer_size)) ||
+      !IsAlignedParam(bss_roots_, static_cast<size_t>(pointer_size)) ||
       !IsAligned<alignof(GcRoot<mirror::Object>)>(bss_end_)) {
     *error_msg = StringPrintf("In oat file '%s' found unaligned bss symbol(s): "
-                                  "begin = %p, roots = %p, end = %p",
+                                  "begin = %p, methods_ = %p, roots = %p, end = %p",
                               GetLocation().c_str(),
                               bss_begin_,
+                              bss_methods_,
                               bss_roots_,
                               bss_end_);
     return false;
   }
 
-  if (bss_roots_ != nullptr && (bss_roots_ < bss_begin_ || bss_roots_ > bss_end_)) {
-    *error_msg = StringPrintf("In oat file '%s' found bss roots outside .bss: "
-                                  "%p is outside range [%p, %p]",
+  if ((bss_methods_ != nullptr && (bss_methods_ < bss_begin_ || bss_methods_ > bss_end_)) ||
+      (bss_roots_ != nullptr && (bss_roots_ < bss_begin_ || bss_roots_ > bss_end_)) ||
+      (bss_methods_ != nullptr && bss_roots_ != nullptr && bss_methods_ > bss_roots_)) {
+    *error_msg = StringPrintf("In oat file '%s' found bss symbol(s) outside .bss or unordered: "
+                                  "begin = %p, methods_ = %p, roots = %p, end = %p",
                               GetLocation().c_str(),
-                              bss_roots_,
                               bss_begin_,
+                              bss_methods_,
+                              bss_roots_,
                               bss_end_);
     return false;
   }
 
-  PointerSize pointer_size = GetInstructionSetPointerSize(GetOatHeader().GetInstructionSet());
-  uint8_t* dex_cache_arrays = (bss_begin_ == bss_roots_) ? nullptr : bss_begin_;
+  uint8_t* after_arrays = (bss_methods_ != nullptr) ? bss_methods_ : bss_roots_;  // May be null.
+  uint8_t* dex_cache_arrays = (bss_begin_ == after_arrays) ? nullptr : bss_begin_;
   uint8_t* dex_cache_arrays_end =
-      (bss_begin_ == bss_roots_) ? nullptr : (bss_roots_ != nullptr) ? bss_roots_ : bss_end_;
+      (bss_begin_ == after_arrays) ? nullptr : (after_arrays != nullptr) ? after_arrays : bss_end_;
   DCHECK_EQ(dex_cache_arrays != nullptr, dex_cache_arrays_end != nullptr);
   uint32_t dex_file_count = GetOatHeader().GetDexFileCount();
   oat_dex_files_storage_.reserve(dex_file_count);
@@ -529,6 +544,55 @@
       return false;
     }
 
+    uint32_t method_bss_mapping_offset;
+    if (UNLIKELY(!ReadOatDexFileData(*this, &oat, &method_bss_mapping_offset))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zd for '%s' truncated "
+                                    "after method bss mapping offset",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str());
+      return false;
+    }
+    const bool readable_method_bss_mapping_size =
+        method_bss_mapping_offset != 0u &&
+        method_bss_mapping_offset <= Size() &&
+        IsAligned<alignof(MethodBssMapping)>(method_bss_mapping_offset) &&
+        Size() - method_bss_mapping_offset >= MethodBssMapping::ComputeSize(0);
+    const MethodBssMapping* method_bss_mapping = readable_method_bss_mapping_size
+        ? reinterpret_cast<const MethodBssMapping*>(Begin() + method_bss_mapping_offset)
+        : nullptr;
+    if (method_bss_mapping_offset != 0u &&
+        (UNLIKELY(method_bss_mapping == nullptr) ||
+            UNLIKELY(method_bss_mapping->size() == 0u) ||
+            UNLIKELY(Size() - method_bss_mapping_offset <
+                     MethodBssMapping::ComputeSize(method_bss_mapping->size())))) {
+      *error_msg = StringPrintf("In oat file '%s' found OatDexFile #%zu for '%s' with unaligned or "
+                                    " truncated method bss mapping, offset %u of %zu, length %zu",
+                                GetLocation().c_str(),
+                                i,
+                                dex_file_location.c_str(),
+                                method_bss_mapping_offset,
+                                Size(),
+                                method_bss_mapping != nullptr ? method_bss_mapping->size() : 0u);
+      return false;
+    }
+    if (kIsDebugBuild && method_bss_mapping != nullptr) {
+      const MethodBssMappingEntry* prev_entry = nullptr;
+      for (const MethodBssMappingEntry& entry : *method_bss_mapping) {
+        CHECK_ALIGNED_PARAM(entry.bss_offset, static_cast<size_t>(pointer_size));
+        CHECK_LT(entry.bss_offset, BssSize());
+        CHECK_LE(POPCOUNT(entry.index_mask) * static_cast<size_t>(pointer_size),  entry.bss_offset);
+        size_t index_mask_span = (entry.index_mask != 0u) ? 16u - CTZ(entry.index_mask) : 0u;
+        CHECK_LE(index_mask_span, entry.method_index);
+        if (prev_entry != nullptr) {
+          CHECK_LT(prev_entry->method_index, entry.method_index - index_mask_span);
+        }
+        prev_entry = &entry;
+      }
+      CHECK_LT(prev_entry->method_index,
+               reinterpret_cast<const DexFile::Header*>(dex_file_pointer)->method_ids_size_);
+    }
+
     uint8_t* current_dex_cache_arrays = nullptr;
     if (dex_cache_arrays != nullptr) {
       // All DexCache types except for CallSite have their instance counts in the
@@ -569,6 +633,7 @@
                                               dex_file_checksum,
                                               dex_file_pointer,
                                               lookup_table_data,
+                                              method_bss_mapping,
                                               class_offsets_pointer,
                                               current_dex_cache_arrays);
     oat_dex_files_storage_.push_back(oat_dex_file);
@@ -1064,7 +1129,7 @@
   CHECK(!oat_filename.empty()) << oat_location;
   CheckLocation(oat_location);
 
-  std::string vdex_filename = ReplaceFileExtension(oat_filename, "vdex");
+  std::string vdex_filename = GetVdexFilename(oat_filename);
 
   // Check that the files even exist, fast-fail.
   if (kIsVdexEnabled && !OS::FileExists(vdex_filename.c_str())) {
@@ -1158,6 +1223,7 @@
       end_(nullptr),
       bss_begin_(nullptr),
       bss_end_(nullptr),
+      bss_methods_(nullptr),
       bss_roots_(nullptr),
       is_executable_(is_executable),
       secondary_lookup_lock_("OatFile secondary lookup lock", kOatFileSecondaryLookupLock) {
@@ -1198,6 +1264,17 @@
   return kIsVdexEnabled ? vdex_->End() : End();
 }
 
+ArrayRef<ArtMethod*> OatFile::GetBssMethods() const {
+  if (bss_methods_ != nullptr) {
+    ArtMethod** methods = reinterpret_cast<ArtMethod**>(bss_methods_);
+    ArtMethod** methods_end =
+        reinterpret_cast<ArtMethod**>(bss_roots_ != nullptr ? bss_roots_ : bss_end_);
+    return ArrayRef<ArtMethod*>(methods, methods_end - methods);
+  } else {
+    return ArrayRef<ArtMethod*>();
+  }
+}
+
 ArrayRef<GcRoot<mirror::Object>> OatFile::GetBssGcRoots() const {
   if (bss_roots_ != nullptr) {
     auto* roots = reinterpret_cast<GcRoot<mirror::Object>*>(bss_roots_);
@@ -1283,6 +1360,7 @@
                                 uint32_t dex_file_location_checksum,
                                 const uint8_t* dex_file_pointer,
                                 const uint8_t* lookup_table_data,
+                                const MethodBssMapping* method_bss_mapping_data,
                                 const uint32_t* oat_class_offsets_pointer,
                                 uint8_t* dex_cache_arrays)
     : oat_file_(oat_file),
@@ -1291,6 +1369,7 @@
       dex_file_location_checksum_(dex_file_location_checksum),
       dex_file_pointer_(dex_file_pointer),
       lookup_table_data_(lookup_table_data),
+      method_bss_mapping_(method_bss_mapping_data),
       oat_class_offsets_pointer_(oat_class_offsets_pointer),
       dex_cache_arrays_(dex_cache_arrays) {
   // Initialize TypeLookupTable.
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index 06c76b5..66ed44f 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -26,6 +26,7 @@
 #include "base/stringpiece.h"
 #include "compiler_filter.h"
 #include "dex_file.h"
+#include "method_bss_mapping.h"
 #include "mirror/class.h"
 #include "oat.h"
 #include "os.h"
@@ -39,9 +40,10 @@
 class ElfFile;
 template <class MirrorType> class GcRoot;
 class MemMap;
-class OatMethodOffsets;
-class OatHeader;
 class OatDexFile;
+class OatHeader;
+class OatMethodOffsets;
+class OatQuickMethodHeader;
 class VdexFile;
 
 namespace gc {
@@ -256,8 +258,14 @@
     return BssEnd() - BssBegin();
   }
 
+  size_t BssMethodsOffset() const {
+    // Note: This is used only for symbolizer and needs to return a valid .bss offset.
+    return (bss_methods_ != nullptr) ? bss_methods_ - BssBegin() : BssRootsOffset();
+  }
+
   size_t BssRootsOffset() const {
-    return bss_roots_ - BssBegin();
+    // Note: This is used only for symbolizer and needs to return a valid .bss offset.
+    return (bss_roots_ != nullptr) ? bss_roots_ - BssBegin() : BssSize();
   }
 
   size_t DexSize() const {
@@ -273,6 +281,7 @@
   const uint8_t* DexBegin() const;
   const uint8_t* DexEnd() const;
 
+  ArrayRef<ArtMethod*> GetBssMethods() const;
   ArrayRef<GcRoot<mirror::Object>> GetBssGcRoots() const;
 
   // Returns the absolute dex location for the encoded relative dex location.
@@ -324,6 +333,9 @@
   // Pointer to the end of the .bss section, if present, otherwise null.
   uint8_t* bss_end_;
 
+  // Pointer to the beginning of the ArtMethod*s in .bss section, if present, otherwise null.
+  uint8_t* bss_methods_;
+
   // Pointer to the beginning of the GC roots in .bss section, if present, otherwise null.
   uint8_t* bss_roots_;
 
@@ -421,6 +433,10 @@
     return lookup_table_data_;
   }
 
+  const MethodBssMapping* GetMethodBssMapping() const {
+    return method_bss_mapping_;
+  }
+
   const uint8_t* GetDexFilePointer() const {
     return dex_file_pointer_;
   }
@@ -447,6 +463,7 @@
              uint32_t dex_file_checksum,
              const uint8_t* dex_file_pointer,
              const uint8_t* lookup_table_data,
+             const MethodBssMapping* method_bss_mapping,
              const uint32_t* oat_class_offsets_pointer,
              uint8_t* dex_cache_arrays);
 
@@ -457,7 +474,8 @@
   const std::string canonical_dex_file_location_;
   const uint32_t dex_file_location_checksum_ = 0u;
   const uint8_t* const dex_file_pointer_ = nullptr;
-  const uint8_t* lookup_table_data_ = nullptr;
+  const uint8_t* const lookup_table_data_ = nullptr;
+  const MethodBssMapping* const method_bss_mapping_ = nullptr;
   const uint32_t* const oat_class_offsets_pointer_ = 0u;
   uint8_t* const dex_cache_arrays_ = nullptr;
   mutable std::unique_ptr<TypeLookupTable> lookup_table_;
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 603bbbf..2e2e8c3 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -24,6 +24,7 @@
 #include "android-base/strings.h"
 
 #include "base/logging.h"
+#include "base/stl_util.h"
 #include "compiler_filter.h"
 #include "class_linker.h"
 #include "exec_utils.h"
@@ -239,7 +240,7 @@
     case kDex2OatForBootImage:
     case kDex2OatForRelocation:
     case kDex2OatForFilter:
-      return GenerateOatFileNoChecks(info, error_msg);
+      return GenerateOatFileNoChecks(info, target, error_msg);
   }
   UNREACHABLE();
 }
@@ -614,7 +615,7 @@
 }
 
 OatFileAssistant::ResultOfAttemptToUpdate OatFileAssistant::GenerateOatFileNoChecks(
-      OatFileAssistant::OatFileInfo& info, std::string* error_msg) {
+      OatFileAssistant::OatFileInfo& info, CompilerFilter::Filter filter, std::string* error_msg) {
   CHECK(error_msg != nullptr);
 
   Runtime* runtime = Runtime::Current();
@@ -630,7 +631,7 @@
     return kUpdateNotAttempted;
   }
   const std::string& oat_file_name = *info.Filename();
-  const std::string& vdex_file_name = ReplaceFileExtension(oat_file_name, "vdex");
+  const std::string& vdex_file_name = GetVdexFilename(oat_file_name);
 
   // dex2oat ignores missing dex files and doesn't report an error.
   // Check explicitly here so we can detect the error properly.
@@ -689,6 +690,7 @@
   args.push_back("--output-vdex-fd=" + std::to_string(vdex_file->Fd()));
   args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
   args.push_back("--oat-location=" + oat_file_name);
+  args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
 
   if (!Dex2Oat(args, error_msg)) {
     // Manually delete the oat and vdex files. This ensures there is no garbage
@@ -962,7 +964,7 @@
     if (file == nullptr) {
       // Check to see if there is a vdex file we can make use of.
       std::string error_msg;
-      std::string vdex_filename = ReplaceFileExtension(filename_, "vdex");
+      std::string vdex_filename = GetVdexFilename(filename_);
       std::unique_ptr<VdexFile> vdex = VdexFile::Open(vdex_filename,
                                                       /*writeable*/false,
                                                       /*low_4gb*/false,
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 7e2385e..03d9ca3 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -366,14 +366,16 @@
   };
 
   // Generate the oat file for the given info from the dex file using the
-  // current runtime compiler options.
+  // current runtime compiler options and the specified filter.
   // This does not check the current status before attempting to generate the
   // oat file.
   //
   // If the result is not kUpdateSucceeded, the value of error_msg will be set
   // to a string describing why there was a failure or the update was not
   // attempted. error_msg must not be null.
-  ResultOfAttemptToUpdate GenerateOatFileNoChecks(OatFileInfo& info, std::string* error_msg);
+  ResultOfAttemptToUpdate GenerateOatFileNoChecks(OatFileInfo& info,
+                                                  CompilerFilter::Filter target,
+                                                  std::string* error_msg);
 
   // Return info for the best oat file.
   OatFileInfo& GetBestInfo();
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index b2b86ee..3619129 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -28,7 +28,7 @@
 #include "oat_file_manager.h"
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "utils.h"
 
 namespace art {
@@ -1232,6 +1232,25 @@
   }
 }
 
+// Verify that when no compiler filter is passed the default one from OatFileAssistant is used.
+TEST_F(OatFileAssistantTest, DefaultMakeUpToDateFilter) {
+  std::string dex_location = GetScratchDir() + "/TestDex.jar";
+  Copy(GetDexSrc1(), dex_location);
+
+  OatFileAssistant oat_file_assistant(dex_location.c_str(), kRuntimeISA, false);
+
+  const CompilerFilter::Filter default_filter =
+      OatFileAssistant::kDefaultCompilerFilterForDexLoading;
+  std::string error_msg;
+  EXPECT_EQ(OatFileAssistant::kUpdateSucceeded,
+      oat_file_assistant.MakeUpToDate(false, &error_msg)) << error_msg;
+  EXPECT_EQ(-OatFileAssistant::kNoDexOptNeeded,
+            oat_file_assistant.GetDexOptNeeded(default_filter));
+  std::unique_ptr<OatFile> oat_file = oat_file_assistant.GetBestOatFile();
+  EXPECT_NE(nullptr, oat_file.get());
+  EXPECT_EQ(default_filter, oat_file->GetCompilerFilter());
+}
+
 // TODO: More Tests:
 //  * Test class linker falls back to unquickened dex for DexNoOat
 //  * Test class linker falls back to unquickened dex for MultiDexNoOat
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index c1cf800..630945a 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -29,6 +29,7 @@
 #include "base/systrace.h"
 #include "class_linker.h"
 #include "dex_file-inl.h"
+#include "dex_file_tracking_registrar.h"
 #include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
@@ -38,7 +39,7 @@
 #include "oat_file_assistant.h"
 #include "obj_ptr-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "well_known_classes.h"
 
@@ -737,6 +738,11 @@
             // Successfully added image space to heap, release the map so that it does not get
             // freed.
             image_space.release();
+
+            // Register for tracking.
+            for (const auto& dex_file : dex_files) {
+              dex::tracking::RegisterDexFile(dex_file.get());
+            }
           } else {
             LOG(INFO) << "Failed to add image file " << temp_error_msg;
             dex_files.clear();
@@ -756,6 +762,11 @@
     if (!added_image_space) {
       DCHECK(dex_files.empty());
       dex_files = oat_file_assistant.LoadDexFiles(*source_oat_file, dex_location);
+
+      // Register for tracking.
+      for (const auto& dex_file : dex_files) {
+        dex::tracking::RegisterDexFile(dex_file.get());
+      }
     }
     if (dex_files.empty()) {
       error_msgs->push_back("Failed to open dex files from " + source_oat_file->GetLocation());
diff --git a/runtime/obj_ptr-inl.h b/runtime/obj_ptr-inl.h
index f2921da..f1e3b50 100644
--- a/runtime/obj_ptr-inl.h
+++ b/runtime/obj_ptr-inl.h
@@ -17,8 +17,9 @@
 #ifndef ART_RUNTIME_OBJ_PTR_INL_H_
 #define ART_RUNTIME_OBJ_PTR_INL_H_
 
+#include "base/bit_utils.h"
 #include "obj_ptr.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 9be486e..3ec5b32 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -46,7 +46,7 @@
 #include "object_tagging.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "ti_class.h"
 #include "ti_dump.h"
@@ -1731,6 +1731,7 @@
 }
 
 extern "C" bool ArtPlugin_Deinitialize() {
+  gEventHandler.Shutdown();
   PhaseUtil::Unregister();
   ThreadUtil::Unregister();
   ClassUtil::Unregister();
diff --git a/runtime/openjdkjvmti/art_jvmti.h b/runtime/openjdkjvmti/art_jvmti.h
index 2a2aa4c..af85fb0 100644
--- a/runtime/openjdkjvmti/art_jvmti.h
+++ b/runtime/openjdkjvmti/art_jvmti.h
@@ -41,6 +41,7 @@
 #include "base/casts.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "base/strlcpy.h"
 #include "events.h"
 #include "java_vm_ext.h"
 #include "jni_env_ext.h"
@@ -187,7 +188,7 @@
   size_t len = strlen(src) + 1;
   JvmtiUniquePtr<char[]> ret = AllocJvmtiUniquePtr<char[]>(env, len, error);
   if (ret != nullptr) {
-    strcpy(ret.get(), src);
+    strlcpy(ret.get(), src, len);
   }
   return ret;
 }
@@ -217,8 +218,8 @@
     .can_redefine_any_class                          = 0,
     .can_get_current_thread_cpu_time                 = 0,
     .can_get_thread_cpu_time                         = 0,
-    .can_generate_method_entry_events                = 0,
-    .can_generate_method_exit_events                 = 0,
+    .can_generate_method_entry_events                = 1,
+    .can_generate_method_exit_events                 = 1,
     .can_generate_all_class_hook_events              = 0,
     .can_generate_compiled_method_load_events        = 0,
     .can_generate_monitor_events                     = 0,
diff --git a/runtime/openjdkjvmti/events-inl.h b/runtime/openjdkjvmti/events-inl.h
index 57abf31..cb7e6a9 100644
--- a/runtime/openjdkjvmti/events-inl.h
+++ b/runtime/openjdkjvmti/events-inl.h
@@ -20,6 +20,7 @@
 #include <array>
 
 #include "events.h"
+#include "ScopedLocalRef.h"
 
 #include "art_jvmti.h"
 
@@ -135,6 +136,8 @@
       continue;
     }
     if (ShouldDispatch<kEvent>(env, thread)) {
+      ScopedLocalRef<jthrowable> thr(jnienv, jnienv->ExceptionOccurred());
+      jnienv->ExceptionClear();
       jint new_len = 0;
       unsigned char* new_data = nullptr;
       auto callback = impl::GetCallback<kEvent>(env);
@@ -148,6 +151,9 @@
                current_class_data,
                &new_len,
                &new_data);
+      if (thr.get() != nullptr && !jnienv->ExceptionCheck()) {
+        jnienv->Throw(thr.get());
+      }
       if (new_data != nullptr && new_data != current_class_data) {
         // Destroy the data the last transformer made. We skip this if the previous state was the
         // initial one since we don't know here which jvmtiEnv allocated it.
@@ -180,6 +186,25 @@
   }
 }
 
+// Events with JNIEnvs need to stash pending exceptions since they can cause new ones to be thrown.
+// In accordance with the JVMTI specification we allow exceptions originating from events to
+// overwrite the current exception, including exceptions originating from earlier events.
+// TODO It would be nice to add the overwritten exceptions to the suppressed exceptions list of the
+// newest exception.
+template <ArtJvmtiEvent kEvent, typename ...Args>
+inline void EventHandler::DispatchEvent(art::Thread* thread, JNIEnv* jnienv, Args... args) const {
+  for (ArtJvmTiEnv* env : envs) {
+    if (env != nullptr) {
+      ScopedLocalRef<jthrowable> thr(jnienv, jnienv->ExceptionOccurred());
+      jnienv->ExceptionClear();
+      DispatchEvent<kEvent, JNIEnv*, Args...>(env, thread, jnienv, args...);
+      if (thr.get() != nullptr && !jnienv->ExceptionCheck()) {
+        jnienv->Throw(thr.get());
+      }
+    }
+  }
+}
+
 template <ArtJvmtiEvent kEvent, typename ...Args>
 inline void EventHandler::DispatchEvent(ArtJvmTiEnv* env, art::Thread* thread, Args... args) const {
   using FnType = void(jvmtiEnv*, Args...);
diff --git a/runtime/openjdkjvmti/events.cc b/runtime/openjdkjvmti/events.cc
index 0ec92b7..90bc122 100644
--- a/runtime/openjdkjvmti/events.cc
+++ b/runtime/openjdkjvmti/events.cc
@@ -32,19 +32,24 @@
 #include "events-inl.h"
 
 #include "art_jvmti.h"
+#include "art_method-inl.h"
 #include "base/logging.h"
 #include "gc/allocation_listener.h"
 #include "gc/gc_pause_listener.h"
 #include "gc/heap.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "handle_scope-inl.h"
 #include "instrumentation.h"
 #include "jni_env_ext-inl.h"
+#include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "runtime.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
+#include "thread_list.h"
+#include "ti_phase.h"
 
 namespace openjdkjvmti {
 
@@ -294,6 +299,222 @@
   }
 }
 
+template<typename Type>
+static Type AddLocalRef(art::JNIEnvExt* e, art::mirror::Object* obj)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  return (obj == nullptr) ? nullptr : e->AddLocalReference<Type>(obj);
+}
+
+class JvmtiMethodTraceListener FINAL : public art::instrumentation::InstrumentationListener {
+ public:
+  explicit JvmtiMethodTraceListener(EventHandler* handler) : event_handler_(handler) {}
+
+  template<ArtJvmtiEvent kEvent, typename ...Args>
+  void RunEventCallback(art::Thread* self, art::JNIEnvExt* jnienv, Args... args)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    ScopedLocalRef<jthread> thread_jni(jnienv, AddLocalRef<jthread>(jnienv, self->GetPeer()));
+    // Just give the event a good sized JNI frame. 100 should be fine.
+    jnienv->PushFrame(100);
+    {
+      // Need to do trampoline! :(
+      art::ScopedThreadSuspension sts(self, art::ThreadState::kNative);
+      event_handler_->DispatchEvent<kEvent>(self,
+                                            static_cast<JNIEnv*>(jnienv),
+                                            thread_jni.get(),
+                                            args...);
+    }
+    jnienv->PopFrame();
+  }
+
+  // Call-back for when a method is entered.
+  void MethodEntered(art::Thread* self,
+                     art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                     art::ArtMethod* method,
+                     uint32_t dex_pc ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodEntry)) {
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      RunEventCallback<ArtJvmtiEvent::kMethodEntry>(self,
+                                                    jnienv,
+                                                    art::jni::EncodeArtMethod(method));
+    }
+  }
+
+  // Callback for when a method is exited with a reference return value.
+  void MethodExited(art::Thread* self,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    art::Handle<art::mirror::Object> return_value)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodExit)) {
+      DCHECK_EQ(method->GetReturnTypePrimitive(), art::Primitive::kPrimNot)
+          << method->PrettyMethod();
+      DCHECK(!self->IsExceptionPending());
+      jvalue val;
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      ScopedLocalRef<jobject> return_jobj(jnienv, AddLocalRef<jobject>(jnienv, return_value.Get()));
+      val.l = return_jobj.get();
+      RunEventCallback<ArtJvmtiEvent::kMethodExit>(
+          self,
+          jnienv,
+          art::jni::EncodeArtMethod(method),
+          /*was_popped_by_exception*/ static_cast<jboolean>(JNI_FALSE),
+          val);
+    }
+  }
+
+  // Call-back for when a method is exited.
+  void MethodExited(art::Thread* self,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    const art::JValue& return_value)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodExit)) {
+      DCHECK_NE(method->GetReturnTypePrimitive(), art::Primitive::kPrimNot)
+          << method->PrettyMethod();
+      DCHECK(!self->IsExceptionPending());
+      jvalue val;
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      // 64bit integer is the largest value in the union so we should be fine simply copying it into
+      // the union.
+      val.j = return_value.GetJ();
+      RunEventCallback<ArtJvmtiEvent::kMethodExit>(
+          self,
+          jnienv,
+          art::jni::EncodeArtMethod(method),
+          /*was_popped_by_exception*/ static_cast<jboolean>(JNI_FALSE),
+          val);
+    }
+  }
+
+  // Call-back for when a method is popped due to an exception throw. A method will either cause a
+  // MethodExited call-back or a MethodUnwind call-back when its activation is removed.
+  void MethodUnwind(art::Thread* self,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    if (!method->IsRuntimeMethod() &&
+        event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kMethodExit)) {
+      jvalue val;
+      // Just set this to 0xffffffffffffffff so it's not uninitialized.
+      val.j = static_cast<jlong>(-1);
+      art::JNIEnvExt* jnienv = self->GetJniEnv();
+      art::StackHandleScope<1> hs(self);
+      art::Handle<art::mirror::Throwable> old_exception(hs.NewHandle(self->GetException()));
+      CHECK(!old_exception.IsNull());
+      self->ClearException();
+      RunEventCallback<ArtJvmtiEvent::kMethodExit>(
+          self,
+          jnienv,
+          art::jni::EncodeArtMethod(method),
+          /*was_popped_by_exception*/ static_cast<jboolean>(JNI_TRUE),
+          val);
+      // Match RI behavior of just throwing away original exception if a new one is thrown.
+      if (LIKELY(!self->IsExceptionPending())) {
+        self->SetException(old_exception.Get());
+      }
+    }
+  }
+
+  // Call-back for when the dex pc moves in a method. We don't currently have any events associated
+  // with this.
+  void DexPcMoved(art::Thread* self ATTRIBUTE_UNUSED,
+                  art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                  art::ArtMethod* method ATTRIBUTE_UNUSED,
+                  uint32_t new_dex_pc ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we read from a field.
+  void FieldRead(art::Thread* self ATTRIBUTE_UNUSED,
+                 art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                 art::ArtMethod* method ATTRIBUTE_UNUSED,
+                 uint32_t dex_pc ATTRIBUTE_UNUSED,
+                 art::ArtField* field ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we write into a field.
+  void FieldWritten(art::Thread* self ATTRIBUTE_UNUSED,
+                    art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                    art::ArtMethod* method ATTRIBUTE_UNUSED,
+                    uint32_t dex_pc ATTRIBUTE_UNUSED,
+                    art::ArtField* field ATTRIBUTE_UNUSED,
+                    const art::JValue& field_value ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back when an exception is caught.
+  void ExceptionCaught(art::Thread* self ATTRIBUTE_UNUSED,
+                       art::Handle<art::mirror::Throwable> exception_object ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we execute a branch.
+  void Branch(art::Thread* self ATTRIBUTE_UNUSED,
+              art::ArtMethod* method ATTRIBUTE_UNUSED,
+              uint32_t dex_pc ATTRIBUTE_UNUSED,
+              int32_t dex_pc_offset ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+  // Call-back for when we get an invokevirtual or an invokeinterface.
+  void InvokeVirtualOrInterface(art::Thread* self ATTRIBUTE_UNUSED,
+                                art::Handle<art::mirror::Object> this_object ATTRIBUTE_UNUSED,
+                                art::ArtMethod* caller ATTRIBUTE_UNUSED,
+                                uint32_t dex_pc ATTRIBUTE_UNUSED,
+                                art::ArtMethod* callee ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) OVERRIDE {
+    return;
+  }
+
+ private:
+  EventHandler* const event_handler_;
+};
+
+static uint32_t GetInstrumentationEventsFor(ArtJvmtiEvent event) {
+  switch (event) {
+    case ArtJvmtiEvent::kMethodEntry:
+      return art::instrumentation::Instrumentation::kMethodEntered;
+    case ArtJvmtiEvent::kMethodExit:
+      return art::instrumentation::Instrumentation::kMethodExited |
+             art::instrumentation::Instrumentation::kMethodUnwind;
+    default:
+      LOG(FATAL) << "Unknown event ";
+      return 0;
+  }
+}
+
+static void SetupMethodTraceListener(JvmtiMethodTraceListener* listener,
+                                     ArtJvmtiEvent event,
+                                     bool enable) {
+  uint32_t new_events = GetInstrumentationEventsFor(event);
+  art::instrumentation::Instrumentation* instr = art::Runtime::Current()->GetInstrumentation();
+  art::gc::ScopedGCCriticalSection gcs(art::Thread::Current(),
+                                       art::gc::kGcCauseInstrumentation,
+                                       art::gc::kCollectorTypeInstrumentation);
+  art::ScopedSuspendAll ssa("jvmti method tracing installation");
+  if (enable) {
+    if (!instr->AreAllMethodsDeoptimized()) {
+      instr->EnableMethodTracing("jvmti-tracing", /*needs_interpreter*/true);
+    }
+    instr->AddListener(listener, new_events);
+  } else {
+    instr->RemoveListener(listener, new_events);
+  }
+}
+
 // Handle special work for the given event type, if necessary.
 void EventHandler::HandleEventType(ArtJvmtiEvent event, bool enable) {
   switch (event) {
@@ -306,6 +527,11 @@
       SetupGcPauseTracking(gc_pause_listener_.get(), event, enable);
       return;
 
+    case ArtJvmtiEvent::kMethodEntry:
+    case ArtJvmtiEvent::kMethodExit:
+      SetupMethodTraceListener(method_trace_listener_.get(), event, enable);
+      return;
+
     default:
       break;
   }
@@ -419,9 +645,21 @@
   return ERR(NONE);
 }
 
+void EventHandler::Shutdown() {
+  // Need to remove the method_trace_listener_ if it's there.
+  art::Thread* self = art::Thread::Current();
+  art::gc::ScopedGCCriticalSection gcs(self,
+                                       art::gc::kGcCauseInstrumentation,
+                                       art::gc::kCollectorTypeInstrumentation);
+  art::ScopedSuspendAll ssa("jvmti method tracing uninstallation");
+  // Just remove every possible event.
+  art::Runtime::Current()->GetInstrumentation()->RemoveListener(method_trace_listener_.get(), ~0);
+}
+
 EventHandler::EventHandler() {
   alloc_listener_.reset(new JvmtiAllocationListener(this));
   gc_pause_listener_.reset(new JvmtiGcPauseListener(this));
+  method_trace_listener_.reset(new JvmtiMethodTraceListener(this));
 }
 
 EventHandler::~EventHandler() {
diff --git a/runtime/openjdkjvmti/events.h b/runtime/openjdkjvmti/events.h
index b9e3cf0..5f37dcf 100644
--- a/runtime/openjdkjvmti/events.h
+++ b/runtime/openjdkjvmti/events.h
@@ -29,6 +29,7 @@
 struct ArtJvmTiEnv;
 class JvmtiAllocationListener;
 class JvmtiGcPauseListener;
+class JvmtiMethodTraceListener;
 
 // an enum for ArtEvents. This differs from the JVMTI events only in that we distinguish between
 // retransformation capable and incapable loading
@@ -137,6 +138,9 @@
   EventHandler();
   ~EventHandler();
 
+  // do cleanup for the event handler.
+  void Shutdown();
+
   // Register an env. It is assumed that this happens on env creation, that is, no events are
   // enabled, yet.
   void RegisterArtJvmTiEnv(ArtJvmTiEnv* env);
@@ -160,6 +164,12 @@
   template <ArtJvmtiEvent kEvent, typename ...Args>
   ALWAYS_INLINE
   inline void DispatchEvent(art::Thread* thread, Args... args) const;
+  // Dispatch event to all registered environments stashing exceptions as needed. This works since
+  // JNIEnv* is always the second argument if it is passed to an event. Needed since C++ does not
+  // allow partial template function specialization.
+  template <ArtJvmtiEvent kEvent, typename ...Args>
+  ALWAYS_INLINE
+  void DispatchEvent(art::Thread* thread, JNIEnv* jnienv, Args... args) const;
   // Dispatch event to the given environment, only.
   template <ArtJvmtiEvent kEvent, typename ...Args>
   ALWAYS_INLINE
@@ -211,6 +221,7 @@
 
   std::unique_ptr<JvmtiAllocationListener> alloc_listener_;
   std::unique_ptr<JvmtiGcPauseListener> gc_pause_listener_;
+  std::unique_ptr<JvmtiMethodTraceListener> method_trace_listener_;
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h
index a6fd247..01c24b1 100644
--- a/runtime/openjdkjvmti/jvmti_weak_table.h
+++ b/runtime/openjdkjvmti/jvmti_weak_table.h
@@ -41,7 +41,7 @@
 #include "globals.h"
 #include "jvmti.h"
 #include "mirror/object.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
@@ -59,19 +59,19 @@
 
   // Remove the mapping for the given object, returning whether such a mapping existed (and the old
   // value).
-  bool Remove(art::mirror::Object* obj, /* out */ T* tag)
+  ALWAYS_INLINE bool Remove(art::mirror::Object* obj, /* out */ T* tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
-  bool RemoveLocked(art::mirror::Object* obj, /* out */ T* tag)
+  ALWAYS_INLINE bool RemoveLocked(art::mirror::Object* obj, /* out */ T* tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(allow_disallow_lock_);
 
   // Set the mapping for the given object. Returns true if this overwrites an already existing
   // mapping.
-  virtual bool Set(art::mirror::Object* obj, T tag)
+  ALWAYS_INLINE virtual bool Set(art::mirror::Object* obj, T tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
-  virtual bool SetLocked(art::mirror::Object* obj, T tag)
+  ALWAYS_INLINE virtual bool SetLocked(art::mirror::Object* obj, T tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(allow_disallow_lock_);
 
@@ -97,11 +97,12 @@
   }
 
   // Sweep the container. DO NOT CALL MANUALLY.
-  void Sweep(art::IsMarkedVisitor* visitor)
+  ALWAYS_INLINE void Sweep(art::IsMarkedVisitor* visitor)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
 
   // Return all objects that have a value mapping in tags.
+  ALWAYS_INLINE
   jvmtiError GetTaggedObjects(jvmtiEnv* jvmti_env,
                               jint tag_count,
                               const T* tags,
@@ -112,11 +113,11 @@
       REQUIRES(!allow_disallow_lock_);
 
   // Locking functions, to allow coarse-grained locking and amortization.
-  void Lock() ACQUIRE(allow_disallow_lock_);
-  void Unlock() RELEASE(allow_disallow_lock_);
-  void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
+  ALWAYS_INLINE  void Lock() ACQUIRE(allow_disallow_lock_);
+  ALWAYS_INLINE void Unlock() RELEASE(allow_disallow_lock_);
+  ALWAYS_INLINE void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
 
-  art::mirror::Object* Find(T tag)
+  ALWAYS_INLINE art::mirror::Object* Find(T tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
 
@@ -129,10 +130,12 @@
   virtual void HandleNullSweep(T tag ATTRIBUTE_UNUSED) {}
 
  private:
+  ALWAYS_INLINE
   bool SetLocked(art::Thread* self, art::mirror::Object* obj, T tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(allow_disallow_lock_);
 
+  ALWAYS_INLINE
   bool RemoveLocked(art::Thread* self, art::mirror::Object* obj, /* out */ T* tag)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(allow_disallow_lock_);
@@ -160,12 +163,14 @@
 
   // Slow-path for GetTag. We didn't find the object, but we might be storing from-pointers and
   // are asked to retrieve with a to-pointer.
+  ALWAYS_INLINE
   bool GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, /* out */ T* result)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(allow_disallow_lock_);
 
   // Update the table by doing read barriers on each element, ensuring that to-space pointers
   // are stored.
+  ALWAYS_INLINE
   void UpdateTableWithReadBarrier()
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(allow_disallow_lock_);
diff --git a/runtime/openjdkjvmti/ti_class.cc b/runtime/openjdkjvmti/ti_class.cc
index e0af6e8..ed54cd1 100644
--- a/runtime/openjdkjvmti/ti_class.cc
+++ b/runtime/openjdkjvmti/ti_class.cc
@@ -63,7 +63,7 @@
 #include "runtime_callbacks.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "ti_class_loader.h"
 #include "ti_phase.h"
@@ -103,7 +103,8 @@
     return nullptr;
   }
   uint32_t checksum = reinterpret_cast<const art::DexFile::Header*>(map->Begin())->checksum_;
-  std::unique_ptr<const art::DexFile> dex_file(art::DexFile::Open(map->GetName(),
+  std::string map_name = map->GetName();
+  std::unique_ptr<const art::DexFile> dex_file(art::DexFile::Open(map_name,
                                                                   checksum,
                                                                   std::move(map),
                                                                   /*verify*/true,
@@ -129,6 +130,25 @@
   return dex_file;
 }
 
+// A deleter that acts like the jvmtiEnv->Deallocate so that asan does not get tripped up.
+// TODO We should everything use the actual jvmtiEnv->Allocate/Deallocate functions once we can
+// figure out which env to use.
+template <typename T>
+class FakeJvmtiDeleter {
+ public:
+  FakeJvmtiDeleter() {}
+
+  FakeJvmtiDeleter(FakeJvmtiDeleter&) = default;
+  FakeJvmtiDeleter(FakeJvmtiDeleter&&) = default;
+  FakeJvmtiDeleter& operator=(const FakeJvmtiDeleter&) = default;
+
+  template <typename U> void operator()(const U* ptr) const {
+    if (ptr != nullptr) {
+      free(const_cast<U*>(ptr));
+    }
+  }
+};
+
 struct ClassCallback : public art::ClassLoadCallback {
   void ClassPreDefine(const char* descriptor,
                       art::Handle<art::mirror::Class> klass,
@@ -173,7 +193,8 @@
     // Call all Non-retransformable agents.
     jint post_no_redefine_len = 0;
     unsigned char* post_no_redefine_dex_data = nullptr;
-    std::unique_ptr<const unsigned char> post_no_redefine_unique_ptr(nullptr);
+    std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>
+        post_no_redefine_unique_ptr(nullptr, FakeJvmtiDeleter<const unsigned char>());
     event_handler->DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookNonRetransformable>(
         self,
         static_cast<JNIEnv*>(env),
@@ -190,13 +211,16 @@
       post_no_redefine_dex_data = const_cast<unsigned char*>(dex_file_copy->Begin());
       post_no_redefine_len = dex_file_copy->Size();
     } else {
-      post_no_redefine_unique_ptr = std::unique_ptr<const unsigned char>(post_no_redefine_dex_data);
+      post_no_redefine_unique_ptr =
+          std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>(
+              post_no_redefine_dex_data, FakeJvmtiDeleter<const unsigned char>());
       DCHECK_GT(post_no_redefine_len, 0);
     }
     // Call all retransformable agents.
     jint final_len = 0;
     unsigned char* final_dex_data = nullptr;
-    std::unique_ptr<const unsigned char> final_dex_unique_ptr(nullptr);
+    std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>
+        final_dex_unique_ptr(nullptr, FakeJvmtiDeleter<const unsigned char>());
     event_handler->DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookRetransformable>(
         self,
         static_cast<JNIEnv*>(env),
@@ -213,7 +237,9 @@
       final_dex_data = post_no_redefine_dex_data;
       final_len = post_no_redefine_len;
     } else {
-      final_dex_unique_ptr = std::unique_ptr<const unsigned char>(final_dex_data);
+      final_dex_unique_ptr =
+          std::unique_ptr<const unsigned char, FakeJvmtiDeleter<const unsigned char>>(
+              final_dex_data, FakeJvmtiDeleter<const unsigned char>());
       DCHECK_GT(final_len, 0);
     }
 
diff --git a/runtime/openjdkjvmti/ti_dump.cc b/runtime/openjdkjvmti/ti_dump.cc
index d9e3ef1..7a1e53f 100644
--- a/runtime/openjdkjvmti/ti_dump.cc
+++ b/runtime/openjdkjvmti/ti_dump.cc
@@ -39,7 +39,7 @@
 #include "events-inl.h"
 #include "runtime_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace openjdkjvmti {
diff --git a/runtime/openjdkjvmti/ti_field.cc b/runtime/openjdkjvmti/ti_field.cc
index 1e5fbda..342d8be 100644
--- a/runtime/openjdkjvmti/ti_field.cc
+++ b/runtime/openjdkjvmti/ti_field.cc
@@ -39,7 +39,7 @@
 #include "mirror/object_array-inl.h"
 #include "modifiers.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc
index 99774c6..319b1c2 100644
--- a/runtime/openjdkjvmti/ti_heap.cc
+++ b/runtime/openjdkjvmti/ti_heap.cc
@@ -35,6 +35,7 @@
 #include "primitive.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 
diff --git a/runtime/openjdkjvmti/ti_jni.cc b/runtime/openjdkjvmti/ti_jni.cc
index 88f0395..dd2dda1 100644
--- a/runtime/openjdkjvmti/ti_jni.cc
+++ b/runtime/openjdkjvmti/ti_jni.cc
@@ -38,7 +38,7 @@
 #include "java_vm_ext.h"
 #include "jni_env_ext.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_method.cc b/runtime/openjdkjvmti/ti_method.cc
index f7e5347..beb639e 100644
--- a/runtime/openjdkjvmti/ti_method.cc
+++ b/runtime/openjdkjvmti/ti_method.cc
@@ -42,7 +42,7 @@
 #include "runtime_callbacks.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "ti_phase.h"
 
diff --git a/runtime/openjdkjvmti/ti_monitor.cc b/runtime/openjdkjvmti/ti_monitor.cc
index 645faea..61bf533 100644
--- a/runtime/openjdkjvmti/ti_monitor.cc
+++ b/runtime/openjdkjvmti/ti_monitor.cc
@@ -39,7 +39,7 @@
 #include "art_jvmti.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_object.cc b/runtime/openjdkjvmti/ti_object.cc
index bf84499..2506aca 100644
--- a/runtime/openjdkjvmti/ti_object.cc
+++ b/runtime/openjdkjvmti/ti_object.cc
@@ -34,7 +34,7 @@
 #include "art_jvmti.h"
 #include "mirror/object-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_phase.cc b/runtime/openjdkjvmti/ti_phase.cc
index 941cf7b..3c8bdc6 100644
--- a/runtime/openjdkjvmti/ti_phase.cc
+++ b/runtime/openjdkjvmti/ti_phase.cc
@@ -38,7 +38,7 @@
 #include "runtime_callbacks.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "ti_thread.h"
 
diff --git a/runtime/openjdkjvmti/ti_properties.cc b/runtime/openjdkjvmti/ti_properties.cc
index 8ee5366..e399b48 100644
--- a/runtime/openjdkjvmti/ti_properties.cc
+++ b/runtime/openjdkjvmti/ti_properties.cc
@@ -40,7 +40,7 @@
 
 #include "art_jvmti.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "ti_phase.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
index cca1486..b382a3e 100644
--- a/runtime/openjdkjvmti/ti_redefine.cc
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -48,6 +48,7 @@
 #include "gc/allocation_listener.h"
 #include "gc/heap.h"
 #include "instrumentation.h"
+#include "intern_table.h"
 #include "jdwp/jdwp.h"
 #include "jdwp/jdwp_constants.h"
 #include "jdwp/jdwp_event.h"
@@ -452,7 +453,30 @@
 
 art::mirror::DexCache* Redefiner::ClassRedefinition::CreateNewDexCache(
     art::Handle<art::mirror::ClassLoader> loader) {
-  return driver_->runtime_->GetClassLinker()->RegisterDexFile(*dex_file_, loader.Get()).Ptr();
+  art::StackHandleScope<2> hs(driver_->self_);
+  art::ClassLinker* cl = driver_->runtime_->GetClassLinker();
+  art::Handle<art::mirror::DexCache> cache(hs.NewHandle(
+      art::ObjPtr<art::mirror::DexCache>::DownCast(
+          cl->GetClassRoot(art::ClassLinker::kJavaLangDexCache)->AllocObject(driver_->self_))));
+  if (cache.IsNull()) {
+    driver_->self_->AssertPendingOOMException();
+    return nullptr;
+  }
+  art::Handle<art::mirror::String> location(hs.NewHandle(
+      cl->GetInternTable()->InternStrong(dex_file_->GetLocation().c_str())));
+  if (location.IsNull()) {
+    driver_->self_->AssertPendingOOMException();
+    return nullptr;
+  }
+  art::WriterMutexLock mu(driver_->self_, *art::Locks::dex_lock_);
+  art::mirror::DexCache::InitializeDexCache(driver_->self_,
+                                            cache.Get(),
+                                            location.Get(),
+                                            dex_file_.get(),
+                                            loader.IsNull() ? driver_->runtime_->GetLinearAlloc()
+                                                            : loader->GetAllocator(),
+                                            art::kRuntimePointerSize);
+  return cache.Get();
 }
 
 void Redefiner::RecordFailure(jvmtiError result,
@@ -602,8 +626,8 @@
     // Since direct methods have different flags than virtual ones (specifically direct methods must
     // have kAccPrivate or kAccStatic or kAccConstructor flags) we can tell if a method changes from
     // virtual to direct.
-    uint32_t new_flags = new_iter.GetMethodAccessFlags();
-    if (new_flags != (old_method->GetAccessFlags() & art::kAccValidMethodFlags)) {
+    uint32_t new_flags = new_iter.GetMethodAccessFlags() & ~art::kAccPreviouslyWarm;
+    if (new_flags != (old_method->GetAccessFlags() & (art::kAccValidMethodFlags ^ art::kAccPreviouslyWarm))) {
       RecordFailure(ERR(UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED),
                     StringPrintf("method '%s' (sig: %s) had different access flags",
                                  new_method_name,
@@ -1293,8 +1317,10 @@
 
   // At this point we can no longer fail without corrupting the runtime state.
   for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
+    art::ClassLinker* cl = runtime_->GetClassLinker();
+    cl->RegisterExistingDexCache(data.GetNewDexCache(), data.GetSourceClassLoader());
     if (data.GetSourceClassLoader() == nullptr) {
-      runtime_->GetClassLinker()->AppendToBootClassPath(self_, data.GetRedefinition().GetDexFile());
+      cl->AppendToBootClassPath(self_, data.GetRedefinition().GetDexFile());
     }
   }
   UnregisterAllBreakpoints();
diff --git a/runtime/openjdkjvmti/ti_search.cc b/runtime/openjdkjvmti/ti_search.cc
index ec139f2..6e0196e 100644
--- a/runtime/openjdkjvmti/ti_search.cc
+++ b/runtime/openjdkjvmti/ti_search.cc
@@ -49,7 +49,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
 #include "ti_phase.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/runtime/openjdkjvmti/ti_stack.cc
index 1ddf04f..22da2d2 100644
--- a/runtime/openjdkjvmti/ti_stack.cc
+++ b/runtime/openjdkjvmti/ti_stack.cc
@@ -52,7 +52,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
 #include "stack.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "thread_pool.h"
 #include "well_known_classes.h"
diff --git a/runtime/openjdkjvmti/ti_thread.cc b/runtime/openjdkjvmti/ti_thread.cc
index 3dfa633..2cc2a26 100644
--- a/runtime/openjdkjvmti/ti_thread.cc
+++ b/runtime/openjdkjvmti/ti_thread.cc
@@ -49,7 +49,7 @@
 #include "runtime_callbacks.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/openjdkjvmti/ti_thread.h b/runtime/openjdkjvmti/ti_thread.h
index c7f75d8..939aea7 100644
--- a/runtime/openjdkjvmti/ti_thread.h
+++ b/runtime/openjdkjvmti/ti_thread.h
@@ -37,7 +37,7 @@
 
 namespace art {
 class ArtField;
-}
+}  // namespace art
 
 namespace openjdkjvmti {
 
diff --git a/runtime/openjdkjvmti/ti_threadgroup.cc b/runtime/openjdkjvmti/ti_threadgroup.cc
index dd7be11..c0597ad 100644
--- a/runtime/openjdkjvmti/ti_threadgroup.cc
+++ b/runtime/openjdkjvmti/ti_threadgroup.cc
@@ -45,7 +45,7 @@
 #include "object_lock.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 #include "well_known_classes.h"
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index fc91efa..abb6f8c 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -238,9 +238,9 @@
       .Define("-Xlockprofthreshold:_")
           .WithType<unsigned int>()
           .IntoKey(M::LockProfThreshold)
-      .Define("-Xstacktracedir:_")
-          .WithType<std::string>()
-          .IntoKey(M::StackTraceDir)
+      .Define("-Xusetombstonedtraces")
+          .WithValue(true)
+          .IntoKey(M::UseTombstonedTraces)
       .Define("-Xstacktracefile:_")
           .WithType<std::string>()
           .IntoKey(M::StackTraceFile)
@@ -709,7 +709,7 @@
   UsageMessage(stream, "  -Xmethod-trace-file-size:integervalue\n");
   UsageMessage(stream, "  -Xps-min-save-period-ms:integervalue\n");
   UsageMessage(stream, "  -Xps-save-resolved-classes-delay-ms:integervalue\n");
-  UsageMessage(stream, "  -Xps-startup-method-samples:integervalue\n");
+  UsageMessage(stream, "  -Xps-hot-startup-method-samples:integervalue\n");
   UsageMessage(stream, "  -Xps-min-methods-to-save:integervalue\n");
   UsageMessage(stream, "  -Xps-min-classes-to-save:integervalue\n");
   UsageMessage(stream, "  -Xps-min-notification-before-wake:integervalue\n");
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index dbe7f5c..2d06e54 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -19,6 +19,7 @@
 
 #include "read_barrier.h"
 
+#include "gc/accounting/read_barrier_table.h"
 #include "gc/collector/concurrent_copying-inl.h"
 #include "gc/heap.h"
 #include "mirror/object_reference.h"
@@ -62,7 +63,7 @@
         // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator
         // updates before us, but it's OK.
         if (kAlwaysUpdateField && ref != old_ref) {
-          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+          obj->CasFieldStrongReleaseObjectWithoutWriteBarrier<false, false>(
               offset, old_ref, ref);
         }
       }
@@ -80,7 +81,7 @@
         ref = reinterpret_cast<MirrorType*>(Mark(old_ref));
         // Update the field atomically. This may fail if mutator updates before us, but it's ok.
         if (ref != old_ref) {
-          obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
+          obj->CasFieldStrongReleaseObjectWithoutWriteBarrier<false, false>(
               offset, old_ref, ref);
         }
       }
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 8423e04..010c6f8 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -26,7 +26,6 @@
 #include "base/mutex.h"
 #include "gc_root.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 
 namespace art {
 namespace mirror {
diff --git a/runtime/reference_table_test.cc b/runtime/reference_table_test.cc
index e809ecf..260be8f 100644
--- a/runtime/reference_table_test.cc
+++ b/runtime/reference_table_test.cc
@@ -29,7 +29,7 @@
 #include "primitive.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index e16ef1d..532da2b 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -31,6 +31,7 @@
 #include "nth_caller_visitor.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack_reference.h"
+#include "ScopedLocalRef.h"
 #include "well_known_classes.h"
 
 namespace art {
@@ -668,7 +669,7 @@
   // Wrap any exception with "Ljava/lang/reflect/InvocationTargetException;" and return early.
   if (soa.Self()->IsExceptionPending()) {
     // If we get another exception when we are trying to wrap, then just use that instead.
-    jthrowable th = soa.Env()->ExceptionOccurred();
+    ScopedLocalRef<jthrowable> th(soa.Env(), soa.Env()->ExceptionOccurred());
     soa.Self()->ClearException();
     jclass exception_class = soa.Env()->FindClass("java/lang/reflect/InvocationTargetException");
     if (exception_class == nullptr) {
@@ -677,7 +678,7 @@
     }
     jmethodID mid = soa.Env()->GetMethodID(exception_class, "<init>", "(Ljava/lang/Throwable;)V");
     CHECK(mid != nullptr);
-    jobject exception_instance = soa.Env()->NewObject(exception_class, mid, th);
+    jobject exception_instance = soa.Env()->NewObject(exception_class, mid, th.get());
     if (exception_instance == nullptr) {
       soa.Self()->AssertPendingException();
       return nullptr;
diff --git a/runtime/runtime-inl.h b/runtime/runtime-inl.h
index 75c25dd..609f0d6 100644
--- a/runtime/runtime-inl.h
+++ b/runtime/runtime-inl.h
@@ -20,10 +20,9 @@
 #include "runtime.h"
 
 #include "art_method.h"
-#include "class_linker.h"
+#include "base/callee_save_type.h"
 #include "gc_root-inl.h"
 #include "obj_ptr-inl.h"
-#include "read_barrier-inl.h"
 
 namespace art {
 
@@ -43,15 +42,15 @@
   DCHECK_NE(method, GetImtConflictMethod());
   DCHECK_NE(method, GetResolutionMethod());
   // Don't use GetCalleeSaveMethod(), some tests don't set all callee save methods.
-  if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveRefsAndArgs)) {
-    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
-  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveAllCalleeSaves)) {
-    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveAllCalleeSaves);
-  } else if (method == GetCalleeSaveMethodUnchecked(Runtime::kSaveRefsOnly)) {
-    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsOnly);
+  if (method == GetCalleeSaveMethodUnchecked(CalleeSaveType::kSaveRefsAndArgs)) {
+    return GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs);
+  } else if (method == GetCalleeSaveMethodUnchecked(CalleeSaveType::kSaveAllCalleeSaves)) {
+    return GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveAllCalleeSaves);
+  } else if (method == GetCalleeSaveMethodUnchecked(CalleeSaveType::kSaveRefsOnly)) {
+    return GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveRefsOnly);
   } else {
-    DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(Runtime::kSaveEverything));
-    return GetCalleeSaveMethodFrameInfo(Runtime::kSaveEverything);
+    DCHECK_EQ(method, GetCalleeSaveMethodUnchecked(CalleeSaveType::kSaveEverything));
+    return GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveEverything);
   }
 }
 
@@ -78,7 +77,7 @@
 
 inline ArtMethod* Runtime::GetCalleeSaveMethodUnchecked(CalleeSaveType type)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  return reinterpret_cast<ArtMethod*>(callee_save_methods_[type]);
+  return reinterpret_cast<ArtMethod*>(callee_save_methods_[static_cast<size_t>(type)]);
 }
 
 }  // namespace art
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 3697f21..c11e4bd 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -57,6 +57,7 @@
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "asm_support.h"
+#include "asm_support_check.h"
 #include "atomic.h"
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
@@ -134,6 +135,7 @@
 #include "native_stack_dump.h"
 #include "oat_file.h"
 #include "oat_file_manager.h"
+#include "object_callbacks.h"
 #include "os.h"
 #include "parsed_options.h"
 #include "jit/profile_saver.h"
@@ -216,6 +218,7 @@
       intern_table_(nullptr),
       class_linker_(nullptr),
       signal_catcher_(nullptr),
+      use_tombstoned_traces_(false),
       java_vm_(nullptr),
       fault_message_lock_("Fault message lock"),
       fault_message_(""),
@@ -258,6 +261,9 @@
       process_state_(kProcessStateJankPerceptible),
       zygote_no_threads_(false),
       cha_(nullptr) {
+  static_assert(Runtime::kCalleeSaveSize ==
+                    static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType), "Unexpected size");
+
   CheckAsmSupportOffsetsAndSizes();
   std::fill(callee_save_methods_, callee_save_methods_ + arraysize(callee_save_methods_), 0u);
   interpreter::CheckInterpreterAsmConstants();
@@ -834,7 +840,7 @@
 
 void Runtime::StartSignalCatcher() {
   if (!is_zygote_) {
-    signal_catcher_ = new SignalCatcher(stack_trace_dir_, stack_trace_file_);
+    signal_catcher_ = new SignalCatcher(stack_trace_file_, use_tombstoned_traces_);
   }
 }
 
@@ -1017,6 +1023,30 @@
 
   MemMap::Init();
 
+  // Try to reserve a dedicated fault page. This is allocated for clobbered registers and sentinels.
+  // If we cannot reserve it, log a warning.
+  // Note: We allocate this first to have a good chance of grabbing the page. The address (0xebad..)
+  //       is out-of-the-way enough that it should not collide with boot image mapping.
+  // Note: Don't request an error message. That will lead to a maps dump in the case of failure,
+  //       leading to logspam.
+  {
+    constexpr uintptr_t kSentinelAddr =
+        RoundDown(static_cast<uintptr_t>(Context::kBadGprBase), kPageSize);
+    protected_fault_page_.reset(MemMap::MapAnonymous("Sentinel fault page",
+                                                     reinterpret_cast<uint8_t*>(kSentinelAddr),
+                                                     kPageSize,
+                                                     PROT_NONE,
+                                                     /* low_4g */ true,
+                                                     /* reuse */ false,
+                                                     /* error_msg */ nullptr));
+    if (protected_fault_page_ == nullptr) {
+      LOG(WARNING) << "Could not reserve sentinel fault page";
+    } else if (reinterpret_cast<uintptr_t>(protected_fault_page_->Begin()) != kSentinelAddr) {
+      LOG(WARNING) << "Could not reserve sentinel fault page at the right address.";
+      protected_fault_page_.reset();
+    }
+  }
+
   using Opt = RuntimeArgumentMap;
   VLOG(startup) << "Runtime::Init -verbose:startup enabled";
 
@@ -1045,7 +1075,11 @@
   abort_ = runtime_options.GetOrDefault(Opt::HookAbort);
 
   default_stack_size_ = runtime_options.GetOrDefault(Opt::StackSize);
-  stack_trace_dir_ = runtime_options.ReleaseOrDefault(Opt::StackTraceDir);
+  use_tombstoned_traces_ = runtime_options.GetOrDefault(Opt::UseTombstonedTraces);
+#if !defined(ART_TARGET_ANDROID)
+  CHECK(!use_tombstoned_traces_)
+      << "-Xusetombstonedtraces is only supported in an Android environment";
+#endif
   stack_trace_file_ = runtime_options.ReleaseOrDefault(Opt::StackTraceFile);
 
   compiler_executable_ = runtime_options.ReleaseOrDefault(Opt::Compiler);
@@ -1302,8 +1336,8 @@
 
     // TODO: Should we move the following to InitWithoutImage?
     SetInstructionSet(instruction_set_);
-    for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
-      Runtime::CalleeSaveType type = Runtime::CalleeSaveType(i);
+    for (uint32_t i = 0; i < kCalleeSaveSize; i++) {
+      CalleeSaveType type = CalleeSaveType(i);
       if (!HasCalleeSaveMethod(type)) {
         SetCalleeSaveMethod(CreateCalleeSaveMethod(), type);
       }
@@ -1401,27 +1435,6 @@
     callbacks_->NextRuntimePhase(RuntimePhaseCallback::RuntimePhase::kInitialAgents);
   }
 
-  // Try to reserve a dedicated fault page. This is allocated for clobbered registers and sentinels.
-  // If we cannot reserve it, log a warning.
-  // Note: This is allocated last so that the heap and other things have priority, if necessary.
-  {
-    constexpr uintptr_t kSentinelAddr =
-        RoundDown(static_cast<uintptr_t>(Context::kBadGprBase), kPageSize);
-    protected_fault_page_.reset(MemMap::MapAnonymous("Sentinel fault page",
-                                                     reinterpret_cast<uint8_t*>(kSentinelAddr),
-                                                     kPageSize,
-                                                     PROT_NONE,
-                                                     true,
-                                                     false,
-                                                     &error_msg));
-    if (protected_fault_page_ == nullptr) {
-      LOG(WARNING) << "Could not reserve sentinel fault page: " << error_msg;
-    } else if (reinterpret_cast<uintptr_t>(protected_fault_page_->Begin()) != kSentinelAddr) {
-      LOG(WARNING) << "Could not reserve sentinel fault page at the right address.";
-      protected_fault_page_.reset();
-    }
-  }
-
   VLOG(startup) << "Runtime::Init exiting";
 
   return true;
@@ -1789,7 +1802,7 @@
   if (imt_unimplemented_method_ != nullptr) {
     imt_unimplemented_method_->VisitRoots(buffered_visitor, pointer_size);
   }
-  for (size_t i = 0; i < kLastCalleeSaveType; ++i) {
+  for (uint32_t i = 0; i < kCalleeSaveSize; ++i) {
     auto* m = reinterpret_cast<ArtMethod*>(callee_save_methods_[i]);
     if (m != nullptr) {
       m->VisitRoots(buffered_visitor, pointer_size);
@@ -1965,32 +1978,32 @@
 void Runtime::SetInstructionSet(InstructionSet instruction_set) {
   instruction_set_ = instruction_set;
   if ((instruction_set_ == kThumb2) || (instruction_set_ == kArm)) {
-    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+    for (int i = 0; i != kCalleeSaveSize; ++i) {
       CalleeSaveType type = static_cast<CalleeSaveType>(i);
       callee_save_method_frame_infos_[i] = arm::ArmCalleeSaveMethodFrameInfo(type);
     }
   } else if (instruction_set_ == kMips) {
-    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+    for (int i = 0; i != kCalleeSaveSize; ++i) {
       CalleeSaveType type = static_cast<CalleeSaveType>(i);
       callee_save_method_frame_infos_[i] = mips::MipsCalleeSaveMethodFrameInfo(type);
     }
   } else if (instruction_set_ == kMips64) {
-    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+    for (int i = 0; i != kCalleeSaveSize; ++i) {
       CalleeSaveType type = static_cast<CalleeSaveType>(i);
       callee_save_method_frame_infos_[i] = mips64::Mips64CalleeSaveMethodFrameInfo(type);
     }
   } else if (instruction_set_ == kX86) {
-    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+    for (int i = 0; i != kCalleeSaveSize; ++i) {
       CalleeSaveType type = static_cast<CalleeSaveType>(i);
       callee_save_method_frame_infos_[i] = x86::X86CalleeSaveMethodFrameInfo(type);
     }
   } else if (instruction_set_ == kX86_64) {
-    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+    for (int i = 0; i != kCalleeSaveSize; ++i) {
       CalleeSaveType type = static_cast<CalleeSaveType>(i);
       callee_save_method_frame_infos_[i] = x86_64::X86_64CalleeSaveMethodFrameInfo(type);
     }
   } else if (instruction_set_ == kArm64) {
-    for (int i = 0; i != kLastCalleeSaveType; ++i) {
+    for (int i = 0; i != kCalleeSaveSize; ++i) {
       CalleeSaveType type = static_cast<CalleeSaveType>(i);
       callee_save_method_frame_infos_[i] = arm64::Arm64CalleeSaveMethodFrameInfo(type);
     }
@@ -2004,15 +2017,14 @@
 }
 
 void Runtime::SetCalleeSaveMethod(ArtMethod* method, CalleeSaveType type) {
-  DCHECK_LT(static_cast<int>(type), static_cast<int>(kLastCalleeSaveType));
+  DCHECK_LT(static_cast<uint32_t>(type), kCalleeSaveSize);
   CHECK(method != nullptr);
-  callee_save_methods_[type] = reinterpret_cast<uintptr_t>(method);
+  callee_save_methods_[static_cast<size_t>(type)] = reinterpret_cast<uintptr_t>(method);
 }
 
 void Runtime::ClearCalleeSaveMethods() {
-  for (size_t i = 0; i < static_cast<size_t>(kLastCalleeSaveType); ++i) {
-    CalleeSaveType type = static_cast<CalleeSaveType>(i);
-    callee_save_methods_[type] = reinterpret_cast<uintptr_t>(nullptr);
+  for (size_t i = 0; i < kCalleeSaveSize; ++i) {
+    callee_save_methods_[i] = reinterpret_cast<uintptr_t>(nullptr);
   }
 }
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 2e3b8d7..2505d87 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -34,10 +34,7 @@
 #include "experimental_flags.h"
 #include "gc_root.h"
 #include "instrumentation.h"
-#include "jobject_comparator.h"
-#include "method_reference.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "process_state.h"
 #include "quick/quick_method_frame_info.h"
@@ -74,11 +71,13 @@
 }  // namespace verifier
 class ArenaPool;
 class ArtMethod;
+enum class CalleeSaveType: uint32_t;
 class ClassHierarchyAnalysis;
 class ClassLinker;
 class CompilerCallbacks;
 class DexFile;
 class InternTable;
+class IsMarkedVisitor;
 class JavaVMExt;
 class LinearAlloc;
 class MemMap;
@@ -379,17 +378,8 @@
     imt_unimplemented_method_ = nullptr;
   }
 
-  // Returns a special method that describes all callee saves being spilled to the stack.
-  enum CalleeSaveType {
-    kSaveAllCalleeSaves,  // All callee-save registers.
-    kSaveRefsOnly,        // Only those callee-save registers that can hold references.
-    kSaveRefsAndArgs,     // References (see above) and arguments (usually caller-save registers).
-    kSaveEverything,      // All registers, including both callee-save and caller-save.
-    kLastCalleeSaveType   // Value used for iteration
-  };
-
   bool HasCalleeSaveMethod(CalleeSaveType type) const {
-    return callee_save_methods_[type] != 0u;
+    return callee_save_methods_[static_cast<size_t>(type)] != 0u;
   }
 
   ArtMethod* GetCalleeSaveMethod(CalleeSaveType type)
@@ -399,14 +389,14 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   QuickMethodFrameInfo GetCalleeSaveMethodFrameInfo(CalleeSaveType type) const {
-    return callee_save_method_frame_infos_[type];
+    return callee_save_method_frame_infos_[static_cast<size_t>(type)];
   }
 
   QuickMethodFrameInfo GetRuntimeMethodFrameInfo(ArtMethod* method)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   static size_t GetCalleeSaveMethodOffset(CalleeSaveType type) {
-    return OFFSETOF_MEMBER(Runtime, callee_save_methods_[type]);
+    return OFFSETOF_MEMBER(Runtime, callee_save_methods_[static_cast<size_t>(type)]);
   }
 
   InstructionSet GetInstructionSet() const {
@@ -681,6 +671,14 @@
     deoptimization_counts_[static_cast<size_t>(kind)]++;
   }
 
+  uint32_t GetNumberOfDeoptimizations() const {
+    uint32_t result = 0;
+    for (size_t i = 0; i <= static_cast<size_t>(DeoptimizationKind::kLast); ++i) {
+      result += deoptimization_counts_[i];
+    }
+    return result;
+  }
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -718,8 +716,10 @@
   static constexpr int kProfileForground = 0;
   static constexpr int kProfileBackground = 1;
 
+  static constexpr uint32_t kCalleeSaveSize = 4u;
+
   // 64 bit so that we can share the same asm offsets for both 32 and 64 bits.
-  uint64_t callee_save_methods_[kLastCalleeSaveType];
+  uint64_t callee_save_methods_[kCalleeSaveSize];
   GcRoot<mirror::Throwable> pre_allocated_OutOfMemoryError_;
   GcRoot<mirror::Throwable> pre_allocated_NoClassDefFoundError_;
   ArtMethod* resolution_method_;
@@ -733,7 +733,7 @@
   GcRoot<mirror::Object> sentinel_;
 
   InstructionSet instruction_set_;
-  QuickMethodFrameInfo callee_save_method_frame_infos_[kLastCalleeSaveType];
+  QuickMethodFrameInfo callee_save_method_frame_infos_[kCalleeSaveSize];
 
   CompilerCallbacks* compiler_callbacks_;
   bool is_zygote_;
@@ -783,7 +783,13 @@
   ClassLinker* class_linker_;
 
   SignalCatcher* signal_catcher_;
-  std::string stack_trace_dir_;
+
+  // If true, the runtime will connect to tombstoned via a socket to
+  // request an open file descriptor to write its traces to.
+  bool use_tombstoned_traces_;
+
+  // Location to which traces must be written on SIGQUIT. Only used if
+  // tombstoned_traces_ == false.
   std::string stack_trace_file_;
 
   std::unique_ptr<JavaVMExt> java_vm_;
@@ -947,7 +953,6 @@
 
   DISALLOW_COPY_AND_ASSIGN(Runtime);
 };
-std::ostream& operator<<(std::ostream& os, const Runtime::CalleeSaveType& rhs);
 
 }  // namespace art
 
diff --git a/runtime/runtime_android.cc b/runtime/runtime_android.cc
index 495296c..4bd3b3a 100644
--- a/runtime/runtime_android.cc
+++ b/runtime/runtime_android.cc
@@ -27,7 +27,11 @@
 struct sigaction old_action;
 
 void HandleUnexpectedSignalAndroid(int signal_number, siginfo_t* info, void* raw_context) {
-  HandleUnexpectedSignalCommon(signal_number, info, raw_context, /* running_on_linux */ false);
+  HandleUnexpectedSignalCommon(signal_number,
+                               info,
+                               raw_context,
+                               /* handle_timeout_signal */ false,
+                               /* dump_on_stderr */ false);
 
   // Run the old signal handler.
   old_action.sa_sigaction(signal_number, info, raw_context);
diff --git a/runtime/runtime_common.cc b/runtime/runtime_common.cc
index 3690129..940e461 100644
--- a/runtime/runtime_common.cc
+++ b/runtime/runtime_common.cc
@@ -29,7 +29,8 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "native_stack_dump.h"
-#include "thread-inl.h"
+#include "runtime.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 namespace art {
@@ -370,10 +371,8 @@
 void HandleUnexpectedSignalCommon(int signal_number,
                                   siginfo_t* info,
                                   void* raw_context,
-                                  bool running_on_linux) {
-  bool handle_timeout_signal = running_on_linux;
-  bool dump_on_stderr = running_on_linux;
-
+                                  bool handle_timeout_signal,
+                                  bool dump_on_stderr) {
   static bool handling_unexpected_signal = false;
   if (handling_unexpected_signal) {
     LogHelper::LogLineLowStack(__FILE__,
@@ -393,39 +392,41 @@
   gAborting++;  // set before taking any locks
   MutexLock mu(Thread::Current(), *Locks::unexpected_signal_lock_);
 
-  bool has_address = (signal_number == SIGILL || signal_number == SIGBUS ||
-                      signal_number == SIGFPE || signal_number == SIGSEGV);
+  auto logger = [&](auto& stream) {
+    bool has_address = (signal_number == SIGILL || signal_number == SIGBUS ||
+                        signal_number == SIGFPE || signal_number == SIGSEGV);
+    OsInfo os_info;
+    const char* cmd_line = GetCmdLine();
+    if (cmd_line == nullptr) {
+      cmd_line = "<unset>";  // Because no-one called InitLogging.
+    }
+    pid_t tid = GetTid();
+    std::string thread_name(GetThreadName(tid));
+    UContext thread_context(raw_context);
+    Backtrace thread_backtrace(raw_context);
 
-  OsInfo os_info;
-  const char* cmd_line = GetCmdLine();
-  if (cmd_line == nullptr) {
-    cmd_line = "<unset>";  // Because no-one called InitLogging.
-  }
-  pid_t tid = GetTid();
-  std::string thread_name(GetThreadName(tid));
-  UContext thread_context(raw_context);
-  Backtrace thread_backtrace(raw_context);
+    stream << "*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***" << std::endl
+           << StringPrintf("Fatal signal %d (%s), code %d (%s)",
+                             signal_number,
+                             GetSignalName(signal_number),
+                             info->si_code,
+                             GetSignalCodeName(signal_number, info->si_code))
+           << (has_address ? StringPrintf(" fault addr %p", info->si_addr) : "") << std::endl
+           << "OS: " << Dumpable<OsInfo>(os_info) << std::endl
+           << "Cmdline: " << cmd_line << std::endl
+           << "Thread: " << tid << " \"" << thread_name << "\"" << std::endl
+           << "Registers:\n" << Dumpable<UContext>(thread_context) << std::endl
+           << "Backtrace:\n" << Dumpable<Backtrace>(thread_backtrace) << std::endl;
+    stream << std::flush;
+  };
 
-  std::ostringstream stream;
-  stream << "*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n"
-         << StringPrintf("Fatal signal %d (%s), code %d (%s)",
-                         signal_number,
-                         GetSignalName(signal_number),
-                         info->si_code,
-                         GetSignalCodeName(signal_number, info->si_code))
-         << (has_address ? StringPrintf(" fault addr %p", info->si_addr) : "") << '\n'
-         << "OS: " << Dumpable<OsInfo>(os_info) << '\n'
-         << "Cmdline: " << cmd_line << '\n'
-         << "Thread: " << tid << " \"" << thread_name << "\"" << '\n'
-         << "Registers:\n" << Dumpable<UContext>(thread_context) << '\n'
-         << "Backtrace:\n" << Dumpable<Backtrace>(thread_backtrace) << '\n';
   if (dump_on_stderr) {
     // Note: We are using cerr directly instead of LOG macros to ensure even just partial output
     //       makes it out. That means we lose the "dalvikvm..." prefix, but that is acceptable
     //       considering this is an abort situation.
-    std::cerr << stream.str() << std::flush;
+    logger(std::cerr);
   } else {
-    LOG(FATAL_WITHOUT_ABORT) << stream.str() << std::flush;
+    logger(LOG_STREAM(FATAL_WITHOUT_ABORT));
   }
   if (kIsDebugBuild && signal_number == SIGSEGV) {
     PrintFileToLog("/proc/self/maps", LogSeverity::FATAL_WITHOUT_ABORT);
diff --git a/runtime/runtime_common.h b/runtime/runtime_common.h
index 832b6bb..06d6627 100644
--- a/runtime/runtime_common.h
+++ b/runtime/runtime_common.h
@@ -68,7 +68,8 @@
 void HandleUnexpectedSignalCommon(int signal_number,
                                   siginfo_t* info,
                                   void* raw_context,
-                                  bool running_on_linux);
+                                  bool handle_timeout_signal,
+                                  bool dump_on_stderr);
 
 void InitPlatformSignalHandlersCommon(void (*newact)(int, siginfo_t*, void*),
                                       struct sigaction* oldact,
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index ad61cf3..424dcf8 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -25,7 +25,13 @@
 namespace art {
 
 void HandleUnexpectedSignalLinux(int signal_number, siginfo_t* info, void* raw_context) {
-  HandleUnexpectedSignalCommon(signal_number, info, raw_context, /* running_on_linux */ true);
+  // Linux is mainly used for host testing. Under those conditions, react to the timeout signal,
+  // and dump to stderr to avoid missing output on double-faults.
+  HandleUnexpectedSignalCommon(signal_number,
+                               info,
+                               raw_context,
+                               /* handle_timeout_signal */ true,
+                               /* dump_on_stderr */ true);
 
   if (getenv("debug_db_uid") != nullptr || getenv("art_wait_for_gdb_on_crash") != nullptr) {
     pid_t tid = GetTid();
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 77132a8..cfc681f 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -100,7 +100,7 @@
 RUNTIME_OPTIONS_KEY (Unit,                ForceNativeBridge)
 RUNTIME_OPTIONS_KEY (LogVerbosity,        Verbose)
 RUNTIME_OPTIONS_KEY (unsigned int,        LockProfThreshold)
-RUNTIME_OPTIONS_KEY (std::string,         StackTraceDir)
+RUNTIME_OPTIONS_KEY (bool,                UseTombstonedTraces, false)
 RUNTIME_OPTIONS_KEY (std::string,         StackTraceFile)
 RUNTIME_OPTIONS_KEY (Unit,                MethodTrace)
 RUNTIME_OPTIONS_KEY (std::string,         MethodTraceFile,                "/data/misc/trace/method-trace-file.bin")
diff --git a/runtime/safe_map.h b/runtime/safe_map.h
index e638fdb..b54f587 100644
--- a/runtime/safe_map.h
+++ b/runtime/safe_map.h
@@ -46,6 +46,7 @@
 
   SafeMap() = default;
   SafeMap(const SafeMap&) = default;
+  SafeMap(SafeMap&&) = default;
   explicit SafeMap(const key_compare& cmp, const allocator_type& allocator = allocator_type())
     : map_(cmp, allocator) {
   }
@@ -151,6 +152,11 @@
     return map_ == rhs.map_;
   }
 
+  template <class... Args>
+  std::pair<iterator, bool> emplace(Args&&... args) {
+    return map_.emplace(std::forward<Args>(args)...);
+  }
+
  private:
   ::std::map<K, V, Comparator, Allocator> map_;
 };
diff --git a/runtime/scoped_thread_state_change-inl.h b/runtime/scoped_thread_state_change-inl.h
index ed6e349..aa96871 100644
--- a/runtime/scoped_thread_state_change-inl.h
+++ b/runtime/scoped_thread_state_change-inl.h
@@ -22,6 +22,7 @@
 #include "base/casts.h"
 #include "jni_env_ext-inl.h"
 #include "obj_ptr-inl.h"
+#include "runtime.h"
 #include "thread-inl.h"
 
 namespace art {
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index faea7b3..8c934d5 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -42,6 +42,10 @@
 #include "thread_list.h"
 #include "utils.h"
 
+#if defined(ART_TARGET_ANDROID)
+#include "tombstoned/tombstoned.h"
+#endif
+
 namespace art {
 
 static void DumpCmdLine(std::ostream& os) {
@@ -66,13 +70,19 @@
 #endif
 }
 
-SignalCatcher::SignalCatcher(const std::string& stack_trace_dir,
-                             const std::string& stack_trace_file)
-    : stack_trace_dir_(stack_trace_dir),
-      stack_trace_file_(stack_trace_file),
+SignalCatcher::SignalCatcher(const std::string& stack_trace_file,
+                             bool use_tombstoned_stack_trace_fd)
+    : stack_trace_file_(stack_trace_file),
+      use_tombstoned_stack_trace_fd_(use_tombstoned_stack_trace_fd),
       lock_("SignalCatcher lock"),
       cond_("SignalCatcher::cond_", lock_),
       thread_(nullptr) {
+#if !defined(ART_TARGET_ANDROID)
+  // We're not running on Android, so we can't communicate with tombstoned
+  // to ask for an open file.
+  CHECK(!use_tombstoned_stack_trace_fd_);
+#endif
+
   SetHaltFlag(false);
 
   // Create a raw pthread; its start routine will attach to the runtime.
@@ -103,62 +113,65 @@
   return halt_;
 }
 
-std::string SignalCatcher::GetStackTraceFileName() {
-  if (!stack_trace_dir_.empty()) {
-    // We'll try a maximum of ten times (arbitrarily selected) to create a file
-    // with a unique name, seeding the pseudo random generator each time.
-    //
-    // If this doesn't work, give up and log to stdout. Note that we could try
-    // indefinitely, but that would make problems in this code harder to detect
-    // since we'd be spinning in the signal catcher thread.
-    static constexpr uint32_t kMaxRetries = 10;
-
-    for (uint32_t i = 0; i < kMaxRetries; ++i) {
-        std::srand(NanoTime());
-        // Sample output for PID 1234 : /data/anr/anr-pid1234-cafeffee.txt
-        const std::string file_name = android::base::StringPrintf(
-            "%s/anr-pid%" PRId32 "-%08" PRIx32 ".txt",
-            stack_trace_dir_.c_str(),
-            static_cast<int32_t>(getpid()),
-            static_cast<uint32_t>(std::rand()));
-
-        if (!OS::FileExists(file_name.c_str())) {
-          return file_name;
-        }
-    }
-
-    LOG(ERROR) << "Unable to obtain stack trace filename at path : " << stack_trace_dir_;
-    return "";
+bool SignalCatcher::OpenStackTraceFile(android::base::unique_fd* tombstone_fd,
+                                       android::base::unique_fd* output_fd) {
+  if (use_tombstoned_stack_trace_fd_) {
+#if defined(ART_TARGET_ANDROID)
+    return tombstoned_connect(getpid(), tombstone_fd, output_fd, kDebuggerdJavaBacktrace);
+#else
+    UNUSED(tombstone_fd);
+    UNUSED(output_fd);
+#endif
   }
 
-  return stack_trace_file_;
+  // The runtime is not configured to dump traces to a file, will LOG(INFO)
+  // instead.
+  if (stack_trace_file_.empty()) {
+    return false;
+  }
+
+  int fd = open(stack_trace_file_.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
+  if (fd == -1) {
+      PLOG(ERROR) << "Unable to open stack trace file '" << stack_trace_file_ << "'";
+      return false;
+  }
+
+  output_fd->reset(fd);
+  return true;
 }
 
 void SignalCatcher::Output(const std::string& s) {
-  const std::string output_file = GetStackTraceFileName();
-  if (output_file.empty()) {
+  android::base::unique_fd tombstone_fd;
+  android::base::unique_fd output_fd;
+  if (!OpenStackTraceFile(&tombstone_fd, &output_fd)) {
     LOG(INFO) << s;
     return;
   }
 
   ScopedThreadStateChange tsc(Thread::Current(), kWaitingForSignalCatcherOutput);
-  int fd = open(output_file.c_str(), O_APPEND | O_CREAT | O_WRONLY, 0666);
-  if (fd == -1) {
-    PLOG(ERROR) << "Unable to open stack trace file '" << output_file << "'";
-    return;
-  }
-  std::unique_ptr<File> file(new File(fd, output_file, true));
+
+  std::unique_ptr<File> file(new File(output_fd.release(), true /* check_usage */));
   bool success = file->WriteFully(s.data(), s.size());
   if (success) {
     success = file->FlushCloseOrErase() == 0;
   } else {
     file->Erase();
   }
+
+  const std::string output_path_msg = (use_tombstoned_stack_trace_fd_) ?
+      "[tombstoned]" : stack_trace_file_;
+
   if (success) {
-    LOG(INFO) << "Wrote stack traces to '" << output_file << "'";
+    LOG(INFO) << "Wrote stack traces to '" << output_path_msg << "'";
   } else {
-    PLOG(ERROR) << "Failed to write stack traces to '" << output_file << "'";
+    PLOG(ERROR) << "Failed to write stack traces to '" << output_path_msg << "'";
   }
+
+#if defined(ART_TARGET_ANDROID)
+  if (!tombstoned_notify_completion(tombstone_fd)) {
+    LOG(WARNING) << "Unable to notify tombstoned of dump completion.";
+  }
+#endif
 }
 
 void SignalCatcher::HandleSigQuit() {
diff --git a/runtime/signal_catcher.h b/runtime/signal_catcher.h
index 4cd7a98..8a2a728 100644
--- a/runtime/signal_catcher.h
+++ b/runtime/signal_catcher.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_SIGNAL_CATCHER_H_
 #define ART_RUNTIME_SIGNAL_CATCHER_H_
 
+#include "android-base/unique_fd.h"
 #include "base/mutex.h"
 
 namespace art {
@@ -32,15 +33,17 @@
  */
 class SignalCatcher {
  public:
-  // If |stack_trace_dir| is non empty, traces will be written to a
-  // unique file under that directory.
+  // If |use_tombstoned_stack_trace_fd| is |true|, traces will be
+  // written to a file descriptor provided by tombstoned. The process
+  // will communicate with tombstoned via a unix domain socket. This
+  // mode of stack trace dumping is only supported in an Android
+  // environment.
   //
-  // If |stack_trace_dir| is empty, and |stack_frace_file| is non-empty,
-  // traces will be appended to |stack_trace_file|.
-  //
-  // If both are empty, all traces will be written to the log buffer.
-  explicit SignalCatcher(const std::string& stack_trace_dir,
-                         const std::string& stack_trace_file);
+  // If false, all traces will be dumped to |stack_trace_file| if it's
+  // non-empty. If |stack_trace_file| is empty, all traces will be written
+  // to the log buffer.
+  SignalCatcher(const std::string& stack_trace_file,
+                const bool use_tombstoned_stack_trace_fd);
   ~SignalCatcher();
 
   void HandleSigQuit() REQUIRES(!Locks::mutator_lock_, !Locks::thread_list_lock_,
@@ -51,15 +54,18 @@
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
   static void* Run(void* arg) NO_THREAD_SAFETY_ANALYSIS;
 
-  std::string GetStackTraceFileName();
+  // NOTE: We're using android::base::unique_fd here for easier
+  // interoperability with tombstoned client APIs.
+  bool OpenStackTraceFile(android::base::unique_fd* tombstone_fd,
+                          android::base::unique_fd* output_fd);
   void HandleSigUsr1();
   void Output(const std::string& s);
   void SetHaltFlag(bool new_value) REQUIRES(!lock_);
   bool ShouldHalt() REQUIRES(!lock_);
   int WaitForSignal(Thread* self, SignalSet& signals) REQUIRES(!lock_);
 
-  std::string stack_trace_dir_;
   std::string stack_trace_file_;
+  const bool use_tombstoned_stack_trace_fd_;
 
   mutable Mutex lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   ConditionVariable cond_ GUARDED_BY(lock_);
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 5c6eead..eec0460 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -20,6 +20,7 @@
 
 #include "arch/context.h"
 #include "art_method-inl.h"
+#include "base/callee_save_type.h"
 #include "base/enums.h"
 #include "base/hex_dump.h"
 #include "entrypoints/entrypoint_utils-inl.h"
@@ -29,6 +30,7 @@
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "linear_alloc.h"
+#include "managed_stack.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
@@ -68,34 +70,6 @@
   }
 }
 
-size_t ManagedStack::NumJniShadowFrameReferences() const {
-  size_t count = 0;
-  for (const ManagedStack* current_fragment = this; current_fragment != nullptr;
-       current_fragment = current_fragment->GetLink()) {
-    for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_; current_frame != nullptr;
-         current_frame = current_frame->GetLink()) {
-      if (current_frame->GetMethod()->IsNative()) {
-        // The JNI ShadowFrame only contains references. (For indirect reference.)
-        count += current_frame->NumberOfVRegs();
-      }
-    }
-  }
-  return count;
-}
-
-bool ManagedStack::ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const {
-  for (const ManagedStack* current_fragment = this; current_fragment != nullptr;
-       current_fragment = current_fragment->GetLink()) {
-    for (ShadowFrame* current_frame = current_fragment->top_shadow_frame_; current_frame != nullptr;
-         current_frame = current_frame->GetLink()) {
-      if (current_frame->Contains(shadow_frame_entry)) {
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
 StackVisitor::StackVisitor(Thread* thread,
                            Context* context,
                            StackWalkKind walk_kind,
@@ -648,6 +622,12 @@
     return;
   }
 
+  Runtime* runtime = Runtime::Current();
+  if (runtime->UseJitCompilation() &&
+      runtime->GetJit()->GetCodeCache()->ContainsPc(reinterpret_cast<const void*>(pc))) {
+    return;
+  }
+
   const void* code = method->GetEntryPointFromQuickCompiledCode();
   if (code == GetQuickInstrumentationEntryPoint() || code == GetInvokeObsoleteMethodStub()) {
     return;
@@ -659,9 +639,6 @@
     return;
   }
 
-  // If we are the JIT then we may have just compiled the method after the
-  // IsQuickToInterpreterBridge check.
-  Runtime* runtime = Runtime::Current();
   if (runtime->UseJitCompilation() && runtime->GetJit()->GetCodeCache()->ContainsPc(code)) {
     return;
   }
@@ -758,7 +735,7 @@
   Runtime* runtime = Runtime::Current();
 
   if (method->IsAbstract()) {
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
+    return runtime->GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs);
   }
 
   // This goes before IsProxyMethod since runtime methods have a null declaring class.
@@ -772,7 +749,7 @@
     // compiled method without any stubs. Therefore the method must have a OatQuickMethodHeader.
     DCHECK(!method->IsDirect() && !method->IsConstructor())
         << "Constructors of proxy classes must have a OatQuickMethodHeader";
-    return runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
+    return runtime->GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs);
   }
 
   // The only remaining case is if the method is native and uses the generic JNI stub.
@@ -785,7 +762,7 @@
   uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(method) + 1;
   size_t scope_size = HandleScope::SizeOf(handle_refs);
   QuickMethodFrameInfo callee_info =
-      runtime->GetCalleeSaveMethodFrameInfo(Runtime::kSaveRefsAndArgs);
+      runtime->GetCalleeSaveMethodFrameInfo(CalleeSaveType::kSaveRefsAndArgs);
 
   // Callee saves + handle scope + method ref + alignment
   // Note: -sizeof(void*) since callee-save frame stores a whole method pointer.
@@ -868,11 +845,11 @@
                 thread_->GetInstrumentationStack()->at(instrumentation_stack_depth);
             instrumentation_stack_depth++;
             if (GetMethod() ==
-                Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveAllCalleeSaves)) {
+                Runtime::Current()->GetCalleeSaveMethod(CalleeSaveType::kSaveAllCalleeSaves)) {
               // Skip runtime save all callee frames which are used to deliver exceptions.
             } else if (instrumentation_frame.interpreter_entry_) {
               ArtMethod* callee =
-                  Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs);
+                  Runtime::Current()->GetCalleeSaveMethod(CalleeSaveType::kSaveRefsAndArgs);
               CHECK_EQ(GetMethod(), callee) << "Expected: " << ArtMethod::PrettyMethod(callee)
                                             << " Found: " << ArtMethod::PrettyMethod(GetMethod());
             } else {
diff --git a/runtime/stack.h b/runtime/stack.h
index bdaa4c3..8c74a8c 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -512,86 +512,6 @@
   const size_t vreg_;
 };
 
-// The managed stack is used to record fragments of managed code stacks. Managed code stacks
-// may either be shadow frames or lists of frames using fixed frame sizes. Transition records are
-// necessary for transitions between code using different frame layouts and transitions into native
-// code.
-class PACKED(4) ManagedStack {
- public:
-  ManagedStack()
-      : top_quick_frame_(nullptr), link_(nullptr), top_shadow_frame_(nullptr) {}
-
-  void PushManagedStackFragment(ManagedStack* fragment) {
-    // Copy this top fragment into given fragment.
-    memcpy(fragment, this, sizeof(ManagedStack));
-    // Clear this fragment, which has become the top.
-    memset(this, 0, sizeof(ManagedStack));
-    // Link our top fragment onto the given fragment.
-    link_ = fragment;
-  }
-
-  void PopManagedStackFragment(const ManagedStack& fragment) {
-    DCHECK(&fragment == link_);
-    // Copy this given fragment back to the top.
-    memcpy(this, &fragment, sizeof(ManagedStack));
-  }
-
-  ManagedStack* GetLink() const {
-    return link_;
-  }
-
-  ArtMethod** GetTopQuickFrame() const {
-    return top_quick_frame_;
-  }
-
-  void SetTopQuickFrame(ArtMethod** top) {
-    DCHECK(top_shadow_frame_ == nullptr);
-    top_quick_frame_ = top;
-  }
-
-  static size_t TopQuickFrameOffset() {
-    return OFFSETOF_MEMBER(ManagedStack, top_quick_frame_);
-  }
-
-  ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame) {
-    DCHECK(top_quick_frame_ == nullptr);
-    ShadowFrame* old_frame = top_shadow_frame_;
-    top_shadow_frame_ = new_top_frame;
-    new_top_frame->SetLink(old_frame);
-    return old_frame;
-  }
-
-  ShadowFrame* PopShadowFrame() {
-    DCHECK(top_quick_frame_ == nullptr);
-    CHECK(top_shadow_frame_ != nullptr);
-    ShadowFrame* frame = top_shadow_frame_;
-    top_shadow_frame_ = frame->GetLink();
-    return frame;
-  }
-
-  ShadowFrame* GetTopShadowFrame() const {
-    return top_shadow_frame_;
-  }
-
-  void SetTopShadowFrame(ShadowFrame* top) {
-    DCHECK(top_quick_frame_ == nullptr);
-    top_shadow_frame_ = top;
-  }
-
-  static size_t TopShadowFrameOffset() {
-    return OFFSETOF_MEMBER(ManagedStack, top_shadow_frame_);
-  }
-
-  size_t NumJniShadowFrameReferences() const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  bool ShadowFramesContain(StackReference<mirror::Object>* shadow_frame_entry) const;
-
- private:
-  ArtMethod** top_quick_frame_;
-  ManagedStack* link_;
-  ShadowFrame* top_shadow_frame_;
-};
-
 class StackVisitor {
  public:
   // This enum defines a flag to control whether inlined frames are included
diff --git a/runtime/stride_iterator.h b/runtime/stride_iterator.h
index ac04c3b..0560c33 100644
--- a/runtime/stride_iterator.h
+++ b/runtime/stride_iterator.h
@@ -24,8 +24,11 @@
 namespace art {
 
 template<typename T>
-class StrideIterator : public std::iterator<std::forward_iterator_tag, T> {
+class StrideIterator : public std::iterator<std::random_access_iterator_tag, T> {
  public:
+  using difference_type =
+      typename std::iterator<std::random_access_iterator_tag, T>::difference_type;
+
   StrideIterator(const StrideIterator&) = default;
   StrideIterator(StrideIterator&&) = default;
   StrideIterator& operator=(const StrideIterator&) = default;
@@ -44,28 +47,56 @@
     return !(*this == other);
   }
 
-  StrideIterator operator++() {  // Value after modification.
+  StrideIterator& operator++() {  // Value after modification.
     ptr_ += stride_;
     return *this;
   }
 
   StrideIterator operator++(int) {
     StrideIterator<T> temp = *this;
-    ptr_ += stride_;
+    ++*this;
     return temp;
   }
 
-  StrideIterator operator+(ssize_t delta) const {
+  StrideIterator& operator--() {  // Value after modification.
+    ptr_ -= stride_;
+    return *this;
+  }
+
+  StrideIterator operator--(int) {
+    StrideIterator<T> temp = *this;
+    --*this;
+    return temp;
+  }
+
+  StrideIterator& operator+=(difference_type delta) {
+    ptr_ += static_cast<ssize_t>(stride_) * delta;
+    return *this;
+  }
+
+  StrideIterator operator+(difference_type delta) const {
     StrideIterator<T> temp = *this;
     temp += delta;
     return temp;
   }
 
-  StrideIterator& operator+=(ssize_t delta) {
-    ptr_ += static_cast<ssize_t>(stride_) * delta;
+  StrideIterator& operator-=(difference_type delta) {
+    ptr_ -= static_cast<ssize_t>(stride_) * delta;
     return *this;
   }
 
+  StrideIterator operator-(difference_type delta) const {
+    StrideIterator<T> temp = *this;
+    temp -= delta;
+    return temp;
+  }
+
+  difference_type operator-(const StrideIterator& rhs) {
+    DCHECK_EQ(stride_, rhs.stride_);
+    DCHECK_EQ((ptr_ - rhs.ptr_) % stride_, 0u);
+    return (ptr_ - rhs.ptr_) / stride_;
+  }
+
   T& operator*() const {
     return *reinterpret_cast<T*>(ptr_);
   }
@@ -74,12 +105,46 @@
     return &**this;
   }
 
+  T& operator[](difference_type n) {
+    return *(*this + n);
+  }
+
  private:
   uintptr_t ptr_;
   // Not const for operator=.
   size_t stride_;
+
+  template <typename U>
+  friend bool operator<(const StrideIterator<U>& lhs, const StrideIterator<U>& rhs);
 };
 
+template <typename T>
+StrideIterator<T> operator+(typename StrideIterator<T>::difference_type dist,
+                            const StrideIterator<T>& it) {
+  return it + dist;
+}
+
+template <typename T>
+bool operator<(const StrideIterator<T>& lhs, const StrideIterator<T>& rhs) {
+  DCHECK_EQ(lhs.stride_, rhs.stride_);
+  return lhs.ptr_ < rhs.ptr_;
+}
+
+template <typename T>
+bool operator>(const StrideIterator<T>& lhs, const StrideIterator<T>& rhs) {
+  return rhs < lhs;
+}
+
+template <typename T>
+bool operator<=(const StrideIterator<T>& lhs, const StrideIterator<T>& rhs) {
+  return !(rhs < lhs);
+}
+
+template <typename T>
+bool operator>=(const StrideIterator<T>& lhs, const StrideIterator<T>& rhs) {
+  return !(lhs < rhs);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_STRIDE_ITERATOR_H_
diff --git a/runtime/thread-current-inl.h b/runtime/thread-current-inl.h
new file mode 100644
index 0000000..9241b1f
--- /dev/null
+++ b/runtime/thread-current-inl.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_THREAD_CURRENT_INL_H_
+#define ART_RUNTIME_THREAD_CURRENT_INL_H_
+
+#include "thread.h"
+
+#ifdef ART_TARGET_ANDROID
+#include <bionic_tls.h>  // Access to our own TLS slot.
+#endif
+
+#include <pthread.h>
+
+namespace art {
+
+inline Thread* Thread::Current() {
+  // We rely on Thread::Current returning null for a detached thread, so it's not obvious
+  // that we can replace this with a direct %fs access on x86.
+  if (!is_started_) {
+    return nullptr;
+  } else {
+#ifdef ART_TARGET_ANDROID
+    void* thread = __get_tls()[TLS_SLOT_ART_THREAD_SELF];
+#else
+    void* thread = pthread_getspecific(Thread::pthread_key_self_);
+#endif
+    return reinterpret_cast<Thread*>(thread);
+  }
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_THREAD_CURRENT_INL_H_
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index aa769fa..7da15d9 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -19,18 +19,13 @@
 
 #include "thread.h"
 
-#ifdef ART_TARGET_ANDROID
-#include <bionic_tls.h>  // Access to our own TLS slot.
-#endif
-
-#include <pthread.h>
-
 #include "base/casts.h"
 #include "base/mutex-inl.h"
-#include "gc/heap.h"
+#include "base/time_utils.h"
 #include "jni_env_ext.h"
+#include "managed_stack-inl.h"
 #include "obj_ptr.h"
-#include "runtime.h"
+#include "thread-current-inl.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -41,21 +36,6 @@
   return full_env->self;
 }
 
-inline Thread* Thread::Current() {
-  // We rely on Thread::Current returning null for a detached thread, so it's not obvious
-  // that we can replace this with a direct %fs access on x86.
-  if (!is_started_) {
-    return nullptr;
-  } else {
-#ifdef ART_TARGET_ANDROID
-    void* thread = __get_tls()[TLS_SLOT_ART_THREAD_SELF];
-#else
-    void* thread = pthread_getspecific(Thread::pthread_key_self_);
-#endif
-    return reinterpret_cast<Thread*>(thread);
-  }
-}
-
 inline void Thread::AllowThreadSuspension() {
   DCHECK_EQ(Thread::Current(), this);
   if (UNLIKELY(TestAllFlags())) {
@@ -295,14 +275,6 @@
   return static_cast<ThreadState>(old_state);
 }
 
-inline void Thread::VerifyStack() {
-  if (kVerifyStack) {
-    if (Runtime::Current()->GetHeap()->IsObjectValidationEnabled()) {
-      VerifyStackImpl();
-    }
-  }
-}
-
 inline mirror::Object* Thread::AllocTlab(size_t bytes) {
   DCHECK_GE(TlabSize(), bytes);
   ++tlsPtr_.thread_local_objects;
@@ -386,6 +358,14 @@
   }
 }
 
+inline ShadowFrame* Thread::PushShadowFrame(ShadowFrame* new_top_frame) {
+  return tlsPtr_.managed_stack.PushShadowFrame(new_top_frame);
+}
+
+inline ShadowFrame* Thread::PopShadowFrame() {
+  return tlsPtr_.managed_stack.PopShadowFrame();
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_THREAD_INL_H_
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 6848686..789f571 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -55,6 +55,7 @@
 #include "gc/allocator/rosalloc.h"
 #include "gc/heap.h"
 #include "gc/space/space-inl.h"
+#include "gc_root.h"
 #include "handle_scope-inl.h"
 #include "indirect_reference_table-inl.h"
 #include "java_vm_ext.h"
@@ -2160,7 +2161,7 @@
   TearDownAlternateSignalStack();
 }
 
-void Thread::HandleUncaughtExceptions(ScopedObjectAccess& soa) {
+void Thread::HandleUncaughtExceptions(ScopedObjectAccessAlreadyRunnable& soa) {
   if (!IsExceptionPending()) {
     return;
   }
@@ -2180,7 +2181,7 @@
   tlsPtr_.jni_env->ExceptionClear();
 }
 
-void Thread::RemoveFromThreadGroup(ScopedObjectAccess& soa) {
+void Thread::RemoveFromThreadGroup(ScopedObjectAccessAlreadyRunnable& soa) {
   // this.group.removeThread(this);
   // group can be null if we're in the compiler or a test.
   ObjPtr<mirror::Object> ogroup = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)
@@ -3442,11 +3443,13 @@
 };
 
 void Thread::VerifyStackImpl() {
-  VerifyRootVisitor visitor;
-  std::unique_ptr<Context> context(Context::Create());
-  RootCallbackVisitor visitor_to_callback(&visitor, GetThreadId());
-  ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitor_to_callback);
-  mapper.WalkStack();
+  if (Runtime::Current()->GetHeap()->IsObjectValidationEnabled()) {
+    VerifyRootVisitor visitor;
+    std::unique_ptr<Context> context(Context::Create());
+    RootCallbackVisitor visitor_to_callback(&visitor, GetThreadId());
+    ReferenceMapVisitor<RootCallbackVisitor> mapper(this, context.get(), visitor_to_callback);
+    mapper.WalkStack();
+  }
 }
 
 // Set the stack end to that to be used during a stack overflow
diff --git a/runtime/thread.h b/runtime/thread.h
index a60fd58..e85ee0d 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -33,15 +33,13 @@
 #include "base/mutex.h"
 #include "entrypoints/jni/jni_entrypoints.h"
 #include "entrypoints/quick/quick_entrypoints.h"
-#include "gc_root.h"
 #include "globals.h"
 #include "handle_scope.h"
 #include "instrumentation.h"
 #include "jvalue.h"
-#include "object_callbacks.h"
+#include "managed_stack.h"
 #include "offsets.h"
 #include "runtime_stats.h"
-#include "stack.h"
 #include "thread_state.h"
 
 class BacktraceMap;
@@ -87,12 +85,14 @@
 class JavaVMExt;
 struct JNIEnvExt;
 class Monitor;
+class RootVisitor;
 class ScopedObjectAccessAlreadyRunnable;
 class ShadowFrame;
 class SingleStepControl;
 class StackedShadowFrameRecord;
 class Thread;
 class ThreadList;
+enum VisitRootFlags : uint8_t;
 
 // Thread priorities. These must match the Thread.MIN_PRIORITY,
 // Thread.NORM_PRIORITY, and Thread.MAX_PRIORITY constants.
@@ -149,6 +149,7 @@
 class Thread {
  public:
   static const size_t kStackOverflowImplicitCheckSize;
+  static constexpr bool kVerifyStack = kIsDebugBuild;
 
   // Creates a new native thread corresponding to the given managed peer.
   // Used to implement Thread.start.
@@ -560,10 +561,14 @@
     return tlsPtr_.frame_id_to_shadow_frame != nullptr;
   }
 
-  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags = kVisitRootFlagAllRoots)
+  void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ALWAYS_INLINE void VerifyStack() REQUIRES_SHARED(Locks::mutator_lock_);
+  void VerifyStack() REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (kVerifyStack) {
+      VerifyStackImpl();
+    }
+  }
 
   //
   // Offsets of various members of native Thread class, used by compiled code.
@@ -793,13 +798,8 @@
     tlsPtr_.managed_stack.PopManagedStackFragment(fragment);
   }
 
-  ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame) {
-    return tlsPtr_.managed_stack.PushShadowFrame(new_top_frame);
-  }
-
-  ShadowFrame* PopShadowFrame() {
-    return tlsPtr_.managed_stack.PopShadowFrame();
-  }
+  ALWAYS_INLINE ShadowFrame* PushShadowFrame(ShadowFrame* new_top_frame);
+  ALWAYS_INLINE ShadowFrame* PopShadowFrame();
 
   template<PointerSize pointer_size>
   static ThreadOffset<pointer_size> TopShadowFrameOffset() {
@@ -1250,9 +1250,10 @@
 
   static void* CreateCallback(void* arg);
 
-  void HandleUncaughtExceptions(ScopedObjectAccess& soa)
+  void HandleUncaughtExceptions(ScopedObjectAccessAlreadyRunnable& soa)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void RemoveFromThreadGroup(ScopedObjectAccess& soa) REQUIRES_SHARED(Locks::mutator_lock_);
+  void RemoveFromThreadGroup(ScopedObjectAccessAlreadyRunnable& soa)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Initialize a thread.
   //
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index dc2af2a..95aba79 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -35,7 +35,9 @@
 #include "debugger.h"
 #include "gc/collector/concurrent_copying.h"
 #include "gc/gc_pause_listener.h"
+#include "gc/heap.h"
 #include "gc/reference_processor.h"
+#include "gc_root.h"
 #include "jni_internal.h"
 #include "lock_word.h"
 #include "monitor.h"
@@ -163,7 +165,7 @@
   if (dump_native_stack) {
     DumpNativeStack(os, tid, nullptr, "  native: ");
   }
-  os << "\n";
+  os << std::endl;
 }
 
 void ThreadList::DumpUnattachedThreads(std::ostream& os, bool dump_native_stack) {
@@ -214,11 +216,10 @@
       ScopedObjectAccess soa(self);
       thread->Dump(local_os, dump_native_stack_, backtrace_map_.get());
     }
-    local_os << "\n";
     {
       // Use the logging lock to ensure serialization when writing to the common ostream.
       MutexLock mu(self, *Locks::logging_lock_);
-      *os_ << local_os.str();
+      *os_ << local_os.str() << std::endl;
     }
     barrier_.Pass(self);
   }
@@ -756,7 +757,7 @@
         // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
         if ((errno != EAGAIN) && (errno != EINTR)) {
           if (errno == ETIMEDOUT) {
-            LOG(::android::base::FATAL)
+            LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR)
                 << "Timed out waiting for threads to suspend, waited for "
                 << PrettyDuration(NanoTime() - start_time);
           } else {
@@ -1508,7 +1509,7 @@
   // Visit roots without holding thread_list_lock_ and thread_suspend_count_lock_ to prevent lock
   // order violations.
   for (Thread* thread : threads_to_visit) {
-    thread->VisitRoots(visitor);
+    thread->VisitRoots(visitor, kVisitRootFlagAllRoots);
   }
 
   // Restore suspend counts.
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index 3375746..92702c6 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -22,9 +22,7 @@
 #include "base/mutex.h"
 #include "base/time_utils.h"
 #include "base/value_object.h"
-#include "gc_root.h"
 #include "jni.h"
-#include "object_callbacks.h"
 
 #include <bitset>
 #include <list>
@@ -34,12 +32,14 @@
 namespace gc {
   namespace collector {
     class GarbageCollector;
-  }  // namespac collector
+  }  // namespace collector
   class GcPauseListener;
 }  // namespace gc
 class Closure;
+class RootVisitor;
 class Thread;
 class TimingLogger;
+enum VisitRootFlags : uint8_t;
 
 class ThreadList {
  public:
diff --git a/runtime/thread_pool.cc b/runtime/thread_pool.cc
index d24a5e5..8349f33 100644
--- a/runtime/thread_pool.cc
+++ b/runtime/thread_pool.cc
@@ -18,6 +18,7 @@
 
 #include <pthread.h>
 
+#include <sys/mman.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 
@@ -29,7 +30,7 @@
 #include "base/stl_util.h"
 #include "base/time_utils.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/ti/agent.cc b/runtime/ti/agent.cc
index 86f5282..82b9af3 100644
--- a/runtime/ti/agent.cc
+++ b/runtime/ti/agent.cc
@@ -18,6 +18,7 @@
 
 #include "android-base/stringprintf.h"
 
+#include "base/strlcpy.h"
 #include "java_vm_ext.h"
 #include "runtime.h"
 
@@ -57,7 +58,7 @@
   }
   // Need to let the function fiddle with the array.
   std::unique_ptr<char[]> copied_args(new char[args_.size() + 1]);
-  strcpy(copied_args.get(), args_.c_str());
+  strlcpy(copied_args.get(), args_.c_str(), args_.size() + 1);
   // TODO Need to do some checks that we are at a good spot etc.
   *call_res = callback(Runtime::Current()->GetJavaVM(),
                        copied_args.get(),
diff --git a/runtime/ti/agent.h b/runtime/ti/agent.h
index b5ecba1..f98e387 100644
--- a/runtime/ti/agent.h
+++ b/runtime/ti/agent.h
@@ -20,8 +20,7 @@
 #include <dlfcn.h>
 #include <jni.h>  // for jint, JavaVM* etc declarations
 
-#include "runtime.h"
-#include "utils.h"
+#include "base/logging.h"
 
 namespace art {
 namespace ti {
diff --git a/runtime/trace.cc b/runtime/trace.cc
index 3a9975a..cabd162 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -41,6 +41,7 @@
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
+#include "stack.h"
 #include "thread.h"
 #include "thread_list.h"
 #include "utils.h"
@@ -739,7 +740,7 @@
 }
 
 void Trace::DexPcMoved(Thread* thread ATTRIBUTE_UNUSED,
-                       mirror::Object* this_object ATTRIBUTE_UNUSED,
+                       Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                        ArtMethod* method,
                        uint32_t new_dex_pc) {
   // We're not recorded to listen to this kind of event, so complain.
@@ -748,7 +749,7 @@
 }
 
 void Trace::FieldRead(Thread* thread ATTRIBUTE_UNUSED,
-                      mirror::Object* this_object ATTRIBUTE_UNUSED,
+                      Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                       ArtMethod* method,
                       uint32_t dex_pc,
                       ArtField* field ATTRIBUTE_UNUSED)
@@ -759,7 +760,7 @@
 }
 
 void Trace::FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
-                         mirror::Object* this_object ATTRIBUTE_UNUSED,
+                         Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
                          ArtMethod* method,
                          uint32_t dex_pc,
                          ArtField* field ATTRIBUTE_UNUSED,
@@ -770,8 +771,10 @@
              << " " << dex_pc;
 }
 
-void Trace::MethodEntered(Thread* thread, mirror::Object* this_object ATTRIBUTE_UNUSED,
-                          ArtMethod* method, uint32_t dex_pc ATTRIBUTE_UNUSED) {
+void Trace::MethodEntered(Thread* thread,
+                          Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
+                          ArtMethod* method,
+                          uint32_t dex_pc ATTRIBUTE_UNUSED) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
@@ -779,8 +782,10 @@
                       thread_clock_diff, wall_clock_diff);
 }
 
-void Trace::MethodExited(Thread* thread, mirror::Object* this_object ATTRIBUTE_UNUSED,
-                         ArtMethod* method, uint32_t dex_pc ATTRIBUTE_UNUSED,
+void Trace::MethodExited(Thread* thread,
+                         Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
+                         ArtMethod* method,
+                         uint32_t dex_pc ATTRIBUTE_UNUSED,
                          const JValue& return_value ATTRIBUTE_UNUSED) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
@@ -789,8 +794,10 @@
                       thread_clock_diff, wall_clock_diff);
 }
 
-void Trace::MethodUnwind(Thread* thread, mirror::Object* this_object ATTRIBUTE_UNUSED,
-                         ArtMethod* method, uint32_t dex_pc ATTRIBUTE_UNUSED) {
+void Trace::MethodUnwind(Thread* thread,
+                         Handle<mirror::Object> this_object ATTRIBUTE_UNUSED,
+                         ArtMethod* method,
+                         uint32_t dex_pc ATTRIBUTE_UNUSED) {
   uint32_t thread_clock_diff = 0;
   uint32_t wall_clock_diff = 0;
   ReadClocks(thread, &thread_clock_diff, &wall_clock_diff);
@@ -799,7 +806,7 @@
 }
 
 void Trace::ExceptionCaught(Thread* thread ATTRIBUTE_UNUSED,
-                            mirror::Throwable* exception_object ATTRIBUTE_UNUSED)
+                            Handle<mirror::Throwable> exception_object ATTRIBUTE_UNUSED)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   LOG(ERROR) << "Unexpected exception caught event in tracing";
 }
@@ -811,7 +818,7 @@
 }
 
 void Trace::InvokeVirtualOrInterface(Thread*,
-                                     mirror::Object*,
+                                     Handle<mirror::Object>,
                                      ArtMethod* method,
                                      uint32_t dex_pc,
                                      ArtMethod*) {
diff --git a/runtime/trace.h b/runtime/trace.h
index 485e9a1..ad10250 100644
--- a/runtime/trace.h
+++ b/runtime/trace.h
@@ -140,36 +140,54 @@
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_, !*streaming_lock_);
 
   // InstrumentationListener implementation.
-  void MethodEntered(Thread* thread, mirror::Object* this_object,
-                     ArtMethod* method, uint32_t dex_pc)
+  void MethodEntered(Thread* thread,
+                     Handle<mirror::Object> this_object,
+                     ArtMethod* method,
+                     uint32_t dex_pc)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_, !*streaming_lock_)
       OVERRIDE;
-  void MethodExited(Thread* thread, mirror::Object* this_object,
-                    ArtMethod* method, uint32_t dex_pc,
+  void MethodExited(Thread* thread,
+                    Handle<mirror::Object> this_object,
+                    ArtMethod* method,
+                    uint32_t dex_pc,
                     const JValue& return_value)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_, !*streaming_lock_)
       OVERRIDE;
-  void MethodUnwind(Thread* thread, mirror::Object* this_object,
-                    ArtMethod* method, uint32_t dex_pc)
+  void MethodUnwind(Thread* thread,
+                    Handle<mirror::Object> this_object,
+                    ArtMethod* method,
+                    uint32_t dex_pc)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_, !*streaming_lock_)
       OVERRIDE;
-  void DexPcMoved(Thread* thread, mirror::Object* this_object,
-                  ArtMethod* method, uint32_t new_dex_pc)
+  void DexPcMoved(Thread* thread,
+                  Handle<mirror::Object> this_object,
+                  ArtMethod* method,
+                  uint32_t new_dex_pc)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_, !*streaming_lock_)
       OVERRIDE;
-  void FieldRead(Thread* thread, mirror::Object* this_object,
-                 ArtMethod* method, uint32_t dex_pc, ArtField* field)
+  void FieldRead(Thread* thread,
+                 Handle<mirror::Object> this_object,
+                 ArtMethod* method,
+                 uint32_t dex_pc,
+                 ArtField* field)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
-  void FieldWritten(Thread* thread, mirror::Object* this_object,
-                    ArtMethod* method, uint32_t dex_pc, ArtField* field,
+  void FieldWritten(Thread* thread,
+                    Handle<mirror::Object> this_object,
+                    ArtMethod* method,
+                    uint32_t dex_pc,
+                    ArtField* field,
                     const JValue& field_value)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
-  void ExceptionCaught(Thread* thread, mirror::Throwable* exception_object)
+  void ExceptionCaught(Thread* thread,
+                       Handle<mirror::Throwable> exception_object)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
-  void Branch(Thread* thread, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
+  void Branch(Thread* thread,
+              ArtMethod* method,
+              uint32_t dex_pc,
+              int32_t dex_pc_offset)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*unique_methods_lock_) OVERRIDE;
   void InvokeVirtualOrInterface(Thread* thread,
-                                mirror::Object* this_object,
+                                Handle<mirror::Object> this_object,
                                 ArtMethod* caller,
                                 uint32_t dex_pc,
                                 ArtMethod* callee)
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index 56ff0a1..907d37e 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -19,8 +19,10 @@
 #include "base/stl_util.h"
 #include "base/logging.h"
 #include "gc/accounting/card_table-inl.h"
+#include "gc_root-inl.h"
 #include "intern_table.h"
 #include "mirror/class-inl.h"
+#include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 0333fe8..747c2d0 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -22,7 +22,6 @@
 #include "base/value_object.h"
 #include "dex_file_types.h"
 #include "gc_root.h"
-#include "object_callbacks.h"
 #include "offsets.h"
 #include "primitive.h"
 #include "safe_map.h"
@@ -36,7 +35,7 @@
 class DexCache;
 class Object;
 class String;
-}
+}  // namespace mirror
 class InternTable;
 
 class Transaction FINAL {
diff --git a/compiler/utils/type_reference.h b/runtime/type_reference.h
similarity index 85%
rename from compiler/utils/type_reference.h
rename to runtime/type_reference.h
index a0fa1a4..b7e964b 100644
--- a/compiler/utils/type_reference.h
+++ b/runtime/type_reference.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_UTILS_TYPE_REFERENCE_H_
-#define ART_COMPILER_UTILS_TYPE_REFERENCE_H_
+#ifndef ART_RUNTIME_TYPE_REFERENCE_H_
+#define ART_RUNTIME_TYPE_REFERENCE_H_
 
 #include <stdint.h>
 
@@ -29,7 +29,9 @@
 
 // A type is located by its DexFile and the string_ids_ table index into that DexFile.
 struct TypeReference {
-  TypeReference(const DexFile* file, dex::TypeIndex index) : dex_file(file), type_index(index) { }
+  TypeReference(const DexFile* file = nullptr, dex::TypeIndex index = dex::TypeIndex())
+      : dex_file(file),
+        type_index(index) {}
 
   const DexFile* dex_file;
   dex::TypeIndex type_index;
@@ -48,4 +50,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_UTILS_TYPE_REFERENCE_H_
+#endif  // ART_RUNTIME_TYPE_REFERENCE_H_
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 8d216ce..c4b0441 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -303,7 +303,7 @@
   if (NeedsEscaping(ch)) {
     StringAppendF(&result, "\\u%04x", ch);
   } else {
-    result += ch;
+    result += static_cast<std::string::value_type>(ch);
   }
   result += '\'';
   return result;
@@ -330,7 +330,7 @@
       if (NeedsEscaping(leading)) {
         StringAppendF(&result, "\\u%04x", leading);
       } else {
-        result += leading;
+        result += static_cast<std::string::value_type>(leading);
       }
 
       const uint32_t trailing = GetTrailingUtf16Char(ch);
@@ -841,6 +841,10 @@
   return true;
 }
 
+std::string GetVdexFilename(const std::string& oat_location) {
+  return ReplaceFileExtension(oat_location, "vdex");
+}
+
 static void InsertIsaDirectory(const InstructionSet isa, std::string* filename) {
   // in = /foo/bar/baz
   // out = /foo/bar/<isa>/baz
diff --git a/runtime/utils.h b/runtime/utils.h
index 2011d9e..f1f5576 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -173,6 +173,9 @@
 // Returns the system location for an image
 std::string GetSystemImageFilename(const char* location, InstructionSet isa);
 
+// Returns the vdex filename for the given oat filename.
+std::string GetVdexFilename(const std::string& oat_filename);
+
 // Returns true if the file exists.
 bool FileExists(const std::string& filename);
 bool FileExistsAndNotEmpty(const std::string& filename);
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 81bf293..12f791c 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -50,6 +50,7 @@
 #include "register_line-inl.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
+#include "stack.h"
 #include "utils.h"
 #include "verifier_deps.h"
 #include "verifier_compiler_binding.h"
@@ -883,10 +884,13 @@
                             InstructionFlags());
   // Run through the instructions and see if the width checks out.
   bool result = ComputeWidthsAndCountOps();
+  bool allow_runtime_only_instructions = !Runtime::Current()->IsAotCompiler() || verify_to_dump_;
   // Flag instructions guarded by a "try" block and check exception handlers.
   result = result && ScanTryCatchBlocks();
   // Perform static instruction verification.
-  result = result && VerifyInstructions();
+  result = result && (allow_runtime_only_instructions
+                          ? VerifyInstructions<true>()
+                          : VerifyInstructions<false>());
   // Perform code-flow analysis and return.
   result = result && VerifyCodeFlow();
 
@@ -1102,6 +1106,7 @@
   return true;
 }
 
+template <bool kAllowRuntimeOnlyInstructions>
 bool MethodVerifier::VerifyInstructions() {
   const Instruction* inst = Instruction::At(code_item_->insns_);
 
@@ -1110,9 +1115,8 @@
   GetInstructionFlags(0).SetCompileTimeInfoPoint();
 
   uint32_t insns_size = code_item_->insns_size_in_code_units_;
-  bool allow_runtime_only_instructions = !Runtime::Current()->IsAotCompiler() || verify_to_dump_;
   for (uint32_t dex_pc = 0; dex_pc < insns_size;) {
-    if (!VerifyInstruction(inst, dex_pc, allow_runtime_only_instructions)) {
+    if (!VerifyInstruction<kAllowRuntimeOnlyInstructions>(inst, dex_pc)) {
       DCHECK_NE(failures_.size(), 0U);
       return false;
     }
@@ -1139,9 +1143,8 @@
   return true;
 }
 
-bool MethodVerifier::VerifyInstruction(const Instruction* inst,
-                                       uint32_t code_offset,
-                                       bool allow_runtime_only_instructions) {
+template <bool kAllowRuntimeOnlyInstructions>
+bool MethodVerifier::VerifyInstruction(const Instruction* inst, uint32_t code_offset) {
   if (Instruction::kHaveExperimentalInstructions && UNLIKELY(inst->IsExperimental())) {
     // Experimental instructions don't yet have verifier support implementation.
     // While it is possible to use them by themselves, when we try to use stable instructions
@@ -1250,7 +1253,7 @@
       result = false;
       break;
   }
-  if (!allow_runtime_only_instructions && inst->GetVerifyIsRuntimeOnly()) {
+  if (!kAllowRuntimeOnlyInstructions && inst->GetVerifyIsRuntimeOnly()) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "opcode only expected at runtime " << inst->Name();
     result = false;
   }
@@ -1258,7 +1261,7 @@
 }
 
 inline bool MethodVerifier::CheckRegisterIndex(uint32_t idx) {
-  if (idx >= code_item_->registers_size_) {
+  if (UNLIKELY(idx >= code_item_->registers_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "register index out of range (" << idx << " >= "
                                       << code_item_->registers_size_ << ")";
     return false;
@@ -1267,7 +1270,7 @@
 }
 
 inline bool MethodVerifier::CheckWideRegisterIndex(uint32_t idx) {
-  if (idx + 1 >= code_item_->registers_size_) {
+  if (UNLIKELY(idx + 1 >= code_item_->registers_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "wide register index out of range (" << idx
                                       << "+1 >= " << code_item_->registers_size_ << ")";
     return false;
@@ -1276,7 +1279,7 @@
 }
 
 inline bool MethodVerifier::CheckFieldIndex(uint32_t idx) {
-  if (idx >= dex_file_->GetHeader().field_ids_size_) {
+  if (UNLIKELY(idx >= dex_file_->GetHeader().field_ids_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad field index " << idx << " (max "
                                       << dex_file_->GetHeader().field_ids_size_ << ")";
     return false;
@@ -1285,7 +1288,7 @@
 }
 
 inline bool MethodVerifier::CheckMethodIndex(uint32_t idx) {
-  if (idx >= dex_file_->GetHeader().method_ids_size_) {
+  if (UNLIKELY(idx >= dex_file_->GetHeader().method_ids_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad method index " << idx << " (max "
                                       << dex_file_->GetHeader().method_ids_size_ << ")";
     return false;
@@ -1294,17 +1297,17 @@
 }
 
 inline bool MethodVerifier::CheckNewInstance(dex::TypeIndex idx) {
-  if (idx.index_ >= dex_file_->GetHeader().type_ids_size_) {
+  if (UNLIKELY(idx.index_ >= dex_file_->GetHeader().type_ids_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx.index_ << " (max "
                                       << dex_file_->GetHeader().type_ids_size_ << ")";
     return false;
   }
   // We don't need the actual class, just a pointer to the class name.
   const char* descriptor = dex_file_->StringByTypeIdx(idx);
-  if (descriptor[0] != 'L') {
+  if (UNLIKELY(descriptor[0] != 'L')) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "can't call new-instance on type '" << descriptor << "'";
     return false;
-  } else if (strcmp(descriptor, "Ljava/lang/Class;") == 0) {
+  } else if (UNLIKELY(strcmp(descriptor, "Ljava/lang/Class;") == 0)) {
     // An unlikely new instance on Class is not allowed. Fall back to interpreter to ensure an
     // exception is thrown when this statement is executed (compiled code would not do that).
     Fail(VERIFY_ERROR_INSTANTIATION);
@@ -1313,7 +1316,7 @@
 }
 
 inline bool MethodVerifier::CheckPrototypeIndex(uint32_t idx) {
-  if (idx >= dex_file_->GetHeader().proto_ids_size_) {
+  if (UNLIKELY(idx >= dex_file_->GetHeader().proto_ids_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad prototype index " << idx << " (max "
                                       << dex_file_->GetHeader().proto_ids_size_ << ")";
     return false;
@@ -1322,7 +1325,7 @@
 }
 
 inline bool MethodVerifier::CheckStringIndex(uint32_t idx) {
-  if (idx >= dex_file_->GetHeader().string_ids_size_) {
+  if (UNLIKELY(idx >= dex_file_->GetHeader().string_ids_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad string index " << idx << " (max "
                                       << dex_file_->GetHeader().string_ids_size_ << ")";
     return false;
@@ -1331,7 +1334,7 @@
 }
 
 inline bool MethodVerifier::CheckTypeIndex(dex::TypeIndex idx) {
-  if (idx.index_ >= dex_file_->GetHeader().type_ids_size_) {
+  if (UNLIKELY(idx.index_ >= dex_file_->GetHeader().type_ids_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx.index_ << " (max "
                                       << dex_file_->GetHeader().type_ids_size_ << ")";
     return false;
@@ -1340,7 +1343,7 @@
 }
 
 bool MethodVerifier::CheckNewArray(dex::TypeIndex idx) {
-  if (idx.index_ >= dex_file_->GetHeader().type_ids_size_) {
+  if (UNLIKELY(idx.index_ >= dex_file_->GetHeader().type_ids_size_)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "bad type index " << idx.index_ << " (max "
                                       << dex_file_->GetHeader().type_ids_size_ << ")";
     return false;
@@ -1351,12 +1354,12 @@
   while (*cp++ == '[') {
     bracket_count++;
   }
-  if (bracket_count == 0) {
+  if (UNLIKELY(bracket_count == 0)) {
     /* The given class must be an array type. */
     Fail(VERIFY_ERROR_BAD_CLASS_HARD)
         << "can't new-array class '" << descriptor << "' (not an array)";
     return false;
-  } else if (bracket_count > 255) {
+  } else if (UNLIKELY(bracket_count > 255)) {
     /* It is illegal to create an array of more than 255 dimensions. */
     Fail(VERIFY_ERROR_BAD_CLASS_HARD)
         << "can't new-array class '" << descriptor << "' (exceeds limit)";
@@ -1374,8 +1377,8 @@
   DCHECK_LT(cur_offset, insn_count);
   /* make sure the start of the array data table is in range */
   array_data_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
-  if (static_cast<int32_t>(cur_offset) + array_data_offset < 0 ||
-      cur_offset + array_data_offset + 2 >= insn_count) {
+  if (UNLIKELY(static_cast<int32_t>(cur_offset) + array_data_offset < 0 ||
+               cur_offset + array_data_offset + 2 >= insn_count)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid array data start: at " << cur_offset
                                       << ", data offset " << array_data_offset
                                       << ", count " << insn_count;
@@ -1384,14 +1387,14 @@
   /* offset to array data table is a relative branch-style offset */
   array_data = insns + array_data_offset;
   // Make sure the table is at an even dex pc, that is, 32-bit aligned.
-  if (!IsAligned<4>(array_data)) {
+  if (UNLIKELY(!IsAligned<4>(array_data))) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unaligned array data table: at " << cur_offset
                                       << ", data offset " << array_data_offset;
     return false;
   }
   // Make sure the array-data is marked as an opcode. This ensures that it was reached when
   // traversing the code item linearly. It is an approximation for a by-spec padding value.
-  if (!GetInstructionFlags(cur_offset + array_data_offset).IsOpcode()) {
+  if (UNLIKELY(!GetInstructionFlags(cur_offset + array_data_offset).IsOpcode())) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "array data table at " << cur_offset
                                       << ", data offset " << array_data_offset
                                       << " not correctly visited, probably bad padding.";
@@ -1402,7 +1405,7 @@
   uint32_t value_count = *reinterpret_cast<const uint32_t*>(&array_data[2]);
   uint32_t table_size = 4 + (value_width * value_count + 1) / 2;
   /* make sure the end of the switch is in range */
-  if (cur_offset + array_data_offset + table_size > insn_count) {
+  if (UNLIKELY(cur_offset + array_data_offset + table_size > insn_count)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid array data end: at " << cur_offset
                                       << ", data offset " << array_data_offset << ", end "
                                       << cur_offset + array_data_offset + table_size
@@ -1418,23 +1421,23 @@
   if (!GetBranchOffset(cur_offset, &offset, &isConditional, &selfOkay)) {
     return false;
   }
-  if (!selfOkay && offset == 0) {
+  if (UNLIKELY(!selfOkay && offset == 0)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "branch offset of zero not allowed at"
                                       << reinterpret_cast<void*>(cur_offset);
     return false;
   }
   // Check for 32-bit overflow. This isn't strictly necessary if we can depend on the runtime
   // to have identical "wrap-around" behavior, but it's unwise to depend on that.
-  if (((int64_t) cur_offset + (int64_t) offset) != (int64_t) (cur_offset + offset)) {
+  if (UNLIKELY(((int64_t) cur_offset + (int64_t) offset) != (int64_t) (cur_offset + offset))) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "branch target overflow "
                                       << reinterpret_cast<void*>(cur_offset) << " +" << offset;
     return false;
   }
   const uint32_t insn_count = code_item_->insns_size_in_code_units_;
   int32_t abs_offset = cur_offset + offset;
-  if (abs_offset < 0 ||
-      (uint32_t) abs_offset >= insn_count ||
-      !GetInstructionFlags(abs_offset).IsOpcode()) {
+  if (UNLIKELY(abs_offset < 0 ||
+               (uint32_t) abs_offset >= insn_count ||
+               !GetInstructionFlags(abs_offset).IsOpcode())) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid branch target " << offset << " (-> "
                                       << reinterpret_cast<void*>(abs_offset) << ") at "
                                       << reinterpret_cast<void*>(cur_offset);
@@ -1487,8 +1490,8 @@
   const uint16_t* insns = code_item_->insns_ + cur_offset;
   /* make sure the start of the switch is in range */
   int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
-  if (static_cast<int32_t>(cur_offset) + switch_offset < 0 ||
-      cur_offset + switch_offset + 2 > insn_count) {
+  if (UNLIKELY(static_cast<int32_t>(cur_offset) + switch_offset < 0 ||
+               cur_offset + switch_offset + 2 > insn_count)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid switch start: at " << cur_offset
                                       << ", switch offset " << switch_offset
                                       << ", count " << insn_count;
@@ -1497,14 +1500,14 @@
   /* offset to switch table is a relative branch-style offset */
   const uint16_t* switch_insns = insns + switch_offset;
   // Make sure the table is at an even dex pc, that is, 32-bit aligned.
-  if (!IsAligned<4>(switch_insns)) {
+  if (UNLIKELY(!IsAligned<4>(switch_insns))) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unaligned switch table: at " << cur_offset
                                       << ", switch offset " << switch_offset;
     return false;
   }
   // Make sure the switch data is marked as an opcode. This ensures that it was reached when
   // traversing the code item linearly. It is an approximation for a by-spec padding value.
-  if (!GetInstructionFlags(cur_offset + switch_offset).IsOpcode()) {
+  if (UNLIKELY(!GetInstructionFlags(cur_offset + switch_offset).IsOpcode())) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "switch table at " << cur_offset
                                       << ", switch offset " << switch_offset
                                       << " not correctly visited, probably bad padding.";
@@ -1526,14 +1529,14 @@
     expected_signature = Instruction::kSparseSwitchSignature;
   }
   uint32_t table_size = targets_offset + switch_count * 2;
-  if (switch_insns[0] != expected_signature) {
+  if (UNLIKELY(switch_insns[0] != expected_signature)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD)
         << StringPrintf("wrong signature for switch table (%x, wanted %x)",
                         switch_insns[0], expected_signature);
     return false;
   }
   /* make sure the end of the switch is in range */
-  if (cur_offset + switch_offset + table_size > (uint32_t) insn_count) {
+  if (UNLIKELY(cur_offset + switch_offset + table_size > (uint32_t) insn_count)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid switch end: at " << cur_offset
                                       << ", switch offset " << switch_offset
                                       << ", end " << (cur_offset + switch_offset + table_size)
@@ -1548,7 +1551,7 @@
       int32_t first_key = switch_insns[keys_offset] | (switch_insns[keys_offset + 1] << 16);
       int32_t max_first_key =
           std::numeric_limits<int32_t>::max() - (static_cast<int32_t>(switch_count) - 1);
-      if (first_key > max_first_key) {
+      if (UNLIKELY(first_key > max_first_key)) {
         Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid packed switch: first_key=" << first_key
                                           << ", switch_count=" << switch_count;
         return false;
@@ -1560,7 +1563,7 @@
         int32_t key =
             static_cast<int32_t>(switch_insns[keys_offset + targ * 2]) |
             static_cast<int32_t>(switch_insns[keys_offset + targ * 2 + 1] << 16);
-        if (key <= last_key) {
+        if (UNLIKELY(key <= last_key)) {
           Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid sparse switch: last key=" << last_key
                                             << ", this=" << key;
           return false;
@@ -1574,9 +1577,9 @@
     int32_t offset = static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
                      static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
     int32_t abs_offset = cur_offset + offset;
-    if (abs_offset < 0 ||
-        abs_offset >= static_cast<int32_t>(insn_count) ||
-        !GetInstructionFlags(abs_offset).IsOpcode()) {
+    if (UNLIKELY(abs_offset < 0 ||
+                 abs_offset >= static_cast<int32_t>(insn_count) ||
+                 !GetInstructionFlags(abs_offset).IsOpcode())) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid switch target " << offset
                                         << " (-> " << reinterpret_cast<void*>(abs_offset) << ") at "
                                         << reinterpret_cast<void*>(cur_offset)
@@ -1591,7 +1594,7 @@
 bool MethodVerifier::CheckVarArgRegs(uint32_t vA, uint32_t arg[]) {
   uint16_t registers_size = code_item_->registers_size_;
   for (uint32_t idx = 0; idx < vA; idx++) {
-    if (arg[idx] >= registers_size) {
+    if (UNLIKELY(arg[idx] >= registers_size)) {
       Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid reg index (" << arg[idx]
                                         << ") in non-range invoke (>= " << registers_size << ")";
       return false;
@@ -1605,7 +1608,7 @@
   uint16_t registers_size = code_item_->registers_size_;
   // vA/vC are unsigned 8-bit/16-bit quantities for /range instructions, so there's no risk of
   // integer overflow when adding them here.
-  if (vA + vC > registers_size) {
+  if (UNLIKELY(vA + vC > registers_size)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invalid reg index " << vA << "+" << vC
                                       << " in range invoke (> " << registers_size << ")";
     return false;
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index b34a3af..46fdc54 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -24,7 +24,6 @@
 #include "base/arena_allocator.h"
 #include "base/macros.h"
 #include "base/scoped_arena_containers.h"
-#include "base/stl_util.h"
 #include "base/value_object.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
@@ -37,12 +36,17 @@
 
 namespace art {
 
+class ClassLinker;
 class CompilerCallbacks;
 class Instruction;
 struct ReferenceMap2Visitor;
 class Thread;
 class VariableIndentationOutputStream;
 
+namespace mirror {
+class DexCache;
+}  // namespace mirror
+
 namespace verifier {
 
 class MethodVerifier;
@@ -355,6 +359,7 @@
    *
    * Walks through instructions in a method calling VerifyInstruction on each.
    */
+  template <bool kAllowRuntimeOnlyInstructions>
   bool VerifyInstructions();
 
   /*
@@ -390,9 +395,8 @@
    * - (earlier) for each exception handler, the handler must start at a valid
    *   instruction
    */
-  bool VerifyInstruction(const Instruction* inst,
-                         uint32_t code_offset,
-                         bool allow_runtime_only_instructions);
+  template <bool kAllowRuntimeOnlyInstructions>
+  bool VerifyInstruction(const Instruction* inst, uint32_t code_offset);
 
   /* Ensure that the register index is valid for this code item. */
   bool CheckRegisterIndex(uint32_t idx);
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 25baac5..6c01a79 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -30,7 +30,6 @@
 #include "gc_root.h"
 #include "handle_scope.h"
 #include "obj_ptr.h"
-#include "object_callbacks.h"
 #include "primitive.h"
 
 namespace art {
diff --git a/runtime/verifier/reg_type_cache-inl.h b/runtime/verifier/reg_type_cache-inl.h
index 68af62e..b57a2c8 100644
--- a/runtime/verifier/reg_type_cache-inl.h
+++ b/runtime/verifier/reg_type_cache-inl.h
@@ -43,6 +43,43 @@
   return FromCat1NonSmallConstant(value, precise);
 }
 
+inline const BooleanType& RegTypeCache::Boolean() {
+  return *BooleanType::GetInstance();
+}
+inline const ByteType& RegTypeCache::Byte() {
+  return *ByteType::GetInstance();
+}
+inline const CharType& RegTypeCache::Char() {
+  return *CharType::GetInstance();
+}
+inline const ShortType& RegTypeCache::Short() {
+  return *ShortType::GetInstance();
+}
+inline const IntegerType& RegTypeCache::Integer() {
+  return *IntegerType::GetInstance();
+}
+inline const FloatType& RegTypeCache::Float() {
+  return *FloatType::GetInstance();
+}
+inline const LongLoType& RegTypeCache::LongLo() {
+  return *LongLoType::GetInstance();
+}
+inline const LongHiType& RegTypeCache::LongHi() {
+  return *LongHiType::GetInstance();
+}
+inline const DoubleLoType& RegTypeCache::DoubleLo() {
+  return *DoubleLoType::GetInstance();
+}
+inline const DoubleHiType& RegTypeCache::DoubleHi() {
+  return *DoubleHiType::GetInstance();
+}
+inline const UndefinedType& RegTypeCache::Undefined() {
+  return *UndefinedType::GetInstance();
+}
+inline const ConflictType& RegTypeCache::Conflict() {
+  return *ConflictType::GetInstance();
+}
+
 inline const ImpreciseConstType& RegTypeCache::ByteConstant() {
   const ConstantType& result = FromCat1Const(std::numeric_limits<jbyte>::min(), false);
   DCHECK(result.IsImpreciseConstant());
diff --git a/runtime/verifier/reg_type_cache.h b/runtime/verifier/reg_type_cache.h
index df0fe3d..37f8a1f 100644
--- a/runtime/verifier/reg_type_cache.h
+++ b/runtime/verifier/reg_type_cache.h
@@ -17,15 +17,14 @@
 #ifndef ART_RUNTIME_VERIFIER_REG_TYPE_CACHE_H_
 #define ART_RUNTIME_VERIFIER_REG_TYPE_CACHE_H_
 
+#include <stdint.h>
+#include <vector>
+
 #include "base/casts.h"
 #include "base/macros.h"
 #include "base/scoped_arena_containers.h"
-#include "object_callbacks.h"
-#include "reg_type.h"
-#include "runtime.h"
-
-#include <stdint.h>
-#include <vector>
+#include "gc_root.h"
+#include "primitive.h"
 
 namespace art {
 namespace mirror {
@@ -37,7 +36,24 @@
 
 namespace verifier {
 
+class BooleanType;
+class ByteType;
+class CharType;
+class ConflictType;
+class ConstantType;
+class DoubleHiType;
+class DoubleLoType;
+class FloatType;
+class ImpreciseConstType;
+class IntegerType;
+class LongHiType;
+class LongLoType;
+class PreciseConstType;
+class PreciseReferenceType;
 class RegType;
+class ShortType;
+class UndefinedType;
+class UninitializedType;
 
 // Use 8 bytes since that is the default arena allocator alignment.
 static constexpr size_t kDefaultArenaBitVectorBytes = 8;
@@ -90,42 +106,18 @@
   size_t GetCacheSize() {
     return entries_.size();
   }
-  const BooleanType& Boolean() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *BooleanType::GetInstance();
-  }
-  const ByteType& Byte() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *ByteType::GetInstance();
-  }
-  const CharType& Char() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *CharType::GetInstance();
-  }
-  const ShortType& Short() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *ShortType::GetInstance();
-  }
-  const IntegerType& Integer() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *IntegerType::GetInstance();
-  }
-  const FloatType& Float() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *FloatType::GetInstance();
-  }
-  const LongLoType& LongLo() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *LongLoType::GetInstance();
-  }
-  const LongHiType& LongHi() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *LongHiType::GetInstance();
-  }
-  const DoubleLoType& DoubleLo() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *DoubleLoType::GetInstance();
-  }
-  const DoubleHiType& DoubleHi() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *DoubleHiType::GetInstance();
-  }
-  const UndefinedType& Undefined() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return *UndefinedType::GetInstance();
-  }
-  const ConflictType& Conflict() {
-    return *ConflictType::GetInstance();
-  }
+  const BooleanType& Boolean() REQUIRES_SHARED(Locks::mutator_lock_);
+  const ByteType& Byte() REQUIRES_SHARED(Locks::mutator_lock_);
+  const CharType& Char() REQUIRES_SHARED(Locks::mutator_lock_);
+  const ShortType& Short() REQUIRES_SHARED(Locks::mutator_lock_);
+  const IntegerType& Integer() REQUIRES_SHARED(Locks::mutator_lock_);
+  const FloatType& Float() REQUIRES_SHARED(Locks::mutator_lock_);
+  const LongLoType& LongLo() REQUIRES_SHARED(Locks::mutator_lock_);
+  const LongHiType& LongHi() REQUIRES_SHARED(Locks::mutator_lock_);
+  const DoubleLoType& DoubleLo() REQUIRES_SHARED(Locks::mutator_lock_);
+  const DoubleHiType& DoubleHi() REQUIRES_SHARED(Locks::mutator_lock_);
+  const UndefinedType& Undefined() REQUIRES_SHARED(Locks::mutator_lock_);
+  const ConflictType& Conflict();
 
   const PreciseReferenceType& JavaLangClass() REQUIRES_SHARED(Locks::mutator_lock_);
   const PreciseReferenceType& JavaLangString() REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/verifier/reg_type_test.cc b/runtime/verifier/reg_type_test.cc
index 49dac26..b0ea6c8 100644
--- a/runtime/verifier/reg_type_test.cc
+++ b/runtime/verifier/reg_type_test.cc
@@ -25,7 +25,7 @@
 #include "reg_type_cache-inl.h"
 #include "reg_type-inl.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace verifier {
diff --git a/runtime/verifier/verifier_deps.h b/runtime/verifier/verifier_deps.h
index d69e4dc..43eb948 100644
--- a/runtime/verifier/verifier_deps.h
+++ b/runtime/verifier/verifier_deps.h
@@ -23,6 +23,7 @@
 
 #include "base/array_ref.h"
 #include "base/mutex.h"
+#include "dex_file_types.h"
 #include "handle.h"
 #include "method_resolution_kind.h"
 #include "obj_ptr.h"
@@ -39,7 +40,7 @@
 namespace mirror {
 class Class;
 class ClassLoader;
-}
+}  // namespace mirror
 
 namespace verifier {
 
diff --git a/runtime/verify_object.h b/runtime/verify_object.h
index 519f7f5..e4c01d0 100644
--- a/runtime/verify_object.h
+++ b/runtime/verify_object.h
@@ -48,7 +48,6 @@
   kVerifyAll = kVerifyThis | kVerifyReads | kVerifyWrites,
 };
 
-static constexpr bool kVerifyStack = kIsDebugBuild;
 static constexpr VerifyObjectFlags kDefaultVerifyFlags = kVerifyNone;
 static constexpr VerifyObjectMode kVerifyObjectSupport =
     kDefaultVerifyFlags != 0 ? kVerifyObjectModeFast : kVerifyObjectModeDisabled;
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 5aef062..24f194b 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -30,7 +30,7 @@
 #include "obj_ptr-inl.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/runtime/zip_archive.cc b/runtime/zip_archive.cc
index 0d0d5c7..df1012e 100644
--- a/runtime/zip_archive.cc
+++ b/runtime/zip_archive.cc
@@ -25,6 +25,7 @@
 #include <vector>
 
 #include "android-base/stringprintf.h"
+#include "base/bit_utils.h"
 #include "base/unix_file/fd_file.h"
 
 namespace art {
diff --git a/sigchainlib/Android.bp b/sigchainlib/Android.bp
index 08af254..0c64b7d 100644
--- a/sigchainlib/Android.bp
+++ b/sigchainlib/Android.bp
@@ -32,6 +32,12 @@
             shared_libs: ["liblog"],
         },
     },
+    // Sigchainlib is whole-statically linked into binaries. For Android.mk-based binaries,
+    // this will drag ASAN symbols into the binary, even for modules using LOCAL_SANITIZE := never.
+    // So disable sanitization for now. b/38456126
+    sanitize: {
+        never: true,
+    },
 }
 
 // Create a dummy version of libsigchain which expose the necessary symbols
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index df4372f..b8ab51b 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -262,8 +262,8 @@
   ucontext_t* ucontext = static_cast<ucontext_t*>(ucontext_raw);
   sigset_t mask;
   sigorset(&mask, &ucontext->uc_sigmask, &chains[signo].action_.sa_mask);
-  if ((handler_flags & SA_NODEFER)) {
-    sigdelset(&mask, signo);
+  if (!(handler_flags & SA_NODEFER)) {
+    sigaddset(&mask, signo);
   }
   linked_sigprocmask(SIG_SETMASK, &mask, nullptr);
 
diff --git a/test/003-omnibus-opcodes/src/Main.java b/test/003-omnibus-opcodes/src/Main.java
index a30ec15..4e1ffe2 100644
--- a/test/003-omnibus-opcodes/src/Main.java
+++ b/test/003-omnibus-opcodes/src/Main.java
@@ -67,7 +67,7 @@
         } catch (Throwable th) {
             // We and the RI throw ClassNotFoundException, but that isn't declared so javac
             // won't let us try to catch it.
-            th.printStackTrace();
+            th.printStackTrace(System.out);
         }
         InternedString.run();
         GenSelect.run();
diff --git a/test/008-exceptions/src/Main.java b/test/008-exceptions/src/Main.java
index 74af00c..89fe016 100644
--- a/test/008-exceptions/src/Main.java
+++ b/test/008-exceptions/src/Main.java
@@ -155,7 +155,7 @@
         } catch (BadError e) {
             System.out.println(e);
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         try {
             // Before splitting mirror::Class::kStatusError into
@@ -171,11 +171,11 @@
             throw new IllegalStateException("Should not reach here.");
         } catch (NoClassDefFoundError ncdfe) {
             if (!(ncdfe.getCause() instanceof BadError)) {
-                ncdfe.getCause().printStackTrace();
+                ncdfe.getCause().printStackTrace(System.out);
             }
         } catch (VerifyError e) {
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 
@@ -186,7 +186,7 @@
         } catch (Error e) {
             System.out.println(e);
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         // Before splitting mirror::Class::kStatusError into
         // kStatusErrorUnresolved and kStatusErrorResolved,
@@ -200,7 +200,7 @@
             System.out.println(ncdfe);
             System.out.println("  cause: " + ncdfe.getCause());
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         try {
             MultiDexBadInitWrapper2.setDummy(1);
@@ -209,7 +209,7 @@
             System.out.println(ncdfe);
             System.out.println("  cause: " + ncdfe.getCause());
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 }
diff --git a/test/023-many-interfaces/src/ManyInterfaces.java b/test/023-many-interfaces/src/ManyInterfaces.java
index d69a490..8ec4566 100644
--- a/test/023-many-interfaces/src/ManyInterfaces.java
+++ b/test/023-many-interfaces/src/ManyInterfaces.java
@@ -355,7 +355,7 @@
 
     static void testInstance001(Object obj, int count) {
         if (!(obj instanceof Interface001))
-            System.err.println("BAD");
+            System.out.println("BAD");
         while (count-- != 0) {
             boolean is;
             is = obj instanceof Interface001;
@@ -379,7 +379,7 @@
 
     static void testInstance049(Object obj, int count) {
         if (!(obj instanceof Interface049))
-            System.err.println("BAD");
+            System.out.println("BAD");
         while (count-- != 0) {
             boolean is;
             is = obj instanceof Interface049;
@@ -403,7 +403,7 @@
 
     static void testInstance099(Object obj, int count) {
         if (!(obj instanceof Interface099))
-            System.err.println("BAD");
+            System.out.println("BAD");
         while (count-- != 0) {
             boolean is;
             is = obj instanceof Interface099;
diff --git a/test/024-illegal-access/src/Main.java b/test/024-illegal-access/src/Main.java
index 84c7114..de9ad5b 100644
--- a/test/024-illegal-access/src/Main.java
+++ b/test/024-illegal-access/src/Main.java
@@ -18,7 +18,7 @@
     static public void main(String[] args) {
         try {
             PublicAccess.accessStaticField();
-            System.err.println("ERROR: call 1 not expected to succeed");
+            System.out.println("ERROR: call 1 not expected to succeed");
         } catch (VerifyError ve) {
             // dalvik
             System.out.println("Got expected failure 1");
@@ -29,7 +29,7 @@
 
         try {
             PublicAccess.accessStaticMethod();
-            System.err.println("ERROR: call 2 not expected to succeed");
+            System.out.println("ERROR: call 2 not expected to succeed");
         } catch (IllegalAccessError iae) {
             // reference
             System.out.println("Got expected failure 2");
@@ -37,7 +37,7 @@
 
         try {
             PublicAccess.accessInstanceField();
-            System.err.println("ERROR: call 3 not expected to succeed");
+            System.out.println("ERROR: call 3 not expected to succeed");
         } catch (VerifyError ve) {
             // dalvik
             System.out.println("Got expected failure 3");
@@ -48,7 +48,7 @@
 
         try {
             PublicAccess.accessInstanceMethod();
-            System.err.println("ERROR: call 4 not expected to succeed");
+            System.out.println("ERROR: call 4 not expected to succeed");
         } catch (IllegalAccessError iae) {
             // reference
             System.out.println("Got expected failure 4");
@@ -56,7 +56,7 @@
 
         try {
             CheckInstanceof.main(new Object());
-            System.err.println("ERROR: call 5 not expected to succeed");
+            System.out.println("ERROR: call 5 not expected to succeed");
         } catch (VerifyError ve) {
             // dalvik
             System.out.println("Got expected failure 5");
diff --git a/test/031-class-attributes/src/ClassAttrs.java b/test/031-class-attributes/src/ClassAttrs.java
index 39e69a3..8489a2c 100644
--- a/test/031-class-attributes/src/ClassAttrs.java
+++ b/test/031-class-attributes/src/ClassAttrs.java
@@ -133,12 +133,12 @@
             System.out.println("field signature: "
                     + getSignatureAttribute(field));
         } catch (NoSuchMethodException nsme) {
-            System.err.println("FAILED: " + nsme);
+            System.out.println("FAILED: " + nsme);
         } catch (NoSuchFieldException nsfe) {
-            System.err.println("FAILED: " + nsfe);
+            System.out.println("FAILED: " + nsfe);
         } catch (RuntimeException re) {
-            System.err.println("FAILED: " + re);
-            re.printStackTrace();
+            System.out.println("FAILED: " + re);
+            re.printStackTrace(System.out);
         }
 
         test_isAssignableFrom();
@@ -228,7 +228,7 @@
             method = c.getDeclaredMethod("getSignatureAttribute");
             method.setAccessible(true);
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
             return "<unknown>";
         }
 
diff --git a/test/032-concrete-sub/src/ConcreteSub.java b/test/032-concrete-sub/src/ConcreteSub.java
index 95adf63..61d1602 100644
--- a/test/032-concrete-sub/src/ConcreteSub.java
+++ b/test/032-concrete-sub/src/ConcreteSub.java
@@ -45,7 +45,7 @@
         try {
             meth = absClass.getMethod("redefineMe");
         } catch (NoSuchMethodException nsme) {
-            nsme.printStackTrace();
+            nsme.printStackTrace(System.out);
             return;
         }
         System.out.println("meth modifiers=" + meth.getModifiers());
diff --git a/test/032-concrete-sub/src/Main.java b/test/032-concrete-sub/src/Main.java
index 4a5193d..7d3be15 100644
--- a/test/032-concrete-sub/src/Main.java
+++ b/test/032-concrete-sub/src/Main.java
@@ -26,7 +26,7 @@
             ConcreteSub2 blah = new ConcreteSub2();
             // other VMs fail here (AbstractMethodError)
             blah.doStuff();
-            System.err.println("Succeeded unexpectedly");
+            System.out.println("Succeeded unexpectedly");
         } catch (VerifyError ve) {
             System.out.println("Got expected failure");
         } catch (AbstractMethodError ame) {
diff --git a/test/036-finalizer/src/Main.java b/test/036-finalizer/src/Main.java
index 0de56f9..734830f 100644
--- a/test/036-finalizer/src/Main.java
+++ b/test/036-finalizer/src/Main.java
@@ -120,7 +120,7 @@
       static void printNonFinalized() {
         for (int i = 0; i < maxCount; ++i) {
           if (!FinalizeCounter.finalized[i]) {
-            System.err.println("Element " + i + " was not finalized");
+            System.out.println("Element " + i + " was not finalized");
           }
         }
       }
diff --git a/test/042-new-instance/src/Main.java b/test/042-new-instance/src/Main.java
index 755d62e..34d1f5a 100644
--- a/test/042-new-instance/src/Main.java
+++ b/test/042-new-instance/src/Main.java
@@ -37,31 +37,31 @@
             Object obj = c.newInstance();
             System.out.println("LocalClass succeeded");
         } catch (Exception ex) {
-            System.err.println("LocalClass failed");
-            ex.printStackTrace();
+            System.out.println("LocalClass failed");
+            ex.printStackTrace(System.out);
         }
 
         // should fail
         try {
             Class<?> c = Class.forName("otherpackage.PackageAccess");
             Object obj = c.newInstance();
-            System.err.println("ERROR: PackageAccess succeeded unexpectedly");
+            System.out.println("ERROR: PackageAccess succeeded unexpectedly");
         } catch (IllegalAccessException iae) {
             System.out.println("Got expected PackageAccess complaint");
         } catch (Exception ex) {
-            System.err.println("Got unexpected PackageAccess failure");
-            ex.printStackTrace();
+            System.out.println("Got unexpected PackageAccess failure");
+            ex.printStackTrace(System.out);
         }
 
         LocalClass3.main();
 
         try {
             MaybeAbstract ma = new MaybeAbstract();
-            System.err.println("ERROR: MaybeAbstract succeeded unexpectedly");
+            System.out.println("ERROR: MaybeAbstract succeeded unexpectedly");
         } catch (InstantiationError ie) {
             System.out.println("Got expected InstantationError");
         } catch (Exception ex) {
-            System.err.println("Got unexpected MaybeAbstract failure");
+            System.out.println("Got unexpected MaybeAbstract failure");
         }
     }
 
@@ -73,12 +73,12 @@
         try {
             Class<?> c = Class.forName("LocalClass");
             Constructor<?> cons = c.getConstructor();
-            System.err.println("Cons LocalClass succeeded unexpectedly");
+            System.out.println("Cons LocalClass succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             System.out.println("Cons LocalClass failed as expected");
         } catch (Exception ex) {
-            System.err.println("Cons LocalClass failed strangely");
-            ex.printStackTrace();
+            System.out.println("Cons LocalClass failed strangely");
+            ex.printStackTrace(System.out);
         }
 
         // should succeed
@@ -88,8 +88,8 @@
             Object obj = cons.newInstance();
             System.out.println("Cons LocalClass2 succeeded");
         } catch (Exception ex) {
-            System.err.println("Cons LocalClass2 failed");
-            ex.printStackTrace();
+            System.out.println("Cons LocalClass2 failed");
+            ex.printStackTrace(System.out);
         }
 
         // should succeed
@@ -99,8 +99,8 @@
             Object obj = cons.newInstance(new Main());
             System.out.println("Cons InnerClass succeeded");
         } catch (Exception ex) {
-            System.err.println("Cons InnerClass failed");
-            ex.printStackTrace();
+            System.out.println("Cons InnerClass failed");
+            ex.printStackTrace(System.out);
         }
 
         // should succeed
@@ -110,21 +110,21 @@
             Object obj = cons.newInstance();
             System.out.println("Cons StaticInnerClass succeeded");
         } catch (Exception ex) {
-            System.err.println("Cons StaticInnerClass failed");
-            ex.printStackTrace();
+            System.out.println("Cons StaticInnerClass failed");
+            ex.printStackTrace(System.out);
         }
 
         // should fail
         try {
             Class<?> c = Class.forName("otherpackage.PackageAccess");
             Constructor<?> cons = c.getConstructor();
-            System.err.println("ERROR: Cons PackageAccess succeeded unexpectedly");
+            System.out.println("ERROR: Cons PackageAccess succeeded unexpectedly");
         } catch (NoSuchMethodException nsme) {
             // constructor isn't public
             System.out.println("Cons got expected PackageAccess complaint");
         } catch (Exception ex) {
-            System.err.println("Cons got unexpected PackageAccess failure");
-            ex.printStackTrace();
+            System.out.println("Cons got unexpected PackageAccess failure");
+            ex.printStackTrace(System.out);
         }
 
         // should fail
@@ -132,13 +132,13 @@
             Class<?> c = Class.forName("MaybeAbstract");
             Constructor<?> cons = c.getConstructor();
             Object obj = cons.newInstance();
-            System.err.println("ERROR: Cons MaybeAbstract succeeded unexpectedly");
+            System.out.println("ERROR: Cons MaybeAbstract succeeded unexpectedly");
         } catch (InstantiationException ie) {
             // note InstantiationException vs. InstantiationError
             System.out.println("Cons got expected InstantationException");
         } catch (Exception ex) {
-            System.err.println("Cons got unexpected MaybeAbstract failure");
-            ex.printStackTrace();
+            System.out.println("Cons got unexpected MaybeAbstract failure");
+            ex.printStackTrace(System.out);
         }
 
         // should fail
@@ -147,13 +147,13 @@
             Constructor<?> cons = c.getConstructor();
             if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
             Object obj = cons.newInstance();
-            System.err.println("ERROR: Cons PackageAccess2 succeeded unexpectedly");
+            System.out.println("ERROR: Cons PackageAccess2 succeeded unexpectedly");
         } catch (IllegalAccessException iae) {
             // constructor is public, but class has package scope
             System.out.println("Cons got expected PackageAccess2 complaint");
         } catch (Exception ex) {
-            System.err.println("Cons got unexpected PackageAccess2 failure");
-            ex.printStackTrace();
+            System.out.println("Cons got unexpected PackageAccess2 failure");
+            ex.printStackTrace(System.out);
         }
 
         // should succeed
@@ -161,8 +161,8 @@
             otherpackage.ConstructorAccess.newConstructorInstance();
             System.out.println("Cons ConstructorAccess succeeded");
         } catch (Exception ex) {
-            System.err.println("Cons ConstructorAccess failed");
-            ex.printStackTrace();
+            System.out.println("Cons ConstructorAccess failed");
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -187,8 +187,8 @@
             CC.newInstance();
             System.out.println("LocalClass3 succeeded");
         } catch (Exception ex) {
-            System.err.println("Got unexpected LocalClass3 failure");
-            ex.printStackTrace();
+            System.out.println("Got unexpected LocalClass3 failure");
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -200,7 +200,7 @@
                 Class<?> c = CC.class;
                 return c.newInstance();
             } catch (Exception ex) {
-                ex.printStackTrace();
+                ex.printStackTrace(System.out);
                 return null;
             }
         }
diff --git a/test/044-proxy/src/BasicTest.java b/test/044-proxy/src/BasicTest.java
index 5f04b93..7f301f6 100644
--- a/test/044-proxy/src/BasicTest.java
+++ b/test/044-proxy/src/BasicTest.java
@@ -34,9 +34,9 @@
         Object proxy = createProxy(proxyMe);
 
         if (!Proxy.isProxyClass(proxy.getClass()))
-            System.err.println("not a proxy class?");
+            System.out.println("not a proxy class?");
         if (Proxy.getInvocationHandler(proxy) == null)
-            System.err.println("ERROR: Proxy.getInvocationHandler is null");
+            System.out.println("ERROR: Proxy.getInvocationHandler is null");
 
         /* take it for a spin; verifies instanceof constraint */
         Shapes shapes = (Shapes) proxy;
@@ -110,13 +110,13 @@
             //System.out.println("Constructor is " + cons);
             proxy = cons.newInstance(handler);
         } catch (NoSuchMethodException nsme) {
-            System.err.println("failed: " + nsme);
+            System.out.println("failed: " + nsme);
         } catch (InstantiationException ie) {
-            System.err.println("failed: " + ie);
+            System.out.println("failed: " + ie);
         } catch (IllegalAccessException ie) {
-            System.err.println("failed: " + ie);
+            System.out.println("failed: " + ie);
         } catch (InvocationTargetException ite) {
-            System.err.println("failed: " + ite);
+            System.out.println("failed: " + ite);
         }
 
         return proxy;
diff --git a/test/044-proxy/src/Clash.java b/test/044-proxy/src/Clash.java
index d000112..7dabe92 100644
--- a/test/044-proxy/src/Clash.java
+++ b/test/044-proxy/src/Clash.java
@@ -32,7 +32,7 @@
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
                 new Class<?>[] { Interface1A.class, Interface1A.class },
                 handler);
-            System.err.println("Dupe did not throw expected exception");
+            System.out.println("Dupe did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Dupe threw expected exception");
         }
@@ -41,7 +41,7 @@
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
                 new Class<?>[] { Interface1A.class, Interface1B.class },
                 handler);
-            System.err.println("Clash did not throw expected exception");
+            System.out.println("Clash did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Clash threw expected exception");
         }
diff --git a/test/044-proxy/src/Clash2.java b/test/044-proxy/src/Clash2.java
index e405cfe..51221f2 100644
--- a/test/044-proxy/src/Clash2.java
+++ b/test/044-proxy/src/Clash2.java
@@ -31,7 +31,7 @@
             Proxy.newProxyInstance(Clash.class.getClassLoader(),
                 new Class<?>[] { Interface2A.class, Interface2B.class },
                 handler);
-            System.err.println("Clash2 did not throw expected exception");
+            System.out.println("Clash2 did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Clash2 threw expected exception");
         }
diff --git a/test/044-proxy/src/Clash3.java b/test/044-proxy/src/Clash3.java
index 44806ce..9d23059 100644
--- a/test/044-proxy/src/Clash3.java
+++ b/test/044-proxy/src/Clash3.java
@@ -35,7 +35,7 @@
                     Interface3aa.class,
                     Interface3b.class },
                 handler);
-            System.err.println("Clash3 did not throw expected exception");
+            System.out.println("Clash3 did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Clash3 threw expected exception");
         }
diff --git a/test/044-proxy/src/Clash4.java b/test/044-proxy/src/Clash4.java
index ca5c3ab..45d4820 100644
--- a/test/044-proxy/src/Clash4.java
+++ b/test/044-proxy/src/Clash4.java
@@ -36,7 +36,7 @@
                     Interface4b.class,
                     Interface4bb.class },
                 handler);
-            System.err.println("Clash4 did not throw expected exception");
+            System.out.println("Clash4 did not throw expected exception");
         } catch (IllegalArgumentException iae) {
             System.out.println("Clash4 threw expected exception");
             //System.out.println(iae);
diff --git a/test/044-proxy/src/ConstructorProxy.java b/test/044-proxy/src/ConstructorProxy.java
index 95d150c..dfafbd8 100644
--- a/test/044-proxy/src/ConstructorProxy.java
+++ b/test/044-proxy/src/ConstructorProxy.java
@@ -28,7 +28,7 @@
       new ConstructorProxy().runTest();
     } catch (Exception e) {
       System.out.println("Unexpected failure occured");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
diff --git a/test/044-proxy/src/WrappedThrow.java b/test/044-proxy/src/WrappedThrow.java
index 643ba05..afea26d 100644
--- a/test/044-proxy/src/WrappedThrow.java
+++ b/test/044-proxy/src/WrappedThrow.java
@@ -43,29 +43,29 @@
         InterfaceW2 if2 = (InterfaceW2) proxy;
         try {
             if1.throwFunky();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (UndeclaredThrowableException ute) {
             System.out.println("Got expected UTE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if1.throwFunky2();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (IOException ioe) {
             System.out.println("Got expected IOE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if2.throwFunky2();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (IOException ioe) {
             System.out.println("Got expected IOE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         /*
@@ -73,38 +73,38 @@
          */
         try {
             if1.throwException();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (UndeclaredThrowableException ute) {
             System.out.println("Got expected UTE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if1.throwBase();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (UndeclaredThrowableException ute) {
             System.out.println("Got expected UTE");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if2.throwSub();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (SubException se) {
             System.out.println("Got expected exception");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         try {
             if2.throwSubSub();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (SubException se) {
             System.out.println("Got expected exception");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
 
         /*
@@ -113,11 +113,11 @@
          */
         try {
             if1.bothThrowBase();
-            System.err.println("No exception thrown");
+            System.out.println("No exception thrown");
         } catch (BaseException se) {
             System.out.println("Got expected exception");
         } catch (Throwable t) {
-            System.err.println("Got unexpected exception: " + t);
+            System.out.println("Got unexpected exception: " + t);
         }
     }
 }
diff --git a/test/045-reflect-array/src/Main.java b/test/045-reflect-array/src/Main.java
index 7418eed..4c321b3 100644
--- a/test/045-reflect-array/src/Main.java
+++ b/test/045-reflect-array/src/Main.java
@@ -102,7 +102,7 @@
                 throw new RuntimeException("load should have worked");
             }
         } catch (IllegalArgumentException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
         }
         try {
             Array.getByte(charArray, 2);
@@ -116,7 +116,7 @@
                     + Array.getInt(charArray, 3));
             }
         } catch (IllegalArgumentException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
         }
 
         System.out.println("ReflectArrayTest.testSingleChar passed");
diff --git a/test/046-reflect/src/Main.java b/test/046-reflect/src/Main.java
index 10dad8d..b8a48ea 100644
--- a/test/046-reflect/src/Main.java
+++ b/test/046-reflect/src/Main.java
@@ -89,7 +89,7 @@
 
             try {
                 meth = target.getMethod("packageMethod");
-                System.err.println("succeeded on package-scope method");
+                System.out.println("succeeded on package-scope method");
             } catch (NoSuchMethodException nsme) {
                 // good
             }
@@ -101,7 +101,7 @@
             try {
                 if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
                 meth.invoke(instance);
-                System.err.println("inner-method invoke unexpectedly worked");
+                System.out.println("inner-method invoke unexpectedly worked");
             } catch (IllegalAccessException iae) {
                 // good
             }
@@ -110,13 +110,13 @@
             try {
                 int x = field.getInt(instance);
                 if (!FULL_ACCESS_CHECKS) { throw new IllegalAccessException(); }
-                System.err.println("field get unexpectedly worked: " + x);
+                System.out.println("field get unexpectedly worked: " + x);
             } catch (IllegalAccessException iae) {
                 // good
             }
         } catch (Exception ex) {
             System.out.println("----- unexpected exception -----");
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -171,7 +171,7 @@
             }
             catch (Exception ex) {
                 System.out.println("GLITCH: invoke got wrong exception:");
-                ex.printStackTrace();
+                ex.printStackTrace(System.out);
             }
             System.out.println("");
 
@@ -400,7 +400,7 @@
 
         } catch (Exception ex) {
             System.out.println("----- unexpected exception -----");
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         System.out.println("ReflectTest done!");
@@ -414,7 +414,7 @@
             m = Collections.class.getDeclaredMethod("swap",
                             Object[].class, int.class, int.class);
         } catch (NoSuchMethodException nsme) {
-            nsme.printStackTrace();
+            nsme.printStackTrace(System.out);
             return;
         }
         System.out.println(m + " accessible=" + m.isAccessible());
@@ -423,10 +423,10 @@
         try {
             m.invoke(null, objects, 0, 1);
         } catch (IllegalAccessException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
             return;
         } catch (InvocationTargetException ite) {
-            ite.printStackTrace();
+            ite.printStackTrace(System.out);
             return;
         }
 
@@ -434,10 +434,10 @@
             String s = "Should be ignored";
             m.invoke(s, objects, 0, 1);
         } catch (IllegalAccessException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
             return;
         } catch (InvocationTargetException ite) {
-            ite.printStackTrace();
+            ite.printStackTrace(System.out);
             return;
         }
 
@@ -449,7 +449,7 @@
         } catch (InvocationTargetException ite) {
             System.out.println("checkType got expected exception");
         } catch (IllegalAccessException iae) {
-            iae.printStackTrace();
+            iae.printStackTrace(System.out);
             return;
         }
     }
@@ -826,7 +826,7 @@
     static {
         System.out.println("FieldNoisyInit is initializing");
         //Throwable th = new Throwable();
-        //th.printStackTrace();
+        //th.printStackTrace(System.out);
     }
 }
 
@@ -842,7 +842,7 @@
     static {
         System.out.println("MethodNoisyInit is initializing");
         //Throwable th = new Throwable();
-        //th.printStackTrace();
+        //th.printStackTrace(System.out);
     }
 }
 
diff --git a/test/048-reflect-v8/src/DefaultDeclared.java b/test/048-reflect-v8/src/DefaultDeclared.java
index 16e8a24..d49bdc9 100644
--- a/test/048-reflect-v8/src/DefaultDeclared.java
+++ b/test/048-reflect-v8/src/DefaultDeclared.java
@@ -52,7 +52,7 @@
       System.out.println("NoSuchMethodException thrown for class " + klass.toString());
     } catch (Throwable t) {
       System.out.println("Unknown error thrown for class " + klass.toString());
-      t.printStackTrace();
+      t.printStackTrace(System.out);
     }
   }
 
diff --git a/test/050-sync-test/src/Main.java b/test/050-sync-test/src/Main.java
index 5364e2a..734b51e 100644
--- a/test/050-sync-test/src/Main.java
+++ b/test/050-sync-test/src/Main.java
@@ -39,7 +39,7 @@
             Thread.sleep(1000);
         } catch (InterruptedException ie) {
             System.out.println("INTERRUPT!");
-            ie.printStackTrace();
+            ie.printStackTrace(System.out);
         }
         System.out.println("GONE");
     }
@@ -56,7 +56,7 @@
                 one.wait();
             } catch (InterruptedException ie) {
                 System.out.println("INTERRUPT!");
-                ie.printStackTrace();
+                ie.printStackTrace(System.out);
             }
         }
 
@@ -69,7 +69,7 @@
             two.join();
         } catch (InterruptedException ie) {
             System.out.println("INTERRUPT!");
-            ie.printStackTrace();
+            ie.printStackTrace(System.out);
         }
         System.out.println("main: all done");
     }
@@ -167,7 +167,7 @@
                         " interrupted, flag=" + Thread.interrupted());
                 intr = true;
             } catch (Exception ex) {
-                ex.printStackTrace();
+                ex.printStackTrace(System.out);
             }
 
             if (!intr)
diff --git a/test/050-sync-test/src/ThreadDeathHandler.java b/test/050-sync-test/src/ThreadDeathHandler.java
index 0a7437d..58061f8 100644
--- a/test/050-sync-test/src/ThreadDeathHandler.java
+++ b/test/050-sync-test/src/ThreadDeathHandler.java
@@ -27,7 +27,7 @@
     }
 
     public void uncaughtException(Thread t, Throwable e) {
-        System.err.println("Uncaught exception " + mMyMessage + "!");
-        e.printStackTrace();
+        System.out.println("Uncaught exception " + mMyMessage + "!");
+        e.printStackTrace(System.out);
     }
 }
diff --git a/test/051-thread/src/Main.java b/test/051-thread/src/Main.java
index 08cb5de..fe1cafe 100644
--- a/test/051-thread/src/Main.java
+++ b/test/051-thread/src/Main.java
@@ -79,7 +79,7 @@
         try {
             t.join();
         } catch (InterruptedException ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         System.out.print("testThreadDaemons finished\n");
diff --git a/test/053-wait-some/src/Main.java b/test/053-wait-some/src/Main.java
index 377a578..b8e6dfe 100644
--- a/test/053-wait-some/src/Main.java
+++ b/test/053-wait-some/src/Main.java
@@ -39,7 +39,7 @@
             } catch (IllegalArgumentException iae) {
                 System.out.println("Caught expected exception on neg arg");
             } catch (InterruptedException ie) {
-                ie.printStackTrace();
+                ie.printStackTrace(System.out);
             }
 
             for (long delay : DELAYS) {
@@ -49,7 +49,7 @@
                 try {
                     sleepy.wait(delay);
                 } catch (InterruptedException ie) {
-                    ie.printStackTrace();
+                    ie.printStackTrace(System.out);
                 }
                 end = System.currentTimeMillis();
 
diff --git a/test/054-uncaught/src/Main.java b/test/054-uncaught/src/Main.java
index 688a2a4..43de7ae 100644
--- a/test/054-uncaught/src/Main.java
+++ b/test/054-uncaught/src/Main.java
@@ -33,7 +33,7 @@
         try {
             t.join();
         } catch (InterruptedException ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -41,7 +41,7 @@
         ThreadDeathHandler defHandler = new ThreadDeathHandler("DEFAULT");
         ThreadDeathHandler threadHandler = new ThreadDeathHandler("THREAD");
 
-        System.err.println("Test " + which);
+        System.out.println("Test " + which);
         switch (which) {
             case 1: {
                 Thread.setDefaultUncaughtExceptionHandler(defHandler);
diff --git a/test/054-uncaught/src/ThreadDeathHandler.java b/test/054-uncaught/src/ThreadDeathHandler.java
index 0a7437d..58061f8 100644
--- a/test/054-uncaught/src/ThreadDeathHandler.java
+++ b/test/054-uncaught/src/ThreadDeathHandler.java
@@ -27,7 +27,7 @@
     }
 
     public void uncaughtException(Thread t, Throwable e) {
-        System.err.println("Uncaught exception " + mMyMessage + "!");
-        e.printStackTrace();
+        System.out.println("Uncaught exception " + mMyMessage + "!");
+        e.printStackTrace(System.out);
     }
 }
diff --git a/test/059-finalizer-throw/src/Main.java b/test/059-finalizer-throw/src/Main.java
index fa80fe3..3bfbc2d 100644
--- a/test/059-finalizer-throw/src/Main.java
+++ b/test/059-finalizer-throw/src/Main.java
@@ -46,7 +46,7 @@
             try {
                 Thread.sleep(500);
             } catch (InterruptedException ie) {
-                System.err.println(ie);
+                System.out.println(ie);
             }
         }
 
@@ -54,7 +54,7 @@
         try {
             Thread.sleep(750);
         } catch (InterruptedException ie) {
-            System.err.println(ie);
+            System.out.println(ie);
         }
 
         System.out.println("done");
diff --git a/test/064-field-access/src/Main.java b/test/064-field-access/src/Main.java
index 50ad5b9..b08f3ae 100644
--- a/test/064-field-access/src/Main.java
+++ b/test/064-field-access/src/Main.java
@@ -28,7 +28,7 @@
 
     try {
       GetNonexistent.main(null);
-      System.err.println("Not expected to succeed");
+      System.out.println("Not expected to succeed");
     } catch (VerifyError fe) {
       // dalvik
       System.out.println("Got expected failure");
@@ -101,22 +101,22 @@
 
       /* success; expected? */
       if (expectedException != null) {
-        System.err.println("ERROR: call succeeded for field " + field +
+        System.out.println("ERROR: call succeeded for field " + field +
             " with a read of type '" + type +
             "', was expecting " + expectedException);
         Thread.dumpStack();
       }
     } catch (Exception ex) {
       if (expectedException == null) {
-        System.err.println("ERROR: call failed unexpectedly: "
+        System.out.println("ERROR: call failed unexpectedly: "
             + ex.getClass());
-        ex.printStackTrace();
+        ex.printStackTrace(System.out);
       } else {
         if (!expectedException.equals(ex.getClass())) {
-          System.err.println("ERROR: incorrect exception: wanted "
+          System.out.println("ERROR: incorrect exception: wanted "
               + expectedException.getName() + ", got "
               + ex.getClass());
-          ex.printStackTrace();
+          ex.printStackTrace(System.out);
         }
       }
     }
@@ -675,22 +675,22 @@
 
       /* success; expected? */
       if (expectedException != null) {
-        System.err.println("ERROR: call succeeded for field " + field +
+        System.out.println("ERROR: call succeeded for field " + field +
             " with a read of type '" + type +
             "', was expecting " + expectedException);
         Thread.dumpStack();
       }
     } catch (Exception ex) {
       if (expectedException == null) {
-        System.err.println("ERROR: call failed unexpectedly: "
+        System.out.println("ERROR: call failed unexpectedly: "
             + ex.getClass());
-        ex.printStackTrace();
+        ex.printStackTrace(System.out);
       } else {
         if (!expectedException.equals(ex.getClass())) {
-          System.err.println("ERROR: incorrect exception: wanted "
+          System.out.println("ERROR: incorrect exception: wanted "
               + expectedException.getName() + ", got "
               + ex.getClass());
-          ex.printStackTrace();
+          ex.printStackTrace(System.out);
         }
       }
     }
@@ -704,19 +704,19 @@
       result = method.invoke(obj);
       /* success; expected? */
       if (expectedException != null) {
-        System.err.println("ERROR: call succeeded for method " + method + "', was expecting " +
+        System.out.println("ERROR: call succeeded for method " + method + "', was expecting " +
                            expectedException);
         Thread.dumpStack();
       }
     } catch (Exception ex) {
       if (expectedException == null) {
-        System.err.println("ERROR: call failed unexpectedly: " + ex.getClass());
-        ex.printStackTrace();
+        System.out.println("ERROR: call failed unexpectedly: " + ex.getClass());
+        ex.printStackTrace(System.out);
       } else {
         if (!expectedException.equals(ex.getClass())) {
-          System.err.println("ERROR: incorrect exception: wanted " + expectedException.getName() +
+          System.out.println("ERROR: incorrect exception: wanted " + expectedException.getName() +
                              ", got " + ex.getClass());
-          ex.printStackTrace();
+          ex.printStackTrace(System.out);
         }
       }
     }
diff --git a/test/065-mismatched-implements/src/Main.java b/test/065-mismatched-implements/src/Main.java
index 5975b99..55d0bab 100644
--- a/test/065-mismatched-implements/src/Main.java
+++ b/test/065-mismatched-implements/src/Main.java
@@ -21,7 +21,7 @@
     public static void main(String[] args) {
         try {
             Indirect.main();
-            System.err.println("Succeeded unexpectedly");
+            System.out.println("Succeeded unexpectedly");
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected ICCE");
         }
diff --git a/test/066-mismatched-super/src/Main.java b/test/066-mismatched-super/src/Main.java
index 5975b99..55d0bab 100644
--- a/test/066-mismatched-super/src/Main.java
+++ b/test/066-mismatched-super/src/Main.java
@@ -21,7 +21,7 @@
     public static void main(String[] args) {
         try {
             Indirect.main();
-            System.err.println("Succeeded unexpectedly");
+            System.out.println("Succeeded unexpectedly");
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected ICCE");
         }
diff --git a/test/068-classloader/src/Main.java b/test/068-classloader/src/Main.java
index 01539b7..0aaa152 100644
--- a/test/068-classloader/src/Main.java
+++ b/test/068-classloader/src/Main.java
@@ -129,7 +129,7 @@
                 throw new RuntimeException("target 2 has unexpected value " + value);
             }
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -153,8 +153,8 @@
         try {
             altClass = loader.loadClass("Inaccessible1");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed");
-            cnfe.printStackTrace();
+            System.out.println("loadClass failed");
+            cnfe.printStackTrace(System.out);
             return;
         }
 
@@ -162,9 +162,9 @@
         Object obj;
         try {
             obj = altClass.newInstance();
-            System.err.println("ERROR: Inaccessible1 was accessible");
+            System.out.println("ERROR: Inaccessible1 was accessible");
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
             System.out.println("Got expected access exception #1");
@@ -182,14 +182,14 @@
 
         try {
             altClass = loader.loadClass("Inaccessible2");
-            System.err.println("ERROR: Inaccessible2 was accessible: " + altClass);
+            System.out.println("ERROR: Inaccessible2 was accessible: " + altClass);
         } catch (ClassNotFoundException cnfe) {
             Throwable cause = cnfe.getCause();
             if (cause instanceof IllegalAccessError) {
                 System.out.println("Got expected CNFE/IAE #2");
             } else {
-                System.err.println("Got unexpected CNFE/IAE #2");
-                cnfe.printStackTrace();
+                System.out.println("Got unexpected CNFE/IAE #2");
+                cnfe.printStackTrace(System.out);
             }
         }
     }
@@ -202,14 +202,14 @@
 
         try {
             altClass = loader.loadClass("Inaccessible3");
-            System.err.println("ERROR: Inaccessible3 was accessible: " + altClass);
+            System.out.println("ERROR: Inaccessible3 was accessible: " + altClass);
         } catch (ClassNotFoundException cnfe) {
             Throwable cause = cnfe.getCause();
             if (cause instanceof IllegalAccessError) {
                 System.out.println("Got expected CNFE/IAE #3");
             } else {
-                System.err.println("Got unexpected CNFE/IAE #3");
-                cnfe.printStackTrace();
+                System.out.println("Got unexpected CNFE/IAE #3");
+                cnfe.printStackTrace(System.out);
             }
         }
     }
@@ -227,7 +227,7 @@
             //System.out.println("+++ DoubledExtend is " + doubledExtendClass
             //    + " in " + doubledExtendClass.getClassLoader());
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -235,10 +235,10 @@
         try {
             obj = doubledExtendClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             System.out.println("Got expected LinkageError on DE");
@@ -254,8 +254,8 @@
             String result;
 
             result = Base.doStuff(de);
-            System.err.println("ERROR: did not get LinkageError on DE");
-            System.err.println("(result=" + result + ")");
+            System.out.println("ERROR: did not get LinkageError on DE");
+            System.out.println("(result=" + result + ")");
         } catch (LinkageError le) {
             System.out.println("Got expected LinkageError on DE");
             return;
@@ -274,7 +274,7 @@
         try {
             doubledExtendOkayClass = loader.loadClass("DoubledExtendOkay");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -282,14 +282,14 @@
         try {
             obj = doubledExtendOkayClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
-            System.err.println("Got unexpected LinkageError on DEO");
-            le.printStackTrace();
+            System.out.println("Got unexpected LinkageError on DEO");
+            le.printStackTrace(System.out);
             return;
         }
 
@@ -304,8 +304,8 @@
             result = BaseOkay.doStuff(de);
             System.out.println("Got DEO result " + result);
         } catch (LinkageError le) {
-            System.err.println("Got unexpected LinkageError on DEO");
-            le.printStackTrace();
+            System.out.println("Got unexpected LinkageError on DEO");
+            le.printStackTrace(System.out);
             return;
         }
     }
@@ -322,7 +322,7 @@
         try {
             getDoubledClass = loader.loadClass("GetDoubled");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -330,10 +330,10 @@
         try {
             obj = getDoubledClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             // Dalvik bails here
@@ -354,7 +354,7 @@
             System.out.println("Got LinkageError on GD");
             return;
         }
-        System.err.println("Should have failed by now on GetDoubled");
+        System.out.println("Should have failed by now on GetDoubled");
     }
 
     /**
@@ -368,7 +368,7 @@
         try {
             abstractGetClass = loader.loadClass("AbstractGet");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass ta failed: " + cnfe);
+            System.out.println("loadClass ta failed: " + cnfe);
             return;
         }
 
@@ -376,10 +376,10 @@
         try {
             obj = abstractGetClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             System.out.println("Got LinkageError on TA");
@@ -399,7 +399,7 @@
             System.out.println("Got LinkageError on TA");
             return;
         }
-        System.err.println("Should have failed by now in testAbstract");
+        System.out.println("Should have failed by now in testAbstract");
     }
 
     /**
@@ -415,7 +415,7 @@
         try {
             doubledImplementClass = loader.loadClass("DoubledImplement");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -423,10 +423,10 @@
         try {
             obj = doubledImplementClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             System.out.println("Got LinkageError on DI (early)");
@@ -447,7 +447,7 @@
         try {
             di.one();
             if (!isOne) {
-                System.err.println("ERROR: did not get LinkageError on DI");
+                System.out.println("ERROR: did not get LinkageError on DI");
             }
         } catch (LinkageError le) {
             if (!isOne) {
@@ -476,7 +476,7 @@
             ifaceImplClass = loader.loadClass("IfaceImpl");
             ifaceImplClass = loader.loadClass("DoubledImplement2");
         } catch (ClassNotFoundException cnfe) {
-            System.err.println("loadClass failed: " + cnfe);
+            System.out.println("loadClass failed: " + cnfe);
             return;
         }
 
@@ -484,10 +484,10 @@
         try {
             obj = ifaceImplClass.newInstance();
         } catch (InstantiationException ie) {
-            System.err.println("newInstance failed: " + ie);
+            System.out.println("newInstance failed: " + ie);
             return;
         } catch (IllegalAccessException iae) {
-            System.err.println("newInstance failed: " + iae);
+            System.out.println("newInstance failed: " + iae);
             return;
         } catch (LinkageError le) {
             System.out.println("Got LinkageError on IDI (early)");
diff --git a/test/069-field-type/src/Main.java b/test/069-field-type/src/Main.java
index f9885e6..d9aa9e1 100644
--- a/test/069-field-type/src/Main.java
+++ b/test/069-field-type/src/Main.java
@@ -19,7 +19,7 @@
         /* try to use the reference; should fail */
         try {
             holder.mValue.run();
-            System.err.println("ERROR: did not get expected ICCE");
+            System.out.println("ERROR: did not get expected ICCE");
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected IncompatibleClassChangeError");
         }
diff --git a/test/070-nio-buffer/src/Main.java b/test/070-nio-buffer/src/Main.java
index a7433b8..a3eeb3f 100644
--- a/test/070-nio-buffer/src/Main.java
+++ b/test/070-nio-buffer/src/Main.java
@@ -58,7 +58,7 @@
 
         try {
             shortBuf.put(myShorts, 0, 1);     // should fail
-            System.err.println("ERROR: out-of-bounds put succeeded\n");
+            System.out.println("ERROR: out-of-bounds put succeeded\n");
         } catch (BufferOverflowException boe) {
             System.out.println("Got expected buffer overflow exception");
         }
@@ -66,7 +66,7 @@
         try {
             shortBuf.position(0);
             shortBuf.put(myShorts, 0, 33);     // should fail
-            System.err.println("ERROR: out-of-bounds put succeeded\n");
+            System.out.println("ERROR: out-of-bounds put succeeded\n");
         } catch (IndexOutOfBoundsException ioobe) {
             System.out.println("Got expected out-of-bounds exception");
         }
@@ -74,7 +74,7 @@
         try {
             shortBuf.position(16);
             shortBuf.put(myShorts, 0, 17);     // should fail
-            System.err.println("ERROR: out-of-bounds put succeeded\n");
+            System.out.println("ERROR: out-of-bounds put succeeded\n");
         } catch (BufferOverflowException boe) {
             System.out.println("Got expected buffer overflow exception");
         }
diff --git a/test/073-mismatched-field/src/Main.java b/test/073-mismatched-field/src/Main.java
index 70709c0..2d6b9eb 100644
--- a/test/073-mismatched-field/src/Main.java
+++ b/test/073-mismatched-field/src/Main.java
@@ -23,7 +23,7 @@
     void doit() {
         try {
             System.out.println("value=" + this.f);
-            System.err.println("Succeeded unexpectedly");
+            System.out.println("Succeeded unexpectedly");
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected failure");
         }
diff --git a/test/074-gc-thrash/src/Main.java b/test/074-gc-thrash/src/Main.java
index df04793..5165df7 100644
--- a/test/074-gc-thrash/src/Main.java
+++ b/test/074-gc-thrash/src/Main.java
@@ -52,9 +52,9 @@
             try {
                 dumpHprofDataMethod.invoke(null, dumpFile);
             } catch (IllegalAccessException iae) {
-                System.err.println(iae);
+                System.out.println(iae);
             } catch (InvocationTargetException ite) {
-                System.err.println(ite);
+                System.out.println(ite);
             }
         }
 
@@ -80,7 +80,7 @@
         try {
             meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
-            System.err.println("Found VMDebug but not dumpHprofData method");
+            System.out.println("Found VMDebug but not dumpHprofData method");
             return null;
         }
 
@@ -126,7 +126,7 @@
             deep.join();
             large.join();
         } catch (InterruptedException ie) {
-            System.err.println("join was interrupted");
+            System.out.println("join was interrupted");
         }
     }
 
@@ -137,7 +137,7 @@
         try {
             Thread.sleep(ms);
         } catch (InterruptedException ie) {
-            System.err.println("sleep was interrupted");
+            System.out.println("sleep was interrupted");
         }
     }
 
@@ -213,7 +213,7 @@
         }
 
         if (!once) {
-            System.err.println("not even once?");
+            System.out.println("not even once?");
             return;
         }
 
@@ -229,7 +229,7 @@
 
         for (int i = 0; i < MAX_DEPTH; i++) {
             if (weak[i].get() != null) {
-                System.err.println("Deep: weak still has " + i);
+                System.out.println("Deep: weak still has " + i);
             }
         }
 
@@ -251,7 +251,7 @@
     private static void checkStringReferences() {
       for (int i = 0; i < MAX_DEPTH; i++) {
           if (strong[i] != weak[i].get()) {
-              System.err.println("Deep: " + i + " strong=" + strong[i] +
+              System.out.println("Deep: " + i + " strong=" + strong[i] +
                   ", weak=" + weak[i].get());
           }
       }
diff --git a/test/075-verification-error/src/Main.java b/test/075-verification-error/src/Main.java
index 9b66a8d..3f2881e 100644
--- a/test/075-verification-error/src/Main.java
+++ b/test/075-verification-error/src/Main.java
@@ -36,12 +36,12 @@
     static void testClassNewInstance() {
         try {
             MaybeAbstract ma = new MaybeAbstract();
-            System.err.println("ERROR: MaybeAbstract succeeded unexpectedly");
+            System.out.println("ERROR: MaybeAbstract succeeded unexpectedly");
         } catch (InstantiationError ie) {
             System.out.println("Got expected InstantationError");
             if (VERBOSE) System.out.println("--- " + ie);
         } catch (Exception ex) {
-            System.err.println("Got unexpected MaybeAbstract failure");
+            System.out.println("Got unexpected MaybeAbstract failure");
         }
     }
 
@@ -88,7 +88,7 @@
 
         try {
             int x = mutant.inaccessibleField;
-            System.err.println("ERROR: bad access succeeded (ifield)");
+            System.out.println("ERROR: bad access succeeded (ifield)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (ifield)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -96,7 +96,7 @@
 
         try {
             int y = Mutant.inaccessibleStaticField;
-            System.err.println("ERROR: bad access succeeded (sfield)");
+            System.out.println("ERROR: bad access succeeded (sfield)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (sfield)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -104,7 +104,7 @@
 
         try {
             mutant.inaccessibleMethod();
-            System.err.println("ERROR: bad access succeeded (method)");
+            System.out.println("ERROR: bad access succeeded (method)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (method)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -112,7 +112,7 @@
 
         try {
             Mutant.inaccessibleStaticMethod();
-            System.err.println("ERROR: bad access succeeded (smethod)");
+            System.out.println("ERROR: bad access succeeded (smethod)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (smethod)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -121,7 +121,7 @@
         try {
             /* accessible static method in an inaccessible class */
             InaccessibleClass.test();
-            System.err.println("ERROR: bad meth-class access succeeded (meth-class)");
+            System.out.println("ERROR: bad meth-class access succeeded (meth-class)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (meth-class)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -130,7 +130,7 @@
         try {
             /* accessible static field in an inaccessible class */
             int blah = InaccessibleClass.blah;
-            System.err.println("ERROR: bad field-class access succeeded (field-class)");
+            System.out.println("ERROR: bad field-class access succeeded (field-class)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (field-class)");
             if (VERBOSE) System.out.println("--- " + iae);
@@ -139,7 +139,7 @@
         try {
             /* inaccessible static method in an accessible class */
             InaccessibleMethod.test();
-            System.err.println("ERROR: bad access succeeded (meth-meth)");
+            System.out.println("ERROR: bad access succeeded (meth-meth)");
         } catch (IllegalAccessError iae) {
             System.out.println("Got expected IllegalAccessError (meth-meth)");
             if (VERBOSE) System.out.println("--- " + iae);
diff --git a/test/077-method-override/src/Main.java b/test/077-method-override/src/Main.java
index 84bdf35..3a3c528 100644
--- a/test/077-method-override/src/Main.java
+++ b/test/077-method-override/src/Main.java
@@ -37,8 +37,8 @@
             ((Base)derived).overrideVirtualWithStatic();
         } catch (NoSuchMethodError nsme) {
             /* NSME is subclass of ICCE, so check it explicitly */
-            System.err.println("Got NSME - ovws");
-            nsme.printStackTrace(System.err);
+            System.out.println("Got NSME - ovws");
+            nsme.printStackTrace(System.out);
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected exception - ovws");
         }
@@ -46,8 +46,8 @@
         try {
             ((Base)derived).overrideStaticWithVirtual();
         } catch (NoSuchMethodError nsme) {
-            System.err.println("Got NSME - oswv");
-            nsme.printStackTrace(System.err);
+            System.out.println("Got NSME - oswv");
+            nsme.printStackTrace(System.out);
         } catch (IncompatibleClassChangeError icce) {
             System.out.println("Got expected exception - oswv");
         }
diff --git a/test/079-phantom/src/Main.java b/test/079-phantom/src/Main.java
index c54bc0b..daead2e 100644
--- a/test/079-phantom/src/Main.java
+++ b/test/079-phantom/src/Main.java
@@ -21,7 +21,7 @@
         try {
             Thread.sleep(ms);
         } catch (InterruptedException ie) {
-            System.err.println("sleep interrupted");
+            System.out.println("sleep interrupted");
         }
     }
 
diff --git a/test/084-class-init/src/Main.java b/test/084-class-init/src/Main.java
index 28eb3e9..a60fbac 100644
--- a/test/084-class-init/src/Main.java
+++ b/test/084-class-init/src/Main.java
@@ -24,7 +24,7 @@
         // that is currently a resolution stub because it's running on behalf of <clinit>.
         try {
             throwDuringClinit();
-            System.err.println("didn't throw!");
+            System.out.println("didn't throw!");
         } catch (NullPointerException ex) {
             System.out.println("caught exception thrown during clinit");
         }
@@ -44,34 +44,34 @@
         try {
             Thread.sleep(msec);
         } catch (InterruptedException ie) {
-            System.err.println("sleep interrupted");
+            System.out.println("sleep interrupted");
         }
     }
 
     static void checkExceptions() {
         try {
             System.out.println(PartialInit.FIELD0);
-            System.err.println("Construction of PartialInit succeeded unexpectedly");
+            System.out.println("Construction of PartialInit succeeded unexpectedly");
         } catch (ExceptionInInitializerError eiie) {
             System.out.println("Got expected EIIE for FIELD0");
         }
 
         try {
             System.out.println(PartialInit.FIELD0);
-            System.err.println("Load of FIELD0 succeeded unexpectedly");
+            System.out.println("Load of FIELD0 succeeded unexpectedly");
         } catch (NoClassDefFoundError ncdfe) {
             System.out.println("Got expected NCDFE for FIELD0");
         }
         try {
             System.out.println(PartialInit.FIELD1);
-            System.err.println("Load of FIELD1 succeeded unexpectedly");
+            System.out.println("Load of FIELD1 succeeded unexpectedly");
         } catch (NoClassDefFoundError ncdfe) {
             System.out.println("Got expected NCDFE for FIELD1");
         }
 
         try {
             System.out.println(Exploder.FIELD);
-            System.err.println("Load of FIELD succeeded unexpectedly");
+            System.out.println("Load of FIELD succeeded unexpectedly");
         } catch (AssertionError expected) {
             System.out.println("Got expected '" + expected.getMessage() + "' from Exploder");
         }
@@ -92,7 +92,7 @@
             fieldThread.join();
             methodThread.join();
         } catch (InterruptedException ie) {
-            System.err.println(ie);
+            System.out.println(ie);
         }
 
         /* print all values */
diff --git a/test/086-null-super/src/Main.java b/test/086-null-super/src/Main.java
index 8bd1786..039a959 100644
--- a/test/086-null-super/src/Main.java
+++ b/test/086-null-super/src/Main.java
@@ -149,14 +149,14 @@
 
             loader = new BrokenDexLoader(ClassLoader.getSystemClassLoader());
             loader.findBrokenClass();
-            System.err.println("ERROR: Inaccessible was accessible");
+            System.out.println("ERROR: Inaccessible was accessible");
         } catch (InvocationTargetException ite) {
             Throwable cause = ite.getCause();
             if (cause instanceof NullPointerException) {
-                System.err.println("Got expected ITE/NPE");
+                System.out.println("Got expected ITE/NPE");
             } else {
-                System.err.println("Got unexpected ITE");
-                ite.printStackTrace();
+                System.out.println("Got unexpected ITE");
+                ite.printStackTrace(System.out);
             }
         }
     }
diff --git a/test/087-gc-after-link/src/Main.java b/test/087-gc-after-link/src/Main.java
index 698af0b..6f686fd 100644
--- a/test/087-gc-after-link/src/Main.java
+++ b/test/087-gc-after-link/src/Main.java
@@ -165,14 +165,14 @@
 
             loader = new BrokenDexLoader(ClassLoader.getSystemClassLoader());
             loader.findBrokenClass();
-            System.err.println("ERROR: Inaccessible was accessible");
+            System.out.println("ERROR: Inaccessible was accessible");
         } catch (InvocationTargetException ite) {
             Throwable cause = ite.getCause();
             if (cause instanceof NullPointerException) {
-                System.err.println("Got expected ITE/NPE");
+                System.out.println("Got expected ITE/NPE");
             } else {
-                System.err.println("Got unexpected ITE");
-                ite.printStackTrace();
+                System.out.println("Got unexpected ITE");
+                ite.printStackTrace(System.out);
             }
         }
     }
diff --git a/test/088-monitor-verification/src/Main.java b/test/088-monitor-verification/src/Main.java
index a6f0e64..bca3df6 100644
--- a/test/088-monitor-verification/src/Main.java
+++ b/test/088-monitor-verification/src/Main.java
@@ -41,7 +41,7 @@
         m.nestedMayThrow(false);
         try {
             m.nestedMayThrow(true);
-            System.err.println("nestedThrow(true) did not throw");
+            System.out.println("nestedThrow(true) did not throw");
         } catch (MyException me) {}
         System.out.println("nestedMayThrow ok");
 
diff --git a/test/092-locale/src/Main.java b/test/092-locale/src/Main.java
index 8916a29..60c0551 100644
--- a/test/092-locale/src/Main.java
+++ b/test/092-locale/src/Main.java
@@ -34,31 +34,31 @@
         try {
             testCalendar();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         try {
             testDateFormatSymbols();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         try {
             testCurrency();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         try {
             testNormalizer();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
 
         try {
             testIso3();
         } catch (Exception ex) {
-            ex.printStackTrace();
+            ex.printStackTrace(System.out);
         }
     }
 
@@ -125,13 +125,13 @@
 
         res = Normalizer.normalize(composed, Normalizer.Form.NFD);
         if (!decomposed.equals(res)) {
-            System.err.println("Bad decompose: '" + composed + "' --> '"
+            System.out.println("Bad decompose: '" + composed + "' --> '"
                 + res + "'");
         }
 
         res = Normalizer.normalize(decomposed, Normalizer.Form.NFC);
         if (!composed.equals(res)) {
-            System.err.println("Bad compose: '" + decomposed + "' --> '"
+            System.out.println("Bad compose: '" + decomposed + "' --> '"
                 + res + "'");
         }
 
@@ -153,7 +153,7 @@
         try {
             System.out.println(" iso3=" + loc.getISO3Language());
         } catch (MissingResourceException mre) {
-            System.err.println("couldn't get iso3 language");
+            System.out.println("couldn't get iso3 language");
         }
     }
 }
diff --git a/test/095-switch-MAX_INT/src/Main.java b/test/095-switch-MAX_INT/src/Main.java
index d1171ea..a004a1a 100644
--- a/test/095-switch-MAX_INT/src/Main.java
+++ b/test/095-switch-MAX_INT/src/Main.java
@@ -2,7 +2,7 @@
   static public void main(String[] args) throws Exception {
     switch (0x7fffffff) {
     case 0x7fffffff:
-      System.err.println("good");
+      System.out.println("good");
       break;
     default:
       throw new AssertionError();
diff --git a/test/100-reflect2/src/Main.java b/test/100-reflect2/src/Main.java
index 91ba307..5f6ffa8 100644
--- a/test/100-reflect2/src/Main.java
+++ b/test/100-reflect2/src/Main.java
@@ -292,7 +292,7 @@
       // Expected.
     } catch (Exception e) {
       // Error.
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
@@ -304,7 +304,7 @@
       cons.newInstance();
     } catch (Exception e) {
       // Error.
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
diff --git a/test/101-fibonacci/src/Main.java b/test/101-fibonacci/src/Main.java
index c594edb..9c57ba7 100644
--- a/test/101-fibonacci/src/Main.java
+++ b/test/101-fibonacci/src/Main.java
@@ -51,7 +51,7 @@
             y = fibonacci(x + 1);
             System.out.printf("fibonacci(%d)=%d\n", x + 1, y);
         } catch (NumberFormatException ex) {
-            System.err.println(ex);
+            System.out.println(ex);
             System.exit(1);
         }
     }
diff --git a/test/109-suspend-check/src/Main.java b/test/109-suspend-check/src/Main.java
index 3c3353b..e140a59 100644
--- a/test/109-suspend-check/src/Main.java
+++ b/test/109-suspend-check/src/Main.java
@@ -55,7 +55,7 @@
         try {
             Thread.sleep(ms);
         } catch (InterruptedException ie) {
-            System.err.println("sleep was interrupted");
+            System.out.println("sleep was interrupted");
         }
     }
 }
diff --git a/test/114-ParallelGC/src/Main.java b/test/114-ParallelGC/src/Main.java
index 159dd5c..2199872 100644
--- a/test/114-ParallelGC/src/Main.java
+++ b/test/114-ParallelGC/src/Main.java
@@ -82,7 +82,7 @@
             // Any exception or error getting here is bad.
             try {
                 // May need allocations...
-                t.printStackTrace(System.err);
+                t.printStackTrace(System.out);
             } catch (Throwable tInner) {
             }
             System.exit(1);
diff --git a/test/987-stack-trace-dumping/run b/test/115-native-bridge/check
similarity index 65%
copy from test/987-stack-trace-dumping/run
copy to test/115-native-bridge/check
index dee3e8b..1ecf334 100755
--- a/test/987-stack-trace-dumping/run
+++ b/test/115-native-bridge/check
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2017 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,5 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Ask for stack traces to be dumped to a file rather than to stdout.
-./default-run "$@" --set-stack-trace-dump-dir
+# ASAN prints a warning here.
+
+sed -e '/WARNING: ASan is ignoring requested __asan_handle_no_return/,+2d' "$2" | \
+    diff --strip-trailing-cr -q "$1" - >/dev/null
diff --git a/test/120-hashcode/src/Main.java b/test/120-hashcode/src/Main.java
index d2435ce..0955f50 100644
--- a/test/120-hashcode/src/Main.java
+++ b/test/120-hashcode/src/Main.java
@@ -30,7 +30,7 @@
         // Make sure that all the hashes agree.
         if (hashOrig != hashInflated || hashOrig != hashSystemOrig ||
             hashSystemOrig != hashSystemInflated) {
-            System.err.println("hash codes dont match: " + hashOrig + " " + hashInflated + " " +
+            System.out.println("hash codes dont match: " + hashOrig + " " + hashInflated + " " +
             hashSystemOrig + " " + hashSystemInflated);
         }
         System.out.println("Done.");
diff --git a/test/130-hprof/src/Main.java b/test/130-hprof/src/Main.java
index 5899dd1..a8597f1 100644
--- a/test/130-hprof/src/Main.java
+++ b/test/130-hprof/src/Main.java
@@ -140,7 +140,7 @@
             allocator.join();
             dumper.join();
         } catch (InterruptedException e) {
-            System.err.println("join interrupted");
+            System.out.println("join interrupted");
         }
     }
 
@@ -178,7 +178,7 @@
         try {
             Thread.sleep(ms);
         } catch (InterruptedException e) {
-            System.err.println("sleep interrupted");
+            System.out.println("sleep interrupted");
         }
     }
 
@@ -223,7 +223,7 @@
         try {
             meth = vmdClass.getMethod("dumpHprofData", String.class);
         } catch (NoSuchMethodException nsme) {
-            System.err.println("Found VMDebug but not dumpHprofData method");
+            System.out.println("Found VMDebug but not dumpHprofData method");
             return null;
         }
 
diff --git a/test/1337-gc-coverage/gc_coverage.cc b/test/1337-gc-coverage/gc_coverage.cc
index 1cb2fb0..ac959f6 100644
--- a/test/1337-gc-coverage/gc_coverage.cc
+++ b/test/1337-gc-coverage/gc_coverage.cc
@@ -18,7 +18,7 @@
 #include "jni.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/135-MirandaDispatch/src/Main.java b/test/135-MirandaDispatch/src/Main.java
index ada8cef..ab2a90b 100644
--- a/test/135-MirandaDispatch/src/Main.java
+++ b/test/135-MirandaDispatch/src/Main.java
@@ -53,7 +53,7 @@
         } catch (VerifyError expected) {
             System.out.println("b/21646347");
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         System.out.println("Finishing");
     }
diff --git a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
index b729301..7d40f57 100644
--- a/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
+++ b/test/136-daemon-jni-shutdown/daemon_jni_shutdown.cc
@@ -21,7 +21,7 @@
 #include "base/macros.h"
 #include "java_vm_ext.h"
 #include "jni_env_ext.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/137-cfi/cfi.cc b/test/137-cfi/cfi.cc
index 3b237f4..1ed1f5a 100644
--- a/test/137-cfi/cfi.cc
+++ b/test/137-cfi/cfi.cc
@@ -33,6 +33,7 @@
 #include "gc/heap.h"
 #include "gc/space/image_space.h"
 #include "oat_file.h"
+#include "runtime.h"
 #include "utils.h"
 
 namespace art {
diff --git a/test/138-duplicate-classes-check/src/Main.java b/test/138-duplicate-classes-check/src/Main.java
index 5ffceb9..b32f0bc 100644
--- a/test/138-duplicate-classes-check/src/Main.java
+++ b/test/138-duplicate-classes-check/src/Main.java
@@ -42,7 +42,7 @@
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
-            exc.printStackTrace();
+            exc.printStackTrace(System.out);
         }
     }
 }
diff --git a/test/138-duplicate-classes-check2/src/Main.java b/test/138-duplicate-classes-check2/src/Main.java
index a0d6977..faf8b5d 100644
--- a/test/138-duplicate-classes-check2/src/Main.java
+++ b/test/138-duplicate-classes-check2/src/Main.java
@@ -37,7 +37,7 @@
             Method test = testEx.getDeclaredMethod("test");
             test.invoke(null);
         } catch (Exception exc) {
-            exc.printStackTrace();
+            exc.printStackTrace(System.out);
         }
     }
 }
diff --git a/test/141-class-unload/jni_unload.cc b/test/141-class-unload/jni_unload.cc
index 9b7e171..355457d 100644
--- a/test/141-class-unload/jni_unload.cc
+++ b/test/141-class-unload/jni_unload.cc
@@ -20,7 +20,7 @@
 
 #include "jit/jit.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 7e8431f..9072c8b 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -50,7 +50,7 @@
             // Test that objects keep class loader live for sticky GC.
             testStickyUnload(constructor);
         } catch (Exception e) {
-            e.printStackTrace();
+            e.printStackTrace(System.out);
         }
     }
 
diff --git a/test/142-classloader2/src/Main.java b/test/142-classloader2/src/Main.java
index a0c7764..193fd5d 100644
--- a/test/142-classloader2/src/Main.java
+++ b/test/142-classloader2/src/Main.java
@@ -91,7 +91,7 @@
           if (e.getCause() instanceof VerifyError) {
             System.out.println("Caught wrapped VerifyError.");
           } else {
-            e.printStackTrace();
+            e.printStackTrace(System.out);
           }
         }
 
diff --git a/test/146-bad-interface/src/Main.java b/test/146-bad-interface/src/Main.java
index 5534bb4..958ec7c 100644
--- a/test/146-bad-interface/src/Main.java
+++ b/test/146-bad-interface/src/Main.java
@@ -37,7 +37,7 @@
     } catch (Throwable t) {
       System.out.println("Error occurred");
       System.out.println(t);
-      t.printStackTrace();
+      t.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/148-multithread-gc-annotations/gc_coverage.cc b/test/148-multithread-gc-annotations/gc_coverage.cc
index 4862b87..f48493c 100644
--- a/test/148-multithread-gc-annotations/gc_coverage.cc
+++ b/test/148-multithread-gc-annotations/gc_coverage.cc
@@ -18,7 +18,7 @@
 #include "jni.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/155-java-set-resolved-type/src/Main.java b/test/155-java-set-resolved-type/src/Main.java
index 8f79bd7..44278a1 100644
--- a/test/155-java-set-resolved-type/src/Main.java
+++ b/test/155-java-set-resolved-type/src/Main.java
@@ -61,7 +61,7 @@
             // to be resolved and found through simple lookup.
             timpl.newInstance();
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 
diff --git a/test/156-register-dex-file-multi-loader/src/Main.java b/test/156-register-dex-file-multi-loader/src/Main.java
index ff5a2bd..6aa1d78 100644
--- a/test/156-register-dex-file-multi-loader/src/Main.java
+++ b/test/156-register-dex-file-multi-loader/src/Main.java
@@ -81,7 +81,7 @@
                      !message.endsWith(" with multiple class loaders");
       }
       if (unexpected) {
-        cnfe.getCause().printStackTrace();
+        cnfe.getCause().printStackTrace(System.out);
       }
     }
   }
diff --git a/test/158-app-image-class-table/src/Main.java b/test/158-app-image-class-table/src/Main.java
index 804468f..97aa14d 100644
--- a/test/158-app-image-class-table/src/Main.java
+++ b/test/158-app-image-class-table/src/Main.java
@@ -39,7 +39,7 @@
             // to be resolved and found through simple lookup.
             timpl.newInstance();
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 
diff --git a/test/159-app-image-fields/src/Main.java b/test/159-app-image-fields/src/Main.java
index d06a502..47d0116 100644
--- a/test/159-app-image-fields/src/Main.java
+++ b/test/159-app-image-fields/src/Main.java
@@ -57,7 +57,7 @@
                 System.out.println("another_value: " + another_value);
             }
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
     }
 
diff --git a/test/301-abstract-protected/src/Main.java b/test/301-abstract-protected/src/Main.java
index 9b19a9d..f120267 100644
--- a/test/301-abstract-protected/src/Main.java
+++ b/test/301-abstract-protected/src/Main.java
@@ -16,7 +16,7 @@
 
 public class Main {
   public static void main(String args[]) throws Exception {
-    System.err.println(new C().m());
+    System.out.println(new C().m());
   }
 }
 
diff --git a/test/409-materialized-condition/src/Main.java b/test/409-materialized-condition/src/Main.java
index 0c179a9..5f21bc3 100644
--- a/test/409-materialized-condition/src/Main.java
+++ b/test/409-materialized-condition/src/Main.java
@@ -50,6 +50,49 @@
     return b;
   }
 
+  public static boolean $noinline$intEq0(int x) {
+    return x == 0;
+  }
+
+  public static boolean $noinline$intNe0(int x) {
+    return x != 0;
+  }
+
+  public static boolean $noinline$longEq0(long x) {
+    return x == 0;
+  }
+
+  public static boolean $noinline$longNe0(long x) {
+    return x != 0;
+  }
+
+  public static boolean $noinline$longEqCst(long x) {
+    return x == 0x0123456789ABCDEFL;
+  }
+
+  public static boolean $noinline$longNeCst(long x) {
+    return x != 0x0123456789ABCDEFL;
+  }
+
+  public static void assertEqual(boolean expected, boolean actual) {
+    if (expected != actual) {
+      throw new Error("Assertion failed: " + expected + " != " + actual);
+    }
+  }
+
+  // The purpose of this method is to test code generation for a materialized
+  // HCondition that is not equality or inequality, and that has one boolean
+  // input. That can't be done directly, so we have to rely on the instruction
+  // simplifier to transform the control-flow graph appropriately.
+  public static boolean $noinline$booleanCondition(boolean in) {
+    int value = in ? 1 : 0;
+
+    // Calling a non-inlineable method that uses `value` as well prevents a
+    // transformation of the return value into `false`.
+    $noinline$intNe0(value);
+    return value > 127;
+  }
+
   public static void main(String[] args) {
     System.out.println("foo1");
     int res = foo1();
@@ -62,5 +105,49 @@
     if (res != 42) {
       throw new Error("Unexpected return value for foo2: " + res + ", expected 42.");
     }
+
+    assertEqual($noinline$booleanCondition(false), false);
+    assertEqual($noinline$booleanCondition(true), false);
+
+    int[] int_inputs = {0, 1, -1, Integer.MIN_VALUE, Integer.MAX_VALUE, 42, -9000};
+    long[] long_inputs = {
+        0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE, 0x100000000L,
+        0x100000001L, -9000L, 0x0123456789ABCDEFL};
+
+    boolean[] int_eq_0_expected = {true, false, false, false, false, false, false};
+
+    for (int i = 0; i < int_inputs.length; i++) {
+      assertEqual(int_eq_0_expected[i], $noinline$intEq0(int_inputs[i]));
+    }
+
+    boolean[] int_ne_0_expected = {false, true, true, true, true, true, true};
+
+    for (int i = 0; i < int_inputs.length; i++) {
+      assertEqual(int_ne_0_expected[i], $noinline$intNe0(int_inputs[i]));
+    }
+
+    boolean[] long_eq_0_expected = {true, false, false, false, false, false, false, false, false};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(long_eq_0_expected[i], $noinline$longEq0(long_inputs[i]));
+    }
+
+    boolean[] long_ne_0_expected = {false, true, true, true, true, true, true, true, true};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(long_ne_0_expected[i], $noinline$longNe0(long_inputs[i]));
+    }
+
+    boolean[] long_eq_cst_expected = {false, false, false, false, false, false, false, false, true};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(long_eq_cst_expected[i], $noinline$longEqCst(long_inputs[i]));
+    }
+
+    boolean[] long_ne_cst_expected = {true, true, true, true, true, true, true, true, false};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(long_ne_cst_expected[i], $noinline$longNeCst(long_inputs[i]));
+    }
   }
 }
diff --git a/test/476-checker-ctor-memory-barrier/src/Main.java b/test/476-checker-ctor-memory-barrier/src/Main.java
index 70c5121..e887cd3 100644
--- a/test/476-checker-ctor-memory-barrier/src/Main.java
+++ b/test/476-checker-ctor-memory-barrier/src/Main.java
@@ -261,7 +261,7 @@
 
   /// CHECK-START: void Main.testNewString() inliner (after)
   /// CHECK-NOT:  ConstructorFence
-  /// CHECK:      InvokeStaticOrDirect method_load_kind:string_init
+  /// CHECK:      InvokeStaticOrDirect method_load_kind:StringInit
   /// CHECK-NOT:  ConstructorFence
   /// CHECK-NOT:  InvokeStaticOrDirect
   public static void testNewString() {
diff --git a/test/487-checker-inline-calls/src/Main.java b/test/487-checker-inline-calls/src/Main.java
index 70384d5..00694f3 100644
--- a/test/487-checker-inline-calls/src/Main.java
+++ b/test/487-checker-inline-calls/src/Main.java
@@ -20,7 +20,7 @@
     try {
       doTopCall();
     } catch (Error e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
diff --git a/test/488-checker-inline-recursive-calls/src/Main.java b/test/488-checker-inline-recursive-calls/src/Main.java
index 87ff3f7..1137837 100644
--- a/test/488-checker-inline-recursive-calls/src/Main.java
+++ b/test/488-checker-inline-recursive-calls/src/Main.java
@@ -20,15 +20,15 @@
     try {
       doTopCall(true);
     } catch (Error e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (before)
-  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:recursive
+  /// CHECK-NOT:   InvokeStaticOrDirect method_load_kind:Recursive
 
   /// CHECK-START: void Main.doTopCall(boolean) inliner (after)
-  /// CHECK:       InvokeStaticOrDirect method_load_kind:recursive
+  /// CHECK:       InvokeStaticOrDirect method_load_kind:Recursive
   public static void doTopCall(boolean first_call) {
     if (first_call) {
       inline1();
diff --git a/test/492-checker-inline-invoke-interface/src/Main.java b/test/492-checker-inline-invoke-interface/src/Main.java
index a919690..785c0db 100644
--- a/test/492-checker-inline-invoke-interface/src/Main.java
+++ b/test/492-checker-inline-invoke-interface/src/Main.java
@@ -21,7 +21,7 @@
 class ForceStatic {
   static {
     System.out.println("Hello from clinit");
-    new Exception().printStackTrace();
+    new Exception().printStackTrace(System.out);
   }
   static int field;
 }
diff --git a/test/493-checker-inline-invoke-interface/src/Main.java b/test/493-checker-inline-invoke-interface/src/Main.java
index 171405c..0570b20 100644
--- a/test/493-checker-inline-invoke-interface/src/Main.java
+++ b/test/493-checker-inline-invoke-interface/src/Main.java
@@ -21,7 +21,7 @@
 class ForceStatic {
   static {
     System.out.println("Hello from clinit");
-    new Exception().printStackTrace();
+    new Exception().printStackTrace(System.out);
   }
   static int field;
 }
diff --git a/test/497-inlining-and-class-loader/src/Main.java b/test/497-inlining-and-class-loader/src/Main.java
index 1e27e77..01b4bcd 100644
--- a/test/497-inlining-and-class-loader/src/Main.java
+++ b/test/497-inlining-and-class-loader/src/Main.java
@@ -121,7 +121,7 @@
     // Because we cleared dex cache entries, we will have to find
     // classes again, which require to use the correct class loader
     // in the presence of inlining.
-    new Exception().printStackTrace();
+    new Exception().printStackTrace(System.out);
   }
   static Object savedResolvedMethods;
 
diff --git a/test/522-checker-regression-monitor-exit/src/Main.java b/test/522-checker-regression-monitor-exit/src/Main.java
index c4f80fc..5c26f36 100644
--- a/test/522-checker-regression-monitor-exit/src/Main.java
+++ b/test/522-checker-regression-monitor-exit/src/Main.java
@@ -43,8 +43,8 @@
         Method m = c.getMethod("synchronizedHashCode", Object.class);
         result = (Integer) m.invoke(null, m_obj);
       } catch (Exception e) {
-        System.err.println("Hash code query exception");
-        e.printStackTrace();
+        System.out.println("Hash code query exception");
+        e.printStackTrace(System.out);
         result = -1;
       }
       return result;
@@ -77,7 +77,7 @@
       }
       pool.shutdown();
     } catch (CancellationException ex) {
-      System.err.println("Job timeout");
+      System.out.println("Job timeout");
       System.exit(1);
     }
   }
diff --git a/test/551-checker-shifter-operand/src/Main.java b/test/551-checker-shifter-operand/src/Main.java
index bf09a6a..951889a 100644
--- a/test/551-checker-shifter-operand/src/Main.java
+++ b/test/551-checker-shifter-operand/src/Main.java
@@ -234,8 +234,8 @@
   /// CHECK-START-ARM: void Main.$opt$noinline$testAnd(long, long) disassembly (after)
   /// CHECK:                            and lsl
   /// CHECK:                            sbfx
-  /// CHECK:                            asr
-  /// CHECK:                            and
+  /// CHECK:                            asr{{s?}}
+  /// CHECK:                            and{{s?}}
 
   /// CHECK-START-ARM64: void Main.$opt$noinline$testAnd(long, long) instruction_simplifier_arm64 (after)
   /// CHECK:                            DataProcWithShifterOp
@@ -259,7 +259,7 @@
   /// CHECK-START-ARM: void Main.$opt$noinline$testOr(int, int) disassembly (after)
   /// CHECK:                            orr asr
   /// CHECK:                            ubfx
-  /// CHECK:                            orr
+  /// CHECK:                            orr{{s?}}
 
   /// CHECK-START-ARM64: void Main.$opt$noinline$testOr(int, int) instruction_simplifier_arm64 (after)
   /// CHECK:                            DataProcWithShifterOp
@@ -282,9 +282,8 @@
 
   /// CHECK-START-ARM: void Main.$opt$noinline$testXor(long, long) disassembly (after)
   /// CHECK:                            eor lsr
-  /// CHECK:                            mov
-  /// CHECK:                            asr
-  /// CHECK:                            eor
+  /// CHECK:                            asr{{s?}}
+  /// CHECK:                            eor{{s?}}
 
   /// CHECK-START-ARM64: void Main.$opt$noinline$testXor(long, long) instruction_simplifier_arm64 (after)
   /// CHECK:                            DataProcWithShifterOp
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index dd77423..7408e6d 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -42,31 +42,30 @@
   }
 
   /// CHECK-START: int Main.testSimple(int) sharpening (before)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:RuntimeCall
 
   /// CHECK-START-ARM: int Main.testSimple(int) sharpening (after)
-  /// CHECK-NOT:            ArmDexCacheArraysBase
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-ARM64: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-MIPS: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-MIPS64: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-X86: int Main.testSimple(int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-X86_64: int Main.testSimple(int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
-  /// CHECK-START-ARM: int Main.testSimple(int) dex_cache_array_fixups_arm (after)
-  /// CHECK:                ArmDexCacheArraysBase
-  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK-START-MIPS: int Main.testSimple(int) pc_relative_fixups_mips (after)
+  /// CHECK:                MipsComputeBaseMethodAddress
+  /// CHECK-NOT:            MipsComputeBaseMethodAddress
 
   /// CHECK-START-X86: int Main.testSimple(int) pc_relative_fixups_x86 (after)
   /// CHECK:                X86ComputeBaseMethodAddress
@@ -78,40 +77,39 @@
   }
 
   /// CHECK-START: int Main.testDiamond(boolean, int) sharpening (before)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_via_method
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:RuntimeCall
 
   /// CHECK-START-ARM: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK-NOT:            ArmDexCacheArraysBase
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-ARM64: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-MIPS64: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) sharpening (after)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   /// CHECK-START-X86_64: int Main.testDiamond(boolean, int) sharpening (after)
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
-  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
-  /// CHECK:                ArmDexCacheArraysBase
-  /// CHECK-NOT:            ArmDexCacheArraysBase
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) pc_relative_fixups_mips (after)
+  /// CHECK:                MipsComputeBaseMethodAddress
+  /// CHECK-NOT:            MipsComputeBaseMethodAddress
 
-  /// CHECK-START-ARM: int Main.testDiamond(boolean, int) dex_cache_array_fixups_arm (after)
-  /// CHECK:                ArmDexCacheArraysBase
+  /// CHECK-START-MIPS: int Main.testDiamond(boolean, int) pc_relative_fixups_mips (after)
+  /// CHECK:                MipsComputeBaseMethodAddress
   /// CHECK-NEXT:           If
 
   /// CHECK-START-X86: int Main.testDiamond(boolean, int) pc_relative_fixups_x86 (after)
@@ -123,8 +121,8 @@
   /// CHECK-NEXT:           If
 
   public static int testDiamond(boolean negate, int x) {
-    // These calls should use PC-relative dex cache array loads to retrieve the target method.
-    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the If.
+    // These calls should use PC-relative loads to retrieve the target method.
+    // PC-relative bases used by MIPS and X86 should be pulled before the If.
     if (negate) {
       return $noinline$foo(-x);
     } else {
@@ -132,6 +130,24 @@
     }
   }
 
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) pc_relative_fixups_mips (before)
+  /// CHECK-NOT:            MipsComputeBaseMethodAddress
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) pc_relative_fixups_mips (after)
+  /// CHECK:                MipsComputeBaseMethodAddress
+  /// CHECK-NOT:            MipsComputeBaseMethodAddress
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) pc_relative_fixups_mips (after)
+  /// CHECK:                InvokeStaticOrDirect
+  /// CHECK-NOT:            InvokeStaticOrDirect
+
+  /// CHECK-START-MIPS: int Main.testLoop(int[], int) pc_relative_fixups_mips (after)
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+  /// CHECK:                begin_block
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
+
   /// CHECK-START-X86: int Main.testLoop(int[], int) pc_relative_fixups_x86 (before)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
 
@@ -148,34 +164,26 @@
   /// CHECK-NEXT:           X86ComputeBaseMethodAddress
   /// CHECK-NEXT:           Goto
   /// CHECK:                begin_block
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
-
-  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (before)
-  /// CHECK-NOT:            ArmDexCacheArraysBase
-
-  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
-  /// CHECK:                ArmDexCacheArraysBase
-  /// CHECK-NOT:            ArmDexCacheArraysBase
-
-  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
-  /// CHECK:                InvokeStaticOrDirect
-  /// CHECK-NOT:            InvokeStaticOrDirect
-
-  /// CHECK-START-ARM: int Main.testLoop(int[], int) dex_cache_array_fixups_arm (after)
-  /// CHECK:                ArrayLength
-  /// CHECK-NEXT:           ArmDexCacheArraysBase
-  /// CHECK-NEXT:           Goto
-  /// CHECK:                begin_block
-  /// CHECK:                InvokeStaticOrDirect method_load_kind:dex_cache_pc_relative
+  /// CHECK:                InvokeStaticOrDirect method_load_kind:BssEntry
 
   public static int testLoop(int[] array, int x) {
-    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop.
+    // PC-relative bases used by MIPS and X86 should be pulled before the loop.
     for (int i : array) {
       x += $noinline$foo(i);
     }
     return x;
   }
 
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_mips (before)
+  /// CHECK-NOT:            MipsComputeBaseMethodAddress
+
+  /// CHECK-START-MIPS: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_mips (after)
+  /// CHECK:                If
+  /// CHECK:                begin_block
+  /// CHECK:                ArrayLength
+  /// CHECK-NEXT:           MipsComputeBaseMethodAddress
+  /// CHECK-NEXT:           Goto
+
   /// CHECK-START-X86: int Main.testLoopWithDiamond(int[], boolean, int) pc_relative_fixups_x86 (before)
   /// CHECK-NOT:            X86ComputeBaseMethodAddress
 
@@ -186,18 +194,8 @@
   /// CHECK-NEXT:           X86ComputeBaseMethodAddress
   /// CHECK-NEXT:           Goto
 
-  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (before)
-  /// CHECK-NOT:            ArmDexCacheArraysBase
-
-  /// CHECK-START-ARM: int Main.testLoopWithDiamond(int[], boolean, int) dex_cache_array_fixups_arm (after)
-  /// CHECK:                If
-  /// CHECK:                begin_block
-  /// CHECK:                ArrayLength
-  /// CHECK-NEXT:           ArmDexCacheArraysBase
-  /// CHECK-NEXT:           Goto
-
   public static int testLoopWithDiamond(int[] array, boolean negate, int x) {
-    // PC-relative bases used by ARM, MIPS and X86 should be pulled before the loop
+    // PC-relative bases used by MIPS and X86 should be pulled before the loop
     // but not outside the if.
     if (array != null) {
       for (int i : array) {
@@ -212,37 +210,31 @@
   }
 
   /// CHECK-START: java.lang.String Main.$noinline$getBootImageString() sharpening (before)
-  /// CHECK:                LoadString load_kind:DexCacheViaMethod
+  /// CHECK:                LoadString load_kind:RuntimeCall
 
   /// CHECK-START-X86: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-X86_64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-ARM: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-ARM64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   /// CHECK-START-MIPS64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry}}
 
   public static String $noinline$getBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
@@ -252,7 +244,7 @@
   }
 
   /// CHECK-START: java.lang.String Main.$noinline$getNonBootImageString() sharpening (before)
-  /// CHECK:                LoadString load_kind:DexCacheViaMethod
+  /// CHECK:                LoadString load_kind:RuntimeCall
 
   /// CHECK-START-X86: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
   /// CHECK:                LoadString load_kind:BssEntry
@@ -285,33 +277,27 @@
 
   /// CHECK-START-X86: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-X86_64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-ARM: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-ARM64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-MIPS: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   /// CHECK-START-MIPS64: java.lang.Class Main.$noinline$getStringClass() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
-  // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}} class_name:java.lang.String
+  /// CHECK:                LoadClass load_kind:{{BootImageAddress|BssEntry}} class_name:java.lang.String
 
   public static Class<?> $noinline$getStringClass() {
     // Prevent inlining to avoid the string comparison being optimized away.
diff --git a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
index e4bf236..5f73bbe 100644
--- a/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/564-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -18,8 +18,8 @@
 
 ## CHECK-START-X86: int IrreducibleLoop.simpleLoop(int) dead_code_elimination$initial (before)
 ## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
-## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>] loop:{{B\d+}} irreducible:true
-## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>] loop:none
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>{{(,[ij]\d+)?}}] loop:{{B\d+}} irreducible:true
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>{{(,[ij]\d+)?}}] loop:none
 .method public static simpleLoop(I)I
    .registers 3
    const/16 v0, 42
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
index 8eca6b2..45ead6b 100644
--- a/test/570-checker-osr/osr.cc
+++ b/test/570-checker-osr/osr.cc
@@ -21,6 +21,7 @@
 #include "oat_quick_method_header.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
+#include "stack.h"
 #include "stack_map.h"
 
 namespace art {
diff --git a/test/570-checker-osr/src/DeoptimizationController.java b/test/570-checker-osr/src/DeoptimizationController.java
index 907d133..e272607 100644
--- a/test/570-checker-osr/src/DeoptimizationController.java
+++ b/test/570-checker-osr/src/DeoptimizationController.java
@@ -53,7 +53,7 @@
         throw new IllegalStateException("Not tracing.");
       }
     } catch (Exception exc) {
-      exc.printStackTrace(System.err);
+      exc.printStackTrace(System.out);
     } finally {
       if (tempFile != null) {
         tempFile.delete();
@@ -68,7 +68,7 @@
         throw new IllegalStateException("Still tracing.");
       }
     } catch (Exception exc) {
-      exc.printStackTrace(System.err);
+      exc.printStackTrace(System.out);
     }
   }
 
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
index 3ac6f89..2dad14c 100644
--- a/test/570-checker-select/src/Main.java
+++ b/test/570-checker-select/src/Main.java
@@ -414,6 +414,46 @@
     return a > 0x7FFFFFFFFFFFFFFFL ? x : y;
   }
 
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar4(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            orrs ip, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:             cmp
+  /// CHECK-NOT:             sbcs
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar4(long a, long x, long y) {
+    return a == 0 ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar5(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            orrs ip, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:             cmp
+  /// CHECK-NOT:             sbcs
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar5(long a, long x, long y) {
+    return a != 0 ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar6(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp {{r\d+}}, #0
+  /// CHECK-NOT:             cmp
+  /// CHECK-NOT:             sbcs
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar6(long a, long x, long y) {
+    return a >= 0 ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar7(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp {{r\d+}}, #0
+  /// CHECK-NOT:             cmp
+  /// CHECK-NOT:             sbcs
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar7(long a, long x, long y) {
+    return a < 0 ? x : y;
+  }
+
   /// CHECK-START: long Main.LongMatCond_LongVarVar(long, long, long, long) register (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
@@ -688,6 +728,37 @@
 
     assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar3(2L, 5L, 7L));
 
+    long[] long_inputs = {
+        0L, 1L, -1L, Long.MIN_VALUE, Long.MAX_VALUE, 2L, 0x100000000L, 0xFFFFFFFF00000000L, -9000L};
+
+    long[] expected_1 = {5L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(expected_1[i], $noinline$LongNonmatCondCst_LongVarVar4(long_inputs[i], 5L, 7L));
+    }
+
+    long[] expected_2 = {7L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(expected_2[i], $noinline$LongNonmatCondCst_LongVarVar5(long_inputs[i], 5L, 7L));
+    }
+
+    long[] expected_3 = {5L, 5L, 7L, 7L, 5L, 5L, 5L, 7L, 7L};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(expected_3[i], $noinline$LongNonmatCondCst_LongVarVar6(long_inputs[i], 5L, 7L));
+    }
+
+    long[] expected_4 = {7L, 7L, 5L, 5L, 7L, 7L, 7L, 5L, 5L};
+
+    for (int i = 0; i < long_inputs.length; i++) {
+      assertEqual(expected_4[i], $noinline$LongNonmatCondCst_LongVarVar7(long_inputs[i], 5L, 7L));
+    }
+
+    assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar7(0L, 5L, 7L));
+    assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar7(2L, 5L, 7L));
+    assertEqual(5L, $noinline$LongNonmatCondCst_LongVarVar7(-9000L, 5L, 7L));
+
     assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7));
     assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7));
     assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
diff --git a/test/588-checker-irreducib-lifetime-hole/smali/IrreducibleLoop.smali b/test/588-checker-irreducib-lifetime-hole/smali/IrreducibleLoop.smali
index 9b8aa51..3058358 100644
--- a/test/588-checker-irreducib-lifetime-hole/smali/IrreducibleLoop.smali
+++ b/test/588-checker-irreducib-lifetime-hole/smali/IrreducibleLoop.smali
@@ -19,8 +19,8 @@
 ## CHECK-START-X86: int IrreducibleLoop.simpleLoop1(int) dead_code_elimination$initial (before)
 ## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
 ## CHECK-DAG:                     Goto irreducible:true
-## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>] loop:none
-## CHECK-DAG:                     InvokeStaticOrDirect [{{i\d+}}] loop:none
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>{{(,[ij]\d+)?}}] loop:none
+## CHECK-DAG:                     InvokeStaticOrDirect [{{i\d+}}{{(,[ij]\d+)?}}] loop:none
 .method public static simpleLoop1(I)I
    .registers 3
    const/16 v0, 42
@@ -59,8 +59,8 @@
 ## CHECK-START-X86: int IrreducibleLoop.simpleLoop2(int) dead_code_elimination$initial (before)
 ## CHECK-DAG: <<Constant:i\d+>>   IntConstant 42
 ## CHECK-DAG:                     Goto irreducible:true
-## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>] loop:none
-## CHECK-DAG:                     InvokeStaticOrDirect [{{i\d+}}] loop:none
+## CHECK-DAG:                     InvokeStaticOrDirect [<<Constant>>{{(,[ij]\d+)?}}] loop:none
+## CHECK-DAG:                     InvokeStaticOrDirect [{{i\d+}}{{(,[ij]\d+)?}}] loop:none
 .method public static simpleLoop2(I)I
    .registers 3
    const/16 v0, 42
diff --git a/test/595-profile-saving/profile-saving.cc b/test/595-profile-saving/profile-saving.cc
index 0f8dd57..019ddad 100644
--- a/test/595-profile-saving/profile-saving.cc
+++ b/test/595-profile-saving/profile-saving.cc
@@ -26,6 +26,7 @@
 #include "oat_file_manager.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
+#include "stack.h"
 #include "thread.h"
 
 namespace art {
diff --git a/test/596-app-images/app_images.cc b/test/596-app-images/app_images.cc
index 42211f7..fa9c902 100644
--- a/test/596-app-images/app_images.cc
+++ b/test/596-app-images/app_images.cc
@@ -63,6 +63,12 @@
   return JNI_FALSE;
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkInitialized(JNIEnv*, jclass, jclass c) {
+  ScopedObjectAccess soa(Thread::Current());
+  ObjPtr<mirror::Class> klass_ptr = soa.Decode<mirror::Class>(c);
+  return klass_ptr->IsInitialized();
+}
+
 }  // namespace
 
 }  // namespace art
diff --git a/test/596-app-images/src/Main.java b/test/596-app-images/src/Main.java
index 75b31b8..8ee3c88 100644
--- a/test/596-app-images/src/Main.java
+++ b/test/596-app-images/src/Main.java
@@ -16,7 +16,11 @@
 
 class Main {
   static class Inner {
-    public static int abc = 0;
+    final public static int abc = 10;
+  }
+
+  static class Nested {
+
   }
 
   public static void main(String[] args) {
@@ -26,8 +30,44 @@
     } else if (!checkAppImageContains(Inner.class)) {
       System.out.println("App image does not contain Inner!");
     }
+
+    if (!checkInitialized(Inner.class))
+      System.out.println("Inner class is not initialized!");
+
+    if (!checkInitialized(Nested.class))
+      System.out.println("Nested class is not initialized!");
+
+    if (!checkInitialized(StaticFields.class))
+      System.out.println("StaticFields class is not initialized!");
+
+    if (!checkInitialized(StaticFieldsInitSub.class))
+      System.out.println("StaticFieldsInitSub class is not initialized!");
+
+    if (!checkInitialized(StaticFieldsInit.class))
+      System.out.println("StaticFieldsInit class is not initialized!");
+
+    if (checkInitialized(StaticInternString.class))
+      System.out.println("StaticInternString class is initialized!");
   }
 
   public static native boolean checkAppImageLoaded();
   public static native boolean checkAppImageContains(Class<?> klass);
+  public static native boolean checkInitialized(Class<?> klass);
 }
+
+class StaticFields{
+  public static int abc;
+}
+
+class StaticFieldsInitSub extends StaticFieldsInit {
+  final public static int def = 10;
+}
+
+class StaticFieldsInit{
+  final public static int abc = 10;
+}
+
+class StaticInternString {
+  final public static String intern = "java.abc.Action";
+}
+
diff --git a/test/596-monitor-inflation/monitor_inflation.cc b/test/596-monitor-inflation/monitor_inflation.cc
index fb4275b..07d1ddb 100644
--- a/test/596-monitor-inflation/monitor_inflation.cc
+++ b/test/596-monitor-inflation/monitor_inflation.cc
@@ -18,7 +18,7 @@
 #include "jni.h"
 #include "monitor.h"
 #include "runtime.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 namespace {
diff --git a/test/597-deopt-new-string/deopt.cc b/test/597-deopt-new-string/deopt.cc
index 844a786..0f02efe 100644
--- a/test/597-deopt-new-string/deopt.cc
+++ b/test/597-deopt-new-string/deopt.cc
@@ -21,6 +21,7 @@
 #include "thread_state.h"
 #include "gc/gc_cause.h"
 #include "gc/scoped_gc_critical_section.h"
+#include "scoped_thread_state_change-inl.h"
 
 namespace art {
 
diff --git a/test/602-deoptimizeable/src/Main.java b/test/602-deoptimizeable/src/Main.java
index 743a579..d995923 100644
--- a/test/602-deoptimizeable/src/Main.java
+++ b/test/602-deoptimizeable/src/Main.java
@@ -99,7 +99,7 @@
                         System.exit(0);
                     }
                 } catch (Exception e) {
-                    e.printStackTrace();
+                    e.printStackTrace(System.out);
                 }
             }
         });
@@ -127,7 +127,7 @@
                     map.put(new DummyObject(10), Long.valueOf(100));
                     assertIsInterpreted();  // Every deoptimizeable method is deoptimized.
                 } catch (Exception e) {
-                    e.printStackTrace();
+                    e.printStackTrace(System.out);
                 }
             }
         });
diff --git a/test/617-clinit-oome/src/Main.java b/test/617-clinit-oome/src/Main.java
index 749a232..94cb7ce 100644
--- a/test/617-clinit-oome/src/Main.java
+++ b/test/617-clinit-oome/src/Main.java
@@ -37,7 +37,7 @@
         Other.print();
     } catch (OutOfMemoryError e) {
     } catch (Exception e) {
-        System.err.println(e);
+        System.out.println(e);
     }
   }
 }
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 520e7c3..af205b0 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -291,6 +291,9 @@
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   //
   // NOTE: should correctly deal with compressed and uncompressed cases.
+  //
+  /// CHECK-START-MIPS64: void Main.string2Bytes(char[], java.lang.String) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
   private static void string2Bytes(char[] a, String b) {
     int min = Math.min(a.length, b.length());
     for (int i = 0; i < min; i++) {
@@ -333,6 +336,13 @@
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.oneBoth(short[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>]         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Repl>>] loop:<<Loop>>      outer_loop:none
+  //
   // Bug b/37764324: integral same-length packed types can be mixed freely.
   private static void oneBoth(short[] a, char[] b) {
     for (int i = 0; i < Math.min(a.length, b.length); i++) {
@@ -351,6 +361,48 @@
     }
   }
 
+  /// CHECK-START: void Main.typeConv(byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                       loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet [{{l\d+}},<<Phi>>]         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<One>>]               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Add>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.typeConv(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                         loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>]          loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi                                   loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>]           loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Vadd:d\d+>> VecAdd [<<Load>>,<<Repl>>]            loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi1>>,<<Vadd>>] loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi                                   loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet [{{l\d+}},<<Phi2>>]          loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<One>>]                 loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Add>>]              loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi2>>,<<Cnv>>]  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.typeConv(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<One:i\d+>>  IntConstant 1                         loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<One>>]          loop:none
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi                                   loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Load:d\d+>> VecLoad [{{l\d+}},<<Phi1>>]           loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Vadd:d\d+>> VecAdd [<<Load>>,<<Repl>>]            loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi1>>,<<Vadd>>] loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi                                   loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:b\d+>>  ArrayGet [{{l\d+}},<<Phi2>>]          loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Add:i\d+>>  Add [<<Get>>,<<One>>]                 loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Add>>]              loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi2>>,<<Cnv>>]  loop:<<Loop2>>      outer_loop:none
+  //
+  // Scalar code in cleanup loop uses correct byte type on array get and type conversion.
+  private static void typeConv(byte[] a, byte[] b) {
+    int len = Math.min(a.length, b.length);
+    for (int i = 0; i < len; i++) {
+      a[i] = (byte) (b[i] + 1);
+    }
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -453,6 +505,17 @@
       expectEquals(40, bt[i]);
     }
 
+    byte[] b1 = new byte[259];  // few extra iterations
+    byte[] b2 = new byte[259];
+    for (int i = 0; i < 259; i++) {
+      b1[i] = 0;
+      b2[i] = (byte) i;
+    }
+    typeConv(b1, b2);
+    for (int i = 0; i < 259; i++) {
+      expectEquals((byte)(i + 1), b1[i]);
+    }
+
     System.out.println("passed");
   }
 
diff --git a/test/626-const-class-linking/src/RacyMisbehavingHelper.java b/test/626-const-class-linking/src/RacyMisbehavingHelper.java
index 4525278..9acd3c3 100644
--- a/test/626-const-class-linking/src/RacyMisbehavingHelper.java
+++ b/test/626-const-class-linking/src/RacyMisbehavingHelper.java
@@ -26,7 +26,7 @@
             Method reportAfterLoading = loader.getClass().getDeclaredMethod("reportAfterLoading");
             reportAfterLoading.invoke(loader);
         } catch (Throwable t) {
-            t.printStackTrace();
+            t.printStackTrace(System.out);
         }
         return new ClassPair(helper1_class, test_class);
     }
diff --git a/test/638-checker-inline-caches/src/Main.java b/test/638-checker-inline-caches/src/Main.java
index 680bd14..f104e6a 100644
--- a/test/638-checker-inline-caches/src/Main.java
+++ b/test/638-checker-inline-caches/src/Main.java
@@ -36,16 +36,17 @@
   /// CHECK:       InvokeVirtual method_name:Super.getValue
 
   /// CHECK-START: int Main.inlineMonomorphicSubA(Super) inliner (after)
-  /// CHECK-NOT:   InvokeVirtual method_name:Super.getValue
-
-  /// CHECK-START: int Main.inlineMonomorphicSubA(Super) inliner (after)
   /// CHECK:  <<SubARet:i\d+>>      IntConstant 42
   /// CHECK:  <<Obj:l\d+>>          NullCheck
   /// CHECK:  <<ObjClass:l\d+>>     InstanceFieldGet [<<Obj>>] field_name:java.lang.Object.shadow$_klass_
   /// CHECK:  <<InlineClass:l\d+>>  LoadClass class_name:SubA
   /// CHECK:  <<Test:z\d+>>         NotEqual [<<InlineClass>>,<<ObjClass>>]
-  /// CHECK:                        Deoptimize [<<Test>>,<<Obj>>]
-  /// CHECK:                        Return [<<SubARet>>]
+  /// CHECK:  <<DefaultRet:i\d+>>   InvokeVirtual [<<Obj>>] method_name:Super.getValue
+
+  /// CHECK:  <<Ret:i\d+>>          Phi [<<SubARet>>,<<DefaultRet>>]
+  /// CHECK:                        Return [<<Ret>>]
+
+  /// CHECK-NOT:                    Deoptimize
   public static int inlineMonomorphicSubA(Super a) {
     return a.getValue();
   }
@@ -53,27 +54,27 @@
   /// CHECK-START: int Main.inlinePolymophicSubASubB(Super) inliner (before)
   /// CHECK:       InvokeVirtual method_name:Super.getValue
 
-  /// CHECK-START: int Main.inlinePolymophicSubASubB(Super) inliner (after)
-  /// CHECK-NOT:   InvokeVirtual method_name:Super.getValue
-
   // Note that the order in which the types are added to the inline cache in the profile matters.
 
   /// CHECK-START: int Main.inlinePolymophicSubASubB(Super) inliner (after)
   /// CHECK-DAG:  <<SubARet:i\d+>>          IntConstant 42
   /// CHECK-DAG:  <<SubBRet:i\d+>>          IntConstant 38
-  /// CHECK:      <<Obj:l\d+>>              NullCheck
-  /// CHECK:      <<ObjClassSubA:l\d+>>     InstanceFieldGet [<<Obj>>] field_name:java.lang.Object.shadow$_klass_
-  /// CHECK:      <<InlineClassSubA:l\d+>>  LoadClass class_name:SubA
-  /// CHECK:      <<TestSubA:z\d+>>         NotEqual [<<InlineClassSubA>>,<<ObjClassSubA>>]
-  /// CHECK:                                If [<<TestSubA>>]
+  /// CHECK-DAG:   <<Obj:l\d+>>             NullCheck
+  /// CHECK-DAG:   <<ObjClassSubA:l\d+>>    InstanceFieldGet [<<Obj>>] field_name:java.lang.Object.shadow$_klass_
+  /// CHECK-DAG:   <<InlineClassSubA:l\d+>> LoadClass class_name:SubA
+  /// CHECK-DAG:   <<TestSubA:z\d+>>        NotEqual [<<InlineClassSubA>>,<<ObjClassSubA>>]
+  /// CHECK-DAG:                            If [<<TestSubA>>]
 
-  /// CHECK:      <<ObjClassSubB:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
-  /// CHECK:      <<InlineClassSubB:l\d+>>  LoadClass class_name:SubB
-  /// CHECK:      <<TestSubB:z\d+>>         NotEqual [<<InlineClassSubB>>,<<ObjClassSubB>>]
-  /// CHECK:                                Deoptimize [<<TestSubB>>,<<Obj>>]
+  /// CHECK-DAG:   <<ObjClassSubB:l\d+>>    InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
+  /// CHECK-DAG:   <<InlineClassSubB:l\d+>> LoadClass class_name:SubB
+  /// CHECK-DAG:   <<TestSubB:z\d+>>        NotEqual [<<InlineClassSubB>>,<<ObjClassSubB>>]
+  /// CHECK-DAG:   <<DefaultRet:i\d+>>      InvokeVirtual [<<Obj>>] method_name:Super.getValue
 
-  /// CHECK:      <<Ret:i\d+>>              Phi [<<SubARet>>,<<SubBRet>>]
-  /// CHECK:                                Return [<<Ret>>]
+  /// CHECK-DAG:  <<FirstMerge:i\d+>>       Phi [<<SubBRet>>,<<DefaultRet>>]
+  /// CHECK-DAG:  <<Ret:i\d+>>              Phi [<<SubARet>>,<<FirstMerge>>]
+  /// CHECK-DAG:                            Return [<<Ret>>]
+
+  /// CHECK-NOT:                            Deoptimize
   public static int inlinePolymophicSubASubB(Super a) {
     return a.getValue();
   }
@@ -81,27 +82,27 @@
   /// CHECK-START: int Main.inlinePolymophicCrossDexSubASubC(Super) inliner (before)
   /// CHECK:       InvokeVirtual method_name:Super.getValue
 
-  /// CHECK-START: int Main.inlinePolymophicCrossDexSubASubC(Super) inliner (after)
-  /// CHECK-NOT:   InvokeVirtual method_name:Super.getValue
-
   // Note that the order in which the types are added to the inline cache in the profile matters.
 
   /// CHECK-START: int Main.inlinePolymophicCrossDexSubASubC(Super) inliner (after)
   /// CHECK-DAG:  <<SubARet:i\d+>>          IntConstant 42
   /// CHECK-DAG:  <<SubCRet:i\d+>>          IntConstant 24
-  /// CHECK:      <<Obj:l\d+>>              NullCheck
-  /// CHECK:      <<ObjClassSubA:l\d+>>     InstanceFieldGet [<<Obj>>] field_name:java.lang.Object.shadow$_klass_
-  /// CHECK:      <<InlineClassSubA:l\d+>>  LoadClass class_name:SubA
-  /// CHECK:      <<TestSubA:z\d+>>         NotEqual [<<InlineClassSubA>>,<<ObjClassSubA>>]
-  /// CHECK:                                If [<<TestSubA>>]
+  /// CHECK-DAG:  <<Obj:l\d+>>              NullCheck
+  /// CHECK-DAG:  <<ObjClassSubA:l\d+>>     InstanceFieldGet [<<Obj>>] field_name:java.lang.Object.shadow$_klass_
+  /// CHECK-DAG:  <<InlineClassSubA:l\d+>>  LoadClass class_name:SubA
+  /// CHECK-DAG:  <<TestSubA:z\d+>>         NotEqual [<<InlineClassSubA>>,<<ObjClassSubA>>]
+  /// CHECK-DAG:                            If [<<TestSubA>>]
 
-  /// CHECK:      <<ObjClassSubC:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
-  /// CHECK:      <<InlineClassSubC:l\d+>>  LoadClass class_name:SubC
-  /// CHECK:      <<TestSubC:z\d+>>         NotEqual [<<InlineClassSubC>>,<<ObjClassSubC>>]
-  /// CHECK:                                Deoptimize [<<TestSubC>>,<<Obj>>]
+  /// CHECK-DAG:  <<ObjClassSubC:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
+  /// CHECK-DAG:  <<InlineClassSubC:l\d+>>  LoadClass class_name:SubC
+  /// CHECK-DAG:  <<TestSubC:z\d+>>         NotEqual [<<InlineClassSubC>>,<<ObjClassSubC>>]
+  /// CHECK-DAG:  <<DefaultRet:i\d+>>       InvokeVirtual [<<Obj>>] method_name:Super.getValue
 
-  /// CHECK:      <<Ret:i\d+>>              Phi [<<SubARet>>,<<SubCRet>>]
-  /// CHECK:                                Return [<<Ret>>]
+  /// CHECK-DAG:  <<FirstMerge:i\d+>>       Phi [<<SubCRet>>,<<DefaultRet>>]
+  /// CHECK-DAG:  <<Ret:i\d+>>              Phi [<<SubARet>>,<<FirstMerge>>]
+  /// CHECK-DAG:                            Return [<<Ret>>]
+
+  /// CHECK-NOT:                            Deoptimize
   public static int inlinePolymophicCrossDexSubASubC(Super a) {
     return a.getValue();
   }
diff --git a/test/638-no-line-number/src/Main.java b/test/638-no-line-number/src/Main.java
index 7fe0404..851f049 100644
--- a/test/638-no-line-number/src/Main.java
+++ b/test/638-no-line-number/src/Main.java
@@ -19,12 +19,12 @@
     try {
       doThrow(new Error());
     } catch (Error e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
     try {
       doThrow(null);
     } catch (Throwable t) {
-      t.printStackTrace();
+      t.printStackTrace(System.out);
     }
   }
 
diff --git a/test/640-checker-boolean-simd/src/Main.java b/test/640-checker-boolean-simd/src/Main.java
index f8239fa..64b76f8 100644
--- a/test/640-checker-boolean-simd/src/Main.java
+++ b/test/640-checker-boolean-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAnd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.and(boolean) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAnd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void and(boolean x) {
     for (int i = 0; i < 128; i++)
       a[i] &= x;  // NOTE: bitwise and, not the common &&
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecOr    loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.or(boolean) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecOr    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void or(boolean x) {
     for (int i = 0; i < 128; i++)
       a[i] |= x;  // NOTE: bitwise or, not the common ||
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecXor   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.xor(boolean) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecXor   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void xor(boolean x) {
     for (int i = 0; i < 128; i++)
       a[i] ^= x;  // NOTE: bitwise xor
@@ -80,6 +98,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = !a[i];
diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java
index 10b20b8..283c2c9 100644
--- a/test/640-checker-byte-simd/src/Main.java
+++ b/test/640-checker-byte-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(int x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(int x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(int x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -94,6 +112,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = (byte) -a[i];
@@ -109,6 +133,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = (byte) ~a[i];
@@ -124,6 +154,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -135,8 +171,16 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   //
-  // TODO: fill in when supported
+  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -147,9 +191,9 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  // TODO: would need signess flip.
+  /// CHECK-START: void Main.shr2() loop_optimization (after)
+  /// CHECK-NOT: VecUShr
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-char-simd/src/Main.java b/test/640-checker-char-simd/src/Main.java
index 0628b36..dd879b4 100644
--- a/test/640-checker-char-simd/src/Main.java
+++ b/test/640-checker-char-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(int x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(int x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(int x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -94,6 +112,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = (char) -a[i];
@@ -109,6 +133,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = (char) ~a[i];
@@ -124,6 +154,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -134,9 +170,9 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  // TODO: would need signess flip.
+  /// CHECK-START: void Main.sar2() loop_optimization (after)
+  /// CHECK-NOT: VecShr
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -148,8 +184,16 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   //
-  // TODO: fill in when supported
+  /// CHECK-START-MIPS64: void Main.shr2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/640-checker-double-simd/src/Main.java b/test/640-checker-double-simd/src/Main.java
index 0d4f87a..f7492d5 100644
--- a/test/640-checker-double-simd/src/Main.java
+++ b/test/640-checker-double-simd/src/Main.java
@@ -36,6 +36,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(double) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(double x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -51,6 +57,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(double) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(double x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -66,6 +78,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(double) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(double x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -81,6 +99,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.div(double) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void div(double x) {
     for (int i = 0; i < 128; i++)
       a[i] /= x;
@@ -96,6 +120,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = -a[i];
@@ -111,6 +141,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.abs() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void abs() {
     for (int i = 0; i < 128; i++)
       a[i] = Math.abs(a[i]);
@@ -122,8 +158,14 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.conv(long[]) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
+  /// CHECK-NOT: VecStore
   //
-  // TODO: fill in when supported
+  /// CHECK-START-MIPS64: void Main.conv(long[]) loop_optimization (after)
+  /// CHECK-NOT: VecLoad
+  /// CHECK-NOT: VecStore
+  //
+  // TODO: fill in when long2double is supported
   static void conv(long[] b) {
     for (int i = 0; i < 128; i++)
       a[i] = b[i];
diff --git a/test/640-checker-float-simd/src/Main.java b/test/640-checker-float-simd/src/Main.java
index 4bcb7e2..4fe9675 100644
--- a/test/640-checker-float-simd/src/Main.java
+++ b/test/640-checker-float-simd/src/Main.java
@@ -36,6 +36,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(float x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -51,6 +57,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(float x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -66,6 +78,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(float x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -81,6 +99,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.div(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void div(float x) {
     for (int i = 0; i < 128; i++)
       a[i] /= x;
@@ -96,6 +120,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = -a[i];
@@ -106,6 +136,12 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.abs() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
   /// CHECK-START-ARM64: void Main.abs() loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
@@ -126,6 +162,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecCnv   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.conv(int[]) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecCnv   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void conv(int[] b) {
     for (int i = 0; i < 128; i++)
       a[i] = b[i];
diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java
index 97048eb..9abf60d 100644
--- a/test/640-checker-int-simd/src/Main.java
+++ b/test/640-checker-int-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(int x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(int x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(int x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -95,6 +113,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = -a[i];
@@ -110,6 +134,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+   //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = ~a[i];
@@ -125,6 +155,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+   //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -134,10 +170,18 @@
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
-  //
+   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   //
-  // TODO: fill in when supported
+  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -149,8 +193,16 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   //
-  // TODO: fill in when supported
+  /// CHECK-START-MIPS64: void Main.shr2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
@@ -181,6 +233,11 @@
   /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr32() loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
   static void shr32() {
     // TODO: remove a[i] = a[i] altogether?
     for (int i = 0; i < 128; i++)
@@ -207,6 +264,13 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr33() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shr33() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= $opt$inline$IntConstant33();  // 1, since & 31
@@ -232,6 +296,13 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shrMinus254() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                         loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shrMinus254() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= $opt$inline$IntConstantMinus254();  // 2, since & 31
diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java
index e42c716..05dcae6 100644
--- a/test/640-checker-long-simd/src/Main.java
+++ b/test/640-checker-long-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(long) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(long x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(long) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(long x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -60,6 +72,12 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.mul(long) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
   //  Not supported for longs.
   /// CHECK-START-ARM64: void Main.mul(long) loop_optimization (after)
   /// CHECK-NOT: VecMul
@@ -93,6 +111,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = -a[i];
@@ -108,6 +132,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = ~a[i];
@@ -123,6 +153,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -134,8 +170,16 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   //
-  // TODO: fill in when supported
+  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -147,8 +191,16 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   //
-  // TODO: fill in when supported
+  /// CHECK-START-MIPS64: void Main.shr2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecUShr  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
@@ -179,6 +231,11 @@
   /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr64() loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              VecStore [{{l\d+}},<<Phi>>,<<Get>>] loop:<<Loop>>      outer_loop:none
   static void shr64() {
     // TODO: remove a[i] = a[i] altogether?
     for (int i = 0; i < 128; i++)
@@ -205,6 +262,13 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shr65() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 1                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shr65() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= $opt$inline$IntConstant65();  // 1, since & 63
@@ -230,6 +294,13 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shrMinus254() loop_optimization (after)
+  /// CHECK-DAG: <<Dist:i\d+>> IntConstant 2                        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<UShr:d\d+>> VecUShr [<<Get>>,<<Dist>>]           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<UShr>>] loop:<<Loop>>      outer_loop:none
   static void shrMinus254() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= $opt$inline$IntConstantMinus254();  // 2, since & 63
diff --git a/test/640-checker-short-simd/src/Main.java b/test/640-checker-short-simd/src/Main.java
index 241f8e6..4cca837 100644
--- a/test/640-checker-short-simd/src/Main.java
+++ b/test/640-checker-short-simd/src/Main.java
@@ -35,6 +35,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void add(int x) {
     for (int i = 0; i < 128; i++)
       a[i] += x;
@@ -50,6 +56,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sub(int x) {
     for (int i = 0; i < 128; i++)
       a[i] -= x;
@@ -65,6 +77,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void mul(int x) {
     for (int i = 0; i < 128; i++)
       a[i] *= x;
@@ -94,6 +112,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void neg() {
     for (int i = 0; i < 128; i++)
       a[i] = (short) -a[i];
@@ -109,6 +133,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void not() {
     for (int i = 0; i < 128; i++)
       a[i] = (short) ~a[i];
@@ -124,6 +154,12 @@
   /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void shl4() {
     for (int i = 0; i < 128; i++)
       a[i] <<= 4;
@@ -135,8 +171,16 @@
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   //
-  // TODO: fill in when supported
+  /// CHECK-START-MIPS64: void Main.sar2() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShr   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
   static void sar2() {
     for (int i = 0; i < 128; i++)
       a[i] >>= 2;
@@ -147,9 +191,9 @@
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
-  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
-  //
-  // TODO: fill in when supported
+  // TODO: would need signess flip.
+  /// CHECK-START: void Main.shr2() loop_optimization (after)
+  /// CHECK-NOT: VecUShr
   static void shr2() {
     for (int i = 0; i < 128; i++)
       a[i] >>>= 2;
diff --git a/test/644-checker-deopt/info.txt b/test/644-checker-deopt/info.txt
deleted file mode 100644
index c5fb12c..0000000
--- a/test/644-checker-deopt/info.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Regression test for making sure HDeoptimize is executed before
-the code it should have prevented executing.
diff --git a/test/644-checker-deopt/profile b/test/644-checker-deopt/profile
deleted file mode 100644
index cb261cc..0000000
--- a/test/644-checker-deopt/profile
+++ /dev/null
@@ -1,2 +0,0 @@
-LMain;->inlineMonomorphic(LMain;)I+LMain;
-LMain;->inlinePolymorphic(LMain;)I+LMain;,LSubMain;
diff --git a/test/644-checker-deopt/src/Main.java b/test/644-checker-deopt/src/Main.java
deleted file mode 100644
index 17c80a6..0000000
--- a/test/644-checker-deopt/src/Main.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class Main {
-
-  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (before)
-  /// CHECK:       InvokeVirtual method_name:Main.getValue
-
-  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (after)
-  /// CHECK-NOT:   InvokeVirtual method_name:Main.getValue
-
-  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (before)
-  /// CHECK:   <<Deopt:l\d+>> Deoptimize
-  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
-
-  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (after)
-  /// CHECK:   <<Deopt:l\d+>> Deoptimize
-  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
-
-  public static int inlineMonomorphic(Main a) {
-    if (a == null) {
-      return 42;
-    }
-    int i = 0;
-    while (i < 100) {
-      i += a.getValue();
-    }
-    return i;
-  }
-
-  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (before)
-  /// CHECK:       InvokeVirtual method_name:Main.getValue
-
-  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (after)
-  /// CHECK-NOT:   InvokeVirtual method_name:Main.getValue
-
-  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (before)
-  /// CHECK:   <<Deopt:l\d+>> Deoptimize
-  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
-
-  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (after)
-  /// CHECK:   <<Deopt:l\d+>> Deoptimize
-  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
-  public static int inlinePolymorphic(Main a) {
-    return a.getValue();
-  }
-
-  public int getValue() {
-    return value;
-  }
-
-  public static void main(String[] args) {
-    inlineMonomorphic(new Main());
-  }
-
-  int value = 1;
-}
-
-// Add a subclass of 'Main' to write the polymorphic inline cache in the profile.
-class SubMain extends Main {
-}
diff --git a/test/645-checker-abs-simd/src/Main.java b/test/645-checker-abs-simd/src/Main.java
index 76850ab..9714a46 100644
--- a/test/645-checker-abs-simd/src/Main.java
+++ b/test/645-checker-abs-simd/src/Main.java
@@ -22,6 +22,91 @@
   private static final int SPQUIET = 1 << 22;
   private static final long DPQUIET = 1L << 51;
 
+  /// CHECK-START: void Main.doitByte(byte[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitByte(byte[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitByte(byte[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  private static void doitByte(byte[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (byte) Math.abs(x[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitChar(char[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.doitChar(char[]) loop_optimization (after)
+  /// CHECK-NOT: VecAbs
+  private static void doitChar(char[] x) {
+    // Basically a nop due to zero extension.
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (char) Math.abs(x[i]);
+    }
+  }
+
+  /// CHECK-START: void Main.doitShort(short[]) loop_optimization (before)
+  /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitShort(short[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitShort(short[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  private static void doitShort(short[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = (short) Math.abs(x[i]);
+    }
+  }
+
   /// CHECK-START: void Main.doitInt(int[]) loop_optimization (before)
   /// CHECK-DAG: Phi                                       loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: ArrayGet                                  loop:<<Loop>>      outer_loop:none
@@ -39,6 +124,18 @@
   /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitInt(int[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                       loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                  loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                       loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                  loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsInt loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                  loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitInt(int[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -52,8 +149,28 @@
   /// CHECK-DAG: ArraySet                                   loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitLong(long[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                        loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                        loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                   loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsLong loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                   loop:<<Loop2>>      outer_loop:none
   //
-  // TODO: Not supported yet.
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitLong(long[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                        loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                   loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                        loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                   loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsLong loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                   loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitLong(long[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -77,6 +194,18 @@
   /// CHECK-DAG: ArraySet                                    loop:<<Loop2>>      outer_loop:none
   //
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitFloat(float[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                         loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                      loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                    loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                         loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                    loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsFloat loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                    loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitFloat(float[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -90,8 +219,28 @@
   /// CHECK-DAG: ArraySet                                     loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START-ARM64: void Main.doitDouble(double[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                          loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                      loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                       loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                          loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                     loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsDouble loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                     loop:<<Loop2>>      outer_loop:none
   //
-  // TODO: Not supported yet.
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START-MIPS64: void Main.doitDouble(double[]) loop_optimization (after)
+  /// CHECK-DAG: Phi                                          loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad                                      loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecAbs                                       loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: VecStore                                     loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi                                          loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet                                     loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: InvokeStaticOrDirect intrinsic:MathAbsDouble loop:<<Loop2>>      outer_loop:none
+  /// CHECK-DAG: ArraySet                                     loop:<<Loop2>>      outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   private static void doitDouble(double[] x) {
     for (int i = 0; i < x.length; i++) {
       x[i] = Math.abs(x[i]);
@@ -99,6 +248,31 @@
   }
 
   public static void main(String[] args) {
+    // Bytes, chars, shorts.
+    byte[] xb = new byte[256];
+    for (int i = 0; i < 256; i++) {
+      xb[i] = (byte) i;
+    }
+    doitByte(xb);
+    for (int i = 0; i < 256; i++) {
+      expectEquals32((byte) Math.abs((byte) i), xb[i]);
+    }
+    char[] xc = new char[1024 * 64];
+    for (int i = 0; i < 1024 * 64; i++) {
+      xc[i] = (char) i;
+    }
+    doitChar(xc);
+    for (int i = 0; i < 1024 *64; i++) {
+      expectEquals32((char) Math.abs((char) i), xc[i]);
+    }
+    short[] xs = new short[1024 * 64];
+    for (int i = 0; i < 1024 * 64; i++) {
+      xs[i] = (short) i;
+    }
+    doitShort(xs);
+    for (int i = 0; i < 1024 * 64; i++) {
+      expectEquals32((short) Math.abs((short) i), xs[i]);
+    }
     // Set up minint32, maxint32 and some others.
     int[] xi = new int[8];
     xi[0] = 0x80000000;
diff --git a/test/646-checker-hadd-alt-byte/src/Main.java b/test/646-checker-hadd-alt-byte/src/Main.java
index d1b33ea..9cc6828 100644
--- a/test/646-checker-hadd-alt-byte/src/Main.java
+++ b/test/646-checker-hadd-alt-byte/src/Main.java
@@ -45,6 +45,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -71,6 +78,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -95,6 +109,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -122,6 +143,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -146,6 +174,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -171,6 +207,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
diff --git a/test/646-checker-hadd-alt-char/src/Main.java b/test/646-checker-hadd-alt-char/src/Main.java
index 1ea8d3f..3f81299 100644
--- a/test/646-checker-hadd-alt-char/src/Main.java
+++ b/test/646-checker-hadd-alt-char/src/Main.java
@@ -45,6 +45,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -72,6 +79,13 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
@@ -98,6 +112,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -126,6 +147,13 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void rounding_halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
@@ -152,6 +180,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(char[] b1, char[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -178,6 +214,14 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void halving_add_also_unsigned_constant(char[] b1, char[] bo) {
diff --git a/test/646-checker-hadd-alt-short/src/Main.java b/test/646-checker-hadd-alt-short/src/Main.java
index 269e618..150626c 100644
--- a/test/646-checker-hadd-alt-short/src/Main.java
+++ b/test/646-checker-hadd-alt-short/src/Main.java
@@ -45,6 +45,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -71,6 +78,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -95,6 +109,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -122,6 +143,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -146,6 +174,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -171,6 +207,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
diff --git a/test/646-checker-hadd-byte/src/Main.java b/test/646-checker-hadd-byte/src/Main.java
index 7e29a7e..5a615a4 100644
--- a/test/646-checker-hadd-byte/src/Main.java
+++ b/test/646-checker-hadd-byte/src/Main.java
@@ -42,6 +42,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -68,6 +75,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -92,6 +106,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -119,6 +140,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>]  unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(byte[] b1, byte[] b2, byte[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -143,6 +171,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I127:i\d+>> IntConstant 127                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I127>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -168,6 +204,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                      loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<I255>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(byte[] b1, byte[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
diff --git a/test/646-checker-hadd-char/src/Main.java b/test/646-checker-hadd-char/src/Main.java
index d24608f..bb8a01f 100644
--- a/test/646-checker-hadd-char/src/Main.java
+++ b/test/646-checker-hadd-char/src/Main.java
@@ -42,6 +42,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -69,6 +76,13 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
@@ -95,6 +109,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(char[] b1, char[] b2, char[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -123,6 +144,13 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_also_unsigned(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void rounding_halving_add_also_unsigned(char[] b1, char[] b2, char[] bo) {
@@ -149,6 +177,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(char[] b1, char[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -175,6 +211,14 @@
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   //
+  /// CHECK-START-MIPS64: void Main.halving_add_also_unsigned_constant(char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
   // Note: HAnd has no impact (already a zero extension).
   //
   private static void halving_add_also_unsigned_constant(char[] b1, char[] bo) {
diff --git a/test/646-checker-hadd-short/src/Main.java b/test/646-checker-hadd-short/src/Main.java
index 4e6b4bd..07845a6 100644
--- a/test/646-checker-hadd-short/src/Main.java
+++ b/test/646-checker-hadd-short/src/Main.java
@@ -42,6 +42,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -69,6 +76,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -96,6 +110,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -120,6 +141,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -144,6 +172,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed_alt(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -172,6 +207,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_signed_alt2(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:false rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_signed_alt2(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -200,6 +242,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -227,6 +276,13 @@
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.rounding_halving_add_unsigned_alt(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get1>>,<<Get2>>] unsigned:true rounded:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void rounding_halving_add_unsigned_alt(short[] b1, short[] b2, short[] bo) {
     int min_length = Math.min(bo.length, Math.min(b1.length, b2.length));
     for (int i = 0; i < min_length; i++) {
@@ -252,6 +308,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_signed_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<SMAX:i\d+>> IntConstant 32767                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<SMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:false rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_signed_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
@@ -277,6 +341,14 @@
   /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.halving_add_unsigned_constant(short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<UMAX:i\d+>> IntConstant 65535                    loop:none
+  /// CHECK-DAG: <<Repl:d\d+>> VecReplicateScalar [<<UMAX>>]        loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get:d\d+>>  VecLoad                              loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<HAdd:d\d+>> VecHalvingAdd [<<Get>>,<<Repl>>] unsigned:true rounded:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<HAdd>>] loop:<<Loop>>      outer_loop:none
   private static void halving_add_unsigned_constant(short[] b1, short[] bo) {
     int min_length = Math.min(bo.length, b1.length);
     for (int i = 0; i < min_length; i++) {
diff --git a/test/651-checker-byte-simd-minmax/src/Main.java b/test/651-checker-byte-simd-minmax/src/Main.java
index 8211ace..4711214 100644
--- a/test/651-checker-byte-simd-minmax/src/Main.java
+++ b/test/651-checker-byte-simd-minmax/src/Main.java
@@ -27,9 +27,19 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-NOT: VecMin
+  /// CHECK-START-ARM64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -37,6 +47,37 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMinUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMinUnsigned(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.min(y[i] & 0xff, z[i] & 0xff);
+    }
+  }
+
   /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (before)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
@@ -45,9 +86,19 @@
   /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
-  /// CHECK-NOT: VecMax
+  /// CHECK-START-ARM64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(byte[] x, byte[] y, byte[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -55,6 +106,37 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (before)
+  /// CHECK-DAG: <<I255:i\d+>> IntConstant 255                     loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:b\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<I255>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:b\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMaxUnsigned(byte[], byte[], byte[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMaxUnsigned(byte[] x, byte[] y, byte[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (byte) Math.max(y[i] & 0xff, z[i] & 0xff);
+    }
+  }
+
   public static void main(String[] args) {
     // Initialize cross-values for all possible values.
     int total = 256 * 256;
@@ -77,11 +159,21 @@
       byte expected = (byte) Math.min(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMinUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.min(y[i] & 0xff, z[i] & 0xff);
+      expectEquals(expected, x[i]);
+    }
     doitMax(x, y, z);
     for (int i = 0; i < total; i++) {
       byte expected = (byte) Math.max(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMaxUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      byte expected = (byte) Math.max(y[i] & 0xff, z[i] & 0xff);
+      expectEquals(expected, x[i]);
+    }
 
     System.out.println("passed");
   }
diff --git a/test/651-checker-char-simd-minmax/src/Main.java b/test/651-checker-char-simd-minmax/src/Main.java
index 5ce7b94..79795ee 100644
--- a/test/651-checker-char-simd-minmax/src/Main.java
+++ b/test/651-checker-char-simd-minmax/src/Main.java
@@ -27,9 +27,19 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-NOT: VecMin
+  /// CHECK-START-ARM64: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -45,9 +55,19 @@
   /// CHECK-DAG: <<Cnv:c\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
-  /// CHECK-NOT: VecMax
+  /// CHECK-START-ARM64: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(char[], char[], char[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(char[] x, char[] y, char[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-double-simd-minmax/src/Main.java b/test/651-checker-double-simd-minmax/src/Main.java
index e1711ae..23a6d54 100644
--- a/test/651-checker-double-simd-minmax/src/Main.java
+++ b/test/651-checker-double-simd-minmax/src/Main.java
@@ -27,6 +27,7 @@
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   // TODO x86: 0.0 vs -0.0?
+  // TODO MIPS64: min(x, NaN)?
   //
   /// CHECK-START-ARM64: void Main.doitMin(double[], double[], double[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
@@ -48,7 +49,8 @@
   /// CHECK-DAG: <<Max:d\d+>>  InvokeStaticOrDirect [<<Get1>>,<<Get2>>] intrinsic:MathMaxDoubleDouble loop:<<Loop>> outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO-x86: 0.0 vs -0.0?
+  // TODO x86: 0.0 vs -0.0?
+  // TODO MIPS64: max(x, NaN)?
   //
   /// CHECK-START-ARM64: void Main.doitMax(double[], double[], double[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/651-checker-float-simd-minmax/src/Main.java b/test/651-checker-float-simd-minmax/src/Main.java
index bd412e0..3959c82 100644
--- a/test/651-checker-float-simd-minmax/src/Main.java
+++ b/test/651-checker-float-simd-minmax/src/Main.java
@@ -27,6 +27,7 @@
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   //
   // TODO x86: 0.0 vs -0.0?
+  // TODO MIPS64: min(x, NaN)?
   //
   /// CHECK-START-ARM64: void Main.doitMin(float[], float[], float[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
@@ -49,6 +50,7 @@
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   //
   // TODO x86: 0.0 vs -0.0?
+  // TODO MIPS64: max(x, NaN)?
   //
   /// CHECK-START-ARM64: void Main.doitMax(float[], float[], float[]) loop_optimization (after)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
diff --git a/test/651-checker-int-simd-minmax/src/Main.java b/test/651-checker-int-simd-minmax/src/Main.java
index 4e05a9d..2a97009 100644
--- a/test/651-checker-int-simd-minmax/src/Main.java
+++ b/test/651-checker-int-simd-minmax/src/Main.java
@@ -30,6 +30,13 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(int[] x, int[] y, int[] z) {
@@ -50,6 +57,13 @@
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(int[], int[], int[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(int[] x, int[] y, int[] z) {
diff --git a/test/651-checker-long-simd-minmax/src/Main.java b/test/651-checker-long-simd-minmax/src/Main.java
index 51cf67e..6289a1e 100644
--- a/test/651-checker-long-simd-minmax/src/Main.java
+++ b/test/651-checker-long-simd-minmax/src/Main.java
@@ -28,8 +28,16 @@
   //
   // Not directly supported for longs.
   //
-  /// CHECK-START: void Main.doitMin(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-START-ARM64: void Main.doitMin(long[], long[], long[]) loop_optimization (after)
   /// CHECK-NOT: VecMin
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+
   private static void doitMin(long[] x, long[] y, long[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -46,8 +54,15 @@
   //
   // Not directly supported for longs.
   //
-  /// CHECK-START: void Main.doitMax(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-START-ARM64: void Main.doitMax(long[], long[], long[]) loop_optimization (after)
   /// CHECK-NOT: VecMax
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(long[], long[], long[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>]          loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(long[] x, long[] y, long[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
diff --git a/test/651-checker-short-simd-minmax/src/Main.java b/test/651-checker-short-simd-minmax/src/Main.java
index f34f526..3bd1305 100644
--- a/test/651-checker-short-simd-minmax/src/Main.java
+++ b/test/651-checker-short-simd-minmax/src/Main.java
@@ -27,9 +27,19 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-NOT: VecMin
+  /// CHECK-START-ARM64: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMin(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
   private static void doitMin(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -37,6 +47,37 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMinIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Min>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMinUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Min:d\d+>>  VecMin [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Min>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMinUnsigned(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.min(y[i] & 0xffff, z[i] & 0xffff);
+    }
+  }
+
   /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (before)
   /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
@@ -45,9 +86,19 @@
   /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
   //
-  // TODO: narrow type vectorization.
-  /// CHECK-START: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
-  /// CHECK-NOT: VecMax
+  /// CHECK-START-ARM64: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMax(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:false loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
   private static void doitMax(short[] x, short[] y, short[] z) {
     int min = Math.min(x.length, Math.min(y.length, z.length));
     for (int i = 0; i < min; i++) {
@@ -55,6 +106,37 @@
     }
   }
 
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (before)
+  /// CHECK-DAG: <<IMAX:i\d+>> IntConstant 65535                   loop:none
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:s\d+>> ArrayGet                            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And1:i\d+>> And [<<Get1>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<And2:i\d+>> And [<<Get2>>,<<IMAX>>]             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:i\d+>>  InvokeStaticOrDirect [<<And1>>,<<And2>>] intrinsic:MathMaxIntInt loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG: <<Cnv:s\d+>>  TypeConversion [<<Max>>]            loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               ArraySet [{{l\d+}},<<Phi>>,<<Cnv>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-MIPS64: void Main.doitMaxUnsigned(short[], short[], short[]) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>>  Phi                                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Get1:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get2:d\d+>> VecLoad                             loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Max:d\d+>>  VecMax [<<Get1>>,<<Get2>>] unsigned:true loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:               VecStore [{{l\d+}},<<Phi>>,<<Max>>] loop:<<Loop>>      outer_loop:none
+  private static void doitMaxUnsigned(short[] x, short[] y, short[] z) {
+    int min = Math.min(x.length, Math.min(y.length, z.length));
+    for (int i = 0; i < min; i++) {
+      x[i] = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff);
+    }
+  }
+
   public static void main(String[] args) {
     short[] interesting = {
       (short) 0x0000, (short) 0x0001, (short) 0x007f,
@@ -91,11 +173,21 @@
       short expected = (short) Math.min(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMinUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.min(y[i] & 0xffff, z[i] & 0xffff);
+      expectEquals(expected, x[i]);
+    }
     doitMax(x, y, z);
     for (int i = 0; i < total; i++) {
       short expected = (short) Math.max(y[i], z[i]);
       expectEquals(expected, x[i]);
     }
+    doitMaxUnsigned(x, y, z);
+    for (int i = 0; i < total; i++) {
+      short expected = (short) Math.max(y[i] & 0xffff, z[i] & 0xffff);
+      expectEquals(expected, x[i]);
+    }
 
     System.out.println("passed");
   }
diff --git a/test/652-deopt-intrinsic/expected.txt b/test/652-deopt-intrinsic/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/652-deopt-intrinsic/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/652-deopt-intrinsic/info.txt b/test/652-deopt-intrinsic/info.txt
new file mode 100644
index 0000000..58a90fa
--- /dev/null
+++ b/test/652-deopt-intrinsic/info.txt
@@ -0,0 +1,2 @@
+Regression test for the interpreter/JIT, where the interpreter used to not
+record inline caches when seeing an intrinsic.
diff --git a/test/652-deopt-intrinsic/src/Main.java b/test/652-deopt-intrinsic/src/Main.java
new file mode 100644
index 0000000..a82580c
--- /dev/null
+++ b/test/652-deopt-intrinsic/src/Main.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    loop();
+    ensureJitCompiled(Main.class, "$noinline$doCall");
+    loop();
+  }
+
+  public static void loop() {
+    Main m = new Main();
+    for (int i = 0; i < 5000; i++) {
+      $noinline$doCall("foo");
+      $noinline$doCall(m);
+      if (numberOfDeoptimizations() != 0) {
+        throw new Error("Unexpected deoptimizations");
+      }
+    }
+  }
+
+  public static boolean $noinline$doCall(Object foo) {
+    return foo.equals(Main.class);
+  }
+
+  public static native int numberOfDeoptimizations();
+  public static native void ensureJitCompiled(Class<?> cls, String methodName);
+}
diff --git a/test/654-checker-periodic/expected.txt b/test/654-checker-periodic/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/654-checker-periodic/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/654-checker-periodic/info.txt b/test/654-checker-periodic/info.txt
new file mode 100644
index 0000000..7c8a777
--- /dev/null
+++ b/test/654-checker-periodic/info.txt
@@ -0,0 +1 @@
+Periodic sequence on integer and floating-point.
diff --git a/test/654-checker-periodic/src/Main.java b/test/654-checker-periodic/src/Main.java
new file mode 100644
index 0000000..7a0c98c
--- /dev/null
+++ b/test/654-checker-periodic/src/Main.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for last value of a few periodic sequences
+ * (found by fuzz testing).
+ */
+public class Main {
+
+  /// CHECK-START: int Main.doitUpInt(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.doitUpInt(int) loop_optimization (after)
+  /// CHECK-NOT: Phi
+  static int doitUpInt(int n) {
+    // Complete loop is replaced by last-value.
+    int lI = 1;
+    for (int i1 = 0; i1  < n; i1++) {
+      lI = (1486662021 - lI);
+    }
+    return lI;
+  }
+
+  /// CHECK-START: int Main.doitDownInt(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.doitDownInt(int) loop_optimization (after)
+  /// CHECK-NOT: Phi
+  static int doitDownInt(int n) {
+    // Complete loop is replaced by last-value.
+    int lI = 1;
+    for (int i1 = n - 1; i1 >= 0; i1--) {
+      lI = (1486662021 - lI);
+    }
+    return lI;
+  }
+
+  /// CHECK-START: float Main.doitUpFloat(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: float Main.doitUpFloat(int) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  static float doitUpFloat(int n) {
+    // FP arithmetic is not sufficiently precise.
+    // The loop remains.
+    float lF = 1.0f;
+    for (int i1 = 0; i1  < n; i1++) {
+      lF = (1486662021.0f - lF);
+    }
+    return lF;
+  }
+
+  /// CHECK-START: float Main.doitDownFloat(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: float Main.doitDownFloat(int) loop_optimization (after)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  static float doitDownFloat(int n) {
+    // FP arithmetic is not sufficiently precise.
+    // The loop remains.
+    float lF = 1.0f;
+    for (int i1 = n - 1; i1 >= 0; i1--) {
+      lF = (1486662021.0f - lF);
+    }
+    return lF;
+  }
+
+  /// CHECK-START: float Main.doitUpFloatAlt(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: float Main.doitUpFloatAlt(int) loop_optimization (after)
+  /// CHECK-NOT: Phi
+  static float doitUpFloatAlt(int n) {
+    // Complete loop is replaced by last-value
+    // since the values are now precise.
+    float lF = 1.0f;
+    float l2 = 1486662020.0f;
+    for (int i1 = 0; i1  < n; i1++) {
+      float old = lF;
+      lF = l2;
+      l2 = old;
+    }
+    return lF;
+  }
+
+  /// CHECK-START: float Main.doitDownFloatAlt(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi  loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              Phi  loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: float Main.doitDownFloatAlt(int) loop_optimization (after)
+  /// CHECK-NOT: Phi
+  static float doitDownFloatAlt(int n) {
+    // Complete loop is replaced by last-value
+    // since the values are now precise.
+    float lF = 1.0f;
+    float l2 = 1486662020.0f;
+    for (int i1 = n - 1; i1 >= 0; i1--) {
+      float old = lF;
+      lF = l2;
+      l2 = old;
+    }
+    return lF;
+  }
+
+  // Main driver.
+  public static void main(String[] args) {
+    for (int i = 0; i < 10; i++) {
+      int ei = (i & 1) == 0 ? 1 : 1486662020;
+      int ci = doitUpInt(i);
+      expectEquals(ei, ci);
+    }
+    for (int i = 0; i < 10; i++) {
+      int ei = (i & 1) == 0 ? 1 : 1486662020;
+      int ci = doitDownInt(i);
+      expectEquals(ei, ci);
+    }
+    for (int i = 0; i < 10; i++) {
+      float ef = i == 0 ? 1.0f : ((i & 1) == 0 ? 0.0f : 1486662021.0f);
+      float cf = doitUpFloat(i);
+      expectEquals(ef, cf);
+    }
+    for (int i = 0; i < 10; i++) {
+      float ef = i == 0 ? 1.0f : ((i & 1) == 0 ? 0.0f : 1486662021.0f);
+      float cf = doitDownFloat(i);
+      expectEquals(ef, cf);
+    }
+    for (int i = 0; i < 10; i++) {
+      float ef = (i & 1) == 0 ? 1.0f : 1486662020.0f;
+      float cf = doitUpFloatAlt(i);
+      expectEquals(ef, cf);
+    }
+    for (int i = 0; i < 10; i++) {
+      float ef = (i & 1) == 0 ? 1.0f : 1486662020.0f;
+      float cf = doitDownFloatAlt(i);
+      expectEquals(ef, cf);
+    }
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
+
+
diff --git a/test/655-checker-simd-arm-opt/expected.txt b/test/655-checker-simd-arm-opt/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/655-checker-simd-arm-opt/info.txt b/test/655-checker-simd-arm-opt/info.txt
new file mode 100644
index 0000000..198cc95
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/info.txt
@@ -0,0 +1 @@
+Checker test for arm and arm64 simd optimizations.
diff --git a/test/655-checker-simd-arm-opt/src/Main.java b/test/655-checker-simd-arm-opt/src/Main.java
new file mode 100644
index 0000000..7b61dd7
--- /dev/null
+++ b/test/655-checker-simd-arm-opt/src/Main.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Checker test for arm and arm64 simd optimizations.
+ */
+public class Main {
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  /// CHECK-START-ARM64: void Main.encodableConstants(byte[], short[], char[], int[], long[], float[], double[]) disassembly (after)
+  /// CHECK-DAG: <<C1:i\d+>>   IntConstant 1
+  /// CHECK-DAG: <<C2:i\d+>>   IntConstant 2
+  /// CHECK-DAG: <<C3:i\d+>>   IntConstant 3
+  /// CHECK-DAG: <<C4:i\d+>>   IntConstant 4
+  /// CHECK-DAG: <<L5:j\d+>>   LongConstant 5
+  /// CHECK-DAG: <<F2:f\d+>>   FloatConstant 2
+  /// CHECK-DAG: <<D20:d\d+>>  DoubleConstant 20
+  //
+  /// CHECK-DAG:               VecReplicateScalar [<<C1>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.16b, #0x1
+  /// CHECK-DAG:               VecReplicateScalar [<<C2>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.8h, #0x2, lsl #0
+  /// CHECK-DAG:               VecReplicateScalar [<<C3>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.8h, #0x3, lsl #0
+  /// CHECK-DAG:               VecReplicateScalar [<<C4>>]
+  /// CHECK-DAG:               movi v{{[0-9]+}}.4s, #0x4, lsl #0
+  /// CHECK-DAG:               VecReplicateScalar [<<L5>>]
+  /// CHECK-DAG:               dup v{{[0-9]+}}.2d, x{{[0-9]+}}
+  /// CHECK-DAG:               VecReplicateScalar [<<F2>>]
+  /// CHECK-DAG:               fmov v{{[0-9]+}}.4s, #0x0
+  /// CHECK-DAG:               VecReplicateScalar [<<D20>>]
+  /// CHECK-DAG:               fmov v{{[0-9]+}}.2d, #0x34
+  private static void encodableConstants(byte[] b, short[] s, char[] c, int[] a, long[] l, float[] f, double[] d) {
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      b[i] += 1;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      s[i] += 2;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      c[i] += 3;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      a[i] += 4;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      l[i] += 5;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      f[i] += 2.0f;
+    }
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      d[i] += 20.0;
+    }
+  }
+
+  private static int sumArray(byte[] b, short[] s, char[] c, int[] a, long[] l, float[] f, double[] d) {
+    int sum = 0;
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+      sum += b[i] + s[i] + c[i] + a[i] + l[i] + f[i] + d[i];
+    }
+    return sum;
+  }
+
+  public static final int ARRAY_SIZE = 100;
+
+  public static void main(String[] args) {
+    byte[] b = new byte[ARRAY_SIZE];
+    short[] s = new short[ARRAY_SIZE];
+    char[] c = new char[ARRAY_SIZE];
+    int[] a = new int[ARRAY_SIZE];
+    long[] l = new long[ARRAY_SIZE];
+    float[] f = new float[ARRAY_SIZE];
+    double[] d = new double[ARRAY_SIZE];
+
+    encodableConstants(b, s, c, a, l, f, d);
+    expectEquals(3700, sumArray(b, s, c, a, l, f, d));
+
+    System.out.println("passed");
+  }
+}
diff --git a/test/655-jit-clinit/expected.txt b/test/655-jit-clinit/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/655-jit-clinit/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/655-jit-clinit/info.txt b/test/655-jit-clinit/info.txt
new file mode 100644
index 0000000..5c81d9b
--- /dev/null
+++ b/test/655-jit-clinit/info.txt
@@ -0,0 +1,3 @@
+Regression test for the JIT compiler, which used to wait
+on a class object, meaning application code could just block
+all JIT compilations.
diff --git a/test/655-jit-clinit/src/Main.java b/test/655-jit-clinit/src/Main.java
new file mode 100644
index 0000000..44b3154
--- /dev/null
+++ b/test/655-jit-clinit/src/Main.java
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[0]);
+    if (!hasJit()) {
+      return;
+    }
+    Foo.hotMethod();
+  }
+
+  public native static boolean isJitCompiled(Class<?> cls, String methodName);
+  private native static boolean hasJit();
+}
+
+class Foo {
+  static void hotMethod() {
+    for (int i = 0; i < array.length; ++i) {
+      array[i] = array;
+    }
+  }
+
+  static {
+    array = new Object[10000];
+    while (!Main.isJitCompiled(Foo.class, "hotMethod")) {
+      Foo.hotMethod();
+      try {
+        // Sleep to give a chance for the JIT to compile `hotMethod`.
+        Thread.sleep(100);
+      } catch (Exception e) {
+        // Ignore
+      }
+    }
+  }
+
+  static Object[] array;
+}
diff --git a/test/644-checker-deopt/expected.txt b/test/707-checker-invalid-profile/expected.txt
similarity index 100%
rename from test/644-checker-deopt/expected.txt
rename to test/707-checker-invalid-profile/expected.txt
diff --git a/test/707-checker-invalid-profile/info.txt b/test/707-checker-invalid-profile/info.txt
new file mode 100644
index 0000000..4b59eff
--- /dev/null
+++ b/test/707-checker-invalid-profile/info.txt
@@ -0,0 +1,2 @@
+Verify the compiler can handle an invalid profile with methods
+and classes exceeding the dex file limits.
diff --git a/test/707-checker-invalid-profile/profile b/test/707-checker-invalid-profile/profile
new file mode 100644
index 0000000..5979dd2
--- /dev/null
+++ b/test/707-checker-invalid-profile/profile
@@ -0,0 +1,4 @@
+LMain;->attemptInlineMonomorphic(LMain;)I+invalid_class
+LMain;->attemptInlinePolymorphic(LMain;)I+LMain;,invalid_class
+LMain;->invalid_method
+invalid_class
\ No newline at end of file
diff --git a/test/644-checker-deopt/run b/test/707-checker-invalid-profile/run
similarity index 100%
rename from test/644-checker-deopt/run
rename to test/707-checker-invalid-profile/run
diff --git a/test/707-checker-invalid-profile/src/Main.java b/test/707-checker-invalid-profile/src/Main.java
new file mode 100644
index 0000000..003f0e8
--- /dev/null
+++ b/test/707-checker-invalid-profile/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Unrelated {
+}
+
+public class Main {
+
+  /// CHECK-START: int Main.attemptInlineMonomorphic(Main) inliner (after)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+  public static int attemptInlineMonomorphic(Main a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.attemptInlinePolymorphic(Main) inliner (after)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+  public static int attemptInlinePolymorphic(Main a) {
+    return a.getValue();
+  }
+
+  public int getValue() {
+    return 42;
+  }
+
+  public static void main(String[] args) {
+    attemptInlineMonomorphic(new Main());
+    attemptInlinePolymorphic(new Main());
+  }
+
+}
diff --git a/test/802-deoptimization/src/DeoptimizationController.java b/test/802-deoptimization/src/DeoptimizationController.java
index d6e662d..88579de 100644
--- a/test/802-deoptimization/src/DeoptimizationController.java
+++ b/test/802-deoptimization/src/DeoptimizationController.java
@@ -50,7 +50,7 @@
         throw new IllegalStateException("Not tracing.");
       }
     } catch (Exception exc) {
-      exc.printStackTrace(System.err);
+      exc.printStackTrace(System.out);
     } finally {
       if (tempFile != null) {
         tempFile.delete();
@@ -65,7 +65,7 @@
         throw new IllegalStateException("Still tracing.");
       }
     } catch (Exception exc) {
-      exc.printStackTrace(System.err);
+      exc.printStackTrace(System.out);
     }
   }
 
diff --git a/test/906-iterate-heap/expected.txt b/test/906-iterate-heap/expected.txt
index b6af843..73b7129 100644
--- a/test/906-iterate-heap/expected.txt
+++ b/test/906-iterate-heap/expected.txt
@@ -18,14 +18,14 @@
 2
 1@0 (32, 2xD '0000000000000000000000000000f03f')
 2
+doTestPrimitiveFieldsClasses
 10000@0 (static, int, index=3) 0000000000000000
 10001
 10000@0 (static, int, index=11) 0000000000000000
 10001
-10000@0 (static, int, index=0) 0000000000000000
 10001
-10000@0 (static, int, index=1) 0000000000000000
 10001
+doTestPrimitiveFieldsIntegral
 10000@0 (instance, int, index=2) 0000000000000000
 10001@0 (instance, byte, index=4) 0000000000000001
 10002@0 (instance, char, index=5) 0000000000000061
@@ -33,6 +33,7 @@
 10004@0 (instance, long, index=7) 0000000000000004
 10005@0 (instance, short, index=9) 0000000000000002
 10006
+doTestPrimitiveFieldsFloat
 10000@0 (instance, int, index=3) 0000000000000000
 10001@0 (instance, byte, index=5) 0000000000000001
 10002@0 (instance, char, index=6) 0000000000000061
diff --git a/test/906-iterate-heap/iterate_heap.cc b/test/906-iterate-heap/iterate_heap.cc
index 6534b4c..02ac699 100644
--- a/test/906-iterate-heap/iterate_heap.cc
+++ b/test/906-iterate-heap/iterate_heap.cc
@@ -408,5 +408,15 @@
   return env->NewStringUTF(ffc.data.c_str());
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test906_checkInitialized(
+    JNIEnv* env, jclass, jclass c) {
+  jint status;
+  jvmtiError error = jvmti_env->GetClassStatus(c, &status);
+  if (JvmtiErrorToException(env, jvmti_env, error)) {
+    return false;
+  }
+  return (status & JVMTI_CLASS_STATUS_INITIALIZED) != 0;
+}
+
 }  // namespace Test906IterateHeap
 }  // namespace art
diff --git a/test/906-iterate-heap/src/art/Test906.java b/test/906-iterate-heap/src/art/Test906.java
index fe18e38..65c2c8c 100644
--- a/test/906-iterate-heap/src/art/Test906.java
+++ b/test/906-iterate-heap/src/art/Test906.java
@@ -142,6 +142,7 @@
   }
 
   private static void doTestPrimitiveFieldsClasses() {
+    System.out.println("doTestPrimitiveFieldsClasses");
     setTag(IntObject.class, 10000);
     System.out.println(iterateThroughHeapPrimitiveFields(10000));
     System.out.println(getTag(IntObject.class));
@@ -152,18 +153,40 @@
     System.out.println(getTag(FloatObject.class));
     setTag(FloatObject.class, 0);
 
+    boolean correctHeapValue = false;
     setTag(Inf1.class, 10000);
-    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    String heapTrace = iterateThroughHeapPrimitiveFields(10000);
+
+    if (!checkInitialized(Inf1.class)) {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000000");
+    } else {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000001");
+    }
+
+    if (!correctHeapValue)
+      System.out.println("Heap Trace for Inf1 is not as expected:\n" + heapTrace);
+
     System.out.println(getTag(Inf1.class));
     setTag(Inf1.class, 0);
 
     setTag(Inf2.class, 10000);
-    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    heapTrace = iterateThroughHeapPrimitiveFields(10000);
+
+    if (!checkInitialized(Inf2.class)) {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000000");
+    } else {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000001");
+    }
+
+    if (!correctHeapValue)
+      System.out.println("Heap Trace for Inf2 is not as expected:\n" + heapTrace);
     System.out.println(getTag(Inf2.class));
+
     setTag(Inf2.class, 0);
   }
 
   private static void doTestPrimitiveFieldsIntegral() {
+    System.out.println("doTestPrimitiveFieldsIntegral");
     IntObject intObject = new IntObject();
     setTag(intObject, 10000);
     System.out.println(iterateThroughHeapPrimitiveFields(10000));
@@ -171,6 +194,7 @@
   }
 
   private static void doTestPrimitiveFieldsFloat() {
+    System.out.println("doTestPrimitiveFieldsFloat");
     FloatObject floatObject = new FloatObject();
     setTag(floatObject, 10000);
     System.out.println(iterateThroughHeapPrimitiveFields(10000));
@@ -265,6 +289,7 @@
     return Main.getTag(o);
   }
 
+  private static native boolean checkInitialized(Class<?> klass);
   private static native int iterateThroughHeapCount(int heapFilter,
       Class<?> klassFilter, int stopAfter);
   private static native int iterateThroughHeapData(int heapFilter,
diff --git a/test/909-attach-agent/attach.cc b/test/909-attach-agent/attach.cc
index 0150e09..3a6788a 100644
--- a/test/909-attach-agent/attach.cc
+++ b/test/909-attach-agent/attach.cc
@@ -27,18 +27,22 @@
 namespace art {
 namespace Test909AttachAgent {
 
+static void Println(const char* c) {
+  fprintf(stdout, "%s\n", c);
+  fflush(stdout);
+}
+
 jint OnAttach(JavaVM* vm,
             char* options ATTRIBUTE_UNUSED,
             void* reserved ATTRIBUTE_UNUSED) {
-  fprintf(stderr, "Attached Agent for test 909-attach-agent\n");
-  fsync(1);
+  Println("Attached Agent for test 909-attach-agent");
   jvmtiEnv* env = nullptr;
   jvmtiEnv* env2 = nullptr;
 
 #define CHECK_CALL_SUCCESS(c) \
   do { \
     if ((c) != JNI_OK) { \
-      fprintf(stderr, "call " #c " did not succeed\n"); \
+      Println("call " #c " did not succeed"); \
       return -1; \
     } \
   } while (false)
@@ -46,7 +50,7 @@
   CHECK_CALL_SUCCESS(vm->GetEnv(reinterpret_cast<void**>(&env), JVMTI_VERSION_1_0));
   CHECK_CALL_SUCCESS(vm->GetEnv(reinterpret_cast<void**>(&env2), JVMTI_VERSION_1_0));
   if (env == env2) {
-    fprintf(stderr, "GetEnv returned same environment twice!\n");
+    Println("GetEnv returned same environment twice!");
     return -1;
   }
   unsigned char* local_data = nullptr;
@@ -56,19 +60,19 @@
   unsigned char* get_data = nullptr;
   CHECK_CALL_SUCCESS(env->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&get_data)));
   if (get_data != local_data) {
-    fprintf(stderr, "Got different data from local storage then what was set!\n");
+    Println("Got different data from local storage then what was set!");
     return -1;
   }
   CHECK_CALL_SUCCESS(env2->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&get_data)));
   if (get_data != nullptr) {
-    fprintf(stderr, "env2 did not have nullptr local storage.\n");
+    Println("env2 did not have nullptr local storage.");
     return -1;
   }
   CHECK_CALL_SUCCESS(env->Deallocate(local_data));
   jint version = 0;
   CHECK_CALL_SUCCESS(env->GetVersionNumber(&version));
   if ((version & JVMTI_VERSION_1) != JVMTI_VERSION_1) {
-    fprintf(stderr, "Unexpected version number!\n");
+    Println("Unexpected version number!");
     return -1;
   }
   CHECK_CALL_SUCCESS(env->DisposeEnvironment());
diff --git a/test/909-attach-agent/src/Main.java b/test/909-attach-agent/src/Main.java
index 569b89a..25ebd57 100644
--- a/test/909-attach-agent/src/Main.java
+++ b/test/909-attach-agent/src/Main.java
@@ -19,17 +19,17 @@
 
 public class Main {
   public static void main(String[] args) {
-    System.err.println("Hello, world!");
+    System.out.println("Hello, world!");
     for(String a : args) {
       if(a.startsWith("agent:")) {
         String agent = a.substring(6);
         try {
           VMDebug.attachAgent(agent);
         } catch(IOException e) {
-          e.printStackTrace();
+          e.printStackTrace(System.out);
         }
       }
     }
-    System.err.println("Goodbye!");
+    System.out.println("Goodbye!");
   }
 }
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
index b128d1c..80f8b9e 100644
--- a/test/913-heaps/expected.txt
+++ b/test/913-heaps/expected.txt
@@ -140,9 +140,7 @@
 10001
 10000@0 (static, int, index=11) 0000000000000000
 10001
-10000@0 (static, int, index=0) 0000000000000000
 10001
-10000@0 (static, int, index=1) 0000000000000000
 10001
 10000@0 (instance, int, index=2) 0000000000000000
 10001@0 (instance, byte, index=4) 0000000000000001
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index ec36ceb..bf3f7b6 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -1078,5 +1078,14 @@
   CHECK(gFoundExt);
 }
 
+extern "C" JNIEXPORT jboolean JNICALL Java_art_Test913_checkInitialized(JNIEnv* env, jclass, jclass c) {
+  jint status;
+  jvmtiError error = jvmti_env->GetClassStatus(c, &status);
+  if (JvmtiErrorToException(env, jvmti_env, error)) {
+    return false;
+  }
+  return (status & JVMTI_CLASS_STATUS_INITIALIZED) != 0;
+}
+
 }  // namespace Test913Heaps
 }  // namespace art
diff --git a/test/913-heaps/src/art/Test913.java b/test/913-heaps/src/art/Test913.java
index 97f48ee..b999001 100644
--- a/test/913-heaps/src/art/Test913.java
+++ b/test/913-heaps/src/art/Test913.java
@@ -195,13 +195,33 @@
     System.out.println(getTag(FloatObject.class));
     setTag(FloatObject.class, 0);
 
+    boolean correctHeapValue = false;
     setTag(Inf1.class, 10000);
-    System.out.println(followReferencesPrimitiveFields(Inf1.class));
+    String heapTrace = followReferencesPrimitiveFields(Inf1.class);
+
+    if (!checkInitialized(Inf1.class)) {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000000");
+    } else {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=0) 0000000000000001");
+    }
+
+    if (!correctHeapValue)
+      System.out.println("Heap Trace for Inf1 is not as expected:\n" + heapTrace);
+
     System.out.println(getTag(Inf1.class));
     setTag(Inf1.class, 0);
 
     setTag(Inf2.class, 10000);
-    System.out.println(followReferencesPrimitiveFields(Inf2.class));
+    heapTrace = followReferencesPrimitiveFields(Inf2.class);
+
+    if (!checkInitialized(Inf2.class)) {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000000");
+    } else {
+      correctHeapValue = heapTrace.equals("10000@0 (static, int, index=1) 0000000000000001");
+    }
+
+    if (!correctHeapValue)
+      System.out.println("Heap Trace for Inf2 is not as expected:\n" + heapTrace);
     System.out.println(getTag(Inf2.class));
     setTag(Inf2.class, 0);
   }
@@ -712,6 +732,7 @@
     return Main.getTag(o);
   }
 
+  private static native boolean checkInitialized(Class<?> klass);
   private static native void setupGcCallback();
   private static native void enableGcTracking(boolean enable);
   private static native int getGcStarts();
diff --git a/test/916-obsolete-jit/src/Main.java b/test/916-obsolete-jit/src/Main.java
index 17a7a86..d7b32ba 100644
--- a/test/916-obsolete-jit/src/Main.java
+++ b/test/916-obsolete-jit/src/Main.java
@@ -132,7 +132,7 @@
           "sayHi", Runnable.class, Consumer.class);
     } catch (Exception e) {
       System.out.println("Unable to find methods!");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       return;
     }
     // Makes sure the stack is the way we want it for the test and does the redefinition. It will
diff --git a/test/934-load-transform/src/Main.java b/test/934-load-transform/src/Main.java
index 1401b7d..2d0c297 100644
--- a/test/934-load-transform/src/Main.java
+++ b/test/934-load-transform/src/Main.java
@@ -86,7 +86,7 @@
       run_test.invoke(null);
     } catch (Exception e) {
       System.out.println(e.toString());
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/935-non-retransformable/src/Main.java b/test/935-non-retransformable/src/Main.java
index f240224..5098712 100644
--- a/test/935-non-retransformable/src/Main.java
+++ b/test/935-non-retransformable/src/Main.java
@@ -97,7 +97,7 @@
       run_test.invoke(null);
     } catch (Exception e) {
       System.out.println(e.toString());
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/938-load-transform-bcp/src-ex/TestMain.java b/test/938-load-transform-bcp/src-ex/TestMain.java
index 3757a0f..b60fe36 100644
--- a/test/938-load-transform-bcp/src-ex/TestMain.java
+++ b/test/938-load-transform-bcp/src-ex/TestMain.java
@@ -29,7 +29,7 @@
       System.out.println(
           "Exception occured (did something load OptionalLong before this test method!: "
           + e.toString());
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/938-load-transform-bcp/src/Main.java b/test/938-load-transform-bcp/src/Main.java
index 69658c0..939bdbe 100644
--- a/test/938-load-transform-bcp/src/Main.java
+++ b/test/938-load-transform-bcp/src/Main.java
@@ -111,7 +111,7 @@
       run_test.invoke(null);
     } catch (Exception e) {
       System.out.println(e.toString());
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/941-recurive-obsolete-jit/src/Main.java b/test/941-recurive-obsolete-jit/src/Main.java
index 89d593b..e3065a7 100644
--- a/test/941-recurive-obsolete-jit/src/Main.java
+++ b/test/941-recurive-obsolete-jit/src/Main.java
@@ -116,7 +116,7 @@
           "sayHi", int.class, Consumer.class, Runnable.class);
     } catch (Exception e) {
       System.out.println("Unable to find methods!");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       return;
     }
     // Makes sure the stack is the way we want it for the test and does the redefinition. It will
diff --git a/test/943-private-recursive-jit/src/Main.java b/test/943-private-recursive-jit/src/Main.java
index 871c636..09337ba 100644
--- a/test/943-private-recursive-jit/src/Main.java
+++ b/test/943-private-recursive-jit/src/Main.java
@@ -129,7 +129,7 @@
           "privateSayHi", int.class, Consumer.class, Runnable.class);
     } catch (Exception e) {
       System.out.println("Unable to find methods!");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       return;
     }
     // Makes sure the stack is the way we want it for the test and does the redefinition. It will
diff --git a/test/947-reflect-method/src/art/Test947.java b/test/947-reflect-method/src/art/Test947.java
index 8cb515e..90e0f81 100644
--- a/test/947-reflect-method/src/art/Test947.java
+++ b/test/947-reflect-method/src/art/Test947.java
@@ -76,7 +76,7 @@
       Redefinition.doCommonClassRedefinition(Transform.class, CLASS_BYTES, DEX_BYTES);
       say_hi_method.invoke(t);
     } catch (Exception e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/953-invoke-polymorphic-compiler/src/Main.java b/test/953-invoke-polymorphic-compiler/src/Main.java
index 20a8fec..ce3f4db 100644
--- a/test/953-invoke-polymorphic-compiler/src/Main.java
+++ b/test/953-invoke-polymorphic-compiler/src/Main.java
@@ -70,30 +70,30 @@
   }
 
   public static void fail() {
-    System.err.println("fail");
+    System.out.println("fail");
     Thread.dumpStack();
   }
 
   public static void fail(String message) {
-    System.err.println("fail: " + message);
+    System.out.println("fail: " + message);
     Thread.dumpStack();
   }
 
   public static int Min2Print2(int a, int b) {
     int[] values = new int[] { a, b };
-    System.err.println("Running Main.Min2Print2(" + Arrays.toString(values) + ")");
+    System.out.println("Running Main.Min2Print2(" + Arrays.toString(values) + ")");
     return a > b ? a : b;
   }
 
   public static int Min2Print3(int a, int b, int c) {
     int[] values = new int[] { a, b, c };
-    System.err.println("Running Main.Min2Print3(" + Arrays.toString(values) + ")");
+    System.out.println("Running Main.Min2Print3(" + Arrays.toString(values) + ")");
     return a > b ? a : b;
   }
 
   public static int Min2Print6(int a, int b, int c, int d, int e, int f) {
     int[] values = new int[] { a, b, c, d, e, f };
-    System.err.println("Running Main.Min2Print6(" + Arrays.toString(values) + ")");
+    System.out.println("Running Main.Min2Print6(" + Arrays.toString(values) + ")");
     return a > b ? a : b;
   }
 
@@ -106,7 +106,7 @@
                                 int y, int z) {
     int[] values = new int[] { a, b, c, d, e, f, g, h, i, j, k, l, m,
                                n, o, p, q, r, s, t, u, v, w, x, y, z };
-    System.err.println("Running Main.Min2Print26(" + Arrays.toString(values) + ")");
+    System.out.println("Running Main.Min2Print26(" + Arrays.toString(values) + ")");
     return a > b ? a : b;
   }
 
@@ -176,7 +176,7 @@
         fail("No NPE for you");
     } catch (NullPointerException npe) {}
 
-    System.err.println("BasicTest done.");
+    System.out.println("BasicTest done.");
   }
 
   private static boolean And(boolean lhs, boolean rhs) {
@@ -248,7 +248,7 @@
     assertEquals(true, (boolean) mh.invoke(false, true));
     assertEquals(false, (boolean) mh.invoke(false, false));
 
-    System.err.println("$opt$ReturnBooleanTest done.");
+    System.out.println("$opt$ReturnBooleanTest done.");
   }
 
   public static void $opt$ReturnCharTest() throws Throwable {
@@ -257,7 +257,7 @@
                            MethodType.methodType(char.class, char.class));
     assertEquals('B', (char) mh.invokeExact('A'));
     assertEquals((char) -55, (char) mh.invokeExact((char) -56));
-    System.err.println("$opt$ReturnCharTest done.");
+    System.out.println("$opt$ReturnCharTest done.");
   }
 
   public static void $opt$ReturnByteTest() throws Throwable {
@@ -266,7 +266,7 @@
                                          MethodType.methodType(byte.class, byte.class, byte.class));
     assertEquals((byte) 30, (byte) mh.invokeExact((byte) 10, (byte) 3));
     assertEquals((byte) -90, (byte) mh.invoke((byte) -10, (byte) 9));
-    System.err.println("$opt$ReturnByteTest done.");
+    System.out.println("$opt$ReturnByteTest done.");
   }
 
   public static void $opt$ReturnShortTest() throws Throwable {
@@ -275,7 +275,7 @@
                            MethodType.methodType(short.class, short.class, short.class));
     assertEquals((short) 3000, (short) mh.invokeExact((short) 1000, (short) 3));
     assertEquals((short) -3000, (short) mh.invoke((short) -1000, (short) 3));
-    System.err.println("$opt$ReturnShortTest done.");
+    System.out.println("$opt$ReturnShortTest done.");
   }
 
   public static void $opt$ReturnIntTest() throws Throwable {
@@ -284,7 +284,7 @@
                            MethodType.methodType(int.class, int.class, int.class));
     assertEquals(3_000_000, (int) mh.invokeExact(1_000_000, 3));
     assertEquals(-3_000_000, (int) mh.invoke(-1_000, 3_000));
-    System.err.println("$opt$ReturnIntTest done.");
+    System.out.println("$opt$ReturnIntTest done.");
   }
 
   public static void $opt$ReturnLongTest() throws Throwable {
@@ -293,7 +293,7 @@
                            MethodType.methodType(long.class, long.class, long.class));
     assertEquals(4_294_967_295_000L, (long) mh.invokeExact(1000L, 4_294_967_295L));
     assertEquals(-4_294_967_295_000L, (long) mh.invoke(-1000L, 4_294_967_295L));
-    System.err.println("$opt$ReturnLongTest done.");
+    System.out.println("$opt$ReturnLongTest done.");
   }
 
   public static void $opt$ReturnFloatTest() throws Throwable {
@@ -302,7 +302,7 @@
                            MethodType.methodType(float.class, float.class, float.class));
     assertEquals(3.0F, (float) mh.invokeExact(1000.0F, 3e-3F));
     assertEquals(-3.0F, (float) mh.invoke(-1000.0F, 3e-3F));
-    System.err.println("$opt$ReturnFloatTest done.");
+    System.out.println("$opt$ReturnFloatTest done.");
   }
 
   public static void $opt$ReturnDoubleTest() throws Throwable {
@@ -311,7 +311,7 @@
                            MethodType.methodType(double.class, double.class, double.class));
     assertEquals(3033000.0, (double) mh.invokeExact(1000.0, 3.033e3));
     assertEquals(-3033000.0, (double) mh.invoke(-1000.0, 3.033e3));
-    System.err.println("$opt$ReturnDoubleTest done.");
+    System.out.println("$opt$ReturnDoubleTest done.");
   }
 
   public static void $opt$ReturnStringTest() throws Throwable {
@@ -320,7 +320,7 @@
                            MethodType.methodType(String.class, String.class, int.class));
     assertEquals("100010001000", (String) mh.invokeExact("1000", 3));
     assertEquals("100010001000", (String) mh.invoke("1000", 3));
-    System.err.println("$opt$ReturnStringTest done.");
+    System.out.println("$opt$ReturnStringTest done.");
   }
 
   public static void ReturnValuesTest() throws Throwable {
@@ -333,7 +333,7 @@
     $opt$ReturnFloatTest();
     $opt$ReturnDoubleTest();
     $opt$ReturnStringTest();
-    System.err.println("ReturnValuesTest done.");
+    System.out.println("ReturnValuesTest done.");
   }
 
   static class ValueHolder {
diff --git a/test/972-default-imt-collision/src/Main.java b/test/972-default-imt-collision/src/Main.java
index 6819e43..043cef1 100644
--- a/test/972-default-imt-collision/src/Main.java
+++ b/test/972-default-imt-collision/src/Main.java
@@ -24,7 +24,7 @@
       Method test = c.getMethod("testMe", iface);
       test.invoke(null, o);
     } catch (Exception e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       System.out.println("FAILED: could not run testMe!");
     }
   }
diff --git a/test/972-iface-super-multidex/src/Main.java b/test/972-iface-super-multidex/src/Main.java
index 3fb3f45..dea5f1d 100644
--- a/test/972-iface-super-multidex/src/Main.java
+++ b/test/972-iface-super-multidex/src/Main.java
@@ -22,7 +22,7 @@
       c = Class.forName("ConcreteClass");
     } catch (Exception e) {
       System.out.println("Could not load class");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       return;
     }
     try {
@@ -30,7 +30,7 @@
       System.out.println((String)m.invoke(c.newInstance(), new Object[0]));
     } catch (Exception e) {
       System.out.println("Unknown exception occurred");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
     try {
       Method m = c.getMethod("runConflict");
@@ -41,15 +41,15 @@
       }
     } catch (AbstractMethodError e) {
       System.out.println("Unexpected AME caught");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     } catch (NoSuchMethodError e) {
       System.out.println("Unexpected NSME caught");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     } catch (IncompatibleClassChangeError e) {
       System.out.println("Expected ICCE caught");
     } catch (Throwable e) {
       System.out.println("Unknown exception caught!");
-      e.printStackTrace();
+      e.printStackTrace(System.out);
     }
   }
 }
diff --git a/test/973-default-multidex/src/Main.java b/test/973-default-multidex/src/Main.java
index b93265a..c7dd6dc 100644
--- a/test/973-default-multidex/src/Main.java
+++ b/test/973-default-multidex/src/Main.java
@@ -23,7 +23,7 @@
       Method m = c.getMethod("callMethod");
       System.out.println(m.invoke(c.newInstance(), new Object[0]));
     } catch (Exception e) {
-      e.printStackTrace();
+      e.printStackTrace(System.out);
       System.out.println("FAILED: Could not call method");
       return;
     }
diff --git a/test/983-source-transform-verify/source_transform.cc b/test/983-source-transform-verify/source_transform.cc
index 3ef3c7c..a433dc9 100644
--- a/test/983-source-transform-verify/source_transform.cc
+++ b/test/983-source-transform-verify/source_transform.cc
@@ -34,7 +34,7 @@
 #include "jvmti.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 #include "thread_list.h"
 
 // Test infrastructure
diff --git a/test/987-stack-trace-dumping/info.txt b/test/987-stack-trace-dumping/info.txt
deleted file mode 100644
index e69de29..0000000
--- a/test/987-stack-trace-dumping/info.txt
+++ /dev/null
diff --git a/test/987-stack-trace-dumping/src/Main.java b/test/987-stack-trace-dumping/src/Main.java
deleted file mode 100644
index d1e8a1b..0000000
--- a/test/987-stack-trace-dumping/src/Main.java
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Copyright (C) 2017 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-
-public class Main {
-    public static void main(String[] args) throws Exception {
-        if (args.length != 3) {
-            throw new AssertionError("Unexpected number of args: " + args.length);
-        }
-
-        if (!"--stack-trace-dir".equals(args[1])) {
-            throw new AssertionError("Unexpected argument in position 1: " + args[1]);
-        }
-
-        // Send ourselves signal 3, which forces stack traces to be written to disk.
-        android.system.Os.kill(android.system.Os.getpid(), 3);
-
-        File[] files = null;
-        final String stackTraceDir = args[2];
-        for (int i = 0; i < 5; ++i) {
-            // Give the signal handler some time to run and dump traces - up to a maximum
-            // of 5 seconds. This is a kludge, but it's hard to do this without using things
-            // like inotify / WatchService and the like.
-            Thread.sleep(1000);
-
-            files = (new File(stackTraceDir)).listFiles();
-            if (files != null && files.length == 1) {
-                break;
-            }
-        }
-
-
-        if (files == null) {
-            throw new AssertionError("Gave up waiting for traces: " + java.util.Arrays.toString(files));
-        }
-
-        final String fileName = files[0].getName();
-        if (!fileName.startsWith("anr-pid")) {
-            throw new AssertionError("Unexpected prefix: " + fileName);
-        }
-
-        if (!fileName.contains(String.valueOf(android.system.Os.getpid()))) {
-            throw new AssertionError("File name does not contain process PID: " + fileName);
-        }
-    }
-}
diff --git a/test/988-method-trace/expected.txt b/test/988-method-trace/expected.txt
new file mode 100644
index 0000000..d3d9249
--- /dev/null
+++ b/test/988-method-trace/expected.txt
@@ -0,0 +1,276 @@
+<= public static native void art.Trace.enableMethodTracing(java.lang.Class,java.lang.reflect.Method,java.lang.reflect.Method,java.lang.Thread) -> <null: null>
+=> art.Test988$IterOp()
+.=> public java.lang.Object()
+.<= public java.lang.Object() -> <null: null>
+<= art.Test988$IterOp() -> <null: null>
+=> public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator)
+.=> public int art.Test988$IterOp.applyAsInt(int)
+..=> static int art.Test988.iter_fibonacci(int)
+..<= static int art.Test988.iter_fibonacci(int) -> <class java.lang.Integer: 832040>
+.<= public int art.Test988$IterOp.applyAsInt(int) -> <class java.lang.Integer: 832040>
+.=> public art.Test988$FibResult(java.lang.String,int,int)
+..=> public java.lang.Object()
+..<= public java.lang.Object() -> <null: null>
+.<= public art.Test988$FibResult(java.lang.String,int,int) -> <null: null>
+.=> public boolean java.util.ArrayList.add(java.lang.Object)
+..=> private void java.util.ArrayList.ensureCapacityInternal(int)
+...=> private void java.util.ArrayList.ensureExplicitCapacity(int)
+...<= private void java.util.ArrayList.ensureExplicitCapacity(int) -> <null: null>
+..<= private void java.util.ArrayList.ensureCapacityInternal(int) -> <null: null>
+fibonacci(30)=832040
+.<= public boolean java.util.ArrayList.add(java.lang.Object) -> <class java.lang.Boolean: true>
+<= public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator) -> <null: null>
+=> art.Test988$RecurOp()
+.=> public java.lang.Object()
+.<= public java.lang.Object() -> <null: null>
+<= art.Test988$RecurOp() -> <null: null>
+=> public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator)
+.=> public int art.Test988$RecurOp.applyAsInt(int)
+..=> static int art.Test988.fibonacci(int)
+...=> static int art.Test988.fibonacci(int)
+....=> static int art.Test988.fibonacci(int)
+.....=> static int art.Test988.fibonacci(int)
+......=> static int art.Test988.fibonacci(int)
+......<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+......=> static int art.Test988.fibonacci(int)
+......<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 0>
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 2>
+....=> static int art.Test988.fibonacci(int)
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 0>
+....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+...<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 3>
+...=> static int art.Test988.fibonacci(int)
+....=> static int art.Test988.fibonacci(int)
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+.....=> static int art.Test988.fibonacci(int)
+.....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 0>
+....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+....=> static int art.Test988.fibonacci(int)
+....<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 1>
+...<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 2>
+..<= static int art.Test988.fibonacci(int) -> <class java.lang.Integer: 5>
+.<= public int art.Test988$RecurOp.applyAsInt(int) -> <class java.lang.Integer: 5>
+.=> public art.Test988$FibResult(java.lang.String,int,int)
+..=> public java.lang.Object()
+..<= public java.lang.Object() -> <null: null>
+.<= public art.Test988$FibResult(java.lang.String,int,int) -> <null: null>
+.=> public boolean java.util.ArrayList.add(java.lang.Object)
+..=> private void java.util.ArrayList.ensureCapacityInternal(int)
+...=> private void java.util.ArrayList.ensureExplicitCapacity(int)
+...<= private void java.util.ArrayList.ensureExplicitCapacity(int) -> <null: null>
+..<= private void java.util.ArrayList.ensureCapacityInternal(int) -> <null: null>
+fibonacci(5)=5
+.<= public boolean java.util.ArrayList.add(java.lang.Object) -> <class java.lang.Boolean: true>
+<= public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator) -> <null: null>
+=> art.Test988$IterOp()
+.=> public java.lang.Object()
+.<= public java.lang.Object() -> <null: null>
+<= art.Test988$IterOp() -> <null: null>
+=> public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator)
+.=> public int art.Test988$IterOp.applyAsInt(int)
+..=> static int art.Test988.iter_fibonacci(int)
+...=> public java.lang.StringBuilder()
+....=> java.lang.AbstractStringBuilder(int)
+.....=> public java.lang.Object()
+.....<= public java.lang.Object() -> <null: null>
+....<= java.lang.AbstractStringBuilder(int) -> <null: null>
+...<= public java.lang.StringBuilder() -> <null: null>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String)
+.....=> public int java.lang.String.length()
+.....<= public int java.lang.String.length() -> <class java.lang.Integer: 14>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> public void java.lang.String.getChars(int,int,char[],int)
+......=> public int java.lang.String.length()
+......<= public int java.lang.String.length() -> <class java.lang.Integer: 14>
+......=> native void java.lang.String.getCharsNoCheck(int,int,char[],int)
+......<= native void java.lang.String.getCharsNoCheck(int,int,char[],int) -> <null: null>
+.....<= public void java.lang.String.getChars(int,int,char[],int) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(int)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(int)
+.....=> static int java.lang.Integer.stringSize(int)
+.....<= static int java.lang.Integer.stringSize(int) -> <class java.lang.Integer: 2>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+......=> private int java.lang.AbstractStringBuilder.newCapacity(int)
+......<= private int java.lang.AbstractStringBuilder.newCapacity(int) -> <class java.lang.Integer: 34>
+......=> public static char[] java.util.Arrays.copyOf(char[],int)
+.......=> public static int java.lang.Math.min(int,int)
+.......<= public static int java.lang.Math.min(int,int) -> <class java.lang.Integer: 16>
+.......=> public static void java.lang.System.arraycopy(char[],int,char[],int,int)
+.......<= public static void java.lang.System.arraycopy(char[],int,char[],int,int) -> <null: null>
+......<= public static char[] java.util.Arrays.copyOf(char[],int) -> <class [C: [B, a, d,  , a, r, g, u, m, e, n, t, :,  , -, 1, 9,  , <,  , 0, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>]>
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> static void java.lang.Integer.getChars(int,int,char[])
+.....<= static void java.lang.Integer.getChars(int,int,char[]) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(int) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(int) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String)
+.....=> public int java.lang.String.length()
+.....<= public int java.lang.String.length() -> <class java.lang.Integer: 4>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> public void java.lang.String.getChars(int,int,char[],int)
+......=> public int java.lang.String.length()
+......<= public int java.lang.String.length() -> <class java.lang.Integer: 4>
+......=> native void java.lang.String.getCharsNoCheck(int,int,char[],int)
+......<= native void java.lang.String.getCharsNoCheck(int,int,char[],int) -> <null: null>
+.....<= public void java.lang.String.getChars(int,int,char[],int) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.String java.lang.StringBuilder.toString()
+....=> static native java.lang.String java.lang.StringFactory.newStringFromChars(int,int,char[])
+....<= static native java.lang.String java.lang.StringFactory.newStringFromChars(int,int,char[]) -> <class java.lang.String: Bad argument: -19 < 0>
+...<= public java.lang.String java.lang.StringBuilder.toString() -> <class java.lang.String: Bad argument: -19 < 0>
+...=> public java.lang.Error(java.lang.String)
+....=> public java.lang.Throwable(java.lang.String)
+.....=> public java.lang.Object()
+.....<= public java.lang.Object() -> <null: null>
+.....=> public static final java.util.List java.util.Collections.emptyList()
+.....<= public static final java.util.List java.util.Collections.emptyList() -> <class java.util.Collections$EmptyList: []>
+.....=> public synchronized java.lang.Throwable java.lang.Throwable.fillInStackTrace()
+......=> private static native java.lang.Object java.lang.Throwable.nativeFillInStackTrace()
+......<= private static native java.lang.Object java.lang.Throwable.nativeFillInStackTrace() -> <class [Ljava.lang.Object;: <non-deterministic>>
+.....<= public synchronized java.lang.Throwable java.lang.Throwable.fillInStackTrace() -> <class java.lang.Error: java.lang.Error: Bad argument: -19 < 0
+	at art.Test988.iter_fibonacci(Test988.java:207)
+	at art.Test988$IterOp.applyAsInt(Test988.java:202)
+	at art.Test988.doFibTest(Test988.java:295)
+	at art.Test988.run(Test988.java:265)
+	at Main.main(Main.java:19)
+>
+....<= public java.lang.Throwable(java.lang.String) -> <null: null>
+...<= public java.lang.Error(java.lang.String) -> <null: null>
+..<= static int art.Test988.iter_fibonacci(int) EXCEPTION
+.<= public int art.Test988$IterOp.applyAsInt(int) EXCEPTION
+.=> public art.Test988$FibThrow(java.lang.String,int,java.lang.Throwable)
+..=> public java.lang.Object()
+..<= public java.lang.Object() -> <null: null>
+.<= public art.Test988$FibThrow(java.lang.String,int,java.lang.Throwable) -> <null: null>
+.=> public boolean java.util.ArrayList.add(java.lang.Object)
+..=> private void java.util.ArrayList.ensureCapacityInternal(int)
+...=> private void java.util.ArrayList.ensureExplicitCapacity(int)
+...<= private void java.util.ArrayList.ensureExplicitCapacity(int) -> <null: null>
+..<= private void java.util.ArrayList.ensureCapacityInternal(int) -> <null: null>
+fibonacci(-19) -> java.lang.Error: Bad argument: -19 < 0
+	at art.Test988.iter_fibonacci(Test988.java:207)
+	at art.Test988$IterOp.applyAsInt(Test988.java:202)
+	at art.Test988.doFibTest(Test988.java:295)
+	at art.Test988.run(Test988.java:265)
+	at Main.main(Main.java:19)
+
+.<= public boolean java.util.ArrayList.add(java.lang.Object) -> <class java.lang.Boolean: true>
+<= public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator) -> <null: null>
+=> art.Test988$RecurOp()
+.=> public java.lang.Object()
+.<= public java.lang.Object() -> <null: null>
+<= art.Test988$RecurOp() -> <null: null>
+=> public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator)
+.=> public int art.Test988$RecurOp.applyAsInt(int)
+..=> static int art.Test988.fibonacci(int)
+...=> public java.lang.StringBuilder()
+....=> java.lang.AbstractStringBuilder(int)
+.....=> public java.lang.Object()
+.....<= public java.lang.Object() -> <null: null>
+....<= java.lang.AbstractStringBuilder(int) -> <null: null>
+...<= public java.lang.StringBuilder() -> <null: null>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String)
+.....=> public int java.lang.String.length()
+.....<= public int java.lang.String.length() -> <class java.lang.Integer: 14>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> public void java.lang.String.getChars(int,int,char[],int)
+......=> public int java.lang.String.length()
+......<= public int java.lang.String.length() -> <class java.lang.Integer: 14>
+......=> native void java.lang.String.getCharsNoCheck(int,int,char[],int)
+......<= native void java.lang.String.getCharsNoCheck(int,int,char[],int) -> <null: null>
+.....<= public void java.lang.String.getChars(int,int,char[],int) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(int)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(int)
+.....=> static int java.lang.Integer.stringSize(int)
+.....<= static int java.lang.Integer.stringSize(int) -> <class java.lang.Integer: 2>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+......=> private int java.lang.AbstractStringBuilder.newCapacity(int)
+......<= private int java.lang.AbstractStringBuilder.newCapacity(int) -> <class java.lang.Integer: 34>
+......=> public static char[] java.util.Arrays.copyOf(char[],int)
+.......=> public static int java.lang.Math.min(int,int)
+.......<= public static int java.lang.Math.min(int,int) -> <class java.lang.Integer: 16>
+.......=> public static void java.lang.System.arraycopy(char[],int,char[],int,int)
+.......<= public static void java.lang.System.arraycopy(char[],int,char[],int,int) -> <null: null>
+......<= public static char[] java.util.Arrays.copyOf(char[],int) -> <class [C: [B, a, d,  , a, r, g, u, m, e, n, t, :,  , -, 1, 9,  , <,  , 0, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>, <control-0000>]>
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> static void java.lang.Integer.getChars(int,int,char[])
+.....<= static void java.lang.Integer.getChars(int,int,char[]) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(int) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(int) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String)
+....=> public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String)
+.....=> public int java.lang.String.length()
+.....<= public int java.lang.String.length() -> <class java.lang.Integer: 4>
+.....=> private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int)
+.....<= private void java.lang.AbstractStringBuilder.ensureCapacityInternal(int) -> <null: null>
+.....=> public void java.lang.String.getChars(int,int,char[],int)
+......=> public int java.lang.String.length()
+......<= public int java.lang.String.length() -> <class java.lang.Integer: 4>
+......=> native void java.lang.String.getCharsNoCheck(int,int,char[],int)
+......<= native void java.lang.String.getCharsNoCheck(int,int,char[],int) -> <null: null>
+.....<= public void java.lang.String.getChars(int,int,char[],int) -> <null: null>
+....<= public java.lang.AbstractStringBuilder java.lang.AbstractStringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...<= public java.lang.StringBuilder java.lang.StringBuilder.append(java.lang.String) -> <class java.lang.StringBuilder: Bad argument: -19 < 0>
+...=> public java.lang.String java.lang.StringBuilder.toString()
+....=> static native java.lang.String java.lang.StringFactory.newStringFromChars(int,int,char[])
+....<= static native java.lang.String java.lang.StringFactory.newStringFromChars(int,int,char[]) -> <class java.lang.String: Bad argument: -19 < 0>
+...<= public java.lang.String java.lang.StringBuilder.toString() -> <class java.lang.String: Bad argument: -19 < 0>
+...=> public java.lang.Error(java.lang.String)
+....=> public java.lang.Throwable(java.lang.String)
+.....=> public java.lang.Object()
+.....<= public java.lang.Object() -> <null: null>
+.....=> public static final java.util.List java.util.Collections.emptyList()
+.....<= public static final java.util.List java.util.Collections.emptyList() -> <class java.util.Collections$EmptyList: []>
+.....=> public synchronized java.lang.Throwable java.lang.Throwable.fillInStackTrace()
+......=> private static native java.lang.Object java.lang.Throwable.nativeFillInStackTrace()
+......<= private static native java.lang.Object java.lang.Throwable.nativeFillInStackTrace() -> <class [Ljava.lang.Object;: <non-deterministic>>
+.....<= public synchronized java.lang.Throwable java.lang.Throwable.fillInStackTrace() -> <class java.lang.Error: java.lang.Error: Bad argument: -19 < 0
+	at art.Test988.fibonacci(Test988.java:229)
+	at art.Test988$RecurOp.applyAsInt(Test988.java:224)
+	at art.Test988.doFibTest(Test988.java:295)
+	at art.Test988.run(Test988.java:266)
+	at Main.main(Main.java:19)
+>
+....<= public java.lang.Throwable(java.lang.String) -> <null: null>
+...<= public java.lang.Error(java.lang.String) -> <null: null>
+..<= static int art.Test988.fibonacci(int) EXCEPTION
+.<= public int art.Test988$RecurOp.applyAsInt(int) EXCEPTION
+.=> public art.Test988$FibThrow(java.lang.String,int,java.lang.Throwable)
+..=> public java.lang.Object()
+..<= public java.lang.Object() -> <null: null>
+.<= public art.Test988$FibThrow(java.lang.String,int,java.lang.Throwable) -> <null: null>
+.=> public boolean java.util.ArrayList.add(java.lang.Object)
+..=> private void java.util.ArrayList.ensureCapacityInternal(int)
+...=> private void java.util.ArrayList.ensureExplicitCapacity(int)
+...<= private void java.util.ArrayList.ensureExplicitCapacity(int) -> <null: null>
+..<= private void java.util.ArrayList.ensureCapacityInternal(int) -> <null: null>
+fibonacci(-19) -> java.lang.Error: Bad argument: -19 < 0
+	at art.Test988.fibonacci(Test988.java:229)
+	at art.Test988$RecurOp.applyAsInt(Test988.java:224)
+	at art.Test988.doFibTest(Test988.java:295)
+	at art.Test988.run(Test988.java:266)
+	at Main.main(Main.java:19)
+
+.<= public boolean java.util.ArrayList.add(java.lang.Object) -> <class java.lang.Boolean: true>
+<= public static void art.Test988.doFibTest(int,java.util.function.IntUnaryOperator) -> <null: null>
+=> public static native java.lang.Thread java.lang.Thread.currentThread()
+<= public static native java.lang.Thread java.lang.Thread.currentThread() -> <class java.lang.Thread: <non-deterministic>>
+=> public static native void art.Trace.disableMethodTracing(java.lang.Thread)
diff --git a/test/988-method-trace/info.txt b/test/988-method-trace/info.txt
new file mode 100644
index 0000000..f0a200d
--- /dev/null
+++ b/test/988-method-trace/info.txt
@@ -0,0 +1,15 @@
+Tests method tracing in JVMTI
+
+This test is sensitive to the internal implementations of:
+ * java.lang.Error
+ * java.lang.Integer
+ * java.lang.Math
+ * java.lang.String
+ * java.lang.System
+ * java.util.ArrayList
+ * java.util.Arrays
+ * java.util.StringBuilder
+ * all super-classes and super-interfaces of the above types.
+
+Changes to the internal implementation of these classes might (or might not)
+change the output of this test.
diff --git a/test/987-stack-trace-dumping/run b/test/988-method-trace/run
similarity index 93%
rename from test/987-stack-trace-dumping/run
rename to test/988-method-trace/run
index dee3e8b..51875a7 100755
--- a/test/987-stack-trace-dumping/run
+++ b/test/988-method-trace/run
@@ -15,4 +15,4 @@
 # limitations under the License.
 
 # Ask for stack traces to be dumped to a file rather than to stdout.
-./default-run "$@" --set-stack-trace-dump-dir
+./default-run "$@" --jvmti
diff --git a/test/988-method-trace/src/Main.java b/test/988-method-trace/src/Main.java
new file mode 100644
index 0000000..9dd1142
--- /dev/null
+++ b/test/988-method-trace/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    art.Test988.run();
+  }
+}
diff --git a/test/988-method-trace/src/art/Test988.java b/test/988-method-trace/src/art/Test988.java
new file mode 100644
index 0000000..37ff136
--- /dev/null
+++ b/test/988-method-trace/src/art/Test988.java
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import java.util.Arrays;
+import java.lang.reflect.Method;
+import java.util.List;
+import java.util.Set;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.function.IntUnaryOperator;
+import java.util.function.Function;
+
+public class Test988 {
+
+    // Methods with non-deterministic output that should not be printed.
+    static Set<Method> NON_DETERMINISTIC_OUTPUT_METHODS = new HashSet<>();
+
+    static {
+      try {
+        NON_DETERMINISTIC_OUTPUT_METHODS.add(
+            Throwable.class.getDeclaredMethod("nativeFillInStackTrace"));
+      } catch (Exception e) {}
+      try {
+        NON_DETERMINISTIC_OUTPUT_METHODS.add(Thread.class.getDeclaredMethod("currentThread"));
+      } catch (Exception e) {}
+    }
+
+    static interface Printable {
+        public void Print();
+    }
+
+    static final class MethodEntry implements Printable {
+        private Object m;
+        private int cnt;
+        public MethodEntry(Object m, int cnt) {
+            this.m = m;
+            this.cnt = cnt;
+        }
+        @Override
+        public void Print() {
+            System.out.println(whitespace(cnt) + "=> " + m);
+        }
+    }
+
+    private static String genericToString(Object val) {
+      if (val == null) {
+        return "null";
+      } else if (val.getClass().isArray()) {
+        return arrayToString(val);
+      } else if (val instanceof Throwable) {
+        StringWriter w = new StringWriter();
+        ((Throwable) val).printStackTrace(new PrintWriter(w));
+        return w.toString();
+      } else {
+        return val.toString();
+      }
+    }
+
+    private static String charArrayToString(char[] src) {
+      String[] res = new String[src.length];
+      for (int i = 0; i < src.length; i++) {
+        if (Character.isISOControl(src[i])) {
+          res[i] = Character.getName(src[i]);
+        } else {
+          res[i] = Character.toString(src[i]);
+        }
+      }
+      return Arrays.toString(res);
+    }
+
+    private static String arrayToString(Object val) {
+      Class<?> klass = val.getClass();
+      if ((new Object[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString(
+            Arrays.stream((Object[])val).map(new Function<Object, String>() {
+              public String apply(Object o) {
+                return Test988.genericToString(o);
+              }
+            }).toArray());
+      } else if ((new byte[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((byte[])val);
+      } else if ((new char[0]).getClass().isAssignableFrom(klass)) {
+        return charArrayToString((char[])val);
+      } else if ((new short[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((short[])val);
+      } else if ((new int[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((int[])val);
+      } else if ((new long[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((long[])val);
+      } else if ((new float[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((float[])val);
+      } else if ((new double[0]).getClass().isAssignableFrom(klass)) {
+        return Arrays.toString((double[])val);
+      } else {
+        throw new Error("Unknown type " + klass);
+      }
+    }
+
+    static final class MethodReturn implements Printable {
+        private Object m;
+        private Object val;
+        private int cnt;
+        public MethodReturn(Object m, Object val, int cnt) {
+            this.m = m;
+            this.val = val;
+            this.cnt = cnt;
+        }
+        @Override
+        public void Print() {
+            String print;
+            if (NON_DETERMINISTIC_OUTPUT_METHODS.contains(m)) {
+                print = "<non-deterministic>";
+            } else {
+                print = genericToString(val);
+            }
+            Class<?> klass = null;
+            if (val != null) {
+              klass = val.getClass();
+            }
+            System.out.println(
+                whitespace(cnt) + "<= " + m + " -> <" + klass + ": " + print + ">");
+        }
+    }
+
+    static final class MethodThrownThrough implements Printable {
+        private Object m;
+        private int cnt;
+        public MethodThrownThrough(Object m, int cnt) {
+            this.m = m;
+            this.cnt = cnt;
+        }
+        @Override
+        public void Print() {
+            System.out.println(whitespace(cnt) + "<= " + m + " EXCEPTION");
+        }
+    }
+
+    private static String whitespace(int n) {
+      String out = "";
+      while (n > 0) {
+        n--;
+        out += ".";
+      }
+      return out;
+    }
+
+    static final class FibThrow implements Printable {
+        private String format;
+        private int arg;
+        private Throwable res;
+        public FibThrow(String format, int arg, Throwable res) {
+            this.format = format;
+            this.arg = arg;
+            this.res = res;
+        }
+
+        @Override
+        public void Print() {
+            System.out.printf(format, arg, genericToString(res));
+        }
+    }
+
+    static final class FibResult implements Printable {
+        private String format;
+        private int arg;
+        private int res;
+        public FibResult(String format, int arg, int res) {
+            this.format = format;
+            this.arg = arg;
+            this.res = res;
+        }
+
+        @Override
+        public void Print() {
+            System.out.printf(format, arg, res);
+        }
+    }
+
+    private static List<Printable> results = new ArrayList<>();
+    private static int cnt = 1;
+
+    // Iterative version
+    static final class IterOp implements IntUnaryOperator {
+      public int applyAsInt(int x) {
+        return iter_fibonacci(x);
+      }
+    }
+    static int iter_fibonacci(int n) {
+        if (n < 0) {
+            throw new Error("Bad argument: " + n + " < 0");
+        } else if (n == 0) {
+            return 0;
+        }
+        int x = 1;
+        int y = 1;
+        for (int i = 3; i <= n; i++) {
+            int z = x + y;
+            x = y;
+            y = z;
+        }
+        return y;
+    }
+
+    // Recursive version
+    static final class RecurOp implements IntUnaryOperator {
+      public int applyAsInt(int x) {
+        return fibonacci(x);
+      }
+    }
+    static int fibonacci(int n) {
+        if (n < 0) {
+            throw new Error("Bad argument: " + n + " < 0");
+        } else if ((n == 0) || (n == 1)) {
+            return n;
+        } else {
+            return fibonacci(n - 1) + (fibonacci(n - 2));
+        }
+    }
+
+    public static void notifyMethodEntry(Object m) {
+        // Called by native code when a method is entered. This method is ignored by the native
+        // entry and exit hooks.
+        results.add(new MethodEntry(m, cnt));
+        cnt++;
+    }
+
+    public static void notifyMethodExit(Object m, boolean exception, Object result) {
+        cnt--;
+        if (exception) {
+            results.add(new MethodThrownThrough(m, cnt));
+        } else {
+            results.add(new MethodReturn(m, result, cnt));
+        }
+    }
+
+    public static void run() throws Exception {
+        // call this here so it is linked. It doesn't actually do anything here.
+        loadAllClasses();
+        Trace.disableMethodTracing(Thread.currentThread());
+        Trace.enableMethodTracing(
+            Test988.class,
+            Test988.class.getDeclaredMethod("notifyMethodEntry", Object.class),
+            Test988.class.getDeclaredMethod(
+                "notifyMethodExit", Object.class, Boolean.TYPE, Object.class),
+            Thread.currentThread());
+        doFibTest(30, new IterOp());
+        doFibTest(5, new RecurOp());
+        doFibTest(-19, new IterOp());
+        doFibTest(-19, new RecurOp());
+        // Turn off method tracing so we don't have to deal with print internals.
+        Trace.disableMethodTracing(Thread.currentThread());
+        printResults();
+    }
+
+    // This ensures that all classes we touch are loaded before we start recording traces. This
+    // eliminates a major source of divergence between the RI and ART.
+    public static void loadAllClasses() {
+      MethodThrownThrough.class.toString();
+      MethodEntry.class.toString();
+      MethodReturn.class.toString();
+      FibResult.class.toString();
+      FibThrow.class.toString();
+      Printable.class.toString();
+      ArrayList.class.toString();
+      RecurOp.class.toString();
+      IterOp.class.toString();
+      StringBuilder.class.toString();
+    }
+
+    public static void printResults() {
+        for (Printable p : results) {
+            p.Print();
+        }
+    }
+
+    public static void doFibTest(int x, IntUnaryOperator op) {
+      try {
+        int y = op.applyAsInt(x);
+        results.add(new FibResult("fibonacci(%d)=%d\n", x, y));
+      } catch (Throwable t) {
+        results.add(new FibThrow("fibonacci(%d) -> %s\n", x, t));
+      }
+    }
+}
diff --git a/test/988-method-trace/src/art/Trace.java b/test/988-method-trace/src/art/Trace.java
new file mode 100644
index 0000000..3370996
--- /dev/null
+++ b/test/988-method-trace/src/art/Trace.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.reflect.Method;
+
+public class Trace {
+  public static native void enableMethodTracing(
+      Class<?> methodClass, Method entryMethod, Method exitMethod, Thread thr);
+  public static native void disableMethodTracing(Thread thr);
+}
diff --git a/test/987-stack-trace-dumping/expected.txt b/test/988-redefine-use-after-free/expected.txt
similarity index 100%
rename from test/987-stack-trace-dumping/expected.txt
rename to test/988-redefine-use-after-free/expected.txt
diff --git a/test/988-redefine-use-after-free/info.txt b/test/988-redefine-use-after-free/info.txt
new file mode 100644
index 0000000..2b683dd
--- /dev/null
+++ b/test/988-redefine-use-after-free/info.txt
@@ -0,0 +1,13 @@
+Regression test for b/62237378
+
+It was possible for the JVMTI class redefinition to encounter a use-after-free
+bug if there had been an attempted redefinition that failed due to a
+verification error in the same class loader. Actually encountering the bug
+required that a later redefinition happen to get the same native pointer for its
+dex-file as the failed redefinition.
+
+Hitting this use-after-free can cause many strange outcomes, from CHECK failures
+to segfaults to incorrect redefinition failures (for example on buggy builds
+this test will fail a DCHECK on debug builds, segfault on x86_64 hosts and have
+redefinition of LDexCacheSmash$Transform; erroneously fail with
+JVMTI_ERROR_FAILS_VERIFICATION on 32 bit hosts).
diff --git a/test/987-stack-trace-dumping/run b/test/988-redefine-use-after-free/run
similarity index 77%
copy from test/987-stack-trace-dumping/run
copy to test/988-redefine-use-after-free/run
index dee3e8b..c6e62ae 100755
--- a/test/987-stack-trace-dumping/run
+++ b/test/988-redefine-use-after-free/run
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright 2017 The Android Open Source Project
+# Copyright 2016 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,5 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# Ask for stack traces to be dumped to a file rather than to stdout.
-./default-run "$@" --set-stack-trace-dump-dir
+./default-run "$@" --jvmti
diff --git a/test/988-redefine-use-after-free/src-ex/DexCacheSmash.java b/test/988-redefine-use-after-free/src-ex/DexCacheSmash.java
new file mode 100644
index 0000000..2193a63
--- /dev/null
+++ b/test/988-redefine-use-after-free/src-ex/DexCacheSmash.java
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import art.Redefinition;
+import java.util.Base64;
+
+public class DexCacheSmash {
+  static class Transform {
+    public void foo() {}
+    public void bar() {}
+    public String getId() {
+      return "TRANSFORM_INITIAL";
+    }
+  }
+
+  static class Transform2 {
+    public String getId() {
+      return "TRANSFORM2_INITIAL";
+    }
+  }
+
+  /**
+   * A base64 encoding of the dex/class file of the Transform class above.
+   */
+  static final  Redefinition.CommonClassDefinition TRANSFORM_INITIAL =
+      new Redefinition.CommonClassDefinition(Transform.class,
+          Base64.getDecoder().decode(
+            "yv66vgAAADQAFwoABAAPCAAQBwASBwAVAQAGPGluaXQ+AQADKClWAQAEQ29kZQEAD0xpbmVOdW1i" +
+            "ZXJUYWJsZQEAA2ZvbwEAA2JhcgEABWdldElkAQAUKClMamF2YS9sYW5nL1N0cmluZzsBAApTb3Vy" +
+            "Y2VGaWxlAQASRGV4Q2FjaGVTbWFzaC5qYXZhDAAFAAYBABFUUkFOU0ZPUk1fSU5JVElBTAcAFgEA" +
+            "F0RleENhY2hlU21hc2gkVHJhbnNmb3JtAQAJVHJhbnNmb3JtAQAMSW5uZXJDbGFzc2VzAQAQamF2" +
+            "YS9sYW5nL09iamVjdAEADURleENhY2hlU21hc2gAIAADAAQAAAAAAAQAAAAFAAYAAQAHAAAAHQAB" +
+            "AAEAAAAFKrcAAbEAAAABAAgAAAAGAAEAAAATAAEACQAGAAEABwAAABkAAAABAAAAAbEAAAABAAgA" +
+            "AAAGAAEAAAAUAAEACgAGAAEABwAAABkAAAABAAAAAbEAAAABAAgAAAAGAAEAAAAVAAEACwAMAAEA" +
+            "BwAAABsAAQABAAAAAxICsAAAAAEACAAAAAYAAQAAABcAAgANAAAAAgAOABQAAAAKAAEAAwARABMA" +
+            "CA=="),
+          Base64.getDecoder().decode(
+            "ZGV4CjAzNQDhg9CfghG1SRlLClguRuFYsqihr4F7NsGQAwAAcAAAAHhWNBIAAAAAAAAAAOQCAAAS" +
+            "AAAAcAAAAAcAAAC4AAAAAgAAANQAAAAAAAAAAAAAAAUAAADsAAAAAQAAABQBAABcAgAANAEAAKgB" +
+            "AACwAQAAxAEAAMcBAADiAQAA8wEAABcCAAA3AgAASwIAAF8CAAByAgAAfQIAAIACAACNAgAAkgIA" +
+            "AJcCAACeAgAApAIAAAMAAAAEAAAABQAAAAYAAAAHAAAACAAAAAsAAAACAAAABQAAAAAAAAALAAAA" +
+            "BgAAAAAAAAAAAAEAAAAAAAAAAQANAAAAAAABAA4AAAAAAAAADwAAAAQAAQAAAAAAAAAAAAAAAAAE" +
+            "AAAAAAAAAAEAAACYAQAAzgIAAAAAAAACAAAAvwIAAMUCAAABAAEAAQAAAKsCAAAEAAAAcBAEAAAA" +
+            "DgABAAEAAAAAALACAAABAAAADgAAAAEAAQAAAAAAtQIAAAEAAAAOAAAAAgABAAAAAAC6AgAAAwAA" +
+            "ABoACQARAAAANAEAAAAAAAAAAAAAAAAAAAY8aW5pdD4AEkRleENhY2hlU21hc2guamF2YQABTAAZ" +
+            "TERleENhY2hlU21hc2gkVHJhbnNmb3JtOwAPTERleENhY2hlU21hc2g7ACJMZGFsdmlrL2Fubm90" +
+            "YXRpb24vRW5jbG9zaW5nQ2xhc3M7AB5MZGFsdmlrL2Fubm90YXRpb24vSW5uZXJDbGFzczsAEkxq" +
+            "YXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABFUUkFOU0ZPUk1fSU5JVElBTAAJ" +
+            "VHJhbnNmb3JtAAFWAAthY2Nlc3NGbGFncwADYmFyAANmb28ABWdldElkAARuYW1lAAV2YWx1ZQAT" +
+            "AAcOABUABw4AFAAHDgAXAAcOAAICAREYAQIDAgwECBAXCgAAAQMAgIAEwAIBAdgCAQHsAgEBgAMO" +
+            "AAAAAAAAAAEAAAAAAAAAAQAAABIAAABwAAAAAgAAAAcAAAC4AAAAAwAAAAIAAADUAAAABQAAAAUA" +
+            "AADsAAAABgAAAAEAAAAUAQAAAxAAAAEAAAA0AQAAASAAAAQAAABAAQAABiAAAAEAAACYAQAAAiAA" +
+            "ABIAAACoAQAAAyAAAAQAAACrAgAABCAAAAIAAAC/AgAAACAAAAEAAADOAgAAABAAAAEAAADkAgAA"));
+
+  /**
+   * A base64 encoding of the following (invalid) class.
+   *
+   *  .class LDexCacheSmash$Transform2;
+   *  .super Ljava/lang/Object;
+   *  .source "DexCacheSmash.java"
+   *
+   *  # annotations
+   *  .annotation system Ldalvik/annotation/EnclosingClass;
+   *      value = LDexCacheSmash;
+   *  .end annotation
+   *
+   *  .annotation system Ldalvik/annotation/InnerClass;
+   *      accessFlags = 0x8
+   *      name = "Transform2"
+   *  .end annotation
+   *
+   *
+   *  # direct methods
+   *  .method constructor <init>()V
+   *      .registers 1
+   *
+   *      .prologue
+   *      .line 26
+   *      invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+   *
+   *      return-void
+   *  .end method
+   *
+   *
+   *  # virtual methods
+   *  .method public getId()Ljava/lang/String;
+   *      .registers 2
+   *
+   *      .prologue
+   *      .line 28
+   *      # NB Fails verification due to this function not returning a String.
+   *      return-void
+   *  .end method
+   */
+  static final  Redefinition.CommonClassDefinition TRANSFORM2_INVALID =
+      new Redefinition.CommonClassDefinition(Transform2.class,
+          Base64.getDecoder().decode(
+            "yv66vgAAADQAEwcAEgcAEQEABjxpbml0PgEAAygpVgEABENvZGUKAAIAEAEAD0xpbmVOdW1iZXJU" +
+            "YWJsZQEABWdldElkAQAUKClMamF2YS9sYW5nL1N0cmluZzsBAApTb3VyY2VGaWxlAQASRGV4Q2Fj" +
+            "aGVTbWFzaC5qYXZhAQAMSW5uZXJDbGFzc2VzBwAPAQAKVHJhbnNmb3JtMgEADURleENhY2hlU21h" +
+            "c2gMAAMABAEAEGphdmEvbGFuZy9PYmplY3QBABhEZXhDYWNoZVNtYXNoJFRyYW5zZm9ybTIAIAAB" +
+            "AAIAAAAAAAIAAAADAAQAAQAFAAAAHQABAAEAAAAFKrcABrEAAAABAAcAAAAGAAEAAAAaAAEACAAJ" +
+            "AAEABQAAABkAAQABAAAAAbEAAAABAAcAAAAGAAEAAAAcAAIACgAAAAIACwAMAAAACgABAAEADQAO" +
+            "AAg="),
+          Base64.getDecoder().decode(
+            "ZGV4CjAzNQCFcegr6Ns+I7iEF4uLRkUX4yGrLhP6soEgAwAAcAAAAHhWNBIAAAAAAAAAAHQCAAAP" +
+            "AAAAcAAAAAcAAACsAAAAAgAAAMgAAAAAAAAAAAAAAAMAAADgAAAAAQAAAPgAAAAIAgAAGAEAABgB" +
+            "AAAgAQAANAEAADcBAABTAQAAZAEAAIgBAACoAQAAvAEAANABAADcAQAA3wEAAOwBAADzAQAA+QEA" +
+            "AAMAAAAEAAAABQAAAAYAAAAHAAAACAAAAAoAAAACAAAABQAAAAAAAAAKAAAABgAAAAAAAAAAAAEA" +
+            "AAAAAAAAAAAMAAAABAABAAAAAAAAAAAAAAAAAAQAAAAAAAAAAQAAACACAABmAgAAAAAAAAY8aW5p" +
+            "dD4AEkRleENhY2hlU21hc2guamF2YQABTAAaTERleENhY2hlU21hc2gkVHJhbnNmb3JtMjsAD0xE" +
+            "ZXhDYWNoZVNtYXNoOwAiTGRhbHZpay9hbm5vdGF0aW9uL0VuY2xvc2luZ0NsYXNzOwAeTGRhbHZp" +
+            "ay9hbm5vdGF0aW9uL0lubmVyQ2xhc3M7ABJMamF2YS9sYW5nL09iamVjdDsAEkxqYXZhL2xhbmcv" +
+            "U3RyaW5nOwAKVHJhbnNmb3JtMgABVgALYWNjZXNzRmxhZ3MABWdldElkAARuYW1lAAV2YWx1ZQAC" +
+            "AwILBAgNFwkCAgEOGAEAAAAAAAIAAAAJAgAAAAIAABQCAAAAAAAAAAAAAAAAAAAaAAcOABwABw4A" +
+            "AAABAAEAAQAAADACAAAEAAAAcBACAAAADgACAAEAAAAAADUCAAABAAAADgAAAAEBAICABLwEAQHU" +
+            "BA4AAAAAAAAAAQAAAAAAAAABAAAADwAAAHAAAAACAAAABwAAAKwAAAADAAAAAgAAAMgAAAAFAAAA" +
+            "AwAAAOAAAAAGAAAAAQAAAPgAAAACIAAADwAAABgBAAAEIAAAAgAAAAACAAADEAAAAgAAABACAAAG" +
+            "IAAAAQAAACACAAADIAAAAgAAADACAAABIAAAAgAAADwCAAAAIAAAAQAAAGYCAAAAEAAAAQAAAHQC" +
+            "AAA="));
+
+  public static void run() throws Exception {
+    try {
+      Redefinition.doMultiClassRedefinition(TRANSFORM2_INVALID);
+    } catch (Exception e) {
+      if (!e.getMessage().endsWith("JVMTI_ERROR_FAILS_VERIFICATION")) {
+        throw new Error(
+            "Unexpected error: Expected failure due to JVMTI_ERROR_FAILS_VERIFICATION", e);
+      }
+    }
+    // Doing this redefinition after a redefinition that failed due to FAILS_VERIFICATION could
+    // cause a use-after-free of the Transform2's DexCache by the redefinition code if it happens
+    // that the native pointer of the art::DexFile created for the Transform redefinition aliases
+    // the one created for Transform2's failed redefinition.
+    //
+    // Due to the order of checks performed by the redefinition code FAILS_VERIFICATION is the only
+    // failure mode that can cause Use-after-frees in this way.
+    //
+    // This should never throw any exceptions (except perhaps OOME in very strange circumstances).
+    Redefinition.doMultiClassRedefinition(TRANSFORM_INITIAL);
+  }
+}
diff --git a/test/988-redefine-use-after-free/src-ex/art/Redefinition.java b/test/988-redefine-use-after-free/src-ex/art/Redefinition.java
new file mode 100644
index 0000000..56d2938
--- /dev/null
+++ b/test/988-redefine-use-after-free/src-ex/art/Redefinition.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.util.ArrayList;
+// Common Redefinition functions. Placed here for use by CTS
+public class Redefinition {
+  public static final class CommonClassDefinition {
+    public final Class<?> target;
+    public final byte[] class_file_bytes;
+    public final byte[] dex_file_bytes;
+
+    public CommonClassDefinition(Class<?> target, byte[] class_file_bytes, byte[] dex_file_bytes) {
+      this.target = target;
+      this.class_file_bytes = class_file_bytes;
+      this.dex_file_bytes = dex_file_bytes;
+    }
+  }
+
+  // A set of possible test configurations. Test should set this if they need to.
+  // This must be kept in sync with the defines in ti-agent/common_helper.cc
+  public static enum Config {
+    COMMON_REDEFINE(0),
+    COMMON_RETRANSFORM(1),
+    COMMON_TRANSFORM(2);
+
+    private final int val;
+    private Config(int val) {
+      this.val = val;
+    }
+  }
+
+  public static void setTestConfiguration(Config type) {
+    nativeSetTestConfiguration(type.val);
+  }
+
+  private static native void nativeSetTestConfiguration(int type);
+
+  // Transforms the class
+  public static native void doCommonClassRedefinition(Class<?> target,
+                                                      byte[] classfile,
+                                                      byte[] dexfile);
+
+  public static void doMultiClassRedefinition(CommonClassDefinition... defs) {
+    ArrayList<Class<?>> classes = new ArrayList<>();
+    ArrayList<byte[]> class_files = new ArrayList<>();
+    ArrayList<byte[]> dex_files = new ArrayList<>();
+
+    for (CommonClassDefinition d : defs) {
+      classes.add(d.target);
+      class_files.add(d.class_file_bytes);
+      dex_files.add(d.dex_file_bytes);
+    }
+    doCommonMultiClassRedefinition(classes.toArray(new Class<?>[0]),
+                                   class_files.toArray(new byte[0][]),
+                                   dex_files.toArray(new byte[0][]));
+  }
+
+  public static void addMultiTransformationResults(CommonClassDefinition... defs) {
+    for (CommonClassDefinition d : defs) {
+      addCommonTransformationResult(d.target.getCanonicalName(),
+                                    d.class_file_bytes,
+                                    d.dex_file_bytes);
+    }
+  }
+
+  public static native void doCommonMultiClassRedefinition(Class<?>[] targets,
+                                                           byte[][] classfiles,
+                                                           byte[][] dexfiles);
+  public static native void doCommonClassRetransformation(Class<?>... target);
+  public static native void setPopRetransformations(boolean pop);
+  public static native void popTransformationFor(String name);
+  public static native void enableCommonRetransformation(boolean enable);
+  public static native void addCommonTransformationResult(String target_name,
+                                                          byte[] class_bytes,
+                                                          byte[] dex_bytes);
+}
diff --git a/test/988-redefine-use-after-free/src/Main.java b/test/988-redefine-use-after-free/src/Main.java
new file mode 100644
index 0000000..d88c471
--- /dev/null
+++ b/test/988-redefine-use-after-free/src/Main.java
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.*;
+
+public class Main {
+  public static final String TEST_NAME = "988-redefine-use-after-free";
+  public static final int REPS = 1000;
+  public static final int STEP = 100;
+
+  public static void main(String[] args) throws Exception {
+    for (int i = 0; i < REPS; i += STEP) {
+      runSeveralTimes(STEP);
+    }
+  }
+
+  public static ClassLoader getClassLoaderFor(String location) throws Exception {
+    try {
+      Class<?> class_loader_class = Class.forName("dalvik.system.PathClassLoader");
+      Constructor<?> ctor = class_loader_class.getConstructor(String.class, ClassLoader.class);
+      return (ClassLoader)ctor.newInstance(location + "/" + TEST_NAME + "-ex.jar",
+                                           Main.class.getClassLoader());
+    } catch (ClassNotFoundException e) {
+      // Running on RI. Use URLClassLoader.
+      return new java.net.URLClassLoader(
+          new java.net.URL[] { new java.net.URL("file://" + location + "/classes-ex/") });
+    }
+  }
+
+  // Run the redefinition several times on a single class-loader to try to trigger the
+  // Use-after-free bug b/62237378
+  public static void runSeveralTimes(int times) throws Exception {
+    ClassLoader c = getClassLoaderFor(System.getenv("DEX_LOCATION"));
+
+    Class<?> klass = (Class<?>)c.loadClass("DexCacheSmash");
+    Method m = klass.getDeclaredMethod("run");
+    for (int i = 0 ; i < times; i++) {
+      m.invoke(null);
+    }
+  }
+}
diff --git a/test/989-method-trace-throw/expected.txt b/test/989-method-trace-throw/expected.txt
new file mode 100644
index 0000000..0911bc3
--- /dev/null
+++ b/test/989-method-trace-throw/expected.txt
@@ -0,0 +1,188 @@
+Normal: Entering public static void art.Test989.doNothing()
+Normal: Leaving public static void art.Test989.doNothing() returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$doNothingClass].
+Normal: Entering public static native void art.Test989.doNothingNative()
+Normal: Leaving public static native void art.Test989.doNothingNative() returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$doNothingNativeClass].
+Normal: Entering public static void art.Test989.throwA()
+Normal: Leaving public static void art.Test989.throwA() returned <exception>
+Received expected error for test[class art.Test989$NormalTracer, class art.Test989$throwAClass] - art.Test989$ErrorA: Throwing Error A
+Normal: Entering public static native void art.Test989.throwANative()
+Normal: Leaving public static native void art.Test989.throwANative() returned <exception>
+Received expected error for test[class art.Test989$NormalTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorA: Throwing Error A
+Normal: Entering public static java.lang.Object art.Test989.returnValue()
+Normal: Leaving public static java.lang.Object art.Test989.returnValue() returned TestObject(0)
+returnValue returned: TestObject(0)
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnValueClass].
+Normal: Entering public static native java.lang.Object art.Test989.returnValueNative()
+Normal: Leaving public static native java.lang.Object art.Test989.returnValueNative() returned TestObject(1)
+returnValueNative returned: TestObject(1)
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnValueNativeClass].
+Normal: Entering public static void art.Test989.acceptValue(java.lang.Object)
+Recieved TestObject(2)
+Normal: Leaving public static void art.Test989.acceptValue(java.lang.Object) returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$acceptValueClass].
+Normal: Entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+Recieved TestObject(3)
+Normal: Leaving public static native void art.Test989.acceptValueNative(java.lang.Object) returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$acceptValueNativeClass].
+Normal: Entering public static void art.Test989.tryCatchExit()
+Normal: Leaving public static void art.Test989.tryCatchExit() returned null
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$tryCatchExitClass].
+Normal: Entering public static float art.Test989.returnFloat()
+Normal: Leaving public static float art.Test989.returnFloat() returned 1.618
+returnFloat returned: 1.618
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnFloatClass].
+Normal: Entering public static native float art.Test989.returnFloatNative()
+Normal: Leaving public static native float art.Test989.returnFloatNative() returned 1.618
+returnFloatNative returned: 1.618
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnFloatNativeClass].
+Normal: Entering public static double art.Test989.returnDouble()
+Normal: Leaving public static double art.Test989.returnDouble() returned 3.14159628
+returnDouble returned: 3.14159628
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnDoubleClass].
+Normal: Entering public static native double art.Test989.returnDoubleNative()
+Normal: Leaving public static native double art.Test989.returnDoubleNative() returned 3.14159628
+returnDoubleNative returned: 3.14159628
+Received no exception as expected for test[class art.Test989$NormalTracer, class art.Test989$returnDoubleNativeClass].
+ThrowEnter: Entering public static void art.Test989.doNothing()
+ThrowEnter: Leaving public static void art.Test989.doNothing() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$doNothingClass] - art.Test989$ErrorB: Throwing error while entering public static void art.Test989.doNothing()
+ThrowEnter: Entering public static native void art.Test989.doNothingNative()
+ThrowEnter: Leaving public static native void art.Test989.doNothingNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$doNothingNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native void art.Test989.doNothingNative()
+ThrowEnter: Entering public static void art.Test989.throwA()
+ThrowEnter: Leaving public static void art.Test989.throwA() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$throwAClass] - art.Test989$ErrorB: Throwing error while entering public static void art.Test989.throwA()
+ThrowEnter: Entering public static native void art.Test989.throwANative()
+ThrowEnter: Leaving public static native void art.Test989.throwANative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorB: Throwing error while entering public static native void art.Test989.throwANative()
+ThrowEnter: Entering public static java.lang.Object art.Test989.returnValue()
+ThrowEnter: Leaving public static java.lang.Object art.Test989.returnValue() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnValueClass] - art.Test989$ErrorB: Throwing error while entering public static java.lang.Object art.Test989.returnValue()
+ThrowEnter: Entering public static native java.lang.Object art.Test989.returnValueNative()
+ThrowEnter: Leaving public static native java.lang.Object art.Test989.returnValueNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnValueNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native java.lang.Object art.Test989.returnValueNative()
+ThrowEnter: Entering public static void art.Test989.acceptValue(java.lang.Object)
+ThrowEnter: Leaving public static void art.Test989.acceptValue(java.lang.Object) returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$acceptValueClass] - art.Test989$ErrorB: Throwing error while entering public static void art.Test989.acceptValue(java.lang.Object)
+ThrowEnter: Entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+ThrowEnter: Leaving public static native void art.Test989.acceptValueNative(java.lang.Object) returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$acceptValueNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+ThrowEnter: Entering public static void art.Test989.tryCatchExit()
+ThrowEnter: Leaving public static void art.Test989.tryCatchExit() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$tryCatchExitClass] - art.Test989$ErrorB: Throwing error while entering public static void art.Test989.tryCatchExit()
+ThrowEnter: Entering public static float art.Test989.returnFloat()
+ThrowEnter: Leaving public static float art.Test989.returnFloat() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnFloatClass] - art.Test989$ErrorB: Throwing error while entering public static float art.Test989.returnFloat()
+ThrowEnter: Entering public static native float art.Test989.returnFloatNative()
+ThrowEnter: Leaving public static native float art.Test989.returnFloatNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnFloatNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native float art.Test989.returnFloatNative()
+ThrowEnter: Entering public static double art.Test989.returnDouble()
+ThrowEnter: Leaving public static double art.Test989.returnDouble() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnDoubleClass] - art.Test989$ErrorB: Throwing error while entering public static double art.Test989.returnDouble()
+ThrowEnter: Entering public static native double art.Test989.returnDoubleNative()
+ThrowEnter: Leaving public static native double art.Test989.returnDoubleNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowEnterTracer, class art.Test989$returnDoubleNativeClass] - art.Test989$ErrorB: Throwing error while entering public static native double art.Test989.returnDoubleNative()
+ThrowExit: Entering public static void art.Test989.doNothing()
+ThrowExit: Leaving public static void art.Test989.doNothing() returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$doNothingClass] - art.Test989$ErrorB: Throwing error while exit public static void art.Test989.doNothing() returned null
+ThrowExit: Entering public static native void art.Test989.doNothingNative()
+ThrowExit: Leaving public static native void art.Test989.doNothingNative() returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$doNothingNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native void art.Test989.doNothingNative() returned null
+ThrowExit: Entering public static void art.Test989.throwA()
+ThrowExit: Leaving public static void art.Test989.throwA() returned <exception>
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$throwAClass] - art.Test989$ErrorB: Throwing error while exit public static void art.Test989.throwA() returned <exception>
+ThrowExit: Entering public static native void art.Test989.throwANative()
+ThrowExit: Leaving public static native void art.Test989.throwANative() returned <exception>
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorB: Throwing error while exit public static native void art.Test989.throwANative() returned <exception>
+ThrowExit: Entering public static java.lang.Object art.Test989.returnValue()
+ThrowExit: Leaving public static java.lang.Object art.Test989.returnValue() returned TestObject(7)
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnValueClass] - art.Test989$ErrorB: Throwing error while exit public static java.lang.Object art.Test989.returnValue() returned TestObject(7)
+ThrowExit: Entering public static native java.lang.Object art.Test989.returnValueNative()
+ThrowExit: Leaving public static native java.lang.Object art.Test989.returnValueNative() returned TestObject(8)
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnValueNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native java.lang.Object art.Test989.returnValueNative() returned TestObject(8)
+ThrowExit: Entering public static void art.Test989.acceptValue(java.lang.Object)
+Recieved TestObject(9)
+ThrowExit: Leaving public static void art.Test989.acceptValue(java.lang.Object) returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$acceptValueClass] - art.Test989$ErrorB: Throwing error while exit public static void art.Test989.acceptValue(java.lang.Object) returned null
+ThrowExit: Entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+Recieved TestObject(10)
+ThrowExit: Leaving public static native void art.Test989.acceptValueNative(java.lang.Object) returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$acceptValueNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native void art.Test989.acceptValueNative(java.lang.Object) returned null
+ThrowExit: Entering public static void art.Test989.tryCatchExit()
+ThrowExit: Leaving public static void art.Test989.tryCatchExit() returned null
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$tryCatchExitClass] - art.Test989$ErrorB: Throwing error while exit public static void art.Test989.tryCatchExit() returned null
+ThrowExit: Entering public static float art.Test989.returnFloat()
+ThrowExit: Leaving public static float art.Test989.returnFloat() returned 1.618
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnFloatClass] - art.Test989$ErrorB: Throwing error while exit public static float art.Test989.returnFloat() returned 1.618
+ThrowExit: Entering public static native float art.Test989.returnFloatNative()
+ThrowExit: Leaving public static native float art.Test989.returnFloatNative() returned 1.618
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnFloatNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native float art.Test989.returnFloatNative() returned 1.618
+ThrowExit: Entering public static double art.Test989.returnDouble()
+ThrowExit: Leaving public static double art.Test989.returnDouble() returned 3.14159628
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnDoubleClass] - art.Test989$ErrorB: Throwing error while exit public static double art.Test989.returnDouble() returned 3.14159628
+ThrowExit: Entering public static native double art.Test989.returnDoubleNative()
+ThrowExit: Leaving public static native double art.Test989.returnDoubleNative() returned 3.14159628
+Received expected error for test[class art.Test989$ThrowExitTracer, class art.Test989$returnDoubleNativeClass] - art.Test989$ErrorB: Throwing error while exit public static native double art.Test989.returnDoubleNative() returned 3.14159628
+ThrowBoth: Entering public static void art.Test989.doNothing()
+ThrowBoth: Leaving public static void art.Test989.doNothing() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$doNothingClass] - art.Test989$ErrorC: Throwing error while exit public static void art.Test989.doNothing() returned <exception>
+ThrowBoth: Entering public static native void art.Test989.doNothingNative()
+ThrowBoth: Leaving public static native void art.Test989.doNothingNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$doNothingNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native void art.Test989.doNothingNative() returned <exception>
+ThrowBoth: Entering public static void art.Test989.throwA()
+ThrowBoth: Leaving public static void art.Test989.throwA() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$throwAClass] - art.Test989$ErrorC: Throwing error while exit public static void art.Test989.throwA() returned <exception>
+ThrowBoth: Entering public static native void art.Test989.throwANative()
+ThrowBoth: Leaving public static native void art.Test989.throwANative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorC: Throwing error while exit public static native void art.Test989.throwANative() returned <exception>
+ThrowBoth: Entering public static java.lang.Object art.Test989.returnValue()
+ThrowBoth: Leaving public static java.lang.Object art.Test989.returnValue() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnValueClass] - art.Test989$ErrorC: Throwing error while exit public static java.lang.Object art.Test989.returnValue() returned <exception>
+ThrowBoth: Entering public static native java.lang.Object art.Test989.returnValueNative()
+ThrowBoth: Leaving public static native java.lang.Object art.Test989.returnValueNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnValueNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native java.lang.Object art.Test989.returnValueNative() returned <exception>
+ThrowBoth: Entering public static void art.Test989.acceptValue(java.lang.Object)
+ThrowBoth: Leaving public static void art.Test989.acceptValue(java.lang.Object) returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$acceptValueClass] - art.Test989$ErrorC: Throwing error while exit public static void art.Test989.acceptValue(java.lang.Object) returned <exception>
+ThrowBoth: Entering public static native void art.Test989.acceptValueNative(java.lang.Object)
+ThrowBoth: Leaving public static native void art.Test989.acceptValueNative(java.lang.Object) returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$acceptValueNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native void art.Test989.acceptValueNative(java.lang.Object) returned <exception>
+ThrowBoth: Entering public static void art.Test989.tryCatchExit()
+ThrowBoth: Leaving public static void art.Test989.tryCatchExit() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$tryCatchExitClass] - art.Test989$ErrorC: Throwing error while exit public static void art.Test989.tryCatchExit() returned <exception>
+ThrowBoth: Entering public static float art.Test989.returnFloat()
+ThrowBoth: Leaving public static float art.Test989.returnFloat() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnFloatClass] - art.Test989$ErrorC: Throwing error while exit public static float art.Test989.returnFloat() returned <exception>
+ThrowBoth: Entering public static native float art.Test989.returnFloatNative()
+ThrowBoth: Leaving public static native float art.Test989.returnFloatNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnFloatNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native float art.Test989.returnFloatNative() returned <exception>
+ThrowBoth: Entering public static double art.Test989.returnDouble()
+ThrowBoth: Leaving public static double art.Test989.returnDouble() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnDoubleClass] - art.Test989$ErrorC: Throwing error while exit public static double art.Test989.returnDouble() returned <exception>
+ThrowBoth: Entering public static native double art.Test989.returnDoubleNative()
+ThrowBoth: Leaving public static native double art.Test989.returnDoubleNative() returned <exception>
+Received expected error for test[class art.Test989$ThrowBothTracer, class art.Test989$returnDoubleNativeClass] - art.Test989$ErrorC: Throwing error while exit public static native double art.Test989.returnDoubleNative() returned <exception>
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$doNothingClass].
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$doNothingNativeClass].
+Received expected error for test[class art.Test989$ForceGCTracer, class art.Test989$throwAClass] - art.Test989$ErrorA: Throwing Error A
+Received expected error for test[class art.Test989$ForceGCTracer, class art.Test989$throwANativeClass] - art.Test989$ErrorA: Throwing Error A
+returnValue returned: TestObject(14)
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnValueClass].
+returnValueNative returned: TestObject(15)
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnValueNativeClass].
+Recieved TestObject(16)
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$acceptValueClass].
+Recieved TestObject(17)
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$acceptValueNativeClass].
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$tryCatchExitClass].
+returnFloat returned: 1.618
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnFloatClass].
+returnFloatNative returned: 1.618
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnFloatNativeClass].
+returnDouble returned: 3.14159628
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnDoubleClass].
+returnDoubleNative returned: 3.14159628
+Received no exception as expected for test[class art.Test989$ForceGCTracer, class art.Test989$returnDoubleNativeClass].
+Finished!
diff --git a/test/989-method-trace-throw/info.txt b/test/989-method-trace-throw/info.txt
new file mode 100644
index 0000000..f0a200d
--- /dev/null
+++ b/test/989-method-trace-throw/info.txt
@@ -0,0 +1,15 @@
+Tests method tracing in JVMTI
+
+This test is sensitive to the internal implementations of:
+ * java.lang.Error
+ * java.lang.Integer
+ * java.lang.Math
+ * java.lang.String
+ * java.lang.System
+ * java.util.ArrayList
+ * java.util.Arrays
+ * java.util.StringBuilder
+ * all super-classes and super-interfaces of the above types.
+
+Changes to the internal implementation of these classes might (or might not)
+change the output of this test.
diff --git a/test/989-method-trace-throw/method_trace.cc b/test/989-method-trace-throw/method_trace.cc
new file mode 100644
index 0000000..554784e
--- /dev/null
+++ b/test/989-method-trace-throw/method_trace.cc
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <memory>
+#include <stdio.h>
+
+#include "android-base/logging.h"
+#include "android-base/stringprintf.h"
+
+#include "jni.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
+
+// Test infrastructure
+#include "jni_binder.h"
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
+#include "ti_macros.h"
+
+namespace art {
+namespace Test989StackTraceThrow {
+
+extern "C" JNIEXPORT
+jfloat JNICALL Java_art_Test989_returnFloatNative(JNIEnv* env, jclass klass) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "doGetFloat", "()F");
+  return env->CallStaticFloatMethod(klass, targetMethod);
+}
+extern "C" JNIEXPORT
+jdouble JNICALL Java_art_Test989_returnDoubleNative(JNIEnv* env, jclass klass) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "doGetDouble", "()D");
+  return env->CallStaticDoubleMethod(klass, targetMethod);
+}
+
+extern "C" JNIEXPORT jobject JNICALL Java_art_Test989_returnValueNative(JNIEnv* env, jclass klass) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "mkTestObject", "()Ljava/lang/Object;");
+  return env->CallStaticObjectMethod(klass, targetMethod);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test989_doNothingNative(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                   jclass klass ATTRIBUTE_UNUSED) {
+  return;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test989_throwANative(JNIEnv* env,
+                                                                jclass klass) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "doThrowA", "()V");
+  env->CallStaticVoidMethod(klass, targetMethod);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Test989_acceptValueNative(JNIEnv* env,
+                                                                     jclass klass,
+                                                                     jobject arg) {
+  jmethodID targetMethod = env->GetStaticMethodID(klass, "printObject", "(Ljava/lang/Object;)V");
+  env->CallStaticVoidMethod(klass, targetMethod, arg);
+}
+
+}  // namespace Test989StackTraceThrow
+}  // namespace art
+
diff --git a/test/987-stack-trace-dumping/run b/test/989-method-trace-throw/run
similarity index 93%
copy from test/987-stack-trace-dumping/run
copy to test/989-method-trace-throw/run
index dee3e8b..51875a7 100755
--- a/test/987-stack-trace-dumping/run
+++ b/test/989-method-trace-throw/run
@@ -15,4 +15,4 @@
 # limitations under the License.
 
 # Ask for stack traces to be dumped to a file rather than to stdout.
-./default-run "$@" --set-stack-trace-dump-dir
+./default-run "$@" --jvmti
diff --git a/test/989-method-trace-throw/src/Main.java b/test/989-method-trace-throw/src/Main.java
new file mode 100644
index 0000000..29b9de1
--- /dev/null
+++ b/test/989-method-trace-throw/src/Main.java
@@ -0,0 +1,21 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    art.Test989.run();
+  }
+}
diff --git a/test/989-method-trace-throw/src/art/Test989.java b/test/989-method-trace-throw/src/art/Test989.java
new file mode 100644
index 0000000..18421bd
--- /dev/null
+++ b/test/989-method-trace-throw/src/art/Test989.java
@@ -0,0 +1,465 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.reflect.Method;
+import java.util.Set;
+import java.util.HashSet;
+
+public class Test989 {
+  static boolean PRINT_STACK_TRACE = false;
+  static Set<Method> testMethods = new HashSet<>();
+
+  static MethodTracer currentTracer = new MethodTracer() {
+    public void methodEntry(Object o) { return; }
+    public void methodExited(Object o, boolean e, Object r) { return; }
+  };
+
+  private static boolean DISABLE_TRACING = false;
+
+  static {
+    try {
+      testMethods.add(Test989.class.getDeclaredMethod("doNothing"));
+      testMethods.add(Test989.class.getDeclaredMethod("doNothingNative"));
+      testMethods.add(Test989.class.getDeclaredMethod("throwA"));
+      testMethods.add(Test989.class.getDeclaredMethod("throwANative"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnFloat"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnFloatNative"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnDouble"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnDoubleNative"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnValue"));
+      testMethods.add(Test989.class.getDeclaredMethod("returnValueNative"));
+      testMethods.add(Test989.class.getDeclaredMethod("acceptValue", Object.class));
+      testMethods.add(Test989.class.getDeclaredMethod("acceptValueNative", Object.class));
+      testMethods.add(Test989.class.getDeclaredMethod("tryCatchExit"));
+    } catch (Exception e) {
+      throw new Error("Bad static!", e);
+    }
+  }
+
+  // Disables tracing only on RI. Used to work around an annoying piece of behavior where in the
+  // RI throwing an exception in an exit hook causes the exit hook to be re-executed. This leads
+  // to an infinite loop on the RI.
+  private static void disableTraceForRI() {
+    if (!System.getProperty("java.vm.name").equals("Dalvik")) {
+      Trace.disableMethodTracing(Thread.currentThread());
+    }
+  }
+
+  private static String getInfo(Object m, boolean exception, Object result) {
+    String out = m.toString() + " returned ";
+    if (exception) {
+      out += "<exception>";
+    } else {
+      out += result;
+    }
+    return out;
+  }
+
+  public static interface MethodTracer {
+    public void methodEntry(Object m);
+    public void methodExited(Object m, boolean exception, Object result);
+    public default Class<?> entryException() { return null; }
+    public default Class<?> exitException() { return null; }
+  }
+
+  public static class NormalTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (testMethods.contains(m)) {
+        System.out.println("Normal: Entering " + m);
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (testMethods.contains(m)) {
+        System.out.println("Normal: Leaving " + getInfo(m, exception, result));
+      }
+    }
+  }
+
+  public static class ThrowEnterTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (testMethods.contains(m)) {
+        System.out.println("ThrowEnter: Entering " + m);
+        throw new ErrorB("Throwing error while entering " + m);
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (testMethods.contains(m)) {
+        System.out.println("ThrowEnter: Leaving " + getInfo(m, exception, result));
+      }
+    }
+    public Class<?> entryException() { return ErrorB.class; }
+  }
+
+  public static class ThrowExitTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (testMethods.contains(m)) {
+        System.out.println("ThrowExit: Entering " + m);
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (testMethods.contains(m)) {
+        // The RI goes into an infinite loop if we throw exceptions in an ExitHook. See
+        // disableTraceForRI for explanation.
+        disableTraceForRI();
+        System.out.println("ThrowExit: Leaving " + getInfo(m, exception, result));
+        throw new ErrorB("Throwing error while exit " + getInfo(m, exception, result));
+      }
+    }
+    public Class<?> exitException() { return ErrorB.class; }
+  }
+
+  public static class ThrowBothTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (testMethods.contains(m)) {
+        System.out.println("ThrowBoth: Entering " + m);
+        throw new ErrorB("Throwing error while entering " + m);
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (testMethods.contains(m)) {
+        // The RI goes into an infinite loop if we throw exceptions in an ExitHook. See
+        // disableTraceForRI for explanation.
+        disableTraceForRI();
+        System.out.println("ThrowBoth: Leaving " + getInfo(m, exception, result));
+        throw new ErrorC("Throwing error while exit " + getInfo(m, exception, result));
+      }
+    }
+    public Class<?> entryException() { return ErrorB.class; }
+    public Class<?> exitException() { return ErrorC.class; }
+  }
+
+  public static class ForceGCTracer implements MethodTracer {
+    public void methodEntry(Object m) {
+      if (System.getProperty("java.vm.name").equals("Dalvik")) {
+        System.gc();
+      }
+    }
+    public void methodExited(Object m, boolean exception, Object result) {
+      if (System.getProperty("java.vm.name").equals("Dalvik")) {
+        System.gc();
+      }
+    }
+  }
+
+  private static void maybeDisableTracing() throws Exception {
+    if (DISABLE_TRACING) {
+      Trace.disableMethodTracing(Thread.currentThread());
+    }
+  }
+
+  public static void baseNotifyMethodEntry(Object o) {
+    currentTracer.methodEntry(o);
+  }
+  public static void baseNotifyMethodExit(Object o, boolean exception, Object res) {
+    currentTracer.methodExited(o, exception, res);
+  }
+
+  private static void setupTracing() throws Exception {
+    Trace.enableMethodTracing(
+        Test989.class,
+        Test989.class.getDeclaredMethod("baseNotifyMethodEntry", Object.class),
+        Test989.class.getDeclaredMethod(
+            "baseNotifyMethodExit", Object.class, Boolean.TYPE, Object.class),
+        Thread.currentThread());
+  }
+  private static void setEntry(MethodTracer type) throws Exception {
+    if (DISABLE_TRACING || !System.getProperty("java.vm.name").equals("Dalvik")) {
+      Trace.disableMethodTracing(Thread.currentThread());
+      setupTracing();
+    }
+    currentTracer = type;
+  }
+
+  private static String testDescription(MethodTracer type, Runnable test) {
+    return "test[" + type.getClass() + ", " + test.getClass() + "]";
+  }
+
+  private static Class<?> getExpectedError(MethodTracer t, MyRunnable r) {
+    if (t.exitException() != null) {
+      return t.exitException();
+    } else if (t.entryException() != null) {
+      return t.entryException();
+    } else {
+      return r.expectedThrow();
+    }
+  }
+
+  private static void doTest(MethodTracer type, MyRunnable test) throws Exception {
+    Class<?> expected = getExpectedError(type, test);
+
+    setEntry(type);
+    try {
+      test.run();
+      // Disabling method tracing just makes this test somewhat faster.
+      maybeDisableTracing();
+      if (expected == null) {
+        System.out.println(
+            "Received no exception as expected for " + testDescription(type, test) + ".");
+        return;
+      }
+    } catch (Error t) {
+      // Disabling method tracing just makes this test somewhat faster.
+      maybeDisableTracing();
+      if (expected == null) {
+        throw new Error("Unexpected error occured: " + t + " for " + testDescription(type, test), t);
+      } else if (!expected.isInstance(t)) {
+        throw new Error("Expected error of type " + expected + " not " + t +
+            " for " + testDescription(type, test), t);
+      } else {
+        System.out.println(
+            "Received expected error for " + testDescription(type, test) + " - " + t);
+        if (PRINT_STACK_TRACE) {
+          t.printStackTrace();
+        }
+        return;
+      }
+    }
+    System.out.println("Expected an error of type " + expected + " but got no exception for "
+        + testDescription(type, test));
+    // throw new Error("Expected an error of type " + expected + " but got no exception for "
+    //     + testDescription(type, test));
+  }
+
+  public static interface MyRunnable extends Runnable {
+    public default Class<?> expectedThrow() {
+      return null;
+    }
+  }
+
+  public static void run() throws Exception {
+    MyRunnable[] testCases = new MyRunnable[] {
+      new doNothingClass(),
+      new doNothingNativeClass(),
+      new throwAClass(),
+      new throwANativeClass(),
+      new returnValueClass(),
+      new returnValueNativeClass(),
+      new acceptValueClass(),
+      new acceptValueNativeClass(),
+      new tryCatchExitClass(),
+      new returnFloatClass(),
+      new returnFloatNativeClass(),
+      new returnDoubleClass(),
+      new returnDoubleNativeClass(),
+    };
+    MethodTracer[] tracers = new MethodTracer[] {
+      new NormalTracer(),
+      new ThrowEnterTracer(),
+      new ThrowExitTracer(),
+      new ThrowBothTracer(),
+      new ForceGCTracer(),
+    };
+
+    setupTracing();
+    for (MethodTracer t : tracers) {
+      for (MyRunnable r : testCases) {
+        doTest(t, r);
+      }
+    }
+
+    maybeDisableTracing();
+    System.out.println("Finished!");
+    Trace.disableMethodTracing(Thread.currentThread());
+  }
+
+  private static final class throwAClass implements MyRunnable {
+    public void run() {
+      throwA();
+    }
+    @Override
+    public Class<?> expectedThrow() {
+      return ErrorA.class;
+    }
+  }
+
+  private static final class throwANativeClass implements MyRunnable {
+    public void run() {
+      throwANative();
+    }
+    @Override
+    public Class<?> expectedThrow() {
+      return ErrorA.class;
+    }
+  }
+
+  private static final class tryCatchExitClass implements MyRunnable {
+    public void run() {
+      tryCatchExit();
+    }
+  }
+
+  private static final class doNothingClass implements MyRunnable {
+    public void run() {
+      doNothing();
+    }
+  }
+
+  private static final class doNothingNativeClass implements MyRunnable {
+    public void run() {
+      doNothingNative();
+    }
+  }
+
+  private static final class acceptValueClass implements MyRunnable {
+    public void run() {
+      acceptValue(mkTestObject());
+    }
+  }
+
+  private static final class acceptValueNativeClass implements MyRunnable {
+    public void run() {
+      acceptValueNative(mkTestObject());
+    }
+  }
+
+  private static final class returnValueClass implements MyRunnable {
+    public void run() {
+      Object o = returnValue();
+      System.out.println("returnValue returned: " + o);
+    }
+  }
+
+  private static final class returnValueNativeClass implements MyRunnable {
+    public void run() {
+      Object o = returnValueNative();
+      System.out.println("returnValueNative returned: " + o);
+    }
+  }
+
+  private static final class returnFloatClass implements MyRunnable {
+    public void run() {
+      float d = returnFloat();
+      System.out.println("returnFloat returned: " + d);
+    }
+  }
+
+  private static final class returnFloatNativeClass implements MyRunnable {
+    public void run() {
+      float d = returnFloatNative();
+      System.out.println("returnFloatNative returned: " + d);
+    }
+  }
+
+  private static final class returnDoubleClass implements MyRunnable {
+    public void run() {
+      double d = returnDouble();
+      System.out.println("returnDouble returned: " + d);
+    }
+  }
+
+  private static final class returnDoubleNativeClass implements MyRunnable {
+    public void run() {
+      double d = returnDoubleNative();
+      System.out.println("returnDoubleNative returned: " + d);
+    }
+  }
+
+  private static class ErrorA extends Error {
+    private static final long serialVersionUID = 0;
+    public ErrorA(String s) { super(s); }
+  }
+
+  private static class ErrorB extends Error {
+    private static final long serialVersionUID = 1;
+    public ErrorB(String s) { super(s); }
+  }
+
+  private static class ErrorC extends Error {
+    private static final long serialVersionUID = 2;
+    public ErrorC(String s) { super(s); }
+  }
+
+  // Does nothing.
+  public static void doNothing() { }
+
+  public static void tryCatchExit() {
+    try {
+      Object o = mkTestObject();
+      return;
+    } catch (ErrorB b) {
+      System.out.println("ERROR: Caught " + b);
+      b.printStackTrace();
+    } catch (ErrorC c) {
+      System.out.println("ERROR: Caught " + c);
+      c.printStackTrace();
+    }
+  }
+
+  public static float returnFloat() {
+    return doGetFloat();
+  }
+
+  public static double returnDouble() {
+    return doGetDouble();
+  }
+
+  // Throws an ErrorA.
+  public static void throwA() {
+    doThrowA();
+  }
+
+  public static void doThrowA() {
+    throw new ErrorA("Throwing Error A");
+  }
+
+  static final class TestObject {
+    private int idx;
+    public TestObject(int v) {
+      this.idx = v;
+    }
+    @Override
+    public String toString() {
+      return "TestObject(" + idx + ")";
+    }
+  }
+
+  static int counter = 0;
+  public static Object mkTestObject() {
+    return new TestObject(counter++);
+  }
+
+  public static void printObject(Object o) {
+    System.out.println("Recieved " + o);
+  }
+
+  // Returns a newly allocated value.
+  public static Object returnValue() {
+    return mkTestObject();
+  }
+
+  public static void acceptValue(Object o) {
+    printObject(o);
+  }
+
+  public static float doGetFloat() {
+    return 1.618f;
+  }
+
+  public static double doGetDouble() {
+    return 3.14159628;
+  }
+
+  // Calls mkTestObject from native code and returns it.
+  public static native Object returnValueNative();
+  // Calls printObject from native code.
+  public static native void acceptValueNative(Object t);
+  public static native void doNothingNative();
+  public static native void throwANative();
+  public static native float returnFloatNative();
+  public static native double returnDoubleNative();
+}
diff --git a/test/989-method-trace-throw/src/art/Trace.java b/test/989-method-trace-throw/src/art/Trace.java
new file mode 100644
index 0000000..3370996
--- /dev/null
+++ b/test/989-method-trace-throw/src/art/Trace.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art;
+
+import java.lang.reflect.Method;
+
+public class Trace {
+  public static native void enableMethodTracing(
+      Class<?> methodClass, Method entryMethod, Method exitMethod, Thread thr);
+  public static native void disableMethodTracing(Thread thr);
+}
diff --git a/test/Android.bp b/test/Android.bp
index 1679669..35c3d9c 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -51,9 +51,9 @@
     // These really are gtests, but the gtest library comes from libart-gtest.so
     gtest: false,
     defaults: [
-        "art_defaults",
-        "art_debug_defaults",
         "art_test_defaults",
+        "art_debug_defaults",
+        "art_defaults",
     ],
 
     shared_libs: [
@@ -128,8 +128,8 @@
     name: "libart-gtest-defaults",
     host_supported: true,
     defaults: [
-        "art_defaults",
         "art_debug_defaults",
+        "art_defaults",
     ],
     shared_libs: [
         "libartd",
@@ -202,8 +202,8 @@
 cc_defaults {
     name: "libartagent-defaults",
     defaults: [
-        "art_defaults",
         "art_test_defaults",
+        "art_defaults",
     ],
     shared_libs: [
         "libbacktrace",
@@ -234,8 +234,8 @@
     name: "libartagentd",
     srcs: ["900-hello-plugin/load_unload.cc"],
     defaults: [
-        "libartagent-defaults",
         "art_debug_defaults",
+        "libartagent-defaults",
     ],
     shared_libs: ["libartd"],
 }
@@ -278,6 +278,7 @@
         "984-obsolete-invoke/obsolete_invoke.cc",
         "986-native-method-bind/native_bind.cc",
         "987-agent-bind/agent_bind.cc",
+        "989-method-trace-throw/method_trace.cc",
     ],
     shared_libs: [
         "libbase",
@@ -313,8 +314,8 @@
 art_cc_test_library {
     name: "libtiagentd",
     defaults: [
-        "libtiagent-defaults",
         "art_debug_defaults",
+        "libtiagent-defaults",
     ],
     shared_libs: ["libartd"],
 }
@@ -340,8 +341,8 @@
 art_cc_test_library {
     name: "libtistressd",
     defaults: [
-        "libtistress-defaults",
         "art_debug_defaults",
+        "libtistress-defaults",
     ],
     shared_libs: ["libartd"],
 }
@@ -355,8 +356,8 @@
 cc_defaults {
     name: "libarttest-defaults",
     defaults: [
-        "art_defaults",
         "art_test_defaults",
+        "art_defaults",
     ],
     srcs: [
         "common/runtime_state.cc",
@@ -421,8 +422,8 @@
 art_cc_test_library {
     name: "libarttestd",
     defaults: [
-        "libarttest-defaults",
         "art_debug_defaults",
+        "libarttest-defaults",
     ],
     shared_libs: ["libartd"],
 }
@@ -431,9 +432,9 @@
     name: "libnativebridgetest",
     shared_libs: ["libart"],
     defaults: [
-        "art_defaults",
-        "art_debug_defaults",
         "art_test_defaults",
+        "art_debug_defaults",
+        "art_defaults",
     ],
     srcs: ["115-native-bridge/nativebridge.cc"],
     target: {
diff --git a/test/Android.run-test-jvmti-java-library.mk b/test/Android.run-test-jvmti-java-library.mk
index c480be5..da28b4c 100644
--- a/test/Android.run-test-jvmti-java-library.mk
+++ b/test/Android.run-test-jvmti-java-library.mk
@@ -151,4 +151,8 @@
   $(eval $(call GEN_JVMTI_RUN_TEST_GENERATED_FILE,$(NR))))
 LOCAL_JAVA_RESOURCE_FILES := $(JVMTI_RUN_TEST_GENERATED_FILES)
 
+# We only want to depend on libcore.
+LOCAL_NO_STANDARD_LIBRARIES := true
+LOCAL_JAVA_LIBRARIES := core-all
+
 include $(BUILD_JAVA_LIBRARY)
diff --git a/test/Instrumentation/Instrumentation.java b/test/Instrumentation/Instrumentation.java
index 09d4342..b44f78f 100644
--- a/test/Instrumentation/Instrumentation.java
+++ b/test/Instrumentation/Instrumentation.java
@@ -15,8 +15,21 @@
  */
 
 public class Instrumentation {
+  private static int primitiveField;
+  private static Object referenceField;
+
   // Direct method
   private void instanceMethod() {
     System.out.println("instanceMethod");
   }
+
+  private Object returnReference() {
+    System.out.println("returnReference");
+    return null;
+  }
+
+  private int returnPrimitive() {
+    System.out.println("returnPrimitive");
+    return 0;
+  }
 }
diff --git a/test/ManyMethods/ManyMethods.java b/test/ManyMethods/ManyMethods.java
new file mode 100644
index 0000000..b3a57f6
--- /dev/null
+++ b/test/ManyMethods/ManyMethods.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class ManyMethods {
+  static class Strings {
+    public static String msg0 = "Hello World";
+    public static String msg1 = "Hello World1";
+    public static String msg2 = "Hello World2";
+    public static String msg3 = "Hello World3";
+    public static String msg4 = "Hello World4";
+    public static String msg5 = "Hello World5";
+    public static String msg6 = "Hello World6";
+    public static String msg7 = "Hello World7";
+    public static String msg8 = "Hello World8";
+    public static String msg9 = "Hello World9";
+  }
+
+  static class Printer {
+    static void Print(String s) {
+      System.out.println(s);
+    }
+  }
+
+  static class Printer2 {
+    static void Print(String s) {
+      System.out.println("AAA" + s);
+    }
+  }
+
+  public static void Print0() {
+    Printer.Print(Strings.msg0);
+  }
+
+  public static void Print1() {
+    Printer.Print(Strings.msg1);
+  }
+
+  public static void Print2() {
+    Printer.Print(Strings.msg2);
+  }
+
+  public static void Print3() {
+    Printer.Print(Strings.msg1);
+  }
+
+  public static void Print4() {
+    Printer.Print(Strings.msg2);
+  }
+
+  public static void Print5() {
+    Printer.Print(Strings.msg3);
+  }
+
+  public static void Print6() {
+    Printer2.Print(Strings.msg4);
+  }
+
+  public static void Print7() {
+    Printer.Print(Strings.msg5);
+  }
+
+  public static void Print8() {
+    Printer.Print(Strings.msg6);
+  }
+
+  public static void Print9() {
+    Printer2.Print(Strings.msg7);
+  }
+
+  public static void Print10() {
+    Printer2.Print(Strings.msg8);
+  }
+
+  public static void Print11() {
+    Printer.Print(Strings.msg9);
+  }
+
+  public static void main(String args[]) {
+    Print0();
+    Print1();
+    Print2();
+    Print3();
+    Print4();
+    Print5();
+    Print6();
+    Print7();
+    Print8();
+    Print9();
+    Print10();
+    Print11();
+  }
+}
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index b683a27..d8e5b57 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -29,7 +29,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedUtfChars.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
@@ -148,12 +148,24 @@
   CHECK(chars.c_str() != nullptr);
   ArtMethod* method = soa.Decode<mirror::Class>(cls)->FindDeclaredDirectMethodByName(
         chars.c_str(), kRuntimePointerSize);
-  const void* code = method->GetOatMethodQuickCode(kRuntimePointerSize);
-  jit::Jit* jit = Runtime::Current()->GetJit();
-  if (jit != nullptr && jit->GetCodeCache()->ContainsPc(code)) {
-    return true;
+  return method->GetOatMethodQuickCode(kRuntimePointerSize) != nullptr;
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_isJitCompiled(JNIEnv* env,
+                                                              jclass,
+                                                              jclass cls,
+                                                              jstring method_name) {
+  jit::Jit* jit = GetJitIfEnabled();
+  if (jit == nullptr) {
+    return false;
   }
-  return code != nullptr;
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  ScopedUtfChars chars(env, method_name);
+  CHECK(chars.c_str() != nullptr);
+  ArtMethod* method = soa.Decode<mirror::Class>(cls)->FindDeclaredDirectMethodByName(
+        chars.c_str(), kRuntimePointerSize);
+  return jit->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode());
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_ensureJitCompiled(JNIEnv* env,
@@ -238,4 +250,8 @@
   return method->GetCounter();
 }
 
+extern "C" JNIEXPORT int JNICALL Java_Main_numberOfDeoptimizations(JNIEnv*, jclass) {
+  return Runtime::Current()->GetNumberOfDeoptimizations();
+}
+
 }  // namespace art
diff --git a/test/common/stack_inspect.cc b/test/common/stack_inspect.cc
index ceb4ba2..80a2780 100644
--- a/test/common/stack_inspect.cc
+++ b/test/common/stack_inspect.cc
@@ -25,7 +25,7 @@
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "stack.h"
-#include "thread-inl.h"
+#include "thread-current-inl.h"
 
 namespace art {
 
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index a89fe5b..8aacc8c 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -63,6 +63,8 @@
 TEST_IS_NDEBUG="n"
 APP_IMAGE="y"
 JVMTI_STRESS="n"
+JVMTI_TRACE_STRESS="n"
+JVMTI_REDEFINE_STRESS="n"
 VDEX_FILTER=""
 PROFILE="n"
 RANDOM_PROFILE="n"
@@ -151,10 +153,15 @@
     elif [ "x$1" = "x--prebuild" ]; then
         PREBUILD="y"
         shift
-    elif [ "x$1" = "x--jvmti-stress" ]; then
-        # APP_IMAGE doesn't really work with jvmti-torture
+    elif [ "x$1" = "x--jvmti-redefine-stress" ]; then
+        # APP_IMAGE doesn't really work with jvmti redefine stress
         APP_IMAGE="n"
         JVMTI_STRESS="y"
+        JVMTI_REDEFINE_STRESS="y"
+        shift
+    elif [ "x$1" = "x--jvmti-trace-stress" ]; then
+        JVMTI_STRESS="y"
+        JVMTI_TRACE_STRESS="y"
         shift
     elif [ "x$1" = "x--no-app-image" ]; then
         APP_IMAGE="n"
@@ -397,13 +404,25 @@
     plugin=libopenjdkjvmti.so
   fi
 
-  file_1=$(mktemp --tmpdir=${DEX_LOCATION})
-  file_2=$(mktemp --tmpdir=${DEX_LOCATION})
+  # Just give it a default start so we can always add ',' to it.
+  agent_args="jvmti-stress"
+  if [[ "$JVMTI_REDEFINE_STRESS" = "y" ]]; then
+    # We really cannot do this on RI so don't both passing it in that case.
+    if [[ "$USE_JVM" = "n" ]]; then
+      file_1=$(mktemp --tmpdir=${DEX_LOCATION})
+      file_2=$(mktemp --tmpdir=${DEX_LOCATION})
+      # TODO Remove need for DEXTER_BINARY!
+      agent_args="${agent_args},redefine,${DEXTER_BINARY},${file_1},${file_2}"
+    fi
+  fi
+  if [[ "$JVMTI_TRACE_STRESS" = "y" ]]; then
+    agent_args="${agent_args},trace"
+  fi
+  # In the future add onto this;
   if [[ "$USE_JVM" = "y" ]]; then
-    FLAGS="${FLAGS} -agentpath:${ANDROID_HOST_OUT}/nativetest64/${agent}=/bin/false,${file_1},${file_2}"
+    FLAGS="${FLAGS} -agentpath:${ANDROID_HOST_OUT}/nativetest64/${agent}=${agent_args}"
   else
-    # TODO Remove need for DEXTER_BINARY!
-    FLAGS="${FLAGS} -agentpath:${agent}=${DEXTER_BINARY},${file_1},${file_2}"
+    FLAGS="${FLAGS} -agentpath:${agent}=${agent_args}"
     if [ "$IS_JVMTI_TEST" = "n" ]; then
       FLAGS="${FLAGS} -Xplugin:${plugin}"
       FLAGS="${FLAGS} -Xcompiler-option --debuggable"
@@ -610,7 +629,7 @@
   if [ "$HOST" != "n" ]; then
     # Use SIGRTMIN+2 to try to dump threads.
     # Use -k 1m to SIGKILL it a minute later if it hasn't ended.
-    dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 1m ${dex2oat_cmdline}"
+    dex2oat_cmdline="timeout -k 1m -s SIGRTMIN+2 90s ${dex2oat_cmdline} --watchdog-timeout=60000"
   fi
   if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
     vdex_cmdline="${dex2oat_cmdline} ${VDEX_FILTER} --input-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex --output-vdex=$DEX_LOCATION/oat/$ISA/$TEST_NAME.vdex"
@@ -668,12 +687,6 @@
 # Note: this is required as envsetup right now exports detect_leaks=0.
 RUN_TEST_ASAN_OPTIONS=""
 
-# JVMTI has a mismatch of malloc with delete. b/38322765
-if [ "x$RUN_TEST_ASAN_OPTIONS" != "x" ] ; then
-  RUN_TEST_ASAN_OPTIONS="${RUN_TEST_ASAN_OPTIONS}:"
-fi
-RUN_TEST_ASAN_OPTIONS="${RUN_TEST_ASAN_OPTIONS}alloc_dealloc_mismatch=0"
-
 # Multiple shutdown leaks. b/38341789
 if [ "x$RUN_TEST_ASAN_OPTIONS" != "x" ] ; then
   RUN_TEST_ASAN_OPTIONS="${RUN_TEST_ASAN_OPTIONS}:"
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 4b44df7..f515226 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -508,11 +508,10 @@
             "640-checker-short-simd",
             "641-checker-arraycopy",
             "643-checker-bogus-ic",
-            "644-checker-deopt",
             "645-checker-abs-simd",
             "706-checker-scheduler"],
         "description": ["Checker tests are not compatible with jvmti."],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress | trace-stress"
     },
     {
         "tests": [
@@ -520,7 +519,7 @@
             "964-default-iface-init-gen"
         ],
         "description": ["Tests that just take too long with jvmti-stress"],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress | trace-stress"
     },
     {
         "tests": [
@@ -540,7 +539,7 @@
             "dexter/slicer."
         ],
         "bug": "b/37272822",
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress"
     },
     {
         "tests": [
@@ -551,7 +550,7 @@
             "981-dedup-original-dex"
         ],
         "description": ["Tests that require exact knowledge of the number of plugins and agents."],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress | trace-stress"
     },
     {
         "tests": [
@@ -565,7 +564,7 @@
         "description": [
             "Tests that use illegal dex files or otherwise break dexter assumptions"
         ],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress"
     },
     {
         "tests": [
@@ -582,7 +581,7 @@
             "Tests that use custom class loaders or other features not supported ",
             "by our JVMTI implementation"
         ],
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress"
     },
     {
         "tests": [
@@ -593,7 +592,7 @@
             "Tests that use annotations and debug data that is not kept around by dexter."
         ],
         "bug": "b/37239009",
-        "variant": "jvmti-stress"
+        "variant": "jvmti-stress | redefine-stress"
     },
     {
         "tests": [
@@ -651,8 +650,7 @@
             "969-iface-super",
             "981-dedup-original-dex",
             "984-obsolete-invoke",
-            "985-re-obsolete",
-            "987-stack-trace-dumping"
+            "985-re-obsolete"
         ],
         "description": "The tests above fail with --build-with-javac-dx.",
         "env_vars": {"ANDROID_COMPILE_WITH_JACK": "false"},
@@ -682,5 +680,42 @@
             "The java.lang.Integer.valueOf intrinsic is not supported in PIC mode."
         ],
         "variant": "optimizing & pictest | speed-profile & pictest"
+    },
+    {
+        "tests": "202-thread-oome",
+        "description": "ASAN aborts when large thread stacks are requested.",
+        "variant": "host",
+        "env_vars": {"SANITIZE_HOST": "address"}
+    },
+    {
+        "tests": "202-thread-oome",
+        "description": "ASAN aborts when large thread stacks are requested.",
+        "variant": "target",
+        "env_vars": {"SANITIZE_TARGET": "address"}
+    },
+    {
+        "tests": "071-dexfile-map-clean",
+        "description": [ "We use prebuilt zipalign on master-art-host to avoid pulling in a lot",
+                         "of the framework. But a non-sanitized zipalign binary does not work with",
+                         "a sanitized libc++."],
+        "env_vars": {"SANITIZE_HOST": "address"}
+    },
+    {
+        "tests": ["988-method-trace"],
+        "variant": "redefine-stress | jvmti-stress",
+        "description": "Test disabled due to redefine-stress disabling intrinsics which changes the trace output slightly."
+    },
+    {
+        "tests": "137-cfi",
+        "description": [ "ASan is reporting out-of-bounds reads in libunwind."],
+        "variant": "host",
+        "env_vars": {"SANITIZE_HOST": "address"},
+        "bug": "b/62350406"
+    },
+    {
+        "tests": ["137-cfi", "629-vdex-speed"],
+        "description": [ "Tests require speed compilation which is no longer the default for",
+                          "no-prebuild or no-image configs."],
+        "variant": "no-prebuild | no-image"
     }
 ]
diff --git a/test/run-test b/test/run-test
index 933a7fe..41a0dc2 100755
--- a/test/run-test
+++ b/test/run-test
@@ -137,7 +137,8 @@
 basic_verify="false"
 gc_verify="false"
 gc_stress="false"
-jvmti_stress="false"
+jvmti_trace_stress="false"
+jvmti_redefine_stress="false"
 strace="false"
 always_clean="no"
 never_clean="no"
@@ -234,8 +235,11 @@
         basic_verify="true"
         gc_stress="true"
         shift
-    elif [ "x$1" = "x--jvmti-stress" ]; then
-        jvmti_stress="true"
+    elif [ "x$1" = "x--jvmti-redefine-stress" ]; then
+        jvmti_redefine_stress="true"
+        shift
+    elif [ "x$1" = "x--jvmti-trace-stress" ]; then
+        jvmti_trace_stress="true"
         shift
     elif [ "x$1" = "x--suspend-timeout" ]; then
         shift
@@ -447,8 +451,11 @@
 if [ "$gc_stress" = "true" ]; then
   run_args="${run_args} --gc-stress --runtime-option -Xgc:gcstress --runtime-option -Xms2m --runtime-option -Xmx16m"
 fi
-if [ "$jvmti_stress" = "true" ]; then
-    run_args="${run_args} --no-app-image --jvmti-stress"
+if [ "$jvmti_redefine_stress" = "true" ]; then
+    run_args="${run_args} --no-app-image --jvmti-redefine-stress"
+fi
+if [ "$jvmti_trace_stress" = "true" ]; then
+    run_args="${run_args} --no-app-image --jvmti-trace-stress"
 fi
 if [ "$trace" = "true" ]; then
     run_args="${run_args} --runtime-option -Xmethod-trace --runtime-option -Xmethod-trace-file-size:2000000"
@@ -658,7 +665,9 @@
         echo "    --stream              Run method tracing in streaming mode (requires --trace)"
         echo "    --gcstress            Run with gc stress testing"
         echo "    --gcverify            Run with gc verification"
-        echo "    --jvmti-stress        Run with jvmti stress testing"
+        echo "    --jvmti-trace-stress  Run with jvmti method tracing stress testing"
+        echo "    --jvmti-redefine-stress"
+        echo "                          Run with jvmti method redefinition stress testing"
         echo "    --always-clean        Delete the test files even if the test fails."
         echo "    --never-clean         Keep the test files even if the test succeeds."
         echo "    --android-root [path] The path on target for the android root. (/system by default)."
@@ -728,10 +737,8 @@
 # Checker when compiled with Optimizing on host.
 if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then
   if [ "$runtime" = "art" -a "$image_suffix" = "" -a "$USE_JACK" = "true" ]; then
-    # In no-prebuild mode, the compiler is only invoked if both dex2oat and
-    # patchoat are available. Disable Checker otherwise (b/22552692).
-    if [ "$prebuild_mode" = "yes" ] \
-         || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then
+    # In no-prebuild or no-image mode, the compiler only quickens so disable the checker.
+    if [ "$prebuild_mode" = "yes" -a "$have_image" = "yes" ]; then
       run_checker="yes"
 
       if [ "$target_mode" = "no" ]; then
diff --git a/test/testrunner/target_config.py b/test/testrunner/target_config.py
index 6e47c5e..97d4509 100644
--- a/test/testrunner/target_config.py
+++ b/test/testrunner/target_config.py
@@ -316,6 +316,23 @@
         }
     },
 
+   # ASAN (host) configurations.
+
+   'art-gtest-asan': {
+        'make' : 'test-art-host-gtest',
+        'env': {
+            'SANITIZE_HOST' : 'address'
+        }
+   },
+   'art-run-test-asan': {
+        'run-test' : ['--interpreter',
+                      '--optimizing',
+                      '--jit'],
+        'env': {
+            'SANITIZE_HOST' : 'address'
+        }
+   },
+
    # ART Golem build targets used by go/lem (continuous ART benchmarking),
    # (art-opt-cc is used by default since it mimics the default preopt config),
    #
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index c99159f..3445071 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -147,7 +147,7 @@
   VARIANT_TYPE_DICT['relocate'] = {'relocate-npatchoat', 'relocate', 'no-relocate'}
   VARIANT_TYPE_DICT['jni'] = {'jni', 'forcecopy', 'checkjni'}
   VARIANT_TYPE_DICT['address_sizes'] = {'64', '32'}
-  VARIANT_TYPE_DICT['jvmti'] = {'no-jvmti', 'jvmti-stress'}
+  VARIANT_TYPE_DICT['jvmti'] = {'no-jvmti', 'jvmti-stress', 'redefine-stress', 'trace-stress'}
   VARIANT_TYPE_DICT['compiler'] = {'interp-ac', 'interpreter', 'jit', 'optimizing',
                               'regalloc_gc', 'speed-profile'}
 
@@ -437,7 +437,11 @@
         options_test += ' --debuggable'
 
       if jvmti == 'jvmti-stress':
-        options_test += ' --jvmti-stress'
+        options_test += ' --jvmti-trace-stress --jvmti-redefine-stress'
+      elif jvmti == 'trace-stress':
+        options_test += ' --jvmti-trace-stress'
+      elif jvmti == 'redefine-stress':
+        options_test += ' --jvmti-redefine-stress'
 
       if address_size == '64':
         options_test += ' --64'
@@ -750,6 +754,9 @@
     print_text(COLOR_ERROR + 'FAILED: ' + COLOR_NORMAL + '\n')
     for test_info in failed_tests:
       print_text(('%s\n%s\n' % (test_info[0], test_info[1])))
+    print_text(COLOR_ERROR + '----------' + COLOR_NORMAL + '\n')
+    for failed_test in sorted([test_info[0] for test_info in failed_tests]):
+      print_text(('%s\n' % (failed_test)))
 
 
 def parse_test_name(test_name):
@@ -951,6 +958,10 @@
     IMAGE_TYPES.add('multipicimage')
   if options['jvmti_stress']:
     JVMTI_TYPES.add('jvmti-stress')
+  if options['redefine_stress']:
+    JVMTI_TYPES.add('redefine-stress')
+  if options['trace_stress']:
+    JVMTI_TYPES.add('trace-stress')
   if options['no_jvmti']:
     JVMTI_TYPES.add('no-jvmti')
   if options['verbose']:
diff --git a/test/ti-agent/common_helper.cc b/test/ti-agent/common_helper.cc
index bfd4d25..6eaa5c3 100644
--- a/test/ti-agent/common_helper.cc
+++ b/test/ti-agent/common_helper.cc
@@ -69,6 +69,214 @@
   env->ThrowNew(env->FindClass("java/lang/Exception"), message.c_str());
 }
 
+namespace common_trace {
+
+// Taken from art/runtime/modifiers.h
+static constexpr uint32_t kAccStatic =       0x0008;  // field, method, ic
+
+struct TraceData {
+  jclass test_klass;
+  jmethodID enter_method;
+  jmethodID exit_method;
+  bool in_callback;
+};
+
+static jobject GetJavaMethod(jvmtiEnv* jvmti, JNIEnv* env, jmethodID m) {
+  jint mods = 0;
+  if (JvmtiErrorToException(env, jvmti, jvmti->GetMethodModifiers(m, &mods))) {
+    return nullptr;
+  }
+
+  bool is_static = (mods & kAccStatic) != 0;
+  jclass method_klass = nullptr;
+  if (JvmtiErrorToException(env, jvmti, jvmti->GetMethodDeclaringClass(m, &method_klass))) {
+    return nullptr;
+  }
+  jobject res = env->ToReflectedMethod(method_klass, m, is_static);
+  env->DeleteLocalRef(method_klass);
+  return res;
+}
+
+static jobject GetJavaValue(jvmtiEnv* jvmtienv,
+                            JNIEnv* env,
+                            jmethodID m,
+                            jvalue value) {
+  char *fname, *fsig, *fgen;
+  if (JvmtiErrorToException(env, jvmtienv, jvmtienv->GetMethodName(m, &fname, &fsig, &fgen))) {
+    return nullptr;
+  }
+  std::string type(fsig);
+  type = type.substr(type.find(")") + 1);
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fsig));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fgen));
+  std::string name;
+  switch (type[0]) {
+    case 'V':
+      return nullptr;
+    case '[':
+    case 'L':
+      return value.l;
+    case 'Z':
+      name = "java/lang/Boolean";
+      break;
+    case 'B':
+      name = "java/lang/Byte";
+      break;
+    case 'C':
+      name = "java/lang/Character";
+      break;
+    case 'S':
+      name = "java/lang/Short";
+      break;
+    case 'I':
+      name = "java/lang/Integer";
+      break;
+    case 'J':
+      name = "java/lang/Long";
+      break;
+    case 'F':
+      name = "java/lang/Float";
+      break;
+    case 'D':
+      name = "java/lang/Double";
+      break;
+    default:
+      LOG(FATAL) << "Unable to figure out type!";
+      return nullptr;
+  }
+  std::ostringstream oss;
+  oss << "(" << type[0] << ")L" << name << ";";
+  std::string args = oss.str();
+  jclass target = env->FindClass(name.c_str());
+  jmethodID valueOfMethod = env->GetStaticMethodID(target, "valueOf", args.c_str());
+
+  CHECK(valueOfMethod != nullptr) << args;
+  jobject res = env->CallStaticObjectMethodA(target, valueOfMethod, &value);
+  env->DeleteLocalRef(target);
+  return res;
+}
+
+static void methodExitCB(jvmtiEnv* jvmti,
+                         JNIEnv* jnienv,
+                         jthread thr ATTRIBUTE_UNUSED,
+                         jmethodID method,
+                         jboolean was_popped_by_exception,
+                         jvalue return_value) {
+  TraceData* data = nullptr;
+  if (JvmtiErrorToException(jnienv, jvmti,
+                            jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)))) {
+    return;
+  }
+  if (method == data->exit_method || method == data->enter_method || data->in_callback) {
+    // Don't do callback for either of these to prevent an infinite loop.
+    return;
+  }
+  data->in_callback = true;
+  jobject method_arg = GetJavaMethod(jvmti, jnienv, method);
+  jobject result =
+      was_popped_by_exception ? nullptr : GetJavaValue(jvmti, jnienv, method, return_value);
+  if (jnienv->ExceptionCheck()) {
+    data->in_callback = false;
+    return;
+  }
+  jnienv->CallStaticVoidMethod(data->test_klass,
+                               data->exit_method,
+                               method_arg,
+                               was_popped_by_exception,
+                               result);
+  jnienv->DeleteLocalRef(method_arg);
+  data->in_callback = false;
+}
+
+static void methodEntryCB(jvmtiEnv* jvmti,
+                          JNIEnv* jnienv,
+                          jthread thr ATTRIBUTE_UNUSED,
+                          jmethodID method) {
+  TraceData* data = nullptr;
+  if (JvmtiErrorToException(jnienv, jvmti,
+                            jvmti->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)))) {
+    return;
+  }
+  if (method == data->exit_method || method == data->enter_method || data->in_callback) {
+    // Don't do callback for either of these to prevent an infinite loop.
+    return;
+  }
+  data->in_callback = true;
+  jobject method_arg = GetJavaMethod(jvmti, jnienv, method);
+  if (jnienv->ExceptionCheck()) {
+    return;
+  }
+  jnienv->CallStaticVoidMethod(data->test_klass, data->enter_method, method_arg);
+  jnienv->DeleteLocalRef(method_arg);
+  data->in_callback = false;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Trace_enableMethodTracing(
+    JNIEnv* env,
+    jclass trace ATTRIBUTE_UNUSED,
+    jclass klass,
+    jobject enter,
+    jobject exit,
+    jthread thr) {
+  TraceData* data = nullptr;
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->Allocate(sizeof(TraceData),
+                                                reinterpret_cast<unsigned char**>(&data)))) {
+    return;
+  }
+  memset(data, 0, sizeof(TraceData));
+  data->test_klass = reinterpret_cast<jclass>(env->NewGlobalRef(klass));
+  data->enter_method = env->FromReflectedMethod(enter);
+  data->exit_method = env->FromReflectedMethod(exit);
+  data->in_callback = false;
+
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->SetEnvironmentLocalStorage(data))) {
+    return;
+  }
+
+  jvmtiEventCallbacks cb;
+  memset(&cb, 0, sizeof(cb));
+  cb.MethodEntry = methodEntryCB;
+  cb.MethodExit = methodExitCB;
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->SetEventCallbacks(&cb, sizeof(cb)))) {
+    return;
+  }
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                                                JVMTI_EVENT_METHOD_ENTRY,
+                                                                thr))) {
+    return;
+  }
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                                                JVMTI_EVENT_METHOD_EXIT,
+                                                                thr))) {
+    return;
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_Trace_disableMethodTracing(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jthread thr) {
+  if (JvmtiErrorToException(env, jvmti_env,
+                            jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
+                                                                JVMTI_EVENT_METHOD_ENTRY,
+                                                                thr))) {
+    return;
+  }
+  if (JvmtiErrorToException(env, jvmti_env,
+                            jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
+                                                                JVMTI_EVENT_METHOD_EXIT,
+                                                                thr))) {
+    return;
+  }
+}
+
+}  // namespace common_trace
+
 namespace common_redefine {
 
 static void throwRedefinitionError(jvmtiEnv* jvmti,
diff --git a/test/ti-stress/stress.cc b/test/ti-stress/stress.cc
index e8e3cc7..497db1c 100644
--- a/test/ti-stress/stress.cc
+++ b/test/ti-stress/stress.cc
@@ -20,6 +20,7 @@
 #include <fstream>
 #include <stdio.h>
 #include <sstream>
+#include <strstream>
 
 #include "jvmti.h"
 #include "exec_utils.h"
@@ -35,6 +36,8 @@
   std::string out_temp_dex;
   std::string in_temp_dex;
   bool vm_class_loader_initialized;
+  bool trace_stress;
+  bool redefine_stress;
 };
 
 static void WriteToFile(const std::string& fname, jint data_len, const unsigned char* data) {
@@ -95,7 +98,6 @@
   if (thread == nullptr) {
     info.name = const_cast<char*>("<NULLPTR>");
   } else if (jvmtienv->GetThreadInfo(thread, &info) != JVMTI_ERROR_NONE) {
-    LOG(WARNING) << "Unable to get thread info!";
     info.name = const_cast<char*>("<UNKNOWN THREAD>");
   }
   char *fname, *fsig, *fgen;
@@ -115,8 +117,8 @@
     env->DeleteLocalRef(klass);
     return;
   }
-  LOG(INFO) << "Loading native method \"" << cname << "->" << fname << fsig << "\". Thread is "
-            << info.name;
+  LOG(INFO) << "Loading native method \"" << cname << "->" << fname << fsig << "\". Thread is \""
+            << info.name << "\"";
   jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cname));
   jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cgen));
   jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fname));
@@ -126,6 +128,151 @@
   return;
 }
 
+static std::string GetName(jvmtiEnv* jvmtienv, JNIEnv* jnienv, jobject obj) {
+  jclass klass = jnienv->GetObjectClass(obj);
+  char *cname, *cgen;
+  if (jvmtienv->GetClassSignature(klass, &cname, &cgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get class name!";
+    jnienv->DeleteLocalRef(klass);
+    return "<UNKNOWN>";
+  }
+  std::string name(cname);
+  if (name == "Ljava/lang/String;") {
+    jstring str = reinterpret_cast<jstring>(obj);
+    const char* val = jnienv->GetStringUTFChars(str, nullptr);
+    if (val == nullptr) {
+      name += " (unable to get value)";
+    } else {
+      std::ostringstream oss;
+      oss << name << " (value: \"" << val << "\")";
+      name = oss.str();
+      jnienv->ReleaseStringUTFChars(str, val);
+    }
+  }
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cgen));
+  jnienv->DeleteLocalRef(klass);
+  return name;
+}
+
+static std::string GetValOf(jvmtiEnv* env, JNIEnv* jnienv, std::string type, jvalue val) {
+  std::ostringstream oss;
+  switch (type[0]) {
+    case '[':
+    case 'L':
+      return val.l != nullptr ? GetName(env, jnienv, val.l) : "null";
+    case 'Z':
+      return val.z == JNI_TRUE ? "true" : "false";
+    case 'B':
+      oss << val.b;
+      return oss.str();
+    case 'C':
+      oss << val.c;
+      return oss.str();
+    case 'S':
+      oss << val.s;
+      return oss.str();
+    case 'I':
+      oss << val.i;
+      return oss.str();
+    case 'J':
+      oss << val.j;
+      return oss.str();
+    case 'F':
+      oss << val.f;
+      return oss.str();
+    case 'D':
+      oss << val.d;
+      return oss.str();
+    case 'V':
+      return "<void>";
+    default:
+      return "<ERROR Found type " + type + ">";
+  }
+}
+
+void JNICALL MethodExitHook(jvmtiEnv* jvmtienv,
+                            JNIEnv* env,
+                            jthread thread,
+                            jmethodID m,
+                            jboolean was_popped_by_exception,
+                            jvalue val) {
+  jvmtiThreadInfo info;
+  if (thread == nullptr) {
+    info.name = const_cast<char*>("<NULLPTR>");
+  } else if (jvmtienv->GetThreadInfo(thread, &info) != JVMTI_ERROR_NONE) {
+    // LOG(WARNING) << "Unable to get thread info!";
+    info.name = const_cast<char*>("<UNKNOWN THREAD>");
+  }
+  char *fname, *fsig, *fgen;
+  char *cname, *cgen;
+  jclass klass = nullptr;
+  if (jvmtienv->GetMethodDeclaringClass(m, &klass) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method declaring class!";
+    return;
+  }
+  if (jvmtienv->GetMethodName(m, &fname, &fsig, &fgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  if (jvmtienv->GetClassSignature(klass, &cname, &cgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get class name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  std::string type(fsig);
+  type = type.substr(type.find(")") + 1);
+  std::string out_val(was_popped_by_exception ? "" : GetValOf(jvmtienv, env, type, val));
+  LOG(INFO) << "Leaving method \"" << cname << "->" << fname << fsig << "\". Thread is \""
+            << info.name << "\"." << std::endl
+            << "    Cause: " << (was_popped_by_exception ? "exception" : "return ")
+            << out_val << ".";
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cgen));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fsig));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fgen));
+  env->DeleteLocalRef(klass);
+}
+
+void JNICALL MethodEntryHook(jvmtiEnv* jvmtienv,
+                             JNIEnv* env,
+                             jthread thread,
+                             jmethodID m) {
+  jvmtiThreadInfo info;
+  if (thread == nullptr) {
+    info.name = const_cast<char*>("<NULLPTR>");
+  } else if (jvmtienv->GetThreadInfo(thread, &info) != JVMTI_ERROR_NONE) {
+    info.name = const_cast<char*>("<UNKNOWN THREAD>");
+  }
+  char *fname, *fsig, *fgen;
+  char *cname, *cgen;
+  jclass klass = nullptr;
+  if (jvmtienv->GetMethodDeclaringClass(m, &klass) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method declaring class!";
+    return;
+  }
+  if (jvmtienv->GetMethodName(m, &fname, &fsig, &fgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get method name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  if (jvmtienv->GetClassSignature(klass, &cname, &cgen) != JVMTI_ERROR_NONE) {
+    LOG(ERROR) << "Unable to get class name!";
+    env->DeleteLocalRef(klass);
+    return;
+  }
+  LOG(INFO) << "Entering method \"" << cname << "->" << fname << fsig << "\". Thread is \""
+            << info.name << "\"";
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(cgen));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fname));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fsig));
+  jvmtienv->Deallocate(reinterpret_cast<unsigned char*>(fgen));
+  env->DeleteLocalRef(klass);
+}
+
 // The hook we are using.
 void JNICALL ClassFileLoadHookSecretNoOp(jvmtiEnv* jvmti,
                                          JNIEnv* jni_env ATTRIBUTE_UNUSED,
@@ -163,27 +310,57 @@
   }
 }
 
-// Options are ${DEXTER_BINARY},${TEMP_FILE_1},${TEMP_FILE_2}
-static void ReadOptions(StressData* data, char* options) {
-  std::string ops(options);
-  data->dexter_cmd = ops.substr(0, ops.find(','));
-  ops = ops.substr(ops.find(',') + 1);
-  data->in_temp_dex = ops.substr(0, ops.find(','));
-  ops = ops.substr(ops.find(',') + 1);
-  data->out_temp_dex = ops;
+static std::string AdvanceOption(const std::string& ops) {
+  return ops.substr(ops.find(',') + 1);
 }
 
-// We need to make sure that VMClassLoader is initialized before we start redefining anything since
-// it can give (non-fatal) error messages if it's initialized after we've redefined BCP classes.
-// These error messages are expected and no problem but they will mess up our testing
-// infrastructure.
-static void JNICALL EnsureVMClassloaderInitializedCB(jvmtiEnv *jvmti_env,
-                                                     JNIEnv* jni_env,
-                                                     jthread thread ATTRIBUTE_UNUSED) {
+static bool HasNextOption(const std::string& ops) {
+  return ops.find(',') != std::string::npos;
+}
+
+static std::string GetOption(const std::string& in) {
+  return in.substr(0, in.find(','));
+}
+
+// Options are
+// jvmti-stress,[redefine,${DEXTER_BINARY},${TEMP_FILE_1},${TEMP_FILE_2},][trace]
+static void ReadOptions(StressData* data, char* options) {
+  std::string ops(options);
+  CHECK_EQ(GetOption(ops), "jvmti-stress") << "Options should start with jvmti-stress";
+  do {
+    ops = AdvanceOption(ops);
+    std::string cur = GetOption(ops);
+    if (cur == "trace") {
+      data->trace_stress = true;
+    } else if (cur == "redefine") {
+      data->redefine_stress = true;
+      ops = AdvanceOption(ops);
+      data->dexter_cmd = GetOption(ops);
+      ops = AdvanceOption(ops);
+      data->in_temp_dex = GetOption(ops);
+      ops = AdvanceOption(ops);
+      data->out_temp_dex = GetOption(ops);
+    } else {
+      LOG(FATAL) << "Unknown option: " << GetOption(ops);
+    }
+  } while (HasNextOption(ops));
+}
+
+// Do final setup during the VMInit callback. By this time most things are all setup.
+static void JNICALL PerformFinalSetupVMInit(jvmtiEnv *jvmti_env,
+                                            JNIEnv* jni_env,
+                                            jthread thread ATTRIBUTE_UNUSED) {
   // Load the VMClassLoader class. We will get a ClassNotFound exception because we don't have
   // visibility but the class will be loaded behind the scenes.
   LOG(INFO) << "manual load & initialization of class java/lang/VMClassLoader!";
   jclass klass = jni_env->FindClass("java/lang/VMClassLoader");
+  StressData* data = nullptr;
+  CHECK_EQ(jvmti_env->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)),
+           JVMTI_ERROR_NONE);
+  // We need to make sure that VMClassLoader is initialized before we start redefining anything
+  // since it can give (non-fatal) error messages if it's initialized after we've redefined BCP
+  // classes. These error messages are expected and no problem but they will mess up our testing
+  // infrastructure.
   if (klass == nullptr) {
     // Probably on RI. Clear the exception so we can continue but don't mark vmclassloader as
     // initialized.
@@ -193,11 +370,20 @@
     // GetMethodID is spec'd to cause the class to be initialized.
     jni_env->GetMethodID(klass, "hashCode", "()I");
     jni_env->DeleteLocalRef(klass);
-    StressData* data = nullptr;
-    CHECK_EQ(jvmti_env->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&data)),
-             JVMTI_ERROR_NONE);
     data->vm_class_loader_initialized = true;
   }
+  if (data->trace_stress) {
+    if (jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                            JVMTI_EVENT_METHOD_ENTRY,
+                                            nullptr) != JVMTI_ERROR_NONE) {
+      LOG(ERROR) << "Unable to enable JVMTI_EVENT_METHOD_ENTRY event!";
+    }
+    if (jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                        JVMTI_EVENT_METHOD_EXIT,
+                                        nullptr) != JVMTI_ERROR_NONE) {
+      LOG(ERROR) << "Unable to enable JVMTI_EVENT_METHOD_EXIT event!";
+    }
+  }
 }
 
 extern "C" JNIEXPORT jint JNICALL Agent_OnLoad(JavaVM* vm,
@@ -233,7 +419,9 @@
   memset(&cb, 0, sizeof(cb));
   cb.ClassFileLoadHook = ClassFileLoadHookSecretNoOp;
   cb.NativeMethodBind = doJvmtiMethodBind;
-  cb.VMInit = EnsureVMClassloaderInitializedCB;
+  cb.VMInit = PerformFinalSetupVMInit;
+  cb.MethodEntry = MethodEntryHook;
+  cb.MethodExit = MethodExitHook;
   if (jvmti->SetEventCallbacks(&cb, sizeof(cb)) != JVMTI_ERROR_NONE) {
     LOG(ERROR) << "Unable to set class file load hook cb!";
     return 1;
@@ -250,11 +438,13 @@
     LOG(ERROR) << "Unable to enable JVMTI_EVENT_VM_INIT event!";
     return 1;
   }
-  if (jvmti->SetEventNotificationMode(JVMTI_ENABLE,
-                                      JVMTI_EVENT_CLASS_FILE_LOAD_HOOK,
-                                      nullptr) != JVMTI_ERROR_NONE) {
-    LOG(ERROR) << "Unable to enable CLASS_FILE_LOAD_HOOK event!";
-    return 1;
+  if (data->redefine_stress) {
+    if (jvmti->SetEventNotificationMode(JVMTI_ENABLE,
+                                        JVMTI_EVENT_CLASS_FILE_LOAD_HOOK,
+                                        nullptr) != JVMTI_ERROR_NONE) {
+      LOG(ERROR) << "Unable to enable CLASS_FILE_LOAD_HOOK event!";
+      return 1;
+    }
   }
   return 0;
 }
diff --git a/tools/add_package_property.sh b/tools/add_package_property.sh
new file mode 100644
index 0000000..e9294a9
--- /dev/null
+++ b/tools/add_package_property.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Sets the property of an Android package
+
+if [ "$#" -ne 2 ] ; then
+  echo "USAGE: sh add_package_property.sh [PACKAGE_NAME] [PROPERTY_SCRIPT_PATH]"
+  exit 1
+fi
+PACKAGE_NAME=$1
+PROPERTY_SCRIPT_PATH=$2
+PROPERTY_SCRIPT_NAME=`basename $PROPERTY_SCRIPT_PATH`
+adb push $PROPERTY_SCRIPT_PATH /data/data/$PACKAGE_NAME/
+adb shell chmod o+x /data/data/$PACKAGE_NAME/$PROPERTY_SCRIPT_NAME
+adb shell restorecon /data/data/$PACKAGE_NAME/$PROPERTY_SCRIPT_NAME
+adb shell setprop wrap.$PACKAGE_NAME /data/data/$PACKAGE_NAME/$PROPERTY_SCRIPT_NAME
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index 133426f..38556ab 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -75,6 +75,12 @@
  * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
+ 1.3 Pending
+
+ 1.2 May 26, 2017
+   Show registered native sizes of objects.
+   Simplify presentation of sample path from gc root.
+
  1.1 Feb 21, 2017
    Show java.lang.ref.Reference referents as "unreachable" instead of null.
 
diff --git a/tools/ahat/src/DocString.java b/tools/ahat/src/DocString.java
index c6303c8..7970bf8 100644
--- a/tools/ahat/src/DocString.java
+++ b/tools/ahat/src/DocString.java
@@ -126,6 +126,23 @@
   }
 
   /**
+   * Standard formatted DocString for describing a size.
+   *
+   * Nothing is printed for a size of zero.
+   * Set isPlaceHolder to true to indicate that the size field corresponds to
+   * for a place holder object that should be annotated specially.
+   */
+  public static DocString size(long size, boolean isPlaceHolder) {
+    DocString string = new DocString();
+    if (isPlaceHolder) {
+      string.append(DocString.removed("del"));
+    } else if (size != 0) {
+      string.appendFormat("%,14d", size);
+    }
+    return string;
+  }
+
+  /**
    * Standard formatted DocString for describing a change in size relative to
    * a baseline.
    * @param noCurrent - whether no current object exists.
diff --git a/tools/ahat/src/DominatedList.java b/tools/ahat/src/DominatedList.java
index f73e3ca..75133b2 100644
--- a/tools/ahat/src/DominatedList.java
+++ b/tools/ahat/src/DominatedList.java
@@ -55,7 +55,7 @@
 
     @Override
     public long getSize(AhatInstance element, AhatHeap heap) {
-      return element.getRetainedSize(heap);
+      return element.getRetainedSize(heap).getSize();
     }
 
     @Override
diff --git a/tools/ahat/src/HeapTable.java b/tools/ahat/src/HeapTable.java
index 9abbe4a..b04f2ae 100644
--- a/tools/ahat/src/HeapTable.java
+++ b/tools/ahat/src/HeapTable.java
@@ -45,16 +45,6 @@
     List<ValueConfig<T>> getValueConfigs();
   }
 
-  private static DocString sizeString(long size, boolean isPlaceHolder) {
-    DocString string = new DocString();
-    if (isPlaceHolder) {
-      string.append(DocString.removed("del"));
-    } else if (size != 0) {
-      string.appendFormat("%,14d", size);
-    }
-    return string;
-  }
-
   /**
    * Render the table to the given document.
    * @param query - The page query.
@@ -100,10 +90,10 @@
         long basesize = config.getSize(base, heap.getBaseline());
         total += size;
         basetotal += basesize;
-        vals.add(sizeString(size, elem.isPlaceHolder()));
+        vals.add(DocString.size(size, elem.isPlaceHolder()));
         vals.add(DocString.delta(elem.isPlaceHolder(), base.isPlaceHolder(), size, basesize));
       }
-      vals.add(sizeString(total, elem.isPlaceHolder()));
+      vals.add(DocString.size(total, elem.isPlaceHolder()));
       vals.add(DocString.delta(elem.isPlaceHolder(), base.isPlaceHolder(), total, basetotal));
 
       for (ValueConfig<T> value : values) {
@@ -140,10 +130,10 @@
         long basesize = basesummary.get(heap);
         total += size;
         basetotal += basesize;
-        vals.add(sizeString(size, false));
+        vals.add(DocString.size(size, false));
         vals.add(DocString.delta(false, false, size, basesize));
       }
-      vals.add(sizeString(total, false));
+      vals.add(DocString.size(total, false));
       vals.add(DocString.delta(false, false, total, basetotal));
 
       for (ValueConfig<T> value : values) {
@@ -159,7 +149,7 @@
   public static <T extends Diffable<T>> boolean hasNonZeroEntry(AhatHeap heap,
       TableConfig<T> config, List<T> elements) {
     AhatHeap baseheap = heap.getBaseline();
-    if (heap.getSize() > 0 || baseheap.getSize() > 0) {
+    if (!heap.getSize().isZero() || !baseheap.getSize().isZero()) {
       for (T element : elements) {
         if (config.getSize(element, heap) > 0 ||
             config.getSize(element.getBaseline(), baseheap) > 0) {
diff --git a/tools/ahat/src/ObjectHandler.java b/tools/ahat/src/ObjectHandler.java
index 2e0ae6e..d6f1faa 100644
--- a/tools/ahat/src/ObjectHandler.java
+++ b/tools/ahat/src/ObjectHandler.java
@@ -19,7 +19,6 @@
 import com.android.ahat.heapdump.AhatArrayInstance;
 import com.android.ahat.heapdump.AhatClassInstance;
 import com.android.ahat.heapdump.AhatClassObj;
-import com.android.ahat.heapdump.AhatHeap;
 import com.android.ahat.heapdump.AhatInstance;
 import com.android.ahat.heapdump.AhatSnapshot;
 import com.android.ahat.heapdump.Diff;
@@ -29,7 +28,6 @@
 import com.android.ahat.heapdump.Value;
 import java.io.IOException;
 import java.util.Collection;
-import java.util.Collections;
 import java.util.List;
 import java.util.Objects;
 
@@ -72,16 +70,6 @@
     doc.descriptions();
     doc.description(DocString.text("Class"), Summarizer.summarize(cls));
 
-    DocString sizeDescription = DocString.format("%,14d ", inst.getSize());
-    sizeDescription.appendDelta(false, base.isPlaceHolder(),
-        inst.getSize(), base.getSize());
-    doc.description(DocString.text("Size"), sizeDescription);
-
-    DocString rsizeDescription = DocString.format("%,14d ", inst.getTotalRetainedSize());
-    rsizeDescription.appendDelta(false, base.isPlaceHolder(),
-        inst.getTotalRetainedSize(), base.getTotalRetainedSize());
-    doc.description(DocString.text("Retained Size"), rsizeDescription);
-
     doc.description(DocString.text("Heap"), DocString.text(inst.getHeap().getName()));
 
     Collection<String> rootTypes = inst.getRootTypes();
@@ -98,6 +86,13 @@
 
     doc.end();
 
+    doc.section("Object Size");
+    SizeTable.table(doc, new Column(""), inst != base && !base.isPlaceHolder());
+    SizeTable.row(doc, DocString.text("Shallow"), inst.getSize(), base.getSize());
+    SizeTable.row(doc, DocString.text("Retained"),
+        inst.getTotalRetainedSize(), base.getTotalRetainedSize());
+    SizeTable.end(doc);
+
     printBitmap(doc, inst);
     if (inst.isClassInstance()) {
       printClassInstanceFields(doc, query, inst.asClassInstance());
@@ -249,47 +244,16 @@
   private void printGcRootPath(Doc doc, Query query, AhatInstance inst) {
     doc.section("Sample Path from GC Root");
     List<PathElement> path = inst.getPathFromGcRoot();
-
-    // Add a dummy PathElement as a marker for the root.
-    final PathElement root = new PathElement(null, null);
-    path.add(0, root);
-
-    HeapTable.TableConfig<PathElement> table = new HeapTable.TableConfig<PathElement>() {
-      public String getHeapsDescription() {
-        return "Bytes Retained by Heap (Dominators Only)";
-      }
-
-      public long getSize(PathElement element, AhatHeap heap) {
-        if (element == root) {
-          return heap.getSize();
-        }
-        if (element.isDominator) {
-          return element.instance.getRetainedSize(heap);
-        }
-        return 0;
-      }
-
-      public List<HeapTable.ValueConfig<PathElement>> getValueConfigs() {
-        HeapTable.ValueConfig<PathElement> value = new HeapTable.ValueConfig<PathElement>() {
-          public String getDescription() {
-            return "Path Element";
-          }
-
-          public DocString render(PathElement element) {
-            if (element == root) {
-              return DocString.link(DocString.uri("rooted"), DocString.text("ROOT"));
-            } else {
-              DocString label = DocString.text("→ ");
-              label.append(Summarizer.summarize(element.instance));
-              label.append(element.field);
-              return label;
-            }
-          }
-        };
-        return Collections.singletonList(value);
-      }
+    doc.table(new Column(""), new Column("Path Element"));
+    doc.row(DocString.text("(rooted)"),
+        DocString.link(DocString.uri("root"), DocString.text("ROOT")));
+    for (PathElement element : path) {
+      DocString label = DocString.text("→ ");
+      label.append(Summarizer.summarize(element.instance));
+      label.append(element.field);
+      doc.row(DocString.text(element.isDominator ? "(dominator)" : ""), label);
     };
-    HeapTable.render(doc, query, DOMINATOR_PATH_ID, table, mSnapshot, path);
+    doc.end();
   }
 
   public void printDominatedObjects(Doc doc, Query query, AhatInstance inst) {
diff --git a/tools/ahat/src/ObjectsHandler.java b/tools/ahat/src/ObjectsHandler.java
index 3062d23..86d48f1 100644
--- a/tools/ahat/src/ObjectsHandler.java
+++ b/tools/ahat/src/ObjectsHandler.java
@@ -54,23 +54,18 @@
 
     doc.title("Objects");
 
-    doc.table(
-        new Column("Size", Column.Align.RIGHT),
-        new Column("Δ", Column.Align.RIGHT, mSnapshot.isDiffed()),
+    SizeTable.table(doc, mSnapshot.isDiffed(),
         new Column("Heap"),
         new Column("Object"));
 
     SubsetSelector<AhatInstance> selector = new SubsetSelector(query, OBJECTS_ID, insts);
     for (AhatInstance inst : selector.selected()) {
       AhatInstance base = inst.getBaseline();
-      doc.row(
-          DocString.format("%,14d", inst.getSize()),
-          DocString.delta(inst.isPlaceHolder(), base.isPlaceHolder(),
-            inst.getSize(), base.getSize()),
+      SizeTable.row(doc, inst.getSize(), base.getSize(),
           DocString.text(inst.getHeap().getName()),
           Summarizer.summarize(inst));
     }
-    doc.end();
+    SizeTable.end(doc);
     selector.render(doc);
   }
 }
diff --git a/tools/ahat/src/OverviewHandler.java b/tools/ahat/src/OverviewHandler.java
index ea305c4..c9f8425 100644
--- a/tools/ahat/src/OverviewHandler.java
+++ b/tools/ahat/src/OverviewHandler.java
@@ -18,16 +18,12 @@
 
 import com.android.ahat.heapdump.AhatHeap;
 import com.android.ahat.heapdump.AhatSnapshot;
-import com.android.ahat.heapdump.Diffable;
+import com.android.ahat.heapdump.Size;
 import java.io.File;
 import java.io.IOException;
-import java.util.Collections;
-import java.util.List;
 
 class OverviewHandler implements AhatHandler {
 
-  private static final String OVERVIEW_ID = "overview";
-
   private AhatSnapshot mSnapshot;
   private File mHprof;
   private File mBaseHprof;
@@ -53,39 +49,27 @@
     }
     doc.end();
 
-    doc.section("Heap Sizes");
-    printHeapSizes(doc, query);
+    doc.section("Bytes Retained by Heap");
+    printHeapSizes(doc);
 
     doc.big(Menu.getMenu());
   }
 
-  private static class TableElem implements Diffable<TableElem> {
-    @Override public TableElem getBaseline() {
-      return this;
+  private void printHeapSizes(Doc doc) {
+    SizeTable.table(doc, new Column("Heap"), mSnapshot.isDiffed());
+    Size totalSize = Size.ZERO;
+    Size totalBase = Size.ZERO;
+    for (AhatHeap heap : mSnapshot.getHeaps()) {
+      Size size = heap.getSize();
+      Size base = heap.getBaseline().getSize();
+      if (!size.isZero() || !base.isZero()) {
+        SizeTable.row(doc, DocString.text(heap.getName()), size, base);
+        totalSize = totalSize.plus(size);
+        totalBase = totalBase.plus(base);
+      }
     }
-
-    @Override public boolean isPlaceHolder() {
-      return false;
-    }
-  }
-
-  private void printHeapSizes(Doc doc, Query query) {
-    List<TableElem> dummy = Collections.singletonList(new TableElem());
-
-    HeapTable.TableConfig<TableElem> table = new HeapTable.TableConfig<TableElem>() {
-      public String getHeapsDescription() {
-        return "Bytes Retained by Heap";
-      }
-
-      public long getSize(TableElem element, AhatHeap heap) {
-        return heap.getSize();
-      }
-
-      public List<HeapTable.ValueConfig<TableElem>> getValueConfigs() {
-        return Collections.emptyList();
-      }
-    };
-    HeapTable.render(doc, query, OVERVIEW_ID, table, mSnapshot, dummy);
+    SizeTable.row(doc, DocString.text("Total"), totalSize, totalBase);
+    SizeTable.end(doc);
   }
 }
 
diff --git a/tools/ahat/src/SiteHandler.java b/tools/ahat/src/SiteHandler.java
index febf171..7a831d3 100644
--- a/tools/ahat/src/SiteHandler.java
+++ b/tools/ahat/src/SiteHandler.java
@@ -60,7 +60,7 @@
         }
 
         public long getSize(Site element, AhatHeap heap) {
-          return element.getSize(heap);
+          return element.getSize(heap).getSize();
         }
 
         public List<HeapTable.ValueConfig<Site>> getValueConfigs() {
@@ -80,10 +80,7 @@
     }
 
     doc.section("Objects Allocated");
-
-    doc.table(
-        new Column("Reachable Bytes Allocated", Column.Align.RIGHT),
-        new Column("Δ", Column.Align.RIGHT, mSnapshot.isDiffed()),
+    SizeTable.table(doc, mSnapshot.isDiffed(),
         new Column("Instances", Column.Align.RIGHT),
         new Column("Δ", Column.Align.RIGHT, mSnapshot.isDiffed()),
         new Column("Heap"),
@@ -100,9 +97,7 @@
     for (Site.ObjectsInfo info : selector.selected()) {
       Site.ObjectsInfo baseinfo = info.getBaseline();
       String className = info.getClassName();
-      doc.row(
-          DocString.format("%,14d", info.numBytes),
-          DocString.delta(false, false, info.numBytes, baseinfo.numBytes),
+      SizeTable.row(doc, info.numBytes, baseinfo.numBytes,
           DocString.link(
             DocString.formattedUri("objects?id=%d&depth=%d&heap=%s&class=%s",
               site.getId(), site.getDepth(), info.heap.getName(), className),
@@ -111,7 +106,7 @@
           DocString.text(info.heap.getName()),
           Summarizer.summarize(info.classObj));
     }
-    doc.end();
+    SizeTable.end(doc);
     selector.render(doc);
   }
 }
diff --git a/tools/ahat/src/SitePrinter.java b/tools/ahat/src/SitePrinter.java
index 21ca2de..32037f4 100644
--- a/tools/ahat/src/SitePrinter.java
+++ b/tools/ahat/src/SitePrinter.java
@@ -38,7 +38,7 @@
       }
 
       public long getSize(Site element, AhatHeap heap) {
-        return element.getSize(heap);
+        return element.getSize(heap).getSize();
       }
 
       public List<HeapTable.ValueConfig<Site>> getValueConfigs() {
diff --git a/tools/ahat/src/SizeTable.java b/tools/ahat/src/SizeTable.java
new file mode 100644
index 0000000..46e3956
--- /dev/null
+++ b/tools/ahat/src/SizeTable.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.Size;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Class for rendering a table that includes all categories of Size.
+ * Two table formats are supported, one where a custom left column can be
+ * added before the size columns:
+ *    |left column|Java Size|Native Size|...|Total Size|custom columns...|
+ *
+ * The other without the custom left column:
+ *    |Java Size|Native Size|...|Total Size|custom columns...|
+ */
+class SizeTable {
+  /**
+   * Start a size table with a custom left column.
+   *
+   * |left column|Java Size|Native Size|...|Total Size|custom columns...|
+   *
+   * This should be followed by calls to the 'row' method to fill in the table
+   * contents and the 'end' method to end the table.
+   *
+   * Set showDiff to true if size diffs should be shown.
+   */
+  static void table(Doc doc, Column left, boolean showDiff, Column... columns) {
+    List<Column> cols = new ArrayList<Column>();
+    cols.add(left);
+    cols.add(new Column("Java Size", Column.Align.RIGHT));
+    cols.add(new Column("Δ", Column.Align.RIGHT, showDiff));
+    cols.add(new Column("Registered Native Size", Column.Align.RIGHT));
+    cols.add(new Column("Δ", Column.Align.RIGHT, showDiff));
+    cols.add(new Column("Total Size", Column.Align.RIGHT));
+    cols.add(new Column("Δ", Column.Align.RIGHT, showDiff));
+    cols.addAll(Arrays.asList(columns));
+    doc.table(cols.toArray(new Column[cols.size()]));
+  }
+
+  /**
+   * Add a row to the currently active size table with custom left column.
+   * The number of values must match the number of columns provided for the
+   * currently active table.
+   */
+  static void row(Doc doc, DocString left, Size size, Size base, DocString... values) {
+    List<DocString> vals = new ArrayList<DocString>();
+    vals.add(left);
+    vals.add(DocString.size(size.getJavaSize(), false));
+    vals.add(DocString.delta(false, false, size.getJavaSize(), base.getJavaSize()));
+    vals.add(DocString.size(size.getRegisteredNativeSize(), false));
+    vals.add(DocString.delta(false, false,
+          size.getRegisteredNativeSize(), base.getRegisteredNativeSize()));
+    vals.add(DocString.size(size.getSize(), false));
+    vals.add(DocString.delta(false, false, size.getSize(), base.getSize()));
+    vals.addAll(Arrays.asList(values));
+    doc.row(vals.toArray(new DocString[vals.size()]));
+  }
+
+  /**
+   * Start a size table without a custom left column.
+   *
+   * |Java Size|Native Size|...|Total Size|custom columns...|
+   * This should be followed by calls to the 'row' method to fill in the table
+   * contents and the 'end' method to end the table.
+   *
+   * Set showDiff to true if size diffs should be shown.
+   */
+  static void table(Doc doc, boolean showDiff, Column... columns) {
+    // Re-use the code for a size table with custom left column by having an
+    // invisible custom left column.
+    table(doc, new Column("", Column.Align.LEFT, false), showDiff, columns);
+  }
+
+  /**
+   * Add a row to the currently active size table without a custom left column.
+   * The number of values must match the number of columns provided for the
+   * currently active table.
+   */
+  static void row(Doc doc, Size size, Size base, DocString... values) {
+    row(doc, new DocString(), size, base, values);
+  }
+
+  /**
+   * End the currently active table.
+   */
+  static void end(Doc doc) {
+    doc.end();
+  }
+}
diff --git a/tools/ahat/src/heapdump/AhatClassInstance.java b/tools/ahat/src/heapdump/AhatClassInstance.java
index 273530a..c10d604 100644
--- a/tools/ahat/src/heapdump/AhatClassInstance.java
+++ b/tools/ahat/src/heapdump/AhatClassInstance.java
@@ -154,10 +154,7 @@
   }
 
   @Override public AhatInstance getAssociatedBitmapInstance() {
-    if (isInstanceOfClass("android.graphics.Bitmap")) {
-      return this;
-    }
-    return null;
+    return getBitmapInfo() == null ? null : this;
   }
 
   @Override public boolean isClassInstance() {
@@ -178,14 +175,27 @@
    * Returns null if the field value is null, not a byte[] or could not be read.
    */
   private byte[] getByteArrayField(String fieldName) {
-    Value value = getField(fieldName);
-    if (!value.isAhatInstance()) {
-      return null;
-    }
-    return value.asAhatInstance().asByteArray();
+    AhatInstance field = getRefField(fieldName);
+    return field == null ? null : field.asByteArray();
   }
 
-  public BufferedImage asBitmap() {
+  private static class BitmapInfo {
+    public final int width;
+    public final int height;
+    public final byte[] buffer;
+
+    public BitmapInfo(int width, int height, byte[] buffer) {
+      this.width = width;
+      this.height = height;
+      this.buffer = buffer;
+    }
+  }
+
+  /**
+   * Return bitmap info for this object, or null if no appropriate bitmap
+   * info is available.
+   */
+  private BitmapInfo getBitmapInfo() {
     if (!isInstanceOfClass("android.graphics.Bitmap")) {
       return null;
     }
@@ -205,20 +215,34 @@
       return null;
     }
 
+    if (buffer.length < 4 * height * width) {
+      return null;
+    }
+
+    return new BitmapInfo(width, height, buffer);
+
+  }
+
+  public BufferedImage asBitmap() {
+    BitmapInfo info = getBitmapInfo();
+    if (info == null) {
+      return null;
+    }
+
     // Convert the raw data to an image
     // Convert BGRA to ABGR
-    int[] abgr = new int[height * width];
+    int[] abgr = new int[info.height * info.width];
     for (int i = 0; i < abgr.length; i++) {
       abgr[i] = (
-          (((int) buffer[i * 4 + 3] & 0xFF) << 24)
-          + (((int) buffer[i * 4 + 0] & 0xFF) << 16)
-          + (((int) buffer[i * 4 + 1] & 0xFF) << 8)
-          + ((int) buffer[i * 4 + 2] & 0xFF));
+          (((int) info.buffer[i * 4 + 3] & 0xFF) << 24)
+          + (((int) info.buffer[i * 4 + 0] & 0xFF) << 16)
+          + (((int) info.buffer[i * 4 + 1] & 0xFF) << 8)
+          + ((int) info.buffer[i * 4 + 2] & 0xFF));
     }
 
     BufferedImage bitmap = new BufferedImage(
-        width, height, BufferedImage.TYPE_4BYTE_ABGR);
-    bitmap.setRGB(0, 0, width, height, abgr, 0, width);
+        info.width, info.height, BufferedImage.TYPE_4BYTE_ABGR);
+    bitmap.setRGB(0, 0, info.width, info.height, abgr, 0, info.width);
     return bitmap;
   }
 }
diff --git a/tools/ahat/src/heapdump/AhatHeap.java b/tools/ahat/src/heapdump/AhatHeap.java
index c39adc4..b8897a1 100644
--- a/tools/ahat/src/heapdump/AhatHeap.java
+++ b/tools/ahat/src/heapdump/AhatHeap.java
@@ -18,7 +18,7 @@
 
 public class AhatHeap implements Diffable<AhatHeap> {
   private String mName;
-  private long mSize = 0;
+  private Size mSize = Size.ZERO;
   private int mIndex;
   private AhatHeap mBaseline;
   private boolean mIsPlaceHolder = false;
@@ -47,8 +47,8 @@
     return new AhatHeap(name, baseline);
   }
 
-  void addToSize(long increment) {
-    mSize += increment;
+  void addToSize(Size size) {
+    mSize = mSize.plus(size);
   }
 
   /**
@@ -69,7 +69,7 @@
   /**
    * Returns the total number of bytes allocated on this heap.
    */
-  public long getSize() {
+  public Size getSize() {
     return mSize;
   }
 
diff --git a/tools/ahat/src/heapdump/AhatInstance.java b/tools/ahat/src/heapdump/AhatInstance.java
index e6b9c00..af369d9 100644
--- a/tools/ahat/src/heapdump/AhatInstance.java
+++ b/tools/ahat/src/heapdump/AhatInstance.java
@@ -20,17 +20,18 @@
 import com.android.tools.perflib.heap.Instance;
 import com.android.tools.perflib.heap.RootObj;
 import java.awt.image.BufferedImage;
+import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
+import java.util.Deque;
 import java.util.List;
 
 public abstract class AhatInstance implements Diffable<AhatInstance> {
   private long mId;
-  private long mSize;
-  private long mTotalRetainedSize;
-  private long mRetainedSizes[];      // Retained size indexed by heap index
+  private Size mSize;
+  private Size[] mRetainedSizes;      // Retained size indexed by heap index
   private boolean mIsReachable;
   private AhatHeap mHeap;
   private AhatInstance mImmediateDominator;
@@ -63,15 +64,10 @@
    */
   void initialize(AhatSnapshot snapshot, Instance inst) {
     mId = inst.getId();
-    mSize = inst.getSize();
-    mTotalRetainedSize = inst.getTotalRetainedSize();
+    mSize = new Size(inst.getSize(), 0);
     mIsReachable = inst.isReachable();
 
     List<AhatHeap> heaps = snapshot.getHeaps();
-    mRetainedSizes = new long[heaps.size()];
-    for (AhatHeap heap : heaps) {
-      mRetainedSizes[heap.getIndex()] = inst.getRetainedSize(heap.getIndex());
-    }
 
     mHeap = snapshot.getHeap(inst.getHeap().getName());
 
@@ -130,7 +126,7 @@
   /**
    * Returns the shallow number of bytes this object takes up.
    */
-  public long getSize() {
+  public Size getSize() {
     return mSize;
   }
 
@@ -138,16 +134,32 @@
    * Returns the number of bytes belonging to the given heap that this instance
    * retains.
    */
-  public long getRetainedSize(AhatHeap heap) {
+  public Size getRetainedSize(AhatHeap heap) {
     int index = heap.getIndex();
-    return 0 <= index && index < mRetainedSizes.length ? mRetainedSizes[heap.getIndex()] : 0;
+    if (mRetainedSizes != null && 0 <= index && index < mRetainedSizes.length) {
+      return mRetainedSizes[heap.getIndex()];
+    }
+    return Size.ZERO;
   }
 
   /**
    * Returns the total number of bytes this instance retains.
    */
-  public long getTotalRetainedSize() {
-    return mTotalRetainedSize;
+  public Size getTotalRetainedSize() {
+    Size size = Size.ZERO;
+    if (mRetainedSizes != null) {
+      for (int i = 0; i < mRetainedSizes.length; i++) {
+        size = size.plus(mRetainedSizes[i]);
+      }
+    }
+    return size;
+  }
+
+  /**
+   * Increment the number of registered native bytes tied to this object.
+   */
+  void addRegisteredNativeSize(long size) {
+    mSize = mSize.plusRegisteredNativeSize(size);
   }
 
   /**
@@ -452,4 +464,41 @@
   AhatInstance newPlaceHolderInstance() {
     return new AhatPlaceHolderInstance(this);
   }
+
+  /**
+   * Recursively compute the retained size of the given instance and all
+   * other instances it dominates.
+   */
+  static void computeRetainedSize(AhatInstance inst, int numHeaps) {
+    // Note: We can't use a recursive implementation because it can lead to
+    // stack overflow. Use an iterative implementation instead.
+    //
+    // Objects not yet processed will have mRetainedSizes set to null.
+    // Once prepared, an object will have mRetaiedSizes set to an array of 0
+    // sizes.
+    Deque<AhatInstance> deque = new ArrayDeque<AhatInstance>();
+    deque.push(inst);
+
+    while (!deque.isEmpty()) {
+      inst = deque.pop();
+      if (inst.mRetainedSizes == null) {
+        inst.mRetainedSizes = new Size[numHeaps];
+        for (int i = 0; i < numHeaps; i++) {
+          inst.mRetainedSizes[i] = Size.ZERO;
+        }
+        inst.mRetainedSizes[inst.mHeap.getIndex()] = 
+          inst.mRetainedSizes[inst.mHeap.getIndex()].plus(inst.mSize);
+        deque.push(inst);
+        for (AhatInstance dominated : inst.mDominated) {
+          deque.push(dominated);
+        }
+      } else {
+        for (AhatInstance dominated : inst.mDominated) {
+          for (int i = 0; i < numHeaps; i++) {
+            inst.mRetainedSizes[i] = inst.mRetainedSizes[i].plus(dominated.mRetainedSizes[i]);
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/tools/ahat/src/heapdump/AhatPlaceHolderClassObj.java b/tools/ahat/src/heapdump/AhatPlaceHolderClassObj.java
index c6ad87f..2b3e056 100644
--- a/tools/ahat/src/heapdump/AhatPlaceHolderClassObj.java
+++ b/tools/ahat/src/heapdump/AhatPlaceHolderClassObj.java
@@ -29,16 +29,16 @@
     baseline.setBaseline(this);
   }
 
-  @Override public long getSize() {
-    return 0;
+  @Override public Size getSize() {
+    return Size.ZERO;
   }
 
-  @Override public long getRetainedSize(AhatHeap heap) {
-    return 0;
+  @Override public Size getRetainedSize(AhatHeap heap) {
+    return Size.ZERO;
   }
 
-  @Override public long getTotalRetainedSize() {
-    return 0;
+  @Override public Size getTotalRetainedSize() {
+    return Size.ZERO;
   }
 
   @Override public AhatHeap getHeap() {
diff --git a/tools/ahat/src/heapdump/AhatPlaceHolderInstance.java b/tools/ahat/src/heapdump/AhatPlaceHolderInstance.java
index 9412eae..4aac804 100644
--- a/tools/ahat/src/heapdump/AhatPlaceHolderInstance.java
+++ b/tools/ahat/src/heapdump/AhatPlaceHolderInstance.java
@@ -29,16 +29,16 @@
     baseline.setBaseline(this);
   }
 
-  @Override public long getSize() {
-    return 0;
+  @Override public Size getSize() {
+    return Size.ZERO;
   }
 
-  @Override public long getRetainedSize(AhatHeap heap) {
-    return 0;
+  @Override public Size getRetainedSize(AhatHeap heap) {
+    return Size.ZERO;
   }
 
-  @Override public long getTotalRetainedSize() {
-    return 0;
+  @Override public Size getTotalRetainedSize() {
+    return Size.ZERO;
   }
 
   @Override public AhatHeap getHeap() {
diff --git a/tools/ahat/src/heapdump/AhatSnapshot.java b/tools/ahat/src/heapdump/AhatSnapshot.java
index 20b85da..35d6c8a 100644
--- a/tools/ahat/src/heapdump/AhatSnapshot.java
+++ b/tools/ahat/src/heapdump/AhatSnapshot.java
@@ -82,8 +82,7 @@
     Snapshot snapshot = Snapshot.createSnapshot(buffer, map);
     snapshot.computeDominators();
 
-    // Properly label the class of class objects in the perflib snapshot, and
-    // count the total number of instances.
+    // Properly label the class of class objects in the perflib snapshot.
     final ClassObj javaLangClass = snapshot.findClass("java.lang.Class");
     if (javaLangClass != null) {
       for (Heap heap : snapshot.getHeaps()) {
@@ -134,12 +133,19 @@
       }
     });
 
+    Map<Instance, Long> registeredNative = Perflib.getRegisteredNativeAllocations(snapshot);
+
     // Initialize ahat snapshot and instances based on the perflib snapshot
     // and instances.
     for (AhatInstance ahat : mInstances) {
       Instance inst = snapshot.findInstance(ahat.getId());
       ahat.initialize(this, inst);
 
+      Long registeredNativeSize = registeredNative.get(inst);
+      if (registeredNativeSize != null) {
+        ahat.addRegisteredNativeSize(registeredNativeSize);
+      }
+
       if (inst.getImmediateDominator() == Snapshot.SENTINEL_ROOT) {
         mRooted.add(ahat);
       }
@@ -166,6 +172,13 @@
       }
     }
     snapshot.dispose();
+
+    // Compute the retained sizes of objects. We do this explicitly now rather
+    // than relying on the retained sizes computed by perflib so that
+    // registered native sizes are included.
+    for (AhatInstance inst : mRooted) {
+      AhatInstance.computeRetainedSize(inst, mHeaps.size());
+    }
   }
 
   /**
diff --git a/tools/ahat/src/heapdump/Perflib.java b/tools/ahat/src/heapdump/Perflib.java
new file mode 100644
index 0000000..d0264a3
--- /dev/null
+++ b/tools/ahat/src/heapdump/Perflib.java
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+import com.android.tools.perflib.heap.ClassInstance;
+import com.android.tools.perflib.heap.ClassObj;
+import com.android.tools.perflib.heap.Instance;
+import com.android.tools.perflib.heap.Snapshot;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Collection of utilities that may be suitable to have in perflib instead of
+ * ahat.
+ */
+public class Perflib {
+  /**
+   * Return a collection of instances in the given snapshot that are tied to
+   * registered native allocations and their corresponding registered native
+   * sizes.
+   */
+  public static Map<Instance, Long> getRegisteredNativeAllocations(Snapshot snapshot) {
+    Map<Instance, Long> allocs = new HashMap<Instance, Long>();
+    ClassObj cleanerClass = snapshot.findClass("sun.misc.Cleaner");
+    if (cleanerClass != null) {
+      for (Instance cleanerInst : cleanerClass.getInstancesList()) {
+        ClassInstance cleaner = (ClassInstance)cleanerInst;
+        Object referent = getField(cleaner, "referent");
+        if (referent instanceof Instance) {
+          Instance inst = (Instance)referent;
+          Object thunkValue = getField(cleaner, "thunk");
+          if (thunkValue instanceof ClassInstance) {
+            ClassInstance thunk = (ClassInstance)thunkValue;
+            ClassObj thunkClass = thunk.getClassObj();
+            String cleanerThunkClassName = "libcore.util.NativeAllocationRegistry$CleanerThunk";
+            if (thunkClass != null && cleanerThunkClassName.equals(thunkClass.getClassName())) {
+              for (ClassInstance.FieldValue thunkField : thunk.getValues()) {
+                if (thunkField.getValue() instanceof ClassInstance) {
+                  ClassInstance registry = (ClassInstance)thunkField.getValue();
+                  ClassObj registryClass = registry.getClassObj();
+                  String registryClassName = "libcore.util.NativeAllocationRegistry";
+                  if (registryClass != null
+                      && registryClassName.equals(registryClass.getClassName())) {
+                    Object sizeValue = getField(registry, "size");
+                    if (sizeValue instanceof Long) {
+                      long size = (Long)sizeValue;
+                      if (size > 0) {
+                        Long old = allocs.get(inst);
+                        allocs.put(inst, old == null ? size : old + size);
+                      }
+                    }
+                    break;
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    return allocs;
+  }
+
+  /**
+   * Helper function to read a single field from a perflib class instance.
+   * Returns null if field not found. Note there is no way to distinguish
+   * between field not found an a field value of null.
+   */
+  private static Object getField(ClassInstance cls, String name) {
+    for (ClassInstance.FieldValue field : cls.getValues()) {
+      if (name.equals(field.getField().getName())) {
+        return field.getValue();
+      }
+    }
+    return null;
+  }
+}
diff --git a/tools/ahat/src/heapdump/Site.java b/tools/ahat/src/heapdump/Site.java
index 738eaf0..fdd4eea 100644
--- a/tools/ahat/src/heapdump/Site.java
+++ b/tools/ahat/src/heapdump/Site.java
@@ -44,7 +44,7 @@
   // The total size of objects allocated in this site (including child sites),
   // organized by heap index. Heap indices outside the range of mSizesByHeap
   // implicitly have size 0.
-  private long[] mSizesByHeap;
+  private Size[] mSizesByHeap;
 
   // List of child sites.
   private List<Site> mChildren;
@@ -60,14 +60,18 @@
     public AhatHeap heap;
     public AhatClassObj classObj;   // May be null.
     public long numInstances;
-    public long numBytes;
+    public Size numBytes;
     private ObjectsInfo baseline;
 
-    public ObjectsInfo(AhatHeap heap, AhatClassObj classObj, long numInstances, long numBytes) {
+    /**
+     * Construct a new, empty objects info for the given heap and class
+     * combination.
+     */
+    public ObjectsInfo(AhatHeap heap, AhatClassObj classObj) {
       this.heap = heap;
       this.classObj = classObj;
-      this.numInstances = numInstances;
-      this.numBytes = numBytes;
+      this.numInstances = 0;
+      this.numBytes = Size.ZERO;
       this.baseline = this;
     }
 
@@ -107,7 +111,7 @@
     mLineNumber = line;
     mId = id;
     mDepth = depth;
-    mSizesByHeap = new long[1];
+    mSizesByHeap = new Size[0];
     mChildren = new ArrayList<Site>();
     mObjects = new ArrayList<AhatInstance>();
     mObjectsInfos = new ArrayList<ObjectsInfo>();
@@ -133,16 +137,20 @@
       if (inst.isReachable()) {
         AhatHeap heap = inst.getHeap();
         if (heap.getIndex() >= site.mSizesByHeap.length) {
-          long[] newSizes = new long[heap.getIndex() + 1];
+          Size[] newSizes = new Size[heap.getIndex() + 1];
           for (int i = 0; i < site.mSizesByHeap.length; i++) {
             newSizes[i] = site.mSizesByHeap[i];
           }
+          for (int i = site.mSizesByHeap.length; i < heap.getIndex() + 1; i++) {
+            newSizes[i] = Size.ZERO;
+          }
           site.mSizesByHeap = newSizes;
         }
-        site.mSizesByHeap[heap.getIndex()] += inst.getSize();
+        site.mSizesByHeap[heap.getIndex()]
+          = site.mSizesByHeap[heap.getIndex()].plus(inst.getSize());
 
         info.numInstances++;
-        info.numBytes += inst.getSize();
+        info.numBytes = info.numBytes.plus(inst.getSize());
       }
 
       if (depth > 0) {
@@ -172,9 +180,9 @@
   }
 
   // Get the size of a site for a specific heap.
-  public long getSize(AhatHeap heap) {
+  public Size getSize(AhatHeap heap) {
     int index = heap.getIndex();
-    return index >= 0 && index < mSizesByHeap.length ? mSizesByHeap[index] : 0;
+    return index >= 0 && index < mSizesByHeap.length ? mSizesByHeap[index] : Size.ZERO;
   }
 
   /**
@@ -198,7 +206,7 @@
 
     ObjectsInfo info = classToObjectsInfo.get(classObj);
     if (info == null) {
-      info = new ObjectsInfo(heap, classObj, 0, 0);
+      info = new ObjectsInfo(heap, classObj);
       mObjectsInfos.add(info);
       classToObjectsInfo.put(classObj, info);
     }
@@ -210,10 +218,10 @@
   }
 
   // Get the combined size of the site for all heaps.
-  public long getTotalSize() {
-    long total = 0;
+  public Size getTotalSize() {
+    Size total = Size.ZERO;
     for (int i = 0; i < mSizesByHeap.length; i++) {
-      total += mSizesByHeap[i];
+      total = total.plus(mSizesByHeap[i]);
     }
     return total;
   }
diff --git a/tools/ahat/src/heapdump/Size.java b/tools/ahat/src/heapdump/Size.java
new file mode 100644
index 0000000..7c8db90
--- /dev/null
+++ b/tools/ahat/src/heapdump/Size.java
@@ -0,0 +1,89 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat.heapdump;
+
+/**
+ * The Size class is used to represent how much space an instance takes up.
+ *
+ * An abstraction is introduced rather than using a long directly in order to
+ * more easily keep track of the different components of the size. For
+ * example, some instances may have associated native, code, or graphics
+ * sizes.
+ *
+ * Size objects are immutable.
+ */
+public class Size {
+  private final long mJavaSize;
+  private final long mRegisteredNativeSize;
+
+  public static Size ZERO = new Size(0, 0);
+
+  public Size(long javaSize, long registeredNativeSize) {
+    mJavaSize = javaSize;
+    mRegisteredNativeSize = registeredNativeSize;
+  }
+
+  public long getSize() {
+    return mJavaSize + mRegisteredNativeSize;
+  }
+
+  public long getJavaSize() {
+    return mJavaSize;
+  }
+
+  public long getRegisteredNativeSize() {
+    return mRegisteredNativeSize;
+  }
+
+  /**
+   * Returns true if all the fields of this size object are zero.
+   */
+  public boolean isZero() {
+    return mJavaSize == 0 && mRegisteredNativeSize == 0;
+  }
+
+  /**
+   * Return a new Size object that is the sum of this size and the other.
+   */
+  public Size plus(Size other) {
+    if (isZero()) {
+      return other;
+    } else if (other.isZero()) {
+      return this;
+    } else {
+      return new Size(mJavaSize + other.mJavaSize,
+          mRegisteredNativeSize + other.mRegisteredNativeSize);
+    }
+  }
+
+  /**
+   * Return a new Size object that has 'size' more registered native size than
+   * this Size object.
+   */
+  public Size plusRegisteredNativeSize(long size) {
+    return new Size(mJavaSize, mRegisteredNativeSize + size);
+  }
+
+  @Override public boolean equals(Object other) {
+    if (other instanceof Size) {
+      Size s = (Size)other;
+      return mJavaSize == s.mJavaSize && mRegisteredNativeSize == s.mRegisteredNativeSize;
+    }
+    return false;
+  }
+}
+
diff --git a/tools/ahat/src/heapdump/Sort.java b/tools/ahat/src/heapdump/Sort.java
index 93d147a..0745803 100644
--- a/tools/ahat/src/heapdump/Sort.java
+++ b/tools/ahat/src/heapdump/Sort.java
@@ -32,6 +32,17 @@
  */
 public class Sort {
   /**
+   * Compare sizes by their total size.
+   * This sorts sizes from smaller total size to larger total size.
+   */
+  public static final Comparator<Size> SIZE_BY_SIZE = new Comparator<Size>() {
+    @Override
+    public int compare(Size a, Size b) {
+      return Long.compare(a.getSize(), b.getSize());
+    }
+  };
+
+  /**
    * Compare instances by their total retained size.
    * Different instances with the same total retained size are considered
    * equal for the purposes of comparison.
@@ -41,7 +52,7 @@
     = new Comparator<AhatInstance>() {
     @Override
     public int compare(AhatInstance a, AhatInstance b) {
-      return Long.compare(b.getTotalRetainedSize(), a.getTotalRetainedSize());
+      return SIZE_BY_SIZE.compare(b.getTotalRetainedSize(), a.getTotalRetainedSize());
     }
   };
 
@@ -60,7 +71,7 @@
 
     @Override
     public int compare(AhatInstance a, AhatInstance b) {
-      return Long.compare(b.getRetainedSize(mHeap), a.getRetainedSize(mHeap));
+      return SIZE_BY_SIZE.compare(b.getRetainedSize(mHeap), a.getRetainedSize(mHeap));
     }
   }
 
@@ -119,7 +130,7 @@
 
     @Override
     public int compare(Site a, Site b) {
-      return Long.compare(b.getSize(mHeap), a.getSize(mHeap));
+      return SIZE_BY_SIZE.compare(b.getSize(mHeap), a.getSize(mHeap));
     }
   }
 
@@ -130,7 +141,7 @@
   public static final Comparator<Site> SITE_BY_TOTAL_SIZE = new Comparator<Site>() {
     @Override
     public int compare(Site a, Site b) {
-      return Long.compare(b.getTotalSize(), a.getTotalSize());
+      return SIZE_BY_SIZE.compare(b.getTotalSize(), a.getTotalSize());
     }
   };
 
@@ -158,7 +169,7 @@
     = new Comparator<Site.ObjectsInfo>() {
     @Override
     public int compare(Site.ObjectsInfo a, Site.ObjectsInfo b) {
-      return Long.compare(b.numBytes, a.numBytes);
+      return SIZE_BY_SIZE.compare(b.numBytes, a.numBytes);
     }
   };
 
diff --git a/tools/ahat/src/manifest.txt b/tools/ahat/src/manifest.txt
index 20245f3..c35ccf1 100644
--- a/tools/ahat/src/manifest.txt
+++ b/tools/ahat/src/manifest.txt
@@ -1,4 +1,4 @@
 Name: ahat/
 Implementation-Title: ahat
-Implementation-Version: 1.1
+Implementation-Version: 1.2
 Main-Class: com.android.ahat.Main
diff --git a/tools/ahat/test-dump/Main.java b/tools/ahat/test-dump/Main.java
index 7a05b1c..3d3de78 100644
--- a/tools/ahat/test-dump/Main.java
+++ b/tools/ahat/test-dump/Main.java
@@ -20,6 +20,7 @@
 import java.lang.ref.ReferenceQueue;
 import java.lang.ref.SoftReference;
 import java.lang.ref.WeakReference;
+import libcore.util.NativeAllocationRegistry;
 import org.apache.harmony.dalvik.ddmc.DdmVmInternal;
 
 /**
@@ -98,6 +99,11 @@
         bigArray[i] = (byte)((i*i) & 0xFF);
       }
 
+      // 0x12345, 50000, and 0xABCDABCD are arbitrary values.
+      NativeAllocationRegistry registry = new NativeAllocationRegistry(
+          Main.class.getClassLoader(), 0x12345, 50000);
+      registry.registerNativeAllocation(anObject, 0xABCDABCD);
+
       addedObject = baseline ? null : new AddedObject();
       removedObject = baseline ? new RemovedObject() : null;
       modifiedObject = new ModifiedObject();
diff --git a/tools/ahat/test/InstanceTest.java b/tools/ahat/test/InstanceTest.java
index 3a50150..71b081c 100644
--- a/tools/ahat/test/InstanceTest.java
+++ b/tools/ahat/test/InstanceTest.java
@@ -21,6 +21,7 @@
 import com.android.ahat.heapdump.AhatInstance;
 import com.android.ahat.heapdump.AhatSnapshot;
 import com.android.ahat.heapdump.PathElement;
+import com.android.ahat.heapdump.Size;
 import com.android.ahat.heapdump.Value;
 import com.android.tools.perflib.heap.hprof.HprofClassDump;
 import com.android.tools.perflib.heap.hprof.HprofConstant;
@@ -292,13 +293,13 @@
     // allocated on, and should be 0 for all other heaps.
     AhatInstance anObject = dump.getDumpedAhatInstance("anObject");
     AhatSnapshot snapshot = dump.getAhatSnapshot();
-    long size = anObject.getSize();
+    Size size = anObject.getSize();
     assertEquals(size, anObject.getTotalRetainedSize());
     assertEquals(size, anObject.getRetainedSize(anObject.getHeap()));
     for (AhatHeap heap : snapshot.getHeaps()) {
       if (!heap.equals(anObject.getHeap())) {
         assertEquals(String.format("For heap '%s'", heap.getName()),
-            0, anObject.getRetainedSize(heap));
+            Size.ZERO, anObject.getRetainedSize(heap));
       }
     }
   }
diff --git a/tools/ahat/test/NativeAllocationTest.java b/tools/ahat/test/NativeAllocationTest.java
new file mode 100644
index 0000000..7436be8
--- /dev/null
+++ b/tools/ahat/test/NativeAllocationTest.java
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.ahat;
+
+import com.android.ahat.heapdump.AhatInstance;
+import com.android.ahat.heapdump.AhatSnapshot;
+import java.io.IOException;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class NativeAllocationTest {
+
+  @Test
+  public void nativeAllocation() throws IOException {
+    TestDump dump = TestDump.getTestDump();
+
+    AhatSnapshot snapshot = dump.getAhatSnapshot();
+    AhatInstance referent = dump.getDumpedAhatInstance("anObject");
+    assertEquals(50000, referent.getSize().getRegisteredNativeSize());
+  }
+}
+
diff --git a/tools/ahat/test/Tests.java b/tools/ahat/test/Tests.java
index 2fd3286..c7e9b18 100644
--- a/tools/ahat/test/Tests.java
+++ b/tools/ahat/test/Tests.java
@@ -24,6 +24,7 @@
       args = new String[]{
         "com.android.ahat.DiffTest",
         "com.android.ahat.InstanceTest",
+        "com.android.ahat.NativeAllocationTest",
         "com.android.ahat.ObjectHandlerTest",
         "com.android.ahat.OverviewHandlerTest",
         "com.android.ahat.PerformanceTest",
diff --git a/tools/asan.sh b/tools/asan.sh
new file mode 100644
index 0000000..b749545
--- /dev/null
+++ b/tools/asan.sh
@@ -0,0 +1,21 @@
+#!/system/bin/sh
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# NOTE: This script is used by add_package_property.sh and not meant to be executed directly
+#
+# This script contains the property and the options required to log poisoned
+# memory accesses (found in logcat)
+ASAN_OPTIONS=halt_on_error=0:verbosity=0:print_legend=0:print_full_thread_history=0:print_stats=0:print_summary=0:suppress_equal_pcs=0:fast_unwind_on_fatal=1 asanwrapper $@
diff --git a/tools/cpp-define-generator/offset_runtime.def b/tools/cpp-define-generator/offset_runtime.def
index 17167a0..41e7e40 100644
--- a/tools/cpp-define-generator/offset_runtime.def
+++ b/tools/cpp-define-generator/offset_runtime.def
@@ -17,7 +17,8 @@
 // Offsets within ShadowFrame.
 
 #if defined(DEFINE_INCLUDE_DEPENDENCIES)
-#include "runtime.h"         // art::Runtime
+#include "base/callee_save_type.h"  // art::CalleeSaveType
+#include "runtime.h"                // art::Runtime
 #endif
 
 #include "common.def"        // DEFINE_OFFSET_EXPR
@@ -25,17 +26,20 @@
 // Note: these callee save methods loads require read barriers.
 
 #define DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(field_name, constant_name) \
-  DEFINE_OFFSET_EXPR(Runtime, field_name ## _METHOD, size_t, art::Runtime::GetCalleeSaveMethodOffset(art::Runtime:: constant_name))
+  DEFINE_OFFSET_EXPR(Runtime, \
+                     field_name ## _METHOD, \
+                     size_t, \
+                     art::Runtime::GetCalleeSaveMethodOffset(constant_name))
 
                     //     Macro substring       Constant name
 // Offset of field Runtime::callee_save_methods_[kSaveAllCalleeSaves]
-DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_ALL_CALLEE_SAVES, kSaveAllCalleeSaves)
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_ALL_CALLEE_SAVES, art::CalleeSaveType::kSaveAllCalleeSaves)
 // Offset of field Runtime::callee_save_methods_[kSaveRefsOnly]
-DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_ONLY, kSaveRefsOnly)
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_ONLY, art::CalleeSaveType::kSaveRefsOnly)
 // Offset of field Runtime::callee_save_methods_[kSaveRefsAndArgs]
-DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_AND_ARGS, kSaveRefsAndArgs)
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_REFS_AND_ARGS, art::CalleeSaveType::kSaveRefsAndArgs)
 // Offset of field Runtime::callee_save_methods_[kSaveEverything]
-DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING, kSaveEverything)
+DEFINE_RUNTIME_CALLEE_SAVE_OFFSET(SAVE_EVERYTHING, art::CalleeSaveType::kSaveEverything)
 
 #undef DEFINE_RUNTIME_CALLEE_SAVE_OFFSET
 #include "common_undef.def"  // undef DEFINE_OFFSET_EXPR
diff --git a/tools/dexfuzz/src/dexfuzz/program/Mutation.java b/tools/dexfuzz/src/dexfuzz/program/Mutation.java
index 2eba718..436fcc4 100644
--- a/tools/dexfuzz/src/dexfuzz/program/Mutation.java
+++ b/tools/dexfuzz/src/dexfuzz/program/Mutation.java
@@ -39,8 +39,21 @@
     this.mutatableCode = mutatableCode;
     this.mutatableCodeIdx = mutatableCode.mutatableCodeIdx;
   }
-
+  /**
+   * Serializes the field(s) of the mutation to string format.
+   * The fields are separated by a space.
+   * @return the serialized string representation of the field(s) of the mutation.
+   */
   public abstract String getString();
 
+  /**
+   * Deserializes the strings back to the field(s) of the mutation,
+   * given a string array as its argument. The string array
+   * contains the individual elements which were previously constructed by
+   * getstring() method. elements[0] stores the class name and elements[1]
+   * stores the mutable code index which are predefined in MutationSerializer.java.
+   * Users can deserialize the string representation in elements[2] and so forth.
+   * @param elements string array with serialized representations of the field(s) of the mutation.
+   */
   public abstract void parseString(String[] elements);
 }
\ No newline at end of file
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 07d7fb8..0c58585 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -209,5 +209,12 @@
   modes: [device],
   names: ["libcore.java.lang.ProcessBuilderTest#testRedirectInherit",
           "libcore.java.lang.ProcessBuilderTest#testRedirect_nullStreams"]
+},
+{
+  description: "Test is timing sensitive",
+  result: EXEC_FAILED,
+  bug: 62528691,
+  modes: [device],
+  names: ["libcore.java.util.TimeZoneTest#testSetDefaultRace"]
 }
 ]
diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh
index 6c2c072..546a6bf 100755
--- a/tools/setup-buildbot-device.sh
+++ b/tools/setup-buildbot-device.sh
@@ -38,6 +38,11 @@
 
 seconds_per_hour=3600
 
+# Kill logd first, so that when we set the adb buffer size later in this file,
+# it is brought up again.
+echo -e "${green}Killing logd, seen leaking on fugu/N${nc}"
+adb shell killall -9 /system/bin/logd
+
 # Update date on device if the difference with host is more than one hour.
 if [ $abs_time_difference_in_seconds -gt $seconds_per_hour ]; then
   echo -e "${green}Update date on device${nc}"
@@ -61,9 +66,6 @@
 echo -e "${green}Battery info${nc}"
 adb shell dumpsys battery
 
-echo -e "${green}Killing logd, seen leaking on fugu/N${nc}"
-adb shell killall -9 /system/bin/logd
-
 echo -e "${green}Setting adb buffer size to 32MB${nc}"
 adb logcat -G 32M
 adb logcat -g