Merge "Update NativeBridge interface in ART test"
diff --git a/Android.mk b/Android.mk
index 0e86188..f8c5378 100644
--- a/Android.mk
+++ b/Android.mk
@@ -388,6 +388,7 @@
 # libstdc++ is needed when building for ART_TARGET_LINUX.
 ART_TARGET_SHARED_LIBRARY_BENCHMARK := $(TARGET_OUT_SHARED_LIBRARIES)/libartbenchmark.so
 build-art-target-golem: dex2oat dalvikvm patchoat linker libstdc++ \
+                        $(TARGET_OUT_EXECUTABLES)/art \
                         $(TARGET_OUT)/etc/public.libraries.txt \
                         $(ART_TARGET_DEX_DEPENDENCIES) \
                         $(ART_TARGET_SHARED_LIBRARY_DEPENDENCIES) \
diff --git a/PREUPLOAD.cfg b/PREUPLOAD.cfg
index cf1832b..0ed230c 100644
--- a/PREUPLOAD.cfg
+++ b/PREUPLOAD.cfg
@@ -1,2 +1,3 @@
 [Hook Scripts]
 check_generated_files_up_to_date = tools/cpp-define-generator/presubmit-check-files-up-to-date
+check_cpplint_on_changed_files = tools/cpplint_presubmit.py
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 1591e34..1ae79ac 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -54,11 +54,11 @@
 ART_TEST_QUIET ?= true
 
 # Do you want interpreter tests run?
-ART_TEST_INTERPRETER ?= $(ART_TEST_FULL)
-ART_TEST_INTERPRETER_ACCESS_CHECKS ?= $(ART_TEST_FULL)
+ART_TEST_INTERPRETER ?= true
+ART_TEST_INTERPRETER_ACCESS_CHECKS ?= true
 
 # Do you want JIT tests run?
-ART_TEST_JIT ?= $(ART_TEST_FULL)
+ART_TEST_JIT ?= true
 
 # Do you want optimizing compiler tests run?
 ART_TEST_OPTIMIZING ?= true
@@ -66,6 +66,9 @@
 # Do you want to test the optimizing compiler with graph coloring register allocation?
 ART_TEST_OPTIMIZING_GRAPH_COLOR ?= $(ART_TEST_FULL)
 
+# Do you want to do run-tests with profiles?
+ART_TEST_SPEED_PROFILE ?= $(ART_TEST_FULL)
+
 # Do we want to test PIC-compiled tests ("apps")?
 ART_TEST_PIC_TEST ?= $(ART_TEST_FULL)
 
@@ -215,6 +218,7 @@
     LOCAL_MODULE_PATH := $(3)
     LOCAL_DEX_PREOPT_IMAGE_LOCATION := $(TARGET_CORE_IMG_OUT)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
+      LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
       LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_JAVA_LIBRARY)
@@ -230,6 +234,7 @@
     LOCAL_JAVA_LIBRARIES := $(HOST_CORE_JARS)
     LOCAL_DEX_PREOPT_IMAGE := $(HOST_CORE_IMG_LOCATION)
     ifneq ($(wildcard $(LOCAL_PATH)/$(2)/main.list),)
+      LOCAL_DX_FLAGS := --multi-dex --main-dex-list=$(LOCAL_PATH)/$(2)/main.list --minimal-main-dex
       LOCAL_JACK_FLAGS := -D jack.dex.output.policy=minimal-multidex -D jack.preprocessor=true -D jack.preprocessor.file=$(LOCAL_PATH)/$(2)/main.jpp
     endif
     include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
diff --git a/build/Android.cpplint.mk b/build/Android.cpplint.mk
index d09f290..f924a85 100644
--- a/build/Android.cpplint.mk
+++ b/build/Android.cpplint.mk
@@ -21,7 +21,7 @@
 ART_CPPLINT_FLAGS := --quiet --root=$(ANDROID_BUILD_TOP)
 ART_CPPLINT_INGORED := \
     runtime/elf.h \
-    runtime/openjdkjvmti/jvmti.h
+    runtime/openjdkjvmti/include/jvmti.h
 
 # This:
 #  1) Gets a list of all .h & .cc files in the art directory.
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index b661e00..ed34a8d 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -25,6 +25,7 @@
 GTEST_DEX_DIRECTORIES := \
   AbstractMethod \
   AllFields \
+  DefaultMethods \
   DexToDexDecompiler \
   ErroneousA \
   ErroneousB \
@@ -75,8 +76,11 @@
 	$(call dexpreopt-remove-classes.dex,$@)
 
 ART_TEST_GTEST_VerifierDeps_SRC := $(abspath $(wildcard $(LOCAL_PATH)/VerifierDeps/*.smali))
+ART_TEST_GTEST_VerifierDepsMulti_SRC := $(abspath $(wildcard $(LOCAL_PATH)/VerifierDepsMulti/*.smali))
 ART_TEST_HOST_GTEST_VerifierDeps_DEX := $(dir $(ART_TEST_HOST_GTEST_Main_DEX))$(subst Main,VerifierDeps,$(basename $(notdir $(ART_TEST_HOST_GTEST_Main_DEX))))$(suffix $(ART_TEST_HOST_GTEST_Main_DEX))
 ART_TEST_TARGET_GTEST_VerifierDeps_DEX := $(dir $(ART_TEST_TARGET_GTEST_Main_DEX))$(subst Main,VerifierDeps,$(basename $(notdir $(ART_TEST_TARGET_GTEST_Main_DEX))))$(suffix $(ART_TEST_TARGET_GTEST_Main_DEX))
+ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_HOST_GTEST_Main_DEX))$(subst Main,VerifierDepsMulti,$(basename $(notdir $(ART_TEST_HOST_GTEST_Main_DEX))))$(suffix $(ART_TEST_HOST_GTEST_Main_DEX))
+ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX := $(dir $(ART_TEST_TARGET_GTEST_Main_DEX))$(subst Main,VerifierDepsMulti,$(basename $(notdir $(ART_TEST_TARGET_GTEST_Main_DEX))))$(suffix $(ART_TEST_TARGET_GTEST_Main_DEX))
 
 $(ART_TEST_HOST_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali
 	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
@@ -84,6 +88,12 @@
 $(ART_TEST_TARGET_GTEST_VerifierDeps_DEX): $(ART_TEST_GTEST_VerifierDeps_SRC) $(HOST_OUT_EXECUTABLES)/smali
 	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
 
+$(ART_TEST_HOST_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali
+	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+
+$(ART_TEST_TARGET_GTEST_VerifierDepsMulti_DEX): $(ART_TEST_GTEST_VerifierDepsMulti_SRC) $(HOST_OUT_EXECUTABLES)/smali
+	 $(HOST_OUT_EXECUTABLES)/smali --output=$@ $(filter %.smali,$^)
+
 # Dex file dependencies for each gtest.
 ART_GTEST_dex2oat_environment_tests_DEX_DEPS := Main MainStripped MultiDex MultiDexModifiedSecondary Nested
 
@@ -95,7 +105,7 @@
 ART_GTEST_dex_file_test_DEX_DEPS := GetMethodSignature Main Nested MultiDex
 ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics VerifierDeps
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
-ART_GTEST_image_test_DEX_DEPS := ImageLayoutA ImageLayoutB
+ART_GTEST_image_test_DEX_DEPS := ImageLayoutA ImageLayoutB DefaultMethods
 ART_GTEST_imtable_test_DEX_DEPS := IMTA IMTB
 ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
@@ -115,7 +125,7 @@
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
 ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
 ART_GTEST_unstarted_runtime_test_DEX_DEPS := Nested
-ART_GTEST_verifier_deps_test_DEX_DEPS := VerifierDeps MultiDex
+ART_GTEST_verifier_deps_test_DEX_DEPS := VerifierDeps VerifierDepsMulti MultiDex
 ART_GTEST_dex_to_dex_decompiler_test_DEX_DEPS := VerifierDeps DexToDexDecompiler
 
 # The elf writer test has dependencies on core.oat.
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index f53740e..c733feb 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -109,7 +109,7 @@
 	  --oat-location=$$(PRIVATE_CORE_OAT_NAME) --image=$$(PRIVATE_CORE_IMG_NAME) \
 	  --base=$$(LIBART_IMG_HOST_BASE_ADDRESS) --instruction-set=$$($(2)ART_HOST_ARCH) \
 	  $$(LOCAL_$(2)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES_OPTION) \
-	  --host --android-root=$$(HOST_OUT) --include-patch-information \
+	  --host --android-root=$$(HOST_OUT) \
 	  --generate-debug-info --generate-build-id --compile-pic \
 	  $$(PRIVATE_CORE_MULTI_PARAM) $$(PRIVATE_CORE_COMPILE_OPTIONS)
 
@@ -212,7 +212,7 @@
 	  --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) --instruction-set=$$($(2)TARGET_ARCH) \
 	  --instruction-set-variant=$$($(2)DEX2OAT_TARGET_CPU_VARIANT) \
 	  --instruction-set-features=$$($(2)DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \
-	  --android-root=$$(PRODUCT_OUT)/system --include-patch-information \
+	  --android-root=$$(PRODUCT_OUT)/system \
 	  --generate-debug-info --generate-build-id --compile-pic \
 	  $$(PRIVATE_CORE_COMPILE_OPTIONS) || (rm $$(PRIVATE_CORE_OAT_NAME); exit 1)
 
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 550e8c4..5b331bc 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -476,7 +476,7 @@
 * -Xps-*
 */
 TEST_F(CmdlineParserTest, ProfileSaverOptions) {
-  ProfileSaverOptions opt = ProfileSaverOptions(true, 1, 2, 3, 4, 5, 6, 7);
+  ProfileSaverOptions opt = ProfileSaverOptions(true, 1, 2, 3, 4, 5, 6, 7, "abc");
 
   EXPECT_SINGLE_PARSE_VALUE(opt,
                             "-Xjitsaveprofilinginfo "
@@ -486,7 +486,8 @@
                             "-Xps-min-methods-to-save:4 "
                             "-Xps-min-classes-to-save:5 "
                             "-Xps-min-notification-before-wake:6 "
-                            "-Xps-max-notification-before-wake:7",
+                            "-Xps-max-notification-before-wake:7 "
+                            "-Xps-profile-path:abc",
                             M::ProfileSaverOpts);
 }  // TEST_F
 
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index f1123eb..cd19fa4 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -401,19 +401,19 @@
 };
 
 template <>
-struct CmdlineType<std::vector<ti::Agent>> : CmdlineTypeParser<std::vector<ti::Agent>> {
+struct CmdlineType<std::list<ti::Agent>> : CmdlineTypeParser<std::list<ti::Agent>> {
   Result Parse(const std::string& args) {
-    assert(false && "Use AppendValues() for an Agent vector type");
-    return Result::Failure("Unconditional failure: Agent vector must be appended: " + args);
+    assert(false && "Use AppendValues() for an Agent list type");
+    return Result::Failure("Unconditional failure: Agent list must be appended: " + args);
   }
 
   Result ParseAndAppend(const std::string& args,
-                        std::vector<ti::Agent>& existing_value) {
+                        std::list<ti::Agent>& existing_value) {
     existing_value.emplace_back(args);
     return Result::SuccessNoValue();
   }
 
-  static const char* Name() { return "std::vector<ti::Agent>"; }
+  static const char* Name() { return "std::list<ti::Agent>"; }
 };
 
 template <>
@@ -752,9 +752,13 @@
       return ParseInto(existing,
              &ProfileSaverOptions::max_notification_before_wake_,
              type_parser.Parse(suffix));
-    } else {
-      return Result::Failure(std::string("Invalid suboption '") + option + "'");
     }
+    if (android::base::StartsWith(option, "profile-path:")) {
+      existing.profile_path_ = suffix;
+      return Result::SuccessNoValue();
+    }
+
+    return Result::Failure(std::string("Invalid suboption '") + option + "'");
   }
 
   static const char* Name() { return "ProfileSaverOptions"; }
@@ -774,6 +778,5 @@
 
   static const char* Name() { return "ExperimentalFlags"; }
 };
-
 }  // namespace art
 #endif  // ART_CMDLINE_CMDLINE_TYPES_H_
diff --git a/compiler/Android.bp b/compiler/Android.bp
index f5589cd..312fc7b 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -52,6 +52,7 @@
         "optimizing/cha_guard_optimization.cc",
         "optimizing/code_generator.cc",
         "optimizing/code_generator_utils.cc",
+        "optimizing/code_sinking.cc",
         "optimizing/constant_folding.cc",
         "optimizing/dead_code_elimination.cc",
         "optimizing/escape.cc",
@@ -105,7 +106,9 @@
                 "linker/arm/relative_patcher_arm_base.cc",
                 "linker/arm/relative_patcher_thumb2.cc",
                 "optimizing/code_generator_arm.cc",
+                "optimizing/code_generator_vector_arm.cc",
                 "optimizing/code_generator_arm_vixl.cc",
+                "optimizing/code_generator_vector_arm_vixl.cc",
                 "optimizing/dex_cache_array_fixups_arm.cc",
                 "optimizing/instruction_simplifier_arm.cc",
                 "optimizing/instruction_simplifier_shared.cc",
@@ -125,6 +128,7 @@
                 "jni/quick/arm64/calling_convention_arm64.cc",
                 "linker/arm64/relative_patcher_arm64.cc",
                 "optimizing/code_generator_arm64.cc",
+                "optimizing/code_generator_vector_arm64.cc",
                 "optimizing/scheduler_arm64.cc",
                 "optimizing/instruction_simplifier_arm64.cc",
                 "optimizing/intrinsics_arm64.cc",
@@ -138,6 +142,7 @@
                 "jni/quick/mips/calling_convention_mips.cc",
                 "linker/mips/relative_patcher_mips.cc",
                 "optimizing/code_generator_mips.cc",
+                "optimizing/code_generator_vector_mips.cc",
                 "optimizing/dex_cache_array_fixups_mips.cc",
                 "optimizing/intrinsics_mips.cc",
                 "optimizing/pc_relative_fixups_mips.cc",
@@ -150,6 +155,7 @@
                 "jni/quick/mips64/calling_convention_mips64.cc",
                 "linker/mips64/relative_patcher_mips64.cc",
                 "optimizing/code_generator_mips64.cc",
+                "optimizing/code_generator_vector_mips64.cc",
                 "optimizing/intrinsics_mips64.cc",
                 "utils/mips64/assembler_mips64.cc",
                 "utils/mips64/managed_register_mips64.cc",
@@ -161,6 +167,7 @@
                 "linker/x86/relative_patcher_x86.cc",
                 "linker/x86/relative_patcher_x86_base.cc",
                 "optimizing/code_generator_x86.cc",
+                "optimizing/code_generator_vector_x86.cc",
                 "optimizing/intrinsics_x86.cc",
                 "optimizing/pc_relative_fixups_x86.cc",
                 "optimizing/x86_memory_gen.cc",
@@ -175,6 +182,7 @@
                 "linker/x86_64/relative_patcher_x86_64.cc",
                 "optimizing/intrinsics_x86_64.cc",
                 "optimizing/code_generator_x86_64.cc",
+                "optimizing/code_generator_vector_x86_64.cc",
                 "utils/x86_64/assembler_x86_64.cc",
                 "utils/x86_64/jni_macro_assembler_x86_64.cc",
                 "utils/x86_64/managed_register_x86_64.cc",
@@ -350,6 +358,7 @@
         "optimizing/pretty_printer_test.cc",
         "optimizing/reference_type_propagation_test.cc",
         "optimizing/side_effects_test.cc",
+        "optimizing/ssa_liveness_analysis_test.cc",
         "optimizing/ssa_test.cc",
         "optimizing/stack_map_test.cc",
         "optimizing/suspend_check_test.cc",
@@ -389,6 +398,7 @@
         mips64: {
             srcs: [
                 "linker/mips64/relative_patcher_mips64_test.cc",
+                "utils/mips64/managed_register_mips64_test.cc",
             ],
         },
         x86: {
@@ -416,6 +426,7 @@
 
     shared_libs: [
         "libartd-compiler",
+        "libartd-simulator",
         "libvixld-arm",
         "libvixld-arm64",
 
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index d89cdba..8b30292 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -52,14 +52,20 @@
         compiler_driver_->GetCompiledMethod(MethodReference(&dex_file,
                                                             method->GetDexMethodIndex()));
   }
-  if (compiled_method != nullptr) {
+  // If the code size is 0 it means the method was skipped due to profile guided compilation.
+  if (compiled_method != nullptr && compiled_method->GetQuickCode().size() != 0u) {
     ArrayRef<const uint8_t> code = compiled_method->GetQuickCode();
-    uint32_t code_size = code.size();
-    CHECK_NE(0u, code_size);
+    const uint32_t code_size = code.size();
     ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable();
-    uint32_t vmap_table_offset = vmap_table.empty() ? 0u
+    const uint32_t vmap_table_offset = vmap_table.empty() ? 0u
         : sizeof(OatQuickMethodHeader) + vmap_table.size();
+    // The method info is directly before the vmap table.
+    ArrayRef<const uint8_t> method_info = compiled_method->GetMethodInfo();
+    const uint32_t method_info_offset = method_info.empty() ? 0u
+        : vmap_table_offset + method_info.size();
+
     OatQuickMethodHeader method_header(vmap_table_offset,
+                                       method_info_offset,
                                        compiled_method->GetFrameSizeInBytes(),
                                        compiled_method->GetCoreSpillMask(),
                                        compiled_method->GetFpSpillMask(),
@@ -68,11 +74,12 @@
     header_code_and_maps_chunks_.push_back(std::vector<uint8_t>());
     std::vector<uint8_t>* chunk = &header_code_and_maps_chunks_.back();
     const size_t max_padding = GetInstructionSetAlignment(compiled_method->GetInstructionSet());
-    const size_t size = vmap_table.size() + sizeof(method_header) + code_size;
+    const size_t size = method_info.size() + vmap_table.size() + sizeof(method_header) + code_size;
     chunk->reserve(size + max_padding);
     chunk->resize(sizeof(method_header));
     memcpy(&(*chunk)[0], &method_header, sizeof(method_header));
     chunk->insert(chunk->begin(), vmap_table.begin(), vmap_table.end());
+    chunk->insert(chunk->begin(), method_info.begin(), method_info.end());
     chunk->insert(chunk->end(), code.begin(), code.end());
     CHECK_EQ(chunk->size(), size);
     const void* unaligned_code_ptr = chunk->data() + (size - code_size);
diff --git a/compiler/common_compiler_test.h b/compiler/common_compiler_test.h
index 98dcf20..0683577 100644
--- a/compiler/common_compiler_test.h
+++ b/compiler/common_compiler_test.h
@@ -117,25 +117,6 @@
   std::list<std::vector<uint8_t>> header_code_and_maps_chunks_;
 };
 
-// TODO: When read barrier works with all Optimizing back ends, get rid of this.
-#define TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS() \
-  if (kUseReadBarrier && GetCompilerKind() == Compiler::kOptimizing) {                    \
-    switch (GetInstructionSet()) {                                                        \
-      case kArm64:                                                                        \
-      case kThumb2:                                                                       \
-      case kX86:                                                                          \
-      case kX86_64:                                                                       \
-        /* Instruction set has read barrier support. */                                   \
-        break;                                                                            \
-                                                                                          \
-      default:                                                                            \
-        /* Instruction set does not have barrier support. */                              \
-        printf("WARNING: TEST DISABLED FOR READ BARRIER WITH OPTIMIZING "                 \
-               "FOR THIS INSTRUCTION SET\n");                                             \
-        return;                                                                           \
-    }                                                                                     \
-  }
-
 }  // namespace art
 
 #endif  // ART_COMPILER_COMMON_COMPILER_TEST_H_
diff --git a/compiler/compiled_method.cc b/compiler/compiled_method.cc
index f06d90c..0d9021f 100644
--- a/compiler/compiled_method.cc
+++ b/compiler/compiled_method.cc
@@ -105,15 +105,15 @@
                                const size_t frame_size_in_bytes,
                                const uint32_t core_spill_mask,
                                const uint32_t fp_spill_mask,
-                               const ArrayRef<const SrcMapElem>& src_mapping_table,
+                               const ArrayRef<const uint8_t>& method_info,
                                const ArrayRef<const uint8_t>& vmap_table,
                                const ArrayRef<const uint8_t>& cfi_info,
                                const ArrayRef<const LinkerPatch>& patches)
     : CompiledCode(driver, instruction_set, quick_code),
-      frame_size_in_bytes_(frame_size_in_bytes), core_spill_mask_(core_spill_mask),
+      frame_size_in_bytes_(frame_size_in_bytes),
+      core_spill_mask_(core_spill_mask),
       fp_spill_mask_(fp_spill_mask),
-      src_mapping_table_(
-          driver->GetCompiledMethodStorage()->DeduplicateSrcMappingTable(src_mapping_table)),
+      method_info_(driver->GetCompiledMethodStorage()->DeduplicateMethodInfo(method_info)),
       vmap_table_(driver->GetCompiledMethodStorage()->DeduplicateVMapTable(vmap_table)),
       cfi_info_(driver->GetCompiledMethodStorage()->DeduplicateCFIInfo(cfi_info)),
       patches_(driver->GetCompiledMethodStorage()->DeduplicateLinkerPatches(patches)) {
@@ -126,7 +126,7 @@
     const size_t frame_size_in_bytes,
     const uint32_t core_spill_mask,
     const uint32_t fp_spill_mask,
-    const ArrayRef<const SrcMapElem>& src_mapping_table,
+    const ArrayRef<const uint8_t>& method_info,
     const ArrayRef<const uint8_t>& vmap_table,
     const ArrayRef<const uint8_t>& cfi_info,
     const ArrayRef<const LinkerPatch>& patches) {
@@ -139,7 +139,7 @@
                   frame_size_in_bytes,
                   core_spill_mask,
                   fp_spill_mask,
-                  src_mapping_table,
+                  method_info,
                   vmap_table,
                   cfi_info, patches);
   return ret;
@@ -156,7 +156,7 @@
   storage->ReleaseLinkerPatches(patches_);
   storage->ReleaseCFIInfo(cfi_info_);
   storage->ReleaseVMapTable(vmap_table_);
-  storage->ReleaseSrcMappingTable(src_mapping_table_);
+  storage->ReleaseMethodInfo(method_info_);
 }
 
 }  // namespace art
diff --git a/compiler/compiled_method.h b/compiler/compiled_method.h
index e2a0942..aa529f8 100644
--- a/compiler/compiled_method.h
+++ b/compiler/compiled_method.h
@@ -109,57 +109,6 @@
   return lhs.from_ == rhs.from_ && lhs.to_ == rhs.to_;
 }
 
-template <class Allocator>
-class SrcMap FINAL : public std::vector<SrcMapElem, Allocator> {
- public:
-  using std::vector<SrcMapElem, Allocator>::begin;
-  using typename std::vector<SrcMapElem, Allocator>::const_iterator;
-  using std::vector<SrcMapElem, Allocator>::empty;
-  using std::vector<SrcMapElem, Allocator>::end;
-  using std::vector<SrcMapElem, Allocator>::resize;
-  using std::vector<SrcMapElem, Allocator>::shrink_to_fit;
-  using std::vector<SrcMapElem, Allocator>::size;
-
-  explicit SrcMap() {}
-  explicit SrcMap(const Allocator& alloc) : std::vector<SrcMapElem, Allocator>(alloc) {}
-
-  template <class InputIt>
-  SrcMap(InputIt first, InputIt last, const Allocator& alloc)
-      : std::vector<SrcMapElem, Allocator>(first, last, alloc) {}
-
-  void push_back(const SrcMapElem& elem) {
-    if (!empty()) {
-      // Check that the addresses are inserted in sorted order.
-      DCHECK_GE(elem.from_, this->back().from_);
-      // If two consequitive entries map to the same value, ignore the later.
-      // E.g. for map {{0, 1}, {4, 1}, {8, 2}}, all values in [0,8) map to 1.
-      if (elem.to_ == this->back().to_) {
-        return;
-      }
-    }
-    std::vector<SrcMapElem, Allocator>::push_back(elem);
-  }
-
-  // Returns true and the corresponding "to" value if the mapping is found.
-  // Oterwise returns false and 0.
-  std::pair<bool, int32_t> Find(uint32_t from) const {
-    // Finds first mapping such that lb.from_ >= from.
-    auto lb = std::lower_bound(begin(), end(), SrcMapElem {from, INT32_MIN});
-    if (lb != end() && lb->from_ == from) {
-      // Found exact match.
-      return std::make_pair(true, lb->to_);
-    } else if (lb != begin()) {
-      // The previous mapping is still in effect.
-      return std::make_pair(true, (--lb)->to_);
-    } else {
-      // Not found because 'from' is smaller than first entry in the map.
-      return std::make_pair(false, 0);
-    }
-  }
-};
-
-using DefaultSrcMap = SrcMap<std::allocator<SrcMapElem>>;
-
 class LinkerPatch {
  public:
   // Note: We explicitly specify the underlying type of the enum because GCC
@@ -170,7 +119,6 @@
   // choose to squeeze the Type into fewer than 8 bits, we'll have to declare
   // patch_type_ as an uintN_t and do explicit static_cast<>s.
   enum class Type : uint8_t {
-    kRecordPosition,   // Just record patch position for patchoat.
     kMethod,
     kCall,
     kCallRelative,     // NOTE: Actual patching is instruction_set-dependent.
@@ -183,10 +131,6 @@
     kDexCacheArray,    // NOTE: Actual patching is instruction_set-dependent.
   };
 
-  static LinkerPatch RecordPosition(size_t literal_offset) {
-    return LinkerPatch(literal_offset, Type::kRecordPosition, /* target_dex_file */ nullptr);
-  }
-
   static LinkerPatch MethodPatch(size_t literal_offset,
                                  const DexFile* target_dex_file,
                                  uint32_t target_method_idx) {
@@ -425,7 +369,7 @@
                  const size_t frame_size_in_bytes,
                  const uint32_t core_spill_mask,
                  const uint32_t fp_spill_mask,
-                 const ArrayRef<const SrcMapElem>& src_mapping_table,
+                 const ArrayRef<const uint8_t>& method_info,
                  const ArrayRef<const uint8_t>& vmap_table,
                  const ArrayRef<const uint8_t>& cfi_info,
                  const ArrayRef<const LinkerPatch>& patches);
@@ -439,7 +383,7 @@
       const size_t frame_size_in_bytes,
       const uint32_t core_spill_mask,
       const uint32_t fp_spill_mask,
-      const ArrayRef<const SrcMapElem>& src_mapping_table,
+      const ArrayRef<const uint8_t>& method_info,
       const ArrayRef<const uint8_t>& vmap_table,
       const ArrayRef<const uint8_t>& cfi_info,
       const ArrayRef<const LinkerPatch>& patches);
@@ -458,8 +402,8 @@
     return fp_spill_mask_;
   }
 
-  ArrayRef<const SrcMapElem> GetSrcMappingTable() const {
-    return GetArray(src_mapping_table_);
+  ArrayRef<const uint8_t> GetMethodInfo() const {
+    return GetArray(method_info_);
   }
 
   ArrayRef<const uint8_t> GetVmapTable() const {
@@ -481,9 +425,9 @@
   const uint32_t core_spill_mask_;
   // For quick code, a bit mask describing spilled FPR callee-save registers.
   const uint32_t fp_spill_mask_;
-  // For quick code, a set of pairs (PC, DEX) mapping from native PC offset to DEX offset.
-  const LengthPrefixedArray<SrcMapElem>* const src_mapping_table_;
-  // For quick code, a uleb128 encoded map from GPR/FPR register to dex register. Size prefixed.
+  // For quick code, method specific information that is not very dedupe friendly (method indices).
+  const LengthPrefixedArray<uint8_t>* const method_info_;
+  // For quick code, holds code infos which contain stack maps, inline information, and etc.
   const LengthPrefixedArray<uint8_t>* const vmap_table_;
   // For quick code, a FDE entry for the debug_frame section.
   const LengthPrefixedArray<uint8_t>* const cfi_info_;
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 76aeaa5..538fe93 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -70,10 +70,6 @@
     return *unit_.GetDexFile();
   }
 
-  bool PerformOptimizations() const {
-    return dex_to_dex_compilation_level_ >= DexToDexCompilationLevel::kOptimize;
-  }
-
   // Compiles a RETURN-VOID into a RETURN-VOID-BARRIER within a constructor where
   // a barrier is required.
   void CompileReturnVoid(Instruction* inst, uint32_t dex_pc);
@@ -114,7 +110,7 @@
 };
 
 void DexCompiler::Compile() {
-  DCHECK_GE(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kRequired);
+  DCHECK_EQ(dex_to_dex_compilation_level_, DexToDexCompilationLevel::kOptimize);
   const DexFile::CodeItem* code_item = unit_.GetCodeItem();
   const uint16_t* insns = code_item->insns_;
   const uint32_t insns_size = code_item->insns_size_in_code_units_;
@@ -221,7 +217,7 @@
 }
 
 Instruction* DexCompiler::CompileCheckCast(Instruction* inst, uint32_t dex_pc) {
-  if (!kEnableCheckCastEllision || !PerformOptimizations()) {
+  if (!kEnableCheckCastEllision) {
     return inst;
   }
   if (!driver_.IsSafeCast(&unit_, dex_pc)) {
@@ -254,7 +250,7 @@
                                              uint32_t dex_pc,
                                              Instruction::Code new_opcode,
                                              bool is_put) {
-  if (!kEnableQuickening || !PerformOptimizations()) {
+  if (!kEnableQuickening) {
     return;
   }
   uint32_t field_idx = inst->VRegC_22c();
@@ -279,7 +275,7 @@
 
 void DexCompiler::CompileInvokeVirtual(Instruction* inst, uint32_t dex_pc,
                                        Instruction::Code new_opcode, bool is_range) {
-  if (!kEnableQuickening || !PerformOptimizations()) {
+  if (!kEnableQuickening) {
     return;
   }
   uint32_t method_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
@@ -370,7 +366,7 @@
         0,
         0,
         0,
-        ArrayRef<const SrcMapElem>(),                // src_mapping_table
+        ArrayRef<const uint8_t>(),                   // method_info
         ArrayRef<const uint8_t>(builder.GetData()),  // vmap_table
         ArrayRef<const uint8_t>(),                   // cfi data
         ArrayRef<const LinkerPatch>());
diff --git a/compiler/dex/dex_to_dex_compiler.h b/compiler/dex/dex_to_dex_compiler.h
index 00c596d..87ddb39 100644
--- a/compiler/dex/dex_to_dex_compiler.h
+++ b/compiler/dex/dex_to_dex_compiler.h
@@ -34,8 +34,7 @@
 
 enum class DexToDexCompilationLevel {
   kDontDexToDexCompile,   // Only meaning wrt image time interpretation.
-  kRequired,              // Dex-to-dex compilation required for correctness.
-  kOptimize               // Perform required transformation and peep-hole optimizations.
+  kOptimize               // Perform peep-hole optimizations.
 };
 std::ostream& operator<<(std::ostream& os, const DexToDexCompilationLevel& rhs);
 
diff --git a/compiler/dex/dex_to_dex_decompiler.cc b/compiler/dex/dex_to_dex_decompiler.cc
index bfd485d..85d5784 100644
--- a/compiler/dex/dex_to_dex_decompiler.cc
+++ b/compiler/dex/dex_to_dex_decompiler.cc
@@ -20,7 +20,7 @@
 #include "base/mutex.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
-#include "optimizing/bytecode_utils.h"
+#include "bytecode_utils.h"
 
 namespace art {
 namespace optimizer {
@@ -185,7 +185,7 @@
   }
 
   if (quickened_info_ptr_ != quickened_info_end_) {
-    LOG(ERROR) << "Failed to use all values in quickening info."
+    LOG(FATAL) << "Failed to use all values in quickening info."
                << " Actual: " << std::hex << quickened_info_ptr_
                << " Expected: " << quickened_info_end_;
     return false;
diff --git a/compiler/driver/compiled_method_storage.cc b/compiler/driver/compiled_method_storage.cc
index a0a8f81..e6a47ba 100644
--- a/compiler/driver/compiled_method_storage.cc
+++ b/compiler/driver/compiled_method_storage.cc
@@ -172,8 +172,8 @@
     : swap_space_(swap_fd == -1 ? nullptr : new SwapSpace(swap_fd, 10 * MB)),
       dedupe_enabled_(true),
       dedupe_code_("dedupe code", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
-      dedupe_src_mapping_table_("dedupe source mapping table",
-                                LengthPrefixedArrayAlloc<SrcMapElem>(swap_space_.get())),
+      dedupe_method_info_("dedupe method info",
+                          LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_vmap_table_("dedupe vmap table",
                          LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
       dedupe_cfi_info_("dedupe cfi info", LengthPrefixedArrayAlloc<uint8_t>(swap_space_.get())),
@@ -207,13 +207,13 @@
   ReleaseArrayIfNotDeduplicated(code);
 }
 
-const LengthPrefixedArray<SrcMapElem>* CompiledMethodStorage::DeduplicateSrcMappingTable(
-    const ArrayRef<const SrcMapElem>& src_map) {
-  return AllocateOrDeduplicateArray(src_map, &dedupe_src_mapping_table_);
+const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateMethodInfo(
+    const ArrayRef<const uint8_t>& src_map) {
+  return AllocateOrDeduplicateArray(src_map, &dedupe_method_info_);
 }
 
-void CompiledMethodStorage::ReleaseSrcMappingTable(const LengthPrefixedArray<SrcMapElem>* src_map) {
-  ReleaseArrayIfNotDeduplicated(src_map);
+void CompiledMethodStorage::ReleaseMethodInfo(const LengthPrefixedArray<uint8_t>* method_info) {
+  ReleaseArrayIfNotDeduplicated(method_info);
 }
 
 const LengthPrefixedArray<uint8_t>* CompiledMethodStorage::DeduplicateVMapTable(
diff --git a/compiler/driver/compiled_method_storage.h b/compiler/driver/compiled_method_storage.h
index 124b5a6..27011e8 100644
--- a/compiler/driver/compiled_method_storage.h
+++ b/compiler/driver/compiled_method_storage.h
@@ -29,7 +29,6 @@
 namespace art {
 
 class LinkerPatch;
-class SrcMapElem;
 
 class CompiledMethodStorage {
  public:
@@ -52,9 +51,9 @@
   const LengthPrefixedArray<uint8_t>* DeduplicateCode(const ArrayRef<const uint8_t>& code);
   void ReleaseCode(const LengthPrefixedArray<uint8_t>* code);
 
-  const LengthPrefixedArray<SrcMapElem>* DeduplicateSrcMappingTable(
-      const ArrayRef<const SrcMapElem>& src_map);
-  void ReleaseSrcMappingTable(const LengthPrefixedArray<SrcMapElem>* src_map);
+  const LengthPrefixedArray<uint8_t>* DeduplicateMethodInfo(
+      const ArrayRef<const uint8_t>& method_info);
+  void ReleaseMethodInfo(const LengthPrefixedArray<uint8_t>* method_info);
 
   const LengthPrefixedArray<uint8_t>* DeduplicateVMapTable(const ArrayRef<const uint8_t>& table);
   void ReleaseVMapTable(const LengthPrefixedArray<uint8_t>* table);
@@ -96,7 +95,7 @@
   bool dedupe_enabled_;
 
   ArrayDedupeSet<uint8_t> dedupe_code_;
-  ArrayDedupeSet<SrcMapElem> dedupe_src_mapping_table_;
+  ArrayDedupeSet<uint8_t> dedupe_method_info_;
   ArrayDedupeSet<uint8_t> dedupe_vmap_table_;
   ArrayDedupeSet<uint8_t> dedupe_cfi_info_;
   ArrayDedupeSet<LinkerPatch> dedupe_linker_patches_;
diff --git a/compiler/driver/compiled_method_storage_test.cc b/compiler/driver/compiled_method_storage_test.cc
index b72d0ac..6572d17 100644
--- a/compiler/driver/compiled_method_storage_test.cc
+++ b/compiler/driver/compiled_method_storage_test.cc
@@ -51,11 +51,11 @@
       ArrayRef<const uint8_t>(raw_code1),
       ArrayRef<const uint8_t>(raw_code2),
   };
-  const SrcMapElem raw_src_map1[] = { { 1u, 2u }, { 3u, 4u }, { 5u, 6u } };
-  const SrcMapElem raw_src_map2[] = { { 8u, 7u }, { 6u, 5u }, { 4u, 3u }, { 2u, 1u } };
-  ArrayRef<const SrcMapElem> src_map[] = {
-      ArrayRef<const SrcMapElem>(raw_src_map1),
-      ArrayRef<const SrcMapElem>(raw_src_map2),
+  const uint8_t raw_method_info_map1[] = { 1u, 2u, 3u, 4u, 5u, 6u };
+  const uint8_t raw_method_info_map2[] = { 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u };
+  ArrayRef<const uint8_t> method_info[] = {
+      ArrayRef<const uint8_t>(raw_method_info_map1),
+      ArrayRef<const uint8_t>(raw_method_info_map2),
   };
   const uint8_t raw_vmap_table1[] = { 2, 4, 6 };
   const uint8_t raw_vmap_table2[] = { 7, 5, 3, 1 };
@@ -85,7 +85,7 @@
   std::vector<CompiledMethod*> compiled_methods;
   compiled_methods.reserve(1u << 7);
   for (auto&& c : code) {
-    for (auto&& s : src_map) {
+    for (auto&& s : method_info) {
       for (auto&& v : vmap_table) {
         for (auto&& f : cfi_info) {
           for (auto&& p : patches) {
@@ -113,7 +113,7 @@
       bool same_patches = ((i ^ j) & patches_bit) == 0u;
       ASSERT_EQ(same_code, lhs->GetQuickCode().data() == rhs->GetQuickCode().data())
           << i << " " << j;
-      ASSERT_EQ(same_src_map, lhs->GetSrcMappingTable().data() == rhs->GetSrcMappingTable().data())
+      ASSERT_EQ(same_src_map, lhs->GetMethodInfo().data() == rhs->GetMethodInfo().data())
           << i << " " << j;
       ASSERT_EQ(same_vmap_table, lhs->GetVmapTable().data() == rhs->GetVmapTable().data())
           << i << " " << j;
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index 7e91453..e823f67 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -480,7 +480,9 @@
       DCHECK(!it.HasNext());
     }
   }
-  DCHECK_EQ(quickening_info_ptr, quickening_info_end) << "Failed to use all quickening info";
+  if (quickening_info_ptr != quickening_info_end) {
+    LOG(FATAL) << "Failed to use all quickening info";
+  }
 }
 
 void CompilerDriver::CompileAll(jobject class_loader,
@@ -530,17 +532,13 @@
   if (driver.GetCompilerOptions().GetDebuggable()) {
     // We are debuggable so definitions of classes might be changed. We don't want to do any
     // optimizations that could break that.
-    max_level = optimizer::DexToDexCompilationLevel::kRequired;
+    max_level = optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
   }
   if (klass->IsVerified()) {
     // Class is verified so we can enable DEX-to-DEX compilation for performance.
     return max_level;
-  } else if (klass->IsCompileTimeVerified()) {
-    // Class verification has soft-failed. Anyway, ensure at least correctness.
-    DCHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime);
-    return optimizer::DexToDexCompilationLevel::kRequired;
   } else {
-    // Class verification has failed: do not run DEX-to-DEX compilation.
+    // Class verification has failed: do not run DEX-to-DEX optimizations.
     return optimizer::DexToDexCompilationLevel::kDontDexToDexCompile;
   }
 }
@@ -610,7 +608,7 @@
           dex_file,
           (verified_method != nullptr)
               ? dex_to_dex_compilation_level
-              : optimizer::DexToDexCompilationLevel::kRequired);
+              : optimizer::DexToDexCompilationLevel::kDontDexToDexCompile);
     }
   } else if ((access_flags & kAccNative) != 0) {
     // Are we extracting only and have support for generic JNI down calls?
@@ -964,7 +962,7 @@
       if (cls == nullptr) {
         soa.Self()->ClearException();
       } else if (&cls->GetDexFile() == dex_file) {
-        DCHECK(cls->IsErroneous() || cls->IsVerified() || cls->IsCompileTimeVerified())
+        DCHECK(cls->IsErroneous() || cls->IsVerified() || cls->ShouldVerifyAtRuntime())
             << cls->PrettyClass()
             << " " << cls->GetStatus();
       }
@@ -2160,6 +2158,14 @@
         LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor)
                    << " because: " << error_msg;
         manager_->GetCompiler()->SetHadHardVerifierFailure();
+      } else {
+        // Force a soft failure for the VerifierDeps. This is a sanity measure, as
+        // the vdex file already records that the class hasn't been resolved. It avoids
+        // trying to do future verification optimizations when processing the vdex file.
+        DCHECK(failure_kind == verifier::MethodVerifier::kSoftFailure ||
+               failure_kind == verifier::MethodVerifier::kNoFailure)
+            << failure_kind;
+        failure_kind = verifier::MethodVerifier::kSoftFailure;
       }
     } else if (!SkipClass(jclass_loader, dex_file, klass.Get())) {
       CHECK(klass->IsResolved()) << klass->PrettyClass();
@@ -2172,7 +2178,7 @@
         manager_->GetCompiler()->SetHadHardVerifierFailure();
       }
 
-      CHECK(klass->IsCompileTimeVerified() || klass->IsErroneous())
+      CHECK(klass->ShouldVerifyAtRuntime() || klass->IsVerified() || klass->IsErroneous())
           << klass->PrettyDescriptor() << ": state=" << klass->GetStatus();
 
       // It is *very* problematic if there are verification errors in the boot classpath. For example,
@@ -2186,6 +2192,13 @@
           DCHECK(klass->IsVerified()) << "Boot classpath class " << klass->PrettyClass()
               << " failed to fully verify: state= " << klass->GetStatus();
         }
+        if (klass->IsVerified()) {
+          DCHECK_EQ(failure_kind, verifier::MethodVerifier::kNoFailure);
+        } else if (klass->ShouldVerifyAtRuntime()) {
+          DCHECK_EQ(failure_kind, verifier::MethodVerifier::kSoftFailure);
+        } else {
+          DCHECK_EQ(failure_kind, verifier::MethodVerifier::kHardFailure);
+        }
       }
     } else {
       // Make the skip a soft failure, essentially being considered as verify at runtime.
@@ -2283,7 +2296,7 @@
  public:
   explicit InitializeClassVisitor(const ParallelCompilationManager* manager) : manager_(manager) {}
 
-  virtual void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
+  void Visit(size_t class_def_index) REQUIRES(!Locks::mutator_lock_) OVERRIDE {
     ATRACE_CALL();
     jobject jclass_loader = manager_->GetClassLoader();
     const DexFile& dex_file = *manager_->GetDexFile();
@@ -2343,23 +2356,32 @@
               // mode which prevents the GC from visiting objects modified during the transaction.
               // Ensure GC is not run so don't access freed objects when aborting transaction.
 
-              ScopedAssertNoThreadSuspension ants("Transaction end");
-              runtime->ExitTransactionMode();
+              {
+                ScopedAssertNoThreadSuspension ants("Transaction end");
+                runtime->ExitTransactionMode();
+
+                if (!success) {
+                  CHECK(soa.Self()->IsExceptionPending());
+                  mirror::Throwable* exception = soa.Self()->GetException();
+                  VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
+                      << exception->Dump();
+                  std::ostream* file_log = manager_->GetCompiler()->
+                      GetCompilerOptions().GetInitFailureOutput();
+                  if (file_log != nullptr) {
+                    *file_log << descriptor << "\n";
+                    *file_log << exception->Dump() << "\n";
+                  }
+                  soa.Self()->ClearException();
+                  transaction.Rollback();
+                  CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+                }
+              }
 
               if (!success) {
-                CHECK(soa.Self()->IsExceptionPending());
-                mirror::Throwable* exception = soa.Self()->GetException();
-                VLOG(compiler) << "Initialization of " << descriptor << " aborted because of "
-                    << exception->Dump();
-                std::ostream* file_log = manager_->GetCompiler()->
-                    GetCompilerOptions().GetInitFailureOutput();
-                if (file_log != nullptr) {
-                  *file_log << descriptor << "\n";
-                  *file_log << exception->Dump() << "\n";
-                }
-                soa.Self()->ClearException();
-                transaction.Rollback();
-                CHECK_EQ(old_status, klass->GetStatus()) << "Previous class status not restored";
+                // On failure, still intern strings of static fields and seen in <clinit>, as these
+                // will be created in the zygote. This is separated from the transaction code just
+                // above as we will allocate strings, so must be allowed to suspend.
+                InternStrings(klass, class_loader);
               }
             }
           }
@@ -2375,6 +2397,57 @@
   }
 
  private:
+  void InternStrings(Handle<mirror::Class> klass, Handle<mirror::ClassLoader> class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK(manager_->GetCompiler()->GetCompilerOptions().IsBootImage());
+    DCHECK(klass->IsVerified());
+    DCHECK(!klass->IsInitialized());
+
+    StackHandleScope<1> hs(Thread::Current());
+    Handle<mirror::DexCache> h_dex_cache = hs.NewHandle(klass->GetDexCache());
+    const DexFile* dex_file = manager_->GetDexFile();
+    const DexFile::ClassDef* class_def = klass->GetClassDef();
+    ClassLinker* class_linker = manager_->GetClassLinker();
+
+    // Check encoded final field values for strings and intern.
+    annotations::RuntimeEncodedStaticFieldValueIterator value_it(*dex_file,
+                                                                 &h_dex_cache,
+                                                                 &class_loader,
+                                                                 manager_->GetClassLinker(),
+                                                                 *class_def);
+    for ( ; value_it.HasNext(); value_it.Next()) {
+      if (value_it.GetValueType() == annotations::RuntimeEncodedStaticFieldValueIterator::kString) {
+        // Resolve the string. This will intern the string.
+        art::ObjPtr<mirror::String> resolved = class_linker->ResolveString(
+            *dex_file, dex::StringIndex(value_it.GetJavaValue().i), h_dex_cache);
+        CHECK(resolved != nullptr);
+      }
+    }
+
+    // Intern strings seen in <clinit>.
+    ArtMethod* clinit = klass->FindClassInitializer(class_linker->GetImagePointerSize());
+    if (clinit != nullptr) {
+      const DexFile::CodeItem* code_item = clinit->GetCodeItem();
+      DCHECK(code_item != nullptr);
+      const Instruction* inst = Instruction::At(code_item->insns_);
+
+      const uint32_t insns_size = code_item->insns_size_in_code_units_;
+      for (uint32_t dex_pc = 0; dex_pc < insns_size;) {
+        if (inst->Opcode() == Instruction::CONST_STRING) {
+          ObjPtr<mirror::String> s = class_linker->ResolveString(
+              *dex_file, dex::StringIndex(inst->VRegB_21c()), h_dex_cache);
+          CHECK(s != nullptr);
+        } else if (inst->Opcode() == Instruction::CONST_STRING_JUMBO) {
+          ObjPtr<mirror::String> s = class_linker->ResolveString(
+              *dex_file, dex::StringIndex(inst->VRegB_31c()), h_dex_cache);
+          CHECK(s != nullptr);
+        }
+        dex_pc += inst->SizeInCodeUnits();
+        inst = inst->Next();
+      }
+    }
+  }
+
   const ParallelCompilationManager* const manager_;
 };
 
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index 1e5c43d..874e357 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -65,8 +65,6 @@
 class ParallelCompilationManager;
 class ScopedObjectAccess;
 template <class Allocator> class SrcMap;
-class SrcMapElem;
-using SwapSrcMap = SrcMap<SwapAllocator<SrcMapElem>>;
 template<class T> class Handle;
 class TimingLogger;
 class VdexFile;
@@ -355,6 +353,10 @@
     return current_dex_to_dex_methods_;
   }
 
+  const ProfileCompilationInfo* GetProfileCompilationInfo() const {
+    return profile_compilation_info_;
+  }
+
  private:
   // Can `referrer_class` access the resolved `member`?
   // Dispatch call to mirror::Class::CanAccessResolvedField or
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 97954f3..fa1b3a3 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -133,9 +133,10 @@
         << " " << dex.GetMethodDeclaringClassDescriptor(dex.GetMethodId(i)) << " "
         << dex.GetMethodName(dex.GetMethodId(i));
   }
-  EXPECT_EQ(dex.NumFieldIds(), dex_cache->NumResolvedFields());
+  EXPECT_TRUE(dex_cache->StaticArtFieldSize() == dex_cache->NumResolvedFields()
+      || dex.NumFieldIds() ==  dex_cache->NumResolvedFields());
   for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
-    ArtField* field = cl->GetResolvedField(i, dex_cache);
+    ArtField* field = dex_cache->GetResolvedField(i, cl->GetImagePointerSize());
     EXPECT_TRUE(field != nullptr) << "field_idx=" << i
                                << " " << dex.GetFieldDeclaringClassDescriptor(dex.GetFieldId(i))
                                << " " << dex.GetFieldName(dex.GetFieldId(i));
@@ -147,7 +148,6 @@
 }
 
 TEST_F(CompilerDriverTest, AbstractMethodErrorStub) {
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   jobject class_loader;
   {
     ScopedObjectAccess soa(Thread::Current());
@@ -190,7 +190,6 @@
 };
 
 TEST_F(CompilerDriverMethodsTest, Selection) {
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   Thread* self = Thread::Current();
   jobject class_loader;
   {
@@ -240,9 +239,8 @@
 
     ProfileCompilationInfo info;
     for (const std::unique_ptr<const DexFile>& dex_file : dex_files) {
-      std::string key = ProfileCompilationInfo::GetProfileDexFileKey(dex_file->GetLocation());
-      profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 1);
-      profile_info_.AddMethodIndex(key, dex_file->GetLocationChecksum(), 2);
+      profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 1);
+      profile_info_.AddMethodIndex(dex_file->GetLocation(), dex_file->GetLocationChecksum(), 2);
     }
     return &profile_info_;
   }
@@ -299,7 +297,6 @@
 };
 
 TEST_F(CompilerDriverProfileTest, ProfileGuidedCompilation) {
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   Thread* self = Thread::Current();
   jobject class_loader;
   {
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index c222f90..a0c0a2a 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -27,12 +27,10 @@
       small_method_threshold_(kDefaultSmallMethodThreshold),
       tiny_method_threshold_(kDefaultTinyMethodThreshold),
       num_dex_methods_threshold_(kDefaultNumDexMethodsThreshold),
-      inline_depth_limit_(kUnsetInlineDepthLimit),
       inline_max_code_units_(kUnsetInlineMaxCodeUnits),
       no_inline_from_(nullptr),
       boot_image_(false),
       app_image_(false),
-      include_patch_information_(kDefaultIncludePatchInformation),
       top_k_profile_threshold_(kDefaultTopKProfileThreshold),
       debuggable_(false),
       generate_debug_info_(kDefaultGenerateDebugInfo),
@@ -63,10 +61,8 @@
                                  size_t small_method_threshold,
                                  size_t tiny_method_threshold,
                                  size_t num_dex_methods_threshold,
-                                 size_t inline_depth_limit,
                                  size_t inline_max_code_units,
                                  const std::vector<const DexFile*>* no_inline_from,
-                                 bool include_patch_information,
                                  double top_k_profile_threshold,
                                  bool debuggable,
                                  bool generate_debug_info,
@@ -88,12 +84,10 @@
       small_method_threshold_(small_method_threshold),
       tiny_method_threshold_(tiny_method_threshold),
       num_dex_methods_threshold_(num_dex_methods_threshold),
-      inline_depth_limit_(inline_depth_limit),
       inline_max_code_units_(inline_max_code_units),
       no_inline_from_(no_inline_from),
       boot_image_(false),
       app_image_(false),
-      include_patch_information_(include_patch_information),
       top_k_profile_threshold_(top_k_profile_threshold),
       debuggable_(debuggable),
       generate_debug_info_(generate_debug_info),
@@ -133,10 +127,6 @@
   ParseUintOption(option, "--num-dex-methods", &num_dex_methods_threshold_, Usage);
 }
 
-void CompilerOptions::ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage) {
-  ParseUintOption(option, "--inline-depth-limit", &inline_depth_limit_, Usage);
-}
-
 void CompilerOptions::ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage) {
   ParseUintOption(option, "--inline-max-code-units", &inline_max_code_units_, Usage);
 }
@@ -186,8 +176,6 @@
     ParseTinyMethodMax(option, Usage);
   } else if (option.starts_with("--num-dex-methods=")) {
     ParseNumDexMethods(option, Usage);
-  } else if (option.starts_with("--inline-depth-limit=")) {
-    ParseInlineDepthLimit(option, Usage);
   } else if (option.starts_with("--inline-max-code-units=")) {
     ParseInlineMaxCodeUnits(option, Usage);
   } else if (option == "--generate-debug-info" || option == "-g") {
@@ -206,10 +194,6 @@
     debuggable_ = true;
   } else if (option.starts_with("--top-k-profile-threshold=")) {
     ParseDouble(option.data(), '=', 0.0, 100.0, &top_k_profile_threshold_, Usage);
-  } else if (option == "--include-patch-information") {
-    include_patch_information_ = true;
-  } else if (option == "--no-include-patch-information") {
-    include_patch_information_ = false;
   } else if (option == "--abort-on-hard-verifier-error") {
     abort_on_hard_verifier_failure_ = true;
   } else if (option.starts_with("--dump-init-failures=")) {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 6894cd5..2376fbf 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -46,16 +46,9 @@
   static constexpr double kDefaultTopKProfileThreshold = 90.0;
   static const bool kDefaultGenerateDebugInfo = false;
   static const bool kDefaultGenerateMiniDebugInfo = false;
-  static const bool kDefaultIncludePatchInformation = false;
-  static const size_t kDefaultInlineDepthLimit = 3;
   static const size_t kDefaultInlineMaxCodeUnits = 32;
-  static constexpr size_t kUnsetInlineDepthLimit = -1;
   static constexpr size_t kUnsetInlineMaxCodeUnits = -1;
 
-  // Default inlining settings when the space filter is used.
-  static constexpr size_t kSpaceFilterInlineDepthLimit = 3;
-  static constexpr size_t kSpaceFilterInlineMaxCodeUnits = 10;
-
   CompilerOptions();
   ~CompilerOptions();
 
@@ -65,10 +58,8 @@
                   size_t small_method_threshold,
                   size_t tiny_method_threshold,
                   size_t num_dex_methods_threshold,
-                  size_t inline_depth_limit,
                   size_t inline_max_code_units,
                   const std::vector<const DexFile*>* no_inline_from,
-                  bool include_patch_information,
                   double top_k_profile_threshold,
                   bool debuggable,
                   bool generate_debug_info,
@@ -157,13 +148,6 @@
     return num_dex_methods_threshold_;
   }
 
-  size_t GetInlineDepthLimit() const {
-    return inline_depth_limit_;
-  }
-  void SetInlineDepthLimit(size_t limit) {
-    inline_depth_limit_ = limit;
-  }
-
   size_t GetInlineMaxCodeUnits() const {
     return inline_max_code_units_;
   }
@@ -213,10 +197,6 @@
     return implicit_suspend_checks_;
   }
 
-  bool GetIncludePatchInformation() const {
-    return include_patch_information_;
-  }
-
   bool IsBootImage() const {
     return boot_image_;
   }
@@ -281,7 +261,6 @@
   void ParseDumpInitFailures(const StringPiece& option, UsageFn Usage);
   void ParseDumpCfgPasses(const StringPiece& option, UsageFn Usage);
   void ParseInlineMaxCodeUnits(const StringPiece& option, UsageFn Usage);
-  void ParseInlineDepthLimit(const StringPiece& option, UsageFn Usage);
   void ParseNumDexMethods(const StringPiece& option, UsageFn Usage);
   void ParseTinyMethodMax(const StringPiece& option, UsageFn Usage);
   void ParseSmallMethodMax(const StringPiece& option, UsageFn Usage);
@@ -295,7 +274,6 @@
   size_t small_method_threshold_;
   size_t tiny_method_threshold_;
   size_t num_dex_methods_threshold_;
-  size_t inline_depth_limit_;
   size_t inline_max_code_units_;
 
   // Dex files from which we should not inline code.
@@ -305,7 +283,6 @@
 
   bool boot_image_;
   bool app_image_;
-  bool include_patch_information_;
   // When using a profile file only the top K% of the profiled samples will be compiled.
   double top_k_profile_threshold_;
   bool debuggable_;
diff --git a/compiler/elf_writer.h b/compiler/elf_writer.h
index d55f745..7baae52 100644
--- a/compiler/elf_writer.h
+++ b/compiler/elf_writer.h
@@ -63,7 +63,6 @@
   virtual void EndText(OutputStream* text) = 0;
   virtual void WriteDynamicSection() = 0;
   virtual void WriteDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) = 0;
-  virtual void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) = 0;
   virtual bool End() = 0;
 
   // Get the ELF writer's stream. This stream can be used for writing data directly
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 0d6575c..28c35e9 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -105,7 +105,6 @@
   void EndText(OutputStream* text) OVERRIDE;
   void WriteDynamicSection() OVERRIDE;
   void WriteDebugInfo(const ArrayRef<const debug::MethodDebugInfo>& method_infos) OVERRIDE;
-  void WritePatchLocations(const ArrayRef<const uintptr_t>& patch_locations) OVERRIDE;
   bool End() OVERRIDE;
 
   virtual OutputStream* GetStream() OVERRIDE;
@@ -268,17 +267,6 @@
 }
 
 template <typename ElfTypes>
-void ElfWriterQuick<ElfTypes>::WritePatchLocations(
-    const ArrayRef<const uintptr_t>& patch_locations) {
-  // Add relocation section for .text.
-  if (compiler_options_->GetIncludePatchInformation()) {
-    // Note that ElfWriter::Fixup will be called regardless and therefore
-    // we need to include oat_patches for debug sections unconditionally.
-    builder_->WritePatches(".text.oat_patches", patch_locations);
-  }
-}
-
-template <typename ElfTypes>
 bool ElfWriterQuick<ElfTypes>::End() {
   builder_->End();
   if (compiler_options_->GetGenerateBuildId()) {
diff --git a/compiler/exception_test.cc b/compiler/exception_test.cc
index eac46e5..c975944 100644
--- a/compiler/exception_test.cc
+++ b/compiler/exception_test.cc
@@ -74,8 +74,8 @@
 
     fake_header_code_and_maps_.resize(stack_maps_offset + fake_code_.size());
     MemoryRegion stack_maps_region(&fake_header_code_and_maps_[0], stack_maps_size);
-    stack_maps.FillIn(stack_maps_region);
-    OatQuickMethodHeader method_header(stack_maps_offset, 4 * sizeof(void*), 0u, 0u, code_size);
+    stack_maps.FillInCodeInfo(stack_maps_region);
+    OatQuickMethodHeader method_header(stack_maps_offset, 0u, 4 * sizeof(void*), 0u, 0u, code_size);
     memcpy(&fake_header_code_and_maps_[stack_maps_size], &method_header, sizeof(method_header));
     std::copy(fake_code_.begin(),
               fake_code_.end(),
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index b0225a3..897d819 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -76,7 +76,7 @@
   void Compile(ImageHeader::StorageMode storage_mode,
                CompilationHelper& out_helper,
                const std::string& extra_dex = "",
-               const std::string& image_class = "");
+               const std::initializer_list<std::string>& image_classes = {});
 
   void SetUpRuntimeOptions(RuntimeOptions* options) OVERRIDE {
     CommonCompilerTest::SetUpRuntimeOptions(options);
@@ -90,6 +90,18 @@
     return new std::unordered_set<std::string>(image_classes_);
   }
 
+  ArtMethod* FindCopiedMethod(ArtMethod* origin, mirror::Class* klass)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    PointerSize pointer_size = class_linker_->GetImagePointerSize();
+    for (ArtMethod& m : klass->GetCopiedMethods(pointer_size)) {
+      if (strcmp(origin->GetName(), m.GetName()) == 0 &&
+          origin->GetSignature() == m.GetSignature()) {
+        return &m;
+      }
+    }
+    return nullptr;
+  }
+
  private:
   std::unordered_set<std::string> image_classes_;
 };
@@ -318,7 +330,6 @@
 
         elf_writer->WriteDynamicSection();
         elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
-        elf_writer->WritePatchLocations(oat_writer->GetAbsolutePatchLocations());
 
         bool success = elf_writer->End();
         ASSERT_TRUE(success);
@@ -346,26 +357,27 @@
 void ImageTest::Compile(ImageHeader::StorageMode storage_mode,
                         CompilationHelper& helper,
                         const std::string& extra_dex,
-                        const std::string& image_class) {
-  if (!image_class.empty()) {
+                        const std::initializer_list<std::string>& image_classes) {
+  for (const std::string& image_class : image_classes) {
     image_classes_.insert(image_class);
   }
   CreateCompilerDriver(Compiler::kOptimizing, kRuntimeISA, kIsTargetBuild ? 2U : 16U);
   // Set inline filter values.
-  compiler_options_->SetInlineDepthLimit(CompilerOptions::kDefaultInlineDepthLimit);
   compiler_options_->SetInlineMaxCodeUnits(CompilerOptions::kDefaultInlineMaxCodeUnits);
   image_classes_.clear();
   if (!extra_dex.empty()) {
     helper.extra_dex_files = OpenTestDexFiles(extra_dex.c_str());
   }
   helper.Compile(compiler_driver_.get(), storage_mode);
-  if (!image_class.empty()) {
+  if (image_classes.begin() != image_classes.end()) {
     // Make sure the class got initialized.
     ScopedObjectAccess soa(Thread::Current());
     ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
-    EXPECT_TRUE(klass != nullptr);
-    EXPECT_TRUE(klass->IsInitialized());
+    for (const std::string& image_class : image_classes) {
+      mirror::Class* klass = class_linker->FindSystemClass(Thread::Current(), image_class.c_str());
+      EXPECT_TRUE(klass != nullptr);
+      EXPECT_TRUE(klass->IsInitialized());
+    }
   }
 }
 
@@ -493,7 +505,7 @@
   // Compile multi-image with ImageLayoutA being the last image.
   {
     CompilationHelper helper;
-    Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", "LMyClass;");
+    Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutA", {"LMyClass;"});
     image_sizes = helper.GetImageObjectSectionSizes();
   }
   TearDown();
@@ -502,7 +514,7 @@
   // Compile multi-image with ImageLayoutB being the last image.
   {
     CompilationHelper helper;
-    Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", "LMyClass;");
+    Compile(ImageHeader::kStorageModeUncompressed, helper, "ImageLayoutB", {"LMyClass;"});
     image_sizes_extra = helper.GetImageObjectSectionSizes();
   }
   // Make sure that the new stuff in the clinit in ImageLayoutB is in the last image and not in the
@@ -554,4 +566,63 @@
     ASSERT_FALSE(image_header.IsValid());
 }
 
+// Test that pointer to quick code is the same in
+// a default method of an interface and in a copied method
+// of a class which implements the interface. This should be true
+// only if the copied method and the origin method are located in the
+// same oat file.
+TEST_F(ImageTest, TestDefaultMethods) {
+  CompilationHelper helper;
+  Compile(ImageHeader::kStorageModeUncompressed,
+      helper,
+      "DefaultMethods",
+      {"LIface;", "LImpl;", "LIterableBase;"});
+
+  PointerSize pointer_size = class_linker_->GetImagePointerSize();
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+
+  // Test the pointer to quick code is the same in origin method
+  // and in the copied method form the same oat file.
+  mirror::Class* iface_klass = class_linker_->LookupClass(
+      self, "LIface;", ObjPtr<mirror::ClassLoader>());
+  ASSERT_NE(nullptr, iface_klass);
+  ArtMethod* origin = iface_klass->FindDeclaredVirtualMethod(
+      "defaultMethod", "()V", pointer_size);
+  ASSERT_NE(nullptr, origin);
+  const void* code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+  // The origin method should have a pointer to quick code
+  ASSERT_NE(nullptr, code);
+  ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code));
+  mirror::Class* impl_klass = class_linker_->LookupClass(
+      self, "LImpl;", ObjPtr<mirror::ClassLoader>());
+  ASSERT_NE(nullptr, impl_klass);
+  ArtMethod* copied = FindCopiedMethod(origin, impl_klass);
+  ASSERT_NE(nullptr, copied);
+  // the copied method should have pointer to the same quick code as the origin method
+  ASSERT_EQ(code, copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size));
+
+  // Test the origin method has pointer to quick code
+  // but the copied method has pointer to interpreter
+  // because these methods are in different oat files.
+  mirror::Class* iterable_klass = class_linker_->LookupClass(
+      self, "Ljava/lang/Iterable;", ObjPtr<mirror::ClassLoader>());
+  ASSERT_NE(nullptr, iterable_klass);
+  origin = iterable_klass->FindDeclaredVirtualMethod(
+      "forEach", "(Ljava/util/function/Consumer;)V", pointer_size);
+  ASSERT_NE(nullptr, origin);
+  code = origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+  // the origin method should have a pointer to quick code
+  ASSERT_NE(nullptr, code);
+  ASSERT_FALSE(class_linker_->IsQuickToInterpreterBridge(code));
+  mirror::Class* iterablebase_klass = class_linker_->LookupClass(
+      self, "LIterableBase;", ObjPtr<mirror::ClassLoader>());
+  ASSERT_NE(nullptr, iterablebase_klass);
+  copied = FindCopiedMethod(origin, iterablebase_klass);
+  ASSERT_NE(nullptr, copied);
+  code = copied->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
+  // the copied method should have a pointer to interpreter
+  ASSERT_TRUE(class_linker_->IsQuickToInterpreterBridge(code));
+}
+
 }  // namespace art
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 117d113..d129249 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -238,10 +238,11 @@
       case ImageHeader::kStorageModeLZ4: {
         const size_t compressed_max_size = LZ4_compressBound(image_data_size);
         compressed_data.reset(new char[compressed_max_size]);
-        data_size = LZ4_compress(
+        data_size = LZ4_compress_default(
             reinterpret_cast<char*>(image_info.image_->Begin()) + sizeof(ImageHeader),
             &compressed_data[0],
-            image_data_size);
+            image_data_size,
+            compressed_max_size);
 
         break;
       }
@@ -713,7 +714,8 @@
   class_linker->VisitClassesWithoutClassesLock(&visitor);
 }
 
-static bool IsBootClassLoaderClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) {
+static bool IsBootClassLoaderClass(ObjPtr<mirror::Class> klass)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   return klass->GetClassLoader() == nullptr;
 }
 
@@ -721,33 +723,33 @@
   return IsBootClassLoaderClass(klass) && !IsInBootImage(klass);
 }
 
-bool ImageWriter::PruneAppImageClass(mirror::Class* klass) {
+bool ImageWriter::PruneAppImageClass(ObjPtr<mirror::Class> klass) {
   bool early_exit = false;
   std::unordered_set<mirror::Class*> visited;
   return PruneAppImageClassInternal(klass, &early_exit, &visited);
 }
 
 bool ImageWriter::PruneAppImageClassInternal(
-    mirror::Class* klass,
+    ObjPtr<mirror::Class> klass,
     bool* early_exit,
     std::unordered_set<mirror::Class*>* visited) {
   DCHECK(early_exit != nullptr);
   DCHECK(visited != nullptr);
   DCHECK(compile_app_image_);
-  if (klass == nullptr || IsInBootImage(klass)) {
+  if (klass == nullptr || IsInBootImage(klass.Ptr())) {
     return false;
   }
-  auto found = prune_class_memo_.find(klass);
+  auto found = prune_class_memo_.find(klass.Ptr());
   if (found != prune_class_memo_.end()) {
     // Already computed, return the found value.
     return found->second;
   }
   // Circular dependencies, return false but do not store the result in the memoization table.
-  if (visited->find(klass) != visited->end()) {
+  if (visited->find(klass.Ptr()) != visited->end()) {
     *early_exit = true;
     return false;
   }
-  visited->emplace(klass);
+  visited->emplace(klass.Ptr());
   bool result = IsBootClassLoaderClass(klass);
   std::string temp;
   // Prune if not an image class, this handles any broken sets of image classes such as having a
@@ -811,20 +813,20 @@
         dex_file_oat_index_map_.find(dex_cache->GetDexFile()) == dex_file_oat_index_map_.end();
   }
   // Erase the element we stored earlier since we are exiting the function.
-  auto it = visited->find(klass);
+  auto it = visited->find(klass.Ptr());
   DCHECK(it != visited->end());
   visited->erase(it);
   // Only store result if it is true or none of the calls early exited due to circular
   // dependencies. If visited is empty then we are the root caller, in this case the cycle was in
   // a child call and we can remember the result.
   if (result == true || !my_early_exit || visited->empty()) {
-    prune_class_memo_[klass] = result;
+    prune_class_memo_[klass.Ptr()] = result;
   }
   *early_exit |= my_early_exit;
   return result;
 }
 
-bool ImageWriter::KeepClass(Class* klass) {
+bool ImageWriter::KeepClass(ObjPtr<mirror::Class> klass) {
   if (klass == nullptr) {
     return false;
   }
@@ -895,15 +897,27 @@
         Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(class_loader);
     class_table->Visit(classes_visitor);
     removed_class_count_ += classes_visitor.Prune();
+
+    // Record app image class loader. The fake boot class loader should not get registered
+    // and we should end up with only one class loader for an app and none for boot image.
+    if (class_loader != nullptr && class_table != nullptr) {
+      DCHECK(class_loader_ == nullptr);
+      class_loader_ = class_loader;
+    }
   }
 
   size_t GetRemovedClassCount() const {
     return removed_class_count_;
   }
 
+  ObjPtr<mirror::ClassLoader> GetClassLoader() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return class_loader_;
+  }
+
  private:
   ImageWriter* const image_writer_;
   size_t removed_class_count_;
+  ObjPtr<mirror::ClassLoader> class_loader_;
 };
 
 void ImageWriter::VisitClassLoaders(ClassLoaderVisitor* visitor) {
@@ -912,71 +926,149 @@
   Runtime::Current()->GetClassLinker()->VisitClassLoaders(visitor);
 }
 
+void ImageWriter::PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache,
+                                          ObjPtr<mirror::ClassLoader> class_loader) {
+  // To ensure deterministic contents of the hash-based arrays, each slot shall contain
+  // the candidate with the lowest index. As we're processing entries in increasing index
+  // order, this means trying to look up the entry for the current index if the slot is
+  // empty or if it contains a higher index.
+
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  ArtMethod* resolution_method = runtime->GetResolutionMethod();
+  const DexFile& dex_file = *dex_cache->GetDexFile();
+  // Prune methods.
+  ArtMethod** resolved_methods = dex_cache->GetResolvedMethods();
+  for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) {
+    ArtMethod* method =
+        mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
+    DCHECK(method != nullptr) << "Expected resolution method instead of null method";
+    mirror::Class* declaring_class = method->GetDeclaringClass();
+    // Copied methods may be held live by a class which was not an image class but have a
+    // declaring class which is an image class. Set it to the resolution method to be safe and
+    // prevent dangling pointers.
+    if (method->IsCopied() || !KeepClass(declaring_class)) {
+      mirror::DexCache::SetElementPtrSize(resolved_methods,
+                                          i,
+                                          resolution_method,
+                                          target_ptr_size_);
+    } else if (kIsDebugBuild) {
+      // Check that the class is still in the classes table.
+      ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
+      CHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
+          << Class::PrettyClass(declaring_class) << " not in class linker table";
+    }
+  }
+  // Prune fields and make the contents of the field array deterministic.
+  mirror::FieldDexCacheType* resolved_fields = dex_cache->GetResolvedFields();
+  dex::TypeIndex last_class_idx;  // Initialized to invalid index.
+  ObjPtr<mirror::Class> last_class = nullptr;
+  for (size_t i = 0, end = dex_file.NumFieldIds(); i < end; ++i) {
+    uint32_t slot_idx = dex_cache->FieldSlotIndex(i);
+    auto pair = mirror::DexCache::GetNativePairPtrSize(resolved_fields, slot_idx, target_ptr_size_);
+    uint32_t stored_index = pair.index;
+    ArtField* field = pair.object;
+    if (field != nullptr && i > stored_index) {
+      continue;  // Already checked.
+    }
+    // Check if the referenced class is in the image. Note that we want to check the referenced
+    // class rather than the declaring class to preserve the semantics, i.e. using a FieldId
+    // results in resolving the referenced class and that can for example throw OOME.
+    const DexFile::FieldId& field_id = dex_file.GetFieldId(i);
+    if (field_id.class_idx_ != last_class_idx) {
+      last_class_idx = field_id.class_idx_;
+      last_class = class_linker->LookupResolvedType(
+          dex_file, last_class_idx, dex_cache, class_loader);
+      if (last_class != nullptr && !KeepClass(last_class)) {
+        last_class = nullptr;
+      }
+    }
+    if (field == nullptr || i < stored_index) {
+      if (last_class != nullptr) {
+        const char* name = dex_file.StringDataByIdx(field_id.name_idx_);
+        const char* type = dex_file.StringByTypeIdx(field_id.type_idx_);
+        field = mirror::Class::FindField(Thread::Current(), last_class, name, type);
+        if (field != nullptr) {
+          // If the referenced class is in the image, the defining class must also be there.
+          DCHECK(KeepClass(field->GetDeclaringClass()));
+          dex_cache->SetResolvedField(i, field, target_ptr_size_);
+        }
+      }
+    } else {
+      DCHECK_EQ(i, stored_index);
+      if (last_class == nullptr) {
+        dex_cache->ClearResolvedField(stored_index, target_ptr_size_);
+      }
+    }
+  }
+  // Prune types and make the contents of the type array deterministic.
+  // This is done after fields and methods as their lookup can touch the types array.
+  for (size_t i = 0, end = dex_cache->GetDexFile()->NumTypeIds(); i < end; ++i) {
+    dex::TypeIndex type_idx(i);
+    uint32_t slot_idx = dex_cache->TypeSlotIndex(type_idx);
+    mirror::TypeDexCachePair pair =
+        dex_cache->GetResolvedTypes()[slot_idx].load(std::memory_order_relaxed);
+    uint32_t stored_index = pair.index;
+    ObjPtr<mirror::Class> klass = pair.object.Read();
+    if (klass == nullptr || i < stored_index) {
+      klass = class_linker->LookupResolvedType(dex_file, type_idx, dex_cache, class_loader);
+      if (klass != nullptr) {
+        DCHECK_EQ(dex_cache->GetResolvedType(type_idx), klass);
+        stored_index = i;  // For correct clearing below if not keeping the `klass`.
+      }
+    } else if (i == stored_index && !KeepClass(klass)) {
+      dex_cache->ClearResolvedType(dex::TypeIndex(stored_index));
+    }
+  }
+  // Strings do not need pruning, but the contents of the string array must be deterministic.
+  for (size_t i = 0, end = dex_cache->GetDexFile()->NumStringIds(); i < end; ++i) {
+    dex::StringIndex string_idx(i);
+    uint32_t slot_idx = dex_cache->StringSlotIndex(string_idx);
+    mirror::StringDexCachePair pair =
+        dex_cache->GetStrings()[slot_idx].load(std::memory_order_relaxed);
+    uint32_t stored_index = pair.index;
+    ObjPtr<mirror::String> string = pair.object.Read();
+    if (string == nullptr || i < stored_index) {
+      string = class_linker->LookupString(dex_file, string_idx, dex_cache);
+      DCHECK(string == nullptr || dex_cache->GetResolvedString(string_idx) == string);
+    }
+  }
+}
+
 void ImageWriter::PruneNonImageClasses() {
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
   Thread* self = Thread::Current();
+  ScopedAssertNoThreadSuspension sa(__FUNCTION__);
 
   // Clear class table strong roots so that dex caches can get pruned. We require pruning the class
   // path dex caches.
   class_linker->ClearClassTableStrongRoots();
 
   // Remove the undesired classes from the class roots.
+  ObjPtr<mirror::ClassLoader> class_loader;
   {
     PruneClassLoaderClassesVisitor class_loader_visitor(this);
     VisitClassLoaders(&class_loader_visitor);
     VLOG(compiler) << "Pruned " << class_loader_visitor.GetRemovedClassCount() << " classes";
+    class_loader = class_loader_visitor.GetClassLoader();
+    DCHECK_EQ(class_loader != nullptr, compile_app_image_);
   }
 
   // Clear references to removed classes from the DexCaches.
-  ArtMethod* resolution_method = runtime->GetResolutionMethod();
-
-  ScopedAssertNoThreadSuspension sa(__FUNCTION__);
-  ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);  // For ClassInClassTable
-  ReaderMutexLock mu2(self, *Locks::dex_lock_);
-  for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-    if (self->IsJWeakCleared(data.weak_root)) {
-      continue;
-    }
-    ObjPtr<mirror::DexCache> dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
-    for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
-      mirror::TypeDexCachePair pair =
-          dex_cache->GetResolvedTypes()[i].load(std::memory_order_relaxed);
-      mirror::Class* klass = pair.object.Read();
-      if (klass != nullptr && !KeepClass(klass)) {
-        dex_cache->ClearResolvedType(dex::TypeIndex(pair.index));
+  std::vector<ObjPtr<mirror::DexCache>> dex_caches;
+  {
+    ReaderMutexLock mu2(self, *Locks::dex_lock_);
+    dex_caches.reserve(class_linker->GetDexCachesData().size());
+    for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
+      if (self->IsJWeakCleared(data.weak_root)) {
+        continue;
       }
+      dex_caches.push_back(self->DecodeJObject(data.weak_root)->AsDexCache());
     }
-    ArtMethod** resolved_methods = dex_cache->GetResolvedMethods();
-    for (size_t i = 0, num = dex_cache->NumResolvedMethods(); i != num; ++i) {
-      ArtMethod* method =
-          mirror::DexCache::GetElementPtrSize(resolved_methods, i, target_ptr_size_);
-      DCHECK(method != nullptr) << "Expected resolution method instead of null method";
-      mirror::Class* declaring_class = method->GetDeclaringClass();
-      // Copied methods may be held live by a class which was not an image class but have a
-      // declaring class which is an image class. Set it to the resolution method to be safe and
-      // prevent dangling pointers.
-      if (method->IsCopied() || !KeepClass(declaring_class)) {
-        mirror::DexCache::SetElementPtrSize(resolved_methods,
-                                            i,
-                                            resolution_method,
-                                            target_ptr_size_);
-      } else {
-        // Check that the class is still in the classes table.
-        DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
-            << Class::PrettyClass(declaring_class) << " not in class linker table";
-      }
-    }
-    ArtField** resolved_fields = dex_cache->GetResolvedFields();
-    for (size_t i = 0; i < dex_cache->NumResolvedFields(); i++) {
-      ArtField* field = mirror::DexCache::GetElementPtrSize(resolved_fields, i, target_ptr_size_);
-      if (field != nullptr && !KeepClass(field->GetDeclaringClass().Ptr())) {
-        dex_cache->SetResolvedField(i, nullptr, target_ptr_size_);
-      }
-    }
-    // Clean the dex field. It might have been populated during the initialization phase, but
-    // contains data only valid during a real run.
-    dex_cache->SetFieldObject<false>(mirror::DexCache::DexOffset(), nullptr);
+  }
+  for (ObjPtr<mirror::DexCache> dex_cache : dex_caches) {
+    PruneAndPreloadDexCache(dex_cache, class_loader);
   }
 
   // Drop the array class cache in the ClassLinker, as these are roots holding those classes live.
@@ -1246,21 +1338,20 @@
       // live.
       if (as_klass->ShouldHaveImt()) {
         ImTable* imt = as_klass->GetImt(target_ptr_size_);
-        for (size_t i = 0; i < ImTable::kSize; ++i) {
-          ArtMethod* imt_method = imt->Get(i, target_ptr_size_);
-          DCHECK(imt_method != nullptr);
-          if (imt_method->IsRuntimeMethod() &&
-              !IsInBootImage(imt_method) &&
-              !NativeRelocationAssigned(imt_method)) {
-            AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index);
+        if (TryAssignImTableOffset(imt, oat_index)) {
+          // Since imt's can be shared only do this the first time to not double count imt method
+          // fixups.
+          for (size_t i = 0; i < ImTable::kSize; ++i) {
+            ArtMethod* imt_method = imt->Get(i, target_ptr_size_);
+            DCHECK(imt_method != nullptr);
+            if (imt_method->IsRuntimeMethod() &&
+                !IsInBootImage(imt_method) &&
+                !NativeRelocationAssigned(imt_method)) {
+              AssignMethodOffset(imt_method, kNativeObjectRelocationTypeRuntimeMethod, oat_index);
+            }
           }
         }
       }
-
-      if (as_klass->ShouldHaveImt()) {
-        ImTable* imt = as_klass->GetImt(target_ptr_size_);
-        TryAssignImTableOffset(imt, oat_index);
-      }
     } else if (obj->IsClassLoader()) {
       // Register the class loader if it has a class table.
       // The fake boot class loader should not get registered and we should end up with only one
@@ -1294,10 +1385,10 @@
   return native_object_relocations_.find(ptr) != native_object_relocations_.end();
 }
 
-void ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
+bool ImageWriter::TryAssignImTableOffset(ImTable* imt, size_t oat_index) {
   // No offset, or already assigned.
   if (imt == nullptr || IsInBootImage(imt) || NativeRelocationAssigned(imt)) {
-    return;
+    return false;
   }
   // If the method is a conflict method we also want to assign the conflict table offset.
   ImageInfo& image_info = GetImageInfo(oat_index);
@@ -1309,6 +1400,7 @@
           image_info.bin_slot_sizes_[kBinImTable],
           kNativeObjectRelocationTypeIMTable});
   image_info.bin_slot_sizes_[kBinImTable] += size;
+  return true;
 }
 
 void ImageWriter::TryAssignConflictTableOffset(ImtConflictTable* table, size_t oat_index) {
@@ -1407,8 +1499,7 @@
   ALWAYS_INLINE void operator() (ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
                                  ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ref->SetReferent</*kTransactionActive*/false>(
-        VisitReference(ref->GetReferent<kWithoutReadBarrier>()));
+    operator()(ref, mirror::Reference::ReferentOffset(), /* is_static */ false);
   }
 
  private:
@@ -1566,7 +1657,7 @@
   // Calculate size of the dex cache arrays slot and prepare offsets.
   PrepareDexCacheArraySlots();
 
-  // Calculate the sizes of the intern tables and class tables.
+  // Calculate the sizes of the intern tables, class tables, and fixup tables.
   for (ImageInfo& image_info : image_infos_) {
     // Calculate how big the intern table will be after being serialized.
     InternTable* const intern_table = image_info.intern_table_.get();
@@ -1574,12 +1665,11 @@
     if (intern_table->StrongSize() != 0u) {
       image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
     }
+
     // Calculate the size of the class table.
     ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    CHECK_EQ(class_loaders_.size(), compile_app_image_ ? 1u : 0u);
-    mirror::ClassLoader* class_loader = compile_app_image_ ? *class_loaders_.begin() : nullptr;
-    DCHECK_EQ(image_info.class_table_->NumZygoteClasses(class_loader), 0u);
-    if (image_info.class_table_->NumNonZygoteClasses(class_loader) != 0u) {
+    DCHECK_EQ(image_info.class_table_->NumReferencedZygoteClasses(), 0u);
+    if (image_info.class_table_->NumReferencedNonZygoteClasses() != 0u) {
       image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr);
     }
   }
@@ -1595,7 +1685,7 @@
           break;
         }
         case kBinDexCacheArray:
-          bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment());
+          bin_offset = RoundUp(bin_offset, DexCacheArraysLayout::Alignment(target_ptr_size_));
           break;
         case kBinImTable:
         case kBinIMTConflictTable: {
@@ -1628,8 +1718,6 @@
   // Transform each object's bin slot into an offset which will be used to do the final copy.
   heap->VisitObjects(UnbinObjectsIntoOffsetCallback, this);
 
-  // DCHECK_EQ(image_end_, GetBinSizeSum(kBinMirrorCount) + image_objects_offset_begin_);
-
   size_t i = 0;
   for (ImageInfo& image_info : image_infos_) {
     image_info.image_roots_address_ = PointerToLowMemUInt32(GetImageAddress(image_roots[i].Get()));
@@ -1643,8 +1731,6 @@
     ImageInfo& image_info = GetImageInfo(relocation.oat_index);
     relocation.offset += image_info.bin_slot_offsets_[bin_type];
   }
-
-  // Note that image_info.image_end_ is left at end of used mirror object section.
 }
 
 size_t ImageWriter::ImageInfo::CreateImageSections(ImageSection* out_sections) const {
@@ -1686,7 +1772,6 @@
   ImageSection* dex_cache_arrays_section = &out_sections[ImageHeader::kSectionDexCacheArrays];
   *dex_cache_arrays_section = ImageSection(bin_slot_offsets_[kBinDexCacheArray],
                                            bin_slot_sizes_[kBinDexCacheArray]);
-
   // Round up to the alignment the string table expects. See HashSet::WriteToMemory.
   size_t cur_pos = RoundUp(dex_cache_arrays_section->End(), sizeof(uint64_t));
   // Calculate the size of the interned strings.
@@ -1778,18 +1863,18 @@
   explicit FixupRootVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {
   }
 
-  void VisitRoots(mirror::Object*** roots, size_t count, const RootInfo& info ATTRIBUTE_UNUSED)
+  void VisitRoots(mirror::Object*** roots ATTRIBUTE_UNUSED,
+                  size_t count ATTRIBUTE_UNUSED,
+                  const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
-    for (size_t i = 0; i < count; ++i) {
-      *roots[i] = image_writer_->GetImageAddress(*roots[i]);
-    }
+    LOG(FATAL) << "Unsupported";
   }
 
   void VisitRoots(mirror::CompressedReference<mirror::Object>** roots, size_t count,
                   const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     for (size_t i = 0; i < count; ++i) {
-      roots[i]->Assign(image_writer_->GetImageAddress(roots[i]->AsMirrorPtr()));
+      image_writer_->CopyReference(roots[i], roots[i]->AsMirrorPtr());
     }
   }
 
@@ -1800,7 +1885,9 @@
 void ImageWriter::CopyAndFixupImTable(ImTable* orig, ImTable* copy) {
   for (size_t i = 0; i < ImTable::kSize; ++i) {
     ArtMethod* method = orig->Get(i, target_ptr_size_);
-    copy->Set(i, NativeLocationInImage(method), target_ptr_size_);
+    void** address = reinterpret_cast<void**>(copy->AddressOfElement(i, target_ptr_size_));
+    CopyAndFixupPointer(address, method);
+    DCHECK_EQ(copy->Get(i, target_ptr_size_), NativeLocationInImage(method));
   }
 }
 
@@ -1809,10 +1896,13 @@
   for (size_t i = 0; i < count; ++i) {
     ArtMethod* interface_method = orig->GetInterfaceMethod(i, target_ptr_size_);
     ArtMethod* implementation_method = orig->GetImplementationMethod(i, target_ptr_size_);
-    copy->SetInterfaceMethod(i, target_ptr_size_, NativeLocationInImage(interface_method));
-    copy->SetImplementationMethod(i,
-                                  target_ptr_size_,
-                                  NativeLocationInImage(implementation_method));
+    CopyAndFixupPointer(copy->AddressOfInterfaceMethod(i, target_ptr_size_), interface_method);
+    CopyAndFixupPointer(copy->AddressOfImplementationMethod(i, target_ptr_size_),
+                        implementation_method);
+    DCHECK_EQ(copy->GetInterfaceMethod(i, target_ptr_size_),
+              NativeLocationInImage(interface_method));
+    DCHECK_EQ(copy->GetImplementationMethod(i, target_ptr_size_),
+              NativeLocationInImage(implementation_method));
   }
 }
 
@@ -1831,8 +1921,9 @@
     switch (relocation.type) {
       case kNativeObjectRelocationTypeArtField: {
         memcpy(dest, pair.first, sizeof(ArtField));
-        reinterpret_cast<ArtField*>(dest)->SetDeclaringClass(
-            GetImageAddress(reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass().Ptr()));
+        CopyReference(
+            reinterpret_cast<ArtField*>(dest)->GetDeclaringClassAddressWithoutBarrier(),
+            reinterpret_cast<ArtField*>(pair.first)->GetDeclaringClass().Ptr());
         break;
       }
       case kNativeObjectRelocationTypeRuntimeMethod:
@@ -1924,9 +2015,8 @@
     // above comment for intern tables.
     ClassTable temp_class_table;
     temp_class_table.ReadFromMemory(class_table_memory_ptr);
-    ObjPtr<mirror::ClassLoader> class_loader = GetClassLoader();
-    CHECK_EQ(temp_class_table.NumZygoteClasses(class_loader),
-             table->NumNonZygoteClasses(class_loader) + table->NumZygoteClasses(class_loader));
+    CHECK_EQ(temp_class_table.NumReferencedZygoteClasses(),
+             table->NumReferencedNonZygoteClasses() + table->NumReferencedZygoteClasses());
     UnbufferedRootVisitor visitor(&root_visitor, RootInfo(kRootUnknown));
     temp_class_table.VisitRoots(visitor);
   }
@@ -1950,8 +2040,10 @@
   reinterpret_cast<ImageWriter*>(arg)->CopyAndFixupObject(obj);
 }
 
-void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* arr,
-                                    mirror::Class* klass, Bin array_type) {
+void ImageWriter::FixupPointerArray(mirror::Object* dst,
+                                    mirror::PointerArray* arr,
+                                    mirror::Class* klass,
+                                    Bin array_type) {
   CHECK(klass->IsArrayClass());
   CHECK(arr->IsIntArray() || arr->IsLongArray()) << klass->PrettyClass() << " " << arr;
   // Fixup int and long pointers for the ArtMethod or ArtField arrays.
@@ -1960,7 +2052,7 @@
   auto* dest_array = down_cast<mirror::PointerArray*>(dst);
   for (size_t i = 0, count = num_elements; i < count; ++i) {
     void* elem = arr->GetElementPtrSize<void*>(i, target_ptr_size_);
-    if (elem != nullptr && !IsInBootImage(elem)) {
+    if (kIsDebugBuild && elem != nullptr && !IsInBootImage(elem)) {
       auto it = native_object_relocations_.find(elem);
       if (UNLIKELY(it == native_object_relocations_.end())) {
         if (it->second.IsArtMethodRelocation()) {
@@ -1976,12 +2068,9 @@
               << Class::PrettyClass(field->GetDeclaringClass());
         }
         UNREACHABLE();
-      } else {
-        ImageInfo& image_info = GetImageInfo(it->second.oat_index);
-        elem = image_info.image_begin_ + it->second.offset;
       }
     }
-    dest_array->SetElementPtrSize<false, true>(i, elem, target_ptr_size_);
+    CopyAndFixupPointer(dest_array->ElementAddress(i, target_ptr_size_), elem);
   }
 }
 
@@ -2029,22 +2118,19 @@
 
 
   void operator()(ObjPtr<Object> obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
-      REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
+      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
     ObjPtr<Object> ref = obj->GetFieldObject<Object, kVerifyNone>(offset);
-    // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
-    // image.
-    copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-        offset,
-        image_writer_->GetImageAddress(ref.Ptr()));
+    // Copy the reference and record the fixup if necessary.
+    image_writer_->CopyReference(
+        copy_->GetFieldObjectReferenceAddr<kVerifyNone>(offset),
+        ref.Ptr());
   }
 
   // java.lang.ref.Reference visitor.
   void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
                   ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
-    copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
-        mirror::Reference::ReferentOffset(),
-        image_writer_->GetImageAddress(ref->GetReferent()));
+    operator()(ref, mirror::Reference::ReferentOffset(), /* is_static */ false);
   }
 
  protected:
@@ -2122,7 +2208,10 @@
   explicit NativeLocationVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
   template <typename T>
-  T* operator()(T* ptr) const REQUIRES_SHARED(Locks::mutator_lock_) {
+  T* operator()(T* ptr, void** dest_addr = nullptr) const REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (dest_addr != nullptr) {
+      image_writer_->CopyAndFixupPointer(dest_addr, ptr);
+    }
     return image_writer_->NativeLocationInImage(ptr);
   }
 
@@ -2185,10 +2274,10 @@
   }
 }
 
-
-class ImageAddressVisitor {
+class ImageWriter::ImageAddressVisitorForDexCacheArray {
  public:
-  explicit ImageAddressVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
+  explicit ImageAddressVisitorForDexCacheArray(ImageWriter* image_writer)
+      : image_writer_(image_writer) {}
 
   template <typename T>
   T* operator()(T* ptr) const REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -2199,9 +2288,9 @@
   ImageWriter* const image_writer_;
 };
 
-
 void ImageWriter::FixupDexCache(mirror::DexCache* orig_dex_cache,
                                 mirror::DexCache* copy_dex_cache) {
+  ImageAddressVisitorForDexCacheArray fixup_visitor(this);
   // Though the DexCache array fields are usually treated as native pointers, we set the full
   // 64-bit values here, clearing the top 32 bits for 32-bit targets. The zero-extension is
   // done by casting to the unsigned type uintptr_t before casting to int64_t, i.e.
@@ -2211,8 +2300,7 @@
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::StringsOffset(),
                                                NativeLocationInImage(orig_strings),
                                                PointerSize::k64);
-    orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache),
-                                 ImageAddressVisitor(this));
+    orig_dex_cache->FixupStrings(NativeCopyLocation(orig_strings, orig_dex_cache), fixup_visitor);
   }
   mirror::TypeDexCacheType* orig_types = orig_dex_cache->GetResolvedTypes();
   if (orig_types != nullptr) {
@@ -2220,7 +2308,7 @@
                                                NativeLocationInImage(orig_types),
                                                PointerSize::k64);
     orig_dex_cache->FixupResolvedTypes(NativeCopyLocation(orig_types, orig_dex_cache),
-                                       ImageAddressVisitor(this));
+                                       fixup_visitor);
   }
   ArtMethod** orig_methods = orig_dex_cache->GetResolvedMethods();
   if (orig_methods != nullptr) {
@@ -2235,16 +2323,18 @@
       mirror::DexCache::SetElementPtrSize(copy_methods, i, copy, target_ptr_size_);
     }
   }
-  ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
+  mirror::FieldDexCacheType* orig_fields = orig_dex_cache->GetResolvedFields();
   if (orig_fields != nullptr) {
     copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedFieldsOffset(),
                                                NativeLocationInImage(orig_fields),
                                                PointerSize::k64);
-    ArtField** copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
+    mirror::FieldDexCacheType* copy_fields = NativeCopyLocation(orig_fields, orig_dex_cache);
     for (size_t i = 0, num = orig_dex_cache->NumResolvedFields(); i != num; ++i) {
-      ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, i, target_ptr_size_);
-      ArtField* copy = NativeLocationInImage(orig);
-      mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
+      mirror::FieldDexCachePair orig =
+          mirror::DexCache::GetNativePairPtrSize(orig_fields, i, target_ptr_size_);
+      mirror::FieldDexCachePair copy = orig;
+      copy.object = NativeLocationInImage(orig.object);
+      mirror::DexCache::SetNativePairPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
   mirror::MethodTypeDexCacheType* orig_method_types = orig_dex_cache->GetResolvedMethodTypes();
@@ -2253,7 +2343,7 @@
                                                NativeLocationInImage(orig_method_types),
                                                PointerSize::k64);
     orig_dex_cache->FixupResolvedMethodTypes(NativeCopyLocation(orig_method_types, orig_dex_cache),
-                                             ImageAddressVisitor(this));
+                                             fixup_visitor);
   }
   GcRoot<mirror::CallSite>* orig_call_sites = orig_dex_cache->GetResolvedCallSites();
   if (orig_call_sites != nullptr) {
@@ -2261,7 +2351,7 @@
                                                NativeLocationInImage(orig_call_sites),
                                                PointerSize::k64);
     orig_dex_cache->FixupResolvedCallSites(NativeCopyLocation(orig_call_sites, orig_dex_cache),
-                                           ImageAddressVisitor(this));
+                                           fixup_visitor);
   }
 
   // Remove the DexFile pointers. They will be fixed up when the runtime loads the oat file. Leaving
@@ -2369,7 +2459,8 @@
 
   memcpy(copy, orig, ArtMethod::Size(target_ptr_size_));
 
-  copy->SetDeclaringClass(GetImageAddress(orig->GetDeclaringClassUnchecked()));
+  CopyReference(copy->GetDeclaringClassAddressWithoutBarrier(), orig->GetDeclaringClassUnchecked());
+
   ArtMethod** orig_resolved_methods = orig->GetDexCacheResolvedMethods(target_ptr_size_);
   copy->SetDexCacheResolvedMethods(NativeLocationInImage(orig_resolved_methods), target_ptr_size_);
 
@@ -2481,7 +2572,7 @@
     return GetDefaultOatIndex();
   }
   auto it = oat_index_map_.find(obj);
-  DCHECK(it != oat_index_map_.end());
+  DCHECK(it != oat_index_map_.end()) << obj;
   return it->second;
 }
 
@@ -2582,4 +2673,31 @@
     : intern_table_(new InternTable),
       class_table_(new ClassTable) {}
 
+void ImageWriter::CopyReference(mirror::HeapReference<mirror::Object>* dest,
+                                ObjPtr<mirror::Object> src) {
+  dest->Assign(GetImageAddress(src.Ptr()));
+}
+
+void ImageWriter::CopyReference(mirror::CompressedReference<mirror::Object>* dest,
+                                ObjPtr<mirror::Object> src) {
+  dest->Assign(GetImageAddress(src.Ptr()));
+}
+
+void ImageWriter::CopyAndFixupPointer(void** target, void* value) {
+  void* new_value = value;
+  if (value != nullptr && !IsInBootImage(value)) {
+    auto it = native_object_relocations_.find(value);
+    CHECK(it != native_object_relocations_.end()) << value;
+    const NativeObjectRelocation& relocation = it->second;
+    ImageInfo& image_info = GetImageInfo(relocation.oat_index);
+    new_value = reinterpret_cast<void*>(image_info.image_begin_ + relocation.offset);
+  }
+  if (target_ptr_size_ == PointerSize::k32) {
+    *reinterpret_cast<uint32_t*>(target) = PointerToLowMemUInt32(new_value);
+  } else {
+    *reinterpret_cast<uint64_t*>(target) = reinterpret_cast<uintptr_t>(new_value);
+  }
+}
+
+
 }  // namespace art
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index bdc7146..39113c8 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -38,8 +38,9 @@
 #include "image.h"
 #include "lock_word.h"
 #include "mem_map.h"
-#include "oat_file.h"
 #include "mirror/dex_cache.h"
+#include "obj_ptr.h"
+#include "oat_file.h"
 #include "os.h"
 #include "safe_map.h"
 #include "utils.h"
@@ -317,6 +318,12 @@
     // Number of image class table bytes.
     size_t class_table_bytes_ = 0;
 
+    // Number of object fixup bytes.
+    size_t object_fixup_bytes_ = 0;
+
+    // Number of pointer fixup bytes.
+    size_t pointer_fixup_bytes_ = 0;
+
     // Intern table associated with this image for serialization.
     std::unique_ptr<InternTable> intern_table_;
 
@@ -376,7 +383,7 @@
   }
 
   // Returns true if the class was in the original requested image classes list.
-  bool KeepClass(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  bool KeepClass(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Debug aid that list of requested image classes.
   void DumpImageClasses();
@@ -391,6 +398,12 @@
   // Remove unwanted classes from various roots.
   void PruneNonImageClasses() REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Remove unwanted classes from the DexCache roots and preload deterministic DexCache contents.
+  void PruneAndPreloadDexCache(ObjPtr<mirror::DexCache> dex_cache,
+                               ObjPtr<mirror::ClassLoader> class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::classlinker_classes_lock_);
+
   // Verify unwanted classes removed.
   void CheckNonImageClassesRemoved() REQUIRES_SHARED(Locks::mutator_lock_);
   static void CheckNonImageClassesRemovedCallback(mirror::Object* obj, void* arg)
@@ -458,7 +471,8 @@
                           size_t oat_index)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void TryAssignImTableOffset(ImTable* imt, size_t oat_index) REQUIRES_SHARED(Locks::mutator_lock_);
+  // Return true if imt was newly inserted.
+  bool TryAssignImTableOffset(ImTable* imt, size_t oat_index) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Assign the offset for an IMT conflict table. Does nothing if the table already has a native
   // relocation.
@@ -473,11 +487,11 @@
   // we also cannot have any classes which refer to these boot class loader non image classes.
   // PruneAppImageClass also prunes if klass depends on a non-image class according to the compiler
   // driver.
-  bool PruneAppImageClass(mirror::Class* klass)
+  bool PruneAppImageClass(ObjPtr<mirror::Class> klass)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // early_exit is true if we had a cyclic dependency anywhere down the chain.
-  bool PruneAppImageClassInternal(mirror::Class* klass,
+  bool PruneAppImageClassInternal(ObjPtr<mirror::Class> klass,
                                   bool* early_exit,
                                   std::unordered_set<mirror::Class*>* visited)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -528,6 +542,14 @@
   // Return true if there already exists a native allocation for an object.
   bool NativeRelocationAssigned(void* ptr) const;
 
+  void CopyReference(mirror::HeapReference<mirror::Object>* dest, ObjPtr<mirror::Object> src)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  void CopyReference(mirror::CompressedReference<mirror::Object>* dest, ObjPtr<mirror::Object> src)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  void CopyAndFixupPointer(void** target, void* value);
+
   const CompilerDriver& compiler_driver_;
 
   // Beginning target image address for the first image.
@@ -602,9 +624,11 @@
   class FixupRootVisitor;
   class FixupVisitor;
   class GetRootsVisitor;
+  class ImageAddressVisitorForDexCacheArray;
   class NativeLocationVisitor;
   class PruneClassesVisitor;
   class PruneClassLoaderClassesVisitor;
+  class RegisterBootClassPathClassesVisitor;
   class VisitReferencesVisitor;
 
   DISALLOW_COPY_AND_ASSIGN(ImageWriter);
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index 9bd25d8..63c23cb 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -24,6 +24,10 @@
 // Note: adding a new intrinsic requires an art image version change,
 // as the modifiers flag for some ArtMethods will need to be changed.
 
+// Note: j.l.Integer.valueOf says kNoThrow even though it could throw an OOME.
+// The kNoThrow should be renamed to kNoVisibleThrow, as it is ok to GVN Integer.valueOf
+// (kNoSideEffects), and it is also OK to remove it if it's unused.
+
 #define INTRINSICS_LIST(V) \
   V(DoubleDoubleToRawLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToRawLongBits", "(D)J") \
   V(DoubleDoubleToLongBits, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Double;", "doubleToLongBits", "(D)J") \
@@ -149,7 +153,8 @@
   V(UnsafeLoadFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "loadFence", "()V") \
   V(UnsafeStoreFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "storeFence", "()V") \
   V(UnsafeFullFence, kVirtual, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Lsun/misc/Unsafe;", "fullFence", "()V") \
-  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;")
+  V(ReferenceGetReferent, kDirect, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/ref/Reference;", "getReferent", "()Ljava/lang/Object;") \
+  V(IntegerValueOf, kStatic, kNeedsEnvironmentOrCache, kNoSideEffects, kNoThrow, "Ljava/lang/Integer;", "valueOf", "(I)Ljava/lang/Integer;")
 
 #endif  // ART_COMPILER_INTRINSICS_LIST_H_
 #undef ART_COMPILER_INTRINSICS_LIST_H_   // #define is only for lint.
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index cbd831a..ad951bc 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -97,10 +97,8 @@
       CompilerOptions::kDefaultSmallMethodThreshold,
       CompilerOptions::kDefaultTinyMethodThreshold,
       CompilerOptions::kDefaultNumDexMethodsThreshold,
-      CompilerOptions::kDefaultInlineDepthLimit,
       CompilerOptions::kDefaultInlineMaxCodeUnits,
       /* no_inline_from */ nullptr,
-      /* include_patch_information */ false,
       CompilerOptions::kDefaultTopKProfileThreshold,
       Runtime::Current()->IsJavaDebuggable(),
       CompilerOptions::kDefaultGenerateDebugInfo,
@@ -178,10 +176,6 @@
     jit_logger_.reset(new JitLogger());
     jit_logger_->OpenLog();
   }
-
-  size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit();
-  DCHECK_LT(thread_count * inline_depth_limit, std::numeric_limits<uint16_t>::max())
-      << "ProfilingInfo's inline counter can potentially overflow";
 }
 
 JitCompiler::~JitCompiler() {
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 3bd290d..68ec7bd 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -660,8 +660,8 @@
                                                  frame_size,
                                                  main_jni_conv->CoreSpillMask(),
                                                  main_jni_conv->FpSpillMask(),
-                                                 ArrayRef<const SrcMapElem>(),
-                                                 ArrayRef<const uint8_t>(),  // vmap_table.
+                                                 /* method_info */ ArrayRef<const uint8_t>(),
+                                                 /* vmap_table */ ArrayRef<const uint8_t>(),
                                                  ArrayRef<const uint8_t>(*jni_asm->cfi().data()),
                                                  ArrayRef<const LinkerPatch>());
 }
diff --git a/compiler/linker/relative_patcher_test.h b/compiler/linker/relative_patcher_test.h
index 233daf4..908cb41 100644
--- a/compiler/linker/relative_patcher_test.h
+++ b/compiler/linker/relative_patcher_test.h
@@ -87,7 +87,7 @@
         /* frame_size_in_bytes */ 0u,
         /* core_spill_mask */ 0u,
         /* fp_spill_mask */ 0u,
-        /* src_mapping_table */ ArrayRef<const SrcMapElem>(),
+        /* method_info */ ArrayRef<const uint8_t>(),
         /* vmap_table */ ArrayRef<const uint8_t>(),
         /* cfi_info */ ArrayRef<const uint8_t>(),
         patches));
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index 66111f6..ead4124 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -250,7 +250,6 @@
 
     elf_writer->WriteDynamicSection();
     elf_writer->WriteDebugInfo(oat_writer.GetMethodDebugInfo());
-    elf_writer->WritePatchLocations(oat_writer.GetAbsolutePatchLocations());
 
     if (!elf_writer->End()) {
       return false;
@@ -265,6 +264,7 @@
 
   void TestDexFileInput(bool verify, bool low_4gb, bool use_profile);
   void TestZipFileInput(bool verify);
+  void TestZipFileInputWithEmptyDex();
 
   std::unique_ptr<const InstructionSetFeatures> insn_features_;
   std::unique_ptr<QuickCompilerCallbacks> callbacks_;
@@ -485,7 +485,7 @@
   // it is time to update OatHeader::kOatVersion
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
-  EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
+  EXPECT_EQ(24U, sizeof(OatQuickMethodHeader));
   EXPECT_EQ(161 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
             sizeof(QuickEntryPoints));
 }
@@ -821,6 +821,28 @@
   TestZipFileInput(true);
 }
 
+void OatTest::TestZipFileInputWithEmptyDex() {
+  ScratchFile zip_file;
+  ZipBuilder zip_builder(zip_file.GetFile());
+  bool success = zip_builder.AddFile("classes.dex", nullptr, 0);
+  ASSERT_TRUE(success);
+  success = zip_builder.Finish();
+  ASSERT_TRUE(success) << strerror(errno);
+
+  SafeMap<std::string, std::string> key_value_store;
+  key_value_store.Put(OatHeader::kImageLocationKey, "test.art");
+  std::vector<const char*> input_filenames { zip_file.GetFilename().c_str() };  // NOLINT [readability/braces] [4]
+  ScratchFile oat_file, vdex_file(oat_file, ".vdex");
+  std::unique_ptr<ProfileCompilationInfo> profile_compilation_info(new ProfileCompilationInfo());
+  success = WriteElf(vdex_file.GetFile(), oat_file.GetFile(), input_filenames,
+                     key_value_store, /*verify*/false, profile_compilation_info.get());
+  ASSERT_FALSE(success);
+}
+
+TEST_F(OatTest, ZipFileInputWithEmptyDex) {
+  TestZipFileInputWithEmptyDex();
+}
+
 TEST_F(OatTest, UpdateChecksum) {
   InstructionSet insn_set = kX86;
   std::string error_msg;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 0ea1125..105db1d 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -104,6 +104,13 @@
 // Defines the location of the raw dex file to write.
 class OatWriter::DexFileSource {
  public:
+  enum Type {
+    kNone,
+    kZipEntry,
+    kRawFile,
+    kRawData,
+  };
+
   explicit DexFileSource(ZipEntry* zip_entry)
       : type_(kZipEntry), source_(zip_entry) {
     DCHECK(source_ != nullptr);
@@ -119,6 +126,7 @@
     DCHECK(source_ != nullptr);
   }
 
+  Type GetType() const { return type_; }
   bool IsZipEntry() const { return type_ == kZipEntry; }
   bool IsRawFile() const { return type_ == kRawFile; }
   bool IsRawData() const { return type_ == kRawData; }
@@ -147,13 +155,6 @@
   }
 
  private:
-  enum Type {
-    kNone,
-    kZipEntry,
-    kRawFile,
-    kRawData,
-  };
-
   Type type_;
   const void* source_;
 };
@@ -325,6 +326,7 @@
     size_relative_call_thunks_(0),
     size_misc_thunks_(0),
     size_vmap_table_(0),
+    size_method_info_(0),
     size_oat_dex_file_location_size_(0),
     size_oat_dex_file_location_data_(0),
     size_oat_dex_file_location_checksum_(0),
@@ -808,6 +810,7 @@
       DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
       OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
       uint32_t vmap_table_offset = method_header->GetVmapTableOffset();
+      uint32_t method_info_offset = method_header->GetMethodInfoOffset();
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
       uint32_t code_offset = quick_code_offset - thumb_offset;
@@ -818,13 +821,18 @@
           vmap_table_offset += code_offset;
           DCHECK_LT(vmap_table_offset, code_offset);
         }
+        if (method_info_offset != 0u) {
+          method_info_offset += code_offset;
+          DCHECK_LT(method_info_offset, code_offset);
+        }
       } else {
+        CHECK(compiled_method->GetMethodInfo().empty());
         if (kIsVdexEnabled) {
           // We write the offset in the .vdex file.
           DCHECK_EQ(vmap_table_offset, 0u);
           vmap_table_offset = current_quickening_info_offset_;
-          ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
-          current_quickening_info_offset_ += map.size() * sizeof(map.front());
+          ArrayRef<const uint8_t> vmap_table = compiled_method->GetVmapTable();
+          current_quickening_info_offset_ += vmap_table.size() * sizeof(vmap_table.front());
         } else {
           // We write the offset of the quickening info relative to the code.
           vmap_table_offset += code_offset;
@@ -835,6 +843,7 @@
       uint32_t core_spill_mask = compiled_method->GetCoreSpillMask();
       uint32_t fp_spill_mask = compiled_method->GetFpSpillMask();
       *method_header = OatQuickMethodHeader(vmap_table_offset,
+                                            method_info_offset,
                                             frame_size_in_bytes,
                                             core_spill_mask,
                                             fp_spill_mask,
@@ -908,6 +917,9 @@
       if (UNLIKELY(lhs->GetVmapTable().data() != rhs->GetVmapTable().data())) {
         return lhs->GetVmapTable().data() < rhs->GetVmapTable().data();
       }
+      if (UNLIKELY(lhs->GetMethodInfo().data() != rhs->GetMethodInfo().data())) {
+        return lhs->GetMethodInfo().data() < rhs->GetMethodInfo().data();
+      }
       if (UNLIKELY(lhs->GetPatches().data() != rhs->GetPatches().data())) {
         return lhs->GetPatches().data() < rhs->GetPatches().data();
       }
@@ -982,20 +994,103 @@
   SafeMap<const uint8_t*, uint32_t> dedupe_map_;
 };
 
+class OatWriter::InitMethodInfoVisitor : public OatDexMethodVisitor {
+ public:
+  InitMethodInfoVisitor(OatWriter* writer, size_t offset) : OatDexMethodVisitor(writer, offset) {}
+
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
+    CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
+
+    if (compiled_method != nullptr) {
+      DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
+      DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].GetMethodInfoOffset(), 0u);
+      ArrayRef<const uint8_t> map = compiled_method->GetMethodInfo();
+      const uint32_t map_size = map.size() * sizeof(map[0]);
+      if (map_size != 0u) {
+        size_t offset = dedupe_map_.GetOrCreate(
+            map.data(),
+            [this, map_size]() {
+              uint32_t new_offset = offset_;
+              offset_ += map_size;
+              return new_offset;
+            });
+        // Code offset is not initialized yet, so set the map offset to 0u-offset.
+        DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
+        oat_class->method_headers_[method_offsets_index_].SetMethodInfoOffset(0u - offset);
+      }
+      ++method_offsets_index_;
+    }
+
+    return true;
+  }
+
+ private:
+  // Deduplication is already done on a pointer basis by the compiler driver,
+  // so we can simply compare the pointers to find out if things are duplicated.
+  SafeMap<const uint8_t*, uint32_t> dedupe_map_;
+};
+
 class OatWriter::InitImageMethodVisitor : public OatDexMethodVisitor {
  public:
-  InitImageMethodVisitor(OatWriter* writer, size_t offset)
+  InitImageMethodVisitor(OatWriter* writer,
+                         size_t offset,
+                         const std::vector<const DexFile*>* dex_files)
     : OatDexMethodVisitor(writer, offset),
-      pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())) {
+      pointer_size_(GetInstructionSetPointerSize(writer_->compiler_driver_->GetInstructionSet())),
+      dex_files_(dex_files),
+      class_linker_(Runtime::Current()->GetClassLinker()) {
+    }
+
+  // Handle copied methods here. Copy pointer to quick code from
+  // an origin method to a copied method only if they are
+  // in the same oat file. If the origin and the copied methods are
+  // in different oat files don't touch the copied method.
+  // References to other oat files are not supported yet.
+  bool StartClass(const DexFile* dex_file, size_t class_def_index)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    OatDexMethodVisitor::StartClass(dex_file, class_def_index);
+    // Skip classes that are not in the image.
+    if (!IsImageClass()) {
+      return true;
+    }
+    ScopedObjectAccessUnchecked soa(Thread::Current());
+    StackHandleScope<1> hs(soa.Self());
+    Handle<mirror::DexCache> dex_cache = hs.NewHandle(
+        class_linker_->FindDexCache(Thread::Current(), *dex_file));
+    const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_index);
+    mirror::Class* klass = dex_cache->GetResolvedType(class_def.class_idx_);
+    if (klass != nullptr) {
+      for (ArtMethod& method : klass->GetCopiedMethods(pointer_size_)) {
+        // Find origin method. Declaring class and dex_method_idx
+        // in the copied method should be the same as in the origin
+        // method.
+        mirror::Class* declaring_class = method.GetDeclaringClass();
+        ArtMethod* origin = declaring_class->FindDeclaredVirtualMethod(
+            declaring_class->GetDexCache(),
+            method.GetDexMethodIndex(),
+            pointer_size_);
+        CHECK(origin != nullptr);
+        if (IsInOatFile(&declaring_class->GetDexFile())) {
+          const void* code_ptr =
+              origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
+          if (code_ptr == nullptr) {
+            methods_to_process_.push_back(std::make_pair(&method, origin));
+          } else {
+            method.SetEntryPointFromQuickCompiledCodePtrSize(
+                code_ptr, pointer_size_);
+          }
+        }
+      }
+    }
+    return true;
   }
 
   bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    const DexFile::TypeId& type_id =
-        dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_);
-    const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id);
     // Skip methods that are not in the image.
-    if (!writer_->GetCompilerDriver()->IsImageClass(class_descriptor)) {
+    if (!IsImageClass()) {
       return true;
     }
 
@@ -1009,17 +1104,16 @@
       ++method_offsets_index_;
     }
 
-    ClassLinker* linker = Runtime::Current()->GetClassLinker();
     // Unchecked as we hold mutator_lock_ on entry.
     ScopedObjectAccessUnchecked soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(linker->FindDexCache(
+    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker_->FindDexCache(
         Thread::Current(), *dex_file_)));
     ArtMethod* method;
     if (writer_->HasBootImage()) {
       const InvokeType invoke_type = it.GetMethodInvokeType(
           dex_file_->GetClassDef(class_def_index_));
-      method = linker->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
+      method = class_linker_->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
           *dex_file_,
           it.GetMemberIndex(),
           dex_cache,
@@ -1039,7 +1133,8 @@
       // Should already have been resolved by the compiler, just peek into the dex cache.
       // It may not be resolved if the class failed to verify, in this case, don't set the
       // entrypoint. This is not fatal since the dex cache will contain a resolution method.
-      method = dex_cache->GetResolvedMethod(it.GetMemberIndex(), linker->GetImagePointerSize());
+      method = dex_cache->GetResolvedMethod(it.GetMemberIndex(),
+          class_linker_->GetImagePointerSize());
     }
     if (method != nullptr &&
         compiled_method != nullptr &&
@@ -1051,8 +1146,38 @@
     return true;
   }
 
+  // Check whether current class is image class
+  bool IsImageClass() {
+    const DexFile::TypeId& type_id =
+        dex_file_->GetTypeId(dex_file_->GetClassDef(class_def_index_).class_idx_);
+    const char* class_descriptor = dex_file_->GetTypeDescriptor(type_id);
+    return writer_->GetCompilerDriver()->IsImageClass(class_descriptor);
+  }
+
+  // Check whether specified dex file is in the compiled oat file.
+  bool IsInOatFile(const DexFile* dex_file) {
+    return ContainsElement(*dex_files_, dex_file);
+  }
+
+  // Assign a pointer to quick code for copied methods
+  // not handled in the method StartClass
+  void Postprocess() {
+    for (std::pair<ArtMethod*, ArtMethod*>& p : methods_to_process_) {
+      ArtMethod* method = p.first;
+      ArtMethod* origin = p.second;
+      const void* code_ptr =
+          origin->GetEntryPointFromQuickCompiledCodePtrSize(pointer_size_);
+      if (code_ptr != nullptr) {
+        method->SetEntryPointFromQuickCompiledCodePtrSize(code_ptr, pointer_size_);
+      }
+    }
+  }
+
  protected:
   const PointerSize pointer_size_;
+  const std::vector<const DexFile*>* dex_files_;
+  ClassLinker* const class_linker_;
+  std::vector<std::pair<ArtMethod*, ArtMethod*>> methods_to_process_;
 };
 
 class OatWriter::WriteCodeMethodVisitor : public OatDexMethodVisitor {
@@ -1224,7 +1349,7 @@
                 break;
               }
               default: {
-                DCHECK_EQ(patch.GetType(), LinkerPatch::Type::kRecordPosition);
+                DCHECK(false) << "Unexpected linker patch type: " << patch.GetType();
                 break;
               }
             }
@@ -1315,12 +1440,10 @@
 
   mirror::String* GetTargetString(const LinkerPatch& patch) REQUIRES_SHARED(Locks::mutator_lock_) {
     ScopedObjectAccessUnchecked soa(Thread::Current());
-    StackHandleScope<1> hs(soa.Self());
     ClassLinker* linker = Runtime::Current()->GetClassLinker();
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(GetDexCache(patch.TargetStringDexFile())));
     mirror::String* string = linker->LookupString(*patch.TargetStringDexFile(),
                                                   patch.TargetStringIndex(),
-                                                  dex_cache);
+                                                  GetDexCache(patch.TargetStringDexFile()));
     DCHECK(string != nullptr);
     DCHECK(writer_->HasBootImage() ||
            Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(string));
@@ -1433,7 +1556,7 @@
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
-    if (compiled_method != nullptr) {  // ie. not an abstract method
+    if (compiled_method != nullptr) {  // i.e. not an abstract method
       size_t file_offset = file_offset_;
       OutputStream* out = out_;
 
@@ -1482,6 +1605,63 @@
   }
 };
 
+class OatWriter::WriteMethodInfoVisitor : public OatDexMethodVisitor {
+ public:
+  WriteMethodInfoVisitor(OatWriter* writer,
+                         OutputStream* out,
+                         const size_t file_offset,
+                         size_t relative_offset)
+    : OatDexMethodVisitor(writer, relative_offset),
+      out_(out),
+      file_offset_(file_offset) {}
+
+  bool VisitMethod(size_t class_def_method_index, const ClassDataItemIterator& it) {
+    OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
+    const CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
+
+    if (compiled_method != nullptr) {  // i.e. not an abstract method
+      size_t file_offset = file_offset_;
+      OutputStream* out = out_;
+      uint32_t map_offset = oat_class->method_headers_[method_offsets_index_].GetMethodInfoOffset();
+      uint32_t code_offset = oat_class->method_offsets_[method_offsets_index_].code_offset_;
+      ++method_offsets_index_;
+      DCHECK((compiled_method->GetMethodInfo().size() == 0u && map_offset == 0u) ||
+             (compiled_method->GetMethodInfo().size() != 0u && map_offset != 0u))
+          << compiled_method->GetMethodInfo().size() << " " << map_offset << " "
+          << dex_file_->PrettyMethod(it.GetMemberIndex());
+      if (map_offset != 0u) {
+        // Transform map_offset to actual oat data offset.
+        map_offset = (code_offset - compiled_method->CodeDelta()) - map_offset;
+        DCHECK_NE(map_offset, 0u);
+        DCHECK_LE(map_offset, offset_) << dex_file_->PrettyMethod(it.GetMemberIndex());
+
+        ArrayRef<const uint8_t> map = compiled_method->GetMethodInfo();
+        size_t map_size = map.size() * sizeof(map[0]);
+        if (map_offset == offset_) {
+          // Write deduplicated map (code info for Optimizing or transformation info for dex2dex).
+          if (UNLIKELY(!out->WriteFully(map.data(), map_size))) {
+            ReportWriteFailure(it);
+            return false;
+          }
+          offset_ += map_size;
+        }
+      }
+      DCHECK_OFFSET_();
+    }
+
+    return true;
+  }
+
+ private:
+  OutputStream* const out_;
+  size_t const file_offset_;
+
+  void ReportWriteFailure(const ClassDataItemIterator& it) {
+    PLOG(ERROR) << "Failed to write map for "
+        << dex_file_->PrettyMethod(it.GetMemberIndex()) << " to " << out_->GetLocation();
+  }
+};
+
 // Visit all methods from all classes in all dex files with the specified visitor.
 bool OatWriter::VisitDexMethods(DexMethodVisitor* visitor) {
   for (const DexFile* dex_file : *dex_files_) {
@@ -1575,11 +1755,18 @@
   if (!compiler_driver_->GetCompilerOptions().IsAnyMethodCompilationEnabled()) {
     return offset;
   }
-  InitMapMethodVisitor visitor(this, offset);
-  bool success = VisitDexMethods(&visitor);
-  DCHECK(success);
-  offset = visitor.GetOffset();
-
+  {
+    InitMapMethodVisitor visitor(this, offset);
+    bool success = VisitDexMethods(&visitor);
+    DCHECK(success);
+    offset = visitor.GetOffset();
+  }
+  {
+    InitMethodInfoVisitor visitor(this, offset);
+    bool success = VisitDexMethods(&visitor);
+    DCHECK(success);
+    offset = visitor.GetOffset();
+  }
   return offset;
 }
 
@@ -1630,8 +1817,9 @@
   offset = code_visitor.GetOffset();
 
   if (HasImage()) {
-    InitImageMethodVisitor image_visitor(this, offset);
+    InitImageMethodVisitor image_visitor(this, offset, dex_files_);
     success = VisitDexMethods(&image_visitor);
+    image_visitor.Postprocess();
     DCHECK(success);
     offset = image_visitor.GetOffset();
   }
@@ -1919,6 +2107,7 @@
     DO_STAT(size_relative_call_thunks_);
     DO_STAT(size_misc_thunks_);
     DO_STAT(size_vmap_table_);
+    DO_STAT(size_method_info_);
     DO_STAT(size_oat_dex_file_location_size_);
     DO_STAT(size_oat_dex_file_location_data_);
     DO_STAT(size_oat_dex_file_location_checksum_);
@@ -2034,13 +2223,24 @@
 }
 
 size_t OatWriter::WriteMaps(OutputStream* out, const size_t file_offset, size_t relative_offset) {
-  size_t vmap_tables_offset = relative_offset;
-  WriteMapMethodVisitor visitor(this, out, file_offset, relative_offset);
-  if (UNLIKELY(!VisitDexMethods(&visitor))) {
-    return 0;
+  {
+    size_t vmap_tables_offset = relative_offset;
+    WriteMapMethodVisitor visitor(this, out, file_offset, relative_offset);
+    if (UNLIKELY(!VisitDexMethods(&visitor))) {
+      return 0;
+    }
+    relative_offset = visitor.GetOffset();
+    size_vmap_table_ = relative_offset - vmap_tables_offset;
   }
-  relative_offset = visitor.GetOffset();
-  size_vmap_table_ = relative_offset - vmap_tables_offset;
+  {
+    size_t method_infos_offset = relative_offset;
+    WriteMethodInfoVisitor visitor(this, out, file_offset, relative_offset);
+    if (UNLIKELY(!VisitDexMethods(&visitor))) {
+      return 0;
+    }
+    relative_offset = visitor.GetOffset();
+    size_method_info_ = relative_offset - method_infos_offset;
+  }
 
   return relative_offset;
 }
@@ -2259,6 +2459,10 @@
     ZipEntry* zip_entry = oat_dex_file->source_.GetZipEntry();
     std::unique_ptr<MemMap> mem_map(
         zip_entry->ExtractToMemMap(location.c_str(), "classes.dex", &error_msg));
+    if (mem_map == nullptr) {
+      LOG(ERROR) << "Failed to extract dex file to mem map for layout: " << error_msg;
+      return false;
+    }
     dex_file = DexFile::Open(location,
                              zip_entry->GetCrc32(),
                              std::move(mem_map),
@@ -2266,7 +2470,8 @@
                              /* verify_checksum */ true,
                              &error_msg);
   } else {
-    DCHECK(oat_dex_file->source_.IsRawFile());
+    CHECK(oat_dex_file->source_.IsRawFile())
+        << static_cast<size_t>(oat_dex_file->source_.GetType());
     File* raw_file = oat_dex_file->source_.GetRawFile();
     dex_file = DexFile::OpenDex(raw_file->Fd(), location, /* verify_checksum */ true, &error_msg);
   }
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index db84166..e778f75 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -225,10 +225,6 @@
     return oat_data_offset_;
   }
 
-  ArrayRef<const uintptr_t> GetAbsolutePatchLocations() const {
-    return ArrayRef<const uintptr_t>(absolute_patch_locations_);
-  }
-
   ~OatWriter();
 
   void AddMethodDebugInfos(const std::vector<debug::MethodDebugInfo>& infos) {
@@ -258,9 +254,11 @@
   class InitOatClassesMethodVisitor;
   class InitCodeMethodVisitor;
   class InitMapMethodVisitor;
+  class InitMethodInfoVisitor;
   class InitImageMethodVisitor;
   class WriteCodeMethodVisitor;
   class WriteMapMethodVisitor;
+  class WriteMethodInfoVisitor;
   class WriteQuickeningInfoMethodVisitor;
 
   // Visit all the methods in all the compiled dex files in their definition order
@@ -429,6 +427,7 @@
   uint32_t size_relative_call_thunks_;
   uint32_t size_misc_thunks_;
   uint32_t size_vmap_table_;
+  uint32_t size_method_info_;
   uint32_t size_oat_dex_file_location_size_;
   uint32_t size_oat_dex_file_location_data_;
   uint32_t size_oat_dex_file_location_checksum_;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 2ee4db9..476906a 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -528,7 +528,8 @@
         has_dom_based_dynamic_bce_(false),
         initial_block_size_(graph->GetBlocks().size()),
         side_effects_(side_effects),
-        induction_range_(induction_analysis) {}
+        induction_range_(induction_analysis),
+        next_(nullptr) {}
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
     DCHECK(!IsAddedBlock(block));
@@ -1618,8 +1619,8 @@
   void InsertDeoptInLoop(HLoopInformation* loop, HBasicBlock* block, HInstruction* condition) {
     HInstruction* suspend = loop->GetSuspendCheck();
     block->InsertInstructionBefore(condition, block->GetLastInstruction());
-    HDeoptimize* deoptimize =
-        new (GetGraph()->GetArena()) HDeoptimize(condition, suspend->GetDexPc());
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+        GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, suspend->GetDexPc());
     block->InsertInstructionBefore(deoptimize, block->GetLastInstruction());
     if (suspend->HasEnvironment()) {
       deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
@@ -1631,8 +1632,8 @@
   void InsertDeoptInBlock(HBoundsCheck* bounds_check, HInstruction* condition) {
     HBasicBlock* block = bounds_check->GetBlock();
     block->InsertInstructionBefore(condition, bounds_check);
-    HDeoptimize* deoptimize =
-        new (GetGraph()->GetArena()) HDeoptimize(condition, bounds_check->GetDexPc());
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+        GetGraph()->GetArena(), condition, HDeoptimize::Kind::kBCE, bounds_check->GetDexPc());
     block->InsertInstructionBefore(deoptimize, bounds_check);
     deoptimize->CopyEnvironmentFrom(bounds_check->GetEnvironment());
   }
diff --git a/compiler/optimizing/bounds_check_elimination_test.cc b/compiler/optimizing/bounds_check_elimination_test.cc
index 5d58207..cb6e14b 100644
--- a/compiler/optimizing/bounds_check_elimination_test.cc
+++ b/compiler/optimizing/bounds_check_elimination_test.cc
@@ -43,7 +43,7 @@
   void RunBCE() {
     graph_->BuildDominatorTree();
 
-    InstructionSimplifier(graph_).Run();
+    InstructionSimplifier(graph_, /* codegen */ nullptr).Run();
 
     SideEffectsAnalysis side_effects(graph_);
     side_effects.Run();
diff --git a/compiler/optimizing/cha_guard_optimization.cc b/compiler/optimizing/cha_guard_optimization.cc
index fe42301..048073e 100644
--- a/compiler/optimizing/cha_guard_optimization.cc
+++ b/compiler/optimizing/cha_guard_optimization.cc
@@ -36,7 +36,8 @@
       : HGraphVisitor(graph),
         block_has_cha_guard_(GetGraph()->GetBlocks().size(),
                              0,
-                             graph->GetArena()->Adapter(kArenaAllocCHA)) {
+                             graph->GetArena()->Adapter(kArenaAllocCHA)),
+        instruction_iterator_(nullptr) {
     number_of_guards_to_visit_ = GetGraph()->GetNumberOfCHAGuards();
     DCHECK_NE(number_of_guards_to_visit_, 0u);
     // Will recount number of guards during guard optimization.
@@ -201,8 +202,8 @@
     HInstruction* suspend = loop_info->GetSuspendCheck();
     // Need a new deoptimize instruction that copies the environment
     // of the suspend instruction for the loop.
-    HDeoptimize* deoptimize =
-        new (GetGraph()->GetArena()) HDeoptimize(compare, suspend->GetDexPc());
+    HDeoptimize* deoptimize = new (GetGraph()->GetArena()) HDeoptimize(
+        GetGraph()->GetArena(), compare, HDeoptimize::Kind::kInline, suspend->GetDexPc());
     pre_header->InsertInstructionBefore(deoptimize, pre_header->GetLastInstruction());
     deoptimize->CopyEnvironmentFromWithLoopPhiAdjustment(
         suspend->GetEnvironment(), loop_info->GetHeader());
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 424b850..b7c8075 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -654,8 +654,12 @@
   }
 }
 
-size_t CodeGenerator::ComputeStackMapsSize() {
-  return stack_map_stream_.PrepareForFillIn();
+void CodeGenerator::ComputeStackMapAndMethodInfoSize(size_t* stack_map_size,
+                                                     size_t* method_info_size) {
+  DCHECK(stack_map_size != nullptr);
+  DCHECK(method_info_size != nullptr);
+  *stack_map_size = stack_map_stream_.PrepareForFillIn();
+  *method_info_size = stack_map_stream_.ComputeMethodInfoSize();
 }
 
 static void CheckCovers(uint32_t dex_pc,
@@ -723,10 +727,13 @@
   }
 }
 
-void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) {
-  stack_map_stream_.FillIn(region);
+void CodeGenerator::BuildStackMaps(MemoryRegion stack_map_region,
+                                   MemoryRegion method_info_region,
+                                   const DexFile::CodeItem& code_item) {
+  stack_map_stream_.FillInCodeInfo(stack_map_region);
+  stack_map_stream_.FillInMethodInfo(method_info_region);
   if (kIsDebugBuild) {
-    CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item);
+    CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(stack_map_region), code_item);
   }
 }
 
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index b912672..ea463ee 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -341,8 +341,10 @@
     slow_paths_.push_back(std::unique_ptr<SlowPathCode>(slow_path));
   }
 
-  void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
-  size_t ComputeStackMapsSize();
+  void BuildStackMaps(MemoryRegion stack_map_region,
+                      MemoryRegion method_info_region,
+                      const DexFile::CodeItem& code_item);
+  void ComputeStackMapAndMethodInfoSize(size_t* stack_map_size, size_t* method_info_size);
   size_t GetNumberOfJitRoots() const {
     return jit_string_roots_.size() + jit_class_roots_.size();
   }
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 2560c9f..d7cc577 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -1134,7 +1134,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
     // The read barrier instrumentation of object ArrayGet
@@ -1564,10 +1564,346 @@
   }
 }
 
+static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARM* codegen) {
+  Primitive::Type type = instruction->InputAt(0)->GetType();
+  Location lhs_loc = instruction->GetLocations()->InAt(0);
+  Location rhs_loc = instruction->GetLocations()->InAt(1);
+  if (rhs_loc.IsConstant()) {
+    // 0.0 is the only immediate that can be encoded directly in
+    // a VCMP instruction.
+    //
+    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+    // specify that in a floating-point comparison, positive zero
+    // and negative zero are considered equal, so we can use the
+    // literal 0.0 for both cases here.
+    //
+    // Note however that some methods (Float.equal, Float.compare,
+    // Float.compareTo, Double.equal, Double.compare,
+    // Double.compareTo, Math.max, Math.min, StrictMath.max,
+    // StrictMath.min) consider 0.0 to be (strictly) greater than
+    // -0.0. So if we ever translate calls to these methods into a
+    // HCompare instruction, we must handle the -0.0 case with
+    // care here.
+    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+    if (type == Primitive::kPrimFloat) {
+      __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
+  } else {
+    if (type == Primitive::kPrimFloat) {
+      __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
+               FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
+    }
+  }
+}
+
+static std::pair<Condition, Condition> GenerateLongTestConstant(HCondition* condition,
+                                                                bool invert,
+                                                                CodeGeneratorARM* codegen) {
+  DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = condition->GetLocations();
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  std::pair<Condition, Condition> ret;
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+
+  DCHECK(right.IsConstant());
+
+  const Register left_high = left.AsRegisterPairHigh<Register>();
+  const Register left_low = left.AsRegisterPairLow<Register>();
+  int64_t value = right.GetConstant()->AsLongConstant()->GetValue();
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondB:
+    case kCondBE:
+    case kCondA:
+    case kCondAE:
+      __ CmpConstant(left_high, High32Bits(value));
+      __ it(EQ);
+      __ cmp(left_low, ShifterOperand(Low32Bits(value)), EQ);
+      ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
+      break;
+    case kCondLE:
+    case kCondGT:
+      // Trivially true or false.
+      if (value == std::numeric_limits<int64_t>::max()) {
+        __ cmp(left_low, ShifterOperand(left_low));
+        ret = cond == kCondLE ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ);
+        break;
+      }
+
+      if (cond == kCondLE) {
+        DCHECK_EQ(opposite, kCondGT);
+        cond = kCondLT;
+        opposite = kCondGE;
+      } else {
+        DCHECK_EQ(cond, kCondGT);
+        DCHECK_EQ(opposite, kCondLE);
+        cond = kCondGE;
+        opposite = kCondLT;
+      }
+
+      value++;
+      FALLTHROUGH_INTENDED;
+    case kCondGE:
+    case kCondLT:
+      __ CmpConstant(left_low, Low32Bits(value));
+      __ sbcs(IP, left_high, ShifterOperand(High32Bits(value)));
+      ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+  }
+
+  return ret;
+}
+
+static std::pair<Condition, Condition> GenerateLongTest(HCondition* condition,
+                                                        bool invert,
+                                                        CodeGeneratorARM* codegen) {
+  DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = condition->GetLocations();
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  std::pair<Condition, Condition> ret;
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  DCHECK(right.IsRegisterPair());
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondB:
+    case kCondBE:
+    case kCondA:
+    case kCondAE:
+      __ cmp(left.AsRegisterPairHigh<Register>(),
+             ShifterOperand(right.AsRegisterPairHigh<Register>()));
+      __ it(EQ);
+      __ cmp(left.AsRegisterPairLow<Register>(),
+             ShifterOperand(right.AsRegisterPairLow<Register>()),
+             EQ);
+      ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
+      break;
+    case kCondLE:
+    case kCondGT:
+      if (cond == kCondLE) {
+        DCHECK_EQ(opposite, kCondGT);
+        cond = kCondGE;
+        opposite = kCondLT;
+      } else {
+        DCHECK_EQ(cond, kCondGT);
+        DCHECK_EQ(opposite, kCondLE);
+        cond = kCondLT;
+        opposite = kCondGE;
+      }
+
+      std::swap(left, right);
+      FALLTHROUGH_INTENDED;
+    case kCondGE:
+    case kCondLT:
+      __ cmp(left.AsRegisterPairLow<Register>(),
+             ShifterOperand(right.AsRegisterPairLow<Register>()));
+      __ sbcs(IP,
+              left.AsRegisterPairHigh<Register>(),
+              ShifterOperand(right.AsRegisterPairHigh<Register>()));
+      ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+  }
+
+  return ret;
+}
+
+static std::pair<Condition, Condition> GenerateTest(HCondition* condition,
+                                                    bool invert,
+                                                    CodeGeneratorARM* codegen) {
+  const LocationSummary* const locations = condition->GetLocations();
+  const Primitive::Type type = condition->GetLeft()->GetType();
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+  std::pair<Condition, Condition> ret;
+  const Location right = locations->InAt(1);
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  if (type == Primitive::kPrimLong) {
+    ret = locations->InAt(1).IsConstant()
+        ? GenerateLongTestConstant(condition, invert, codegen)
+        : GenerateLongTest(condition, invert, codegen);
+  } else if (Primitive::IsFloatingPointType(type)) {
+    GenerateVcmp(condition, codegen);
+    __ vmstat();
+    ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
+                         ARMFPCondition(opposite, condition->IsGtBias()));
+  } else {
+    DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+    const Register left = locations->InAt(0).AsRegister<Register>();
+
+    if (right.IsRegister()) {
+      __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+    } else {
+      DCHECK(right.IsConstant());
+      __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+    }
+
+    ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+  }
+
+  return ret;
+}
+
+static bool CanGenerateTest(HCondition* condition, ArmAssembler* assembler) {
+  if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
+    const LocationSummary* const locations = condition->GetLocations();
+    const IfCondition c = condition->GetCondition();
+
+    if (locations->InAt(1).IsConstant()) {
+      const int64_t value = locations->InAt(1).GetConstant()->AsLongConstant()->GetValue();
+      ShifterOperand so;
+
+      if (c < kCondLT || c > kCondGE) {
+        // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+        // we check that the least significant half of the first input to be compared
+        // is in a low register (the other half is read outside an IT block), and
+        // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+        // encoding can be used.
+        if (!ArmAssembler::IsLowRegister(locations->InAt(0).AsRegisterPairLow<Register>()) ||
+            !IsUint<8>(Low32Bits(value))) {
+          return false;
+        }
+      } else if (c == kCondLE || c == kCondGT) {
+        if (value < std::numeric_limits<int64_t>::max() &&
+            !assembler->ShifterOperandCanHold(kNoRegister,
+                                              kNoRegister,
+                                              SBC,
+                                              High32Bits(value + 1),
+                                              kCcSet,
+                                              &so)) {
+          return false;
+        }
+      } else if (!assembler->ShifterOperandCanHold(kNoRegister,
+                                                   kNoRegister,
+                                                   SBC,
+                                                   High32Bits(value),
+                                                   kCcSet,
+                                                   &so)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
+  const Primitive::Type type = constant->GetType();
+  bool ret = false;
+
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+  if (type == Primitive::kPrimLong) {
+    const uint64_t value = constant->AsLongConstant()->GetValueAsUint64();
+
+    ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
+  } else {
+    ret = IsUint<8>(CodeGenerator::GetInt32ValueOf(constant));
+  }
+
+  return ret;
+}
+
+static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
+  DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+
+  if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+
+  return Location::RequiresRegister();
+}
+
+static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
+  // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+  // we check that we are not dealing with floating-point output (there is no
+  // 16-bit VMOV encoding).
+  if (!out.IsRegister() && !out.IsRegisterPair()) {
+    return false;
+  }
+
+  // For constants, we also check that the output is in one or two low registers,
+  // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
+  // MOV encoding can be used.
+  if (src.IsConstant()) {
+    if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
+      return false;
+    }
+
+    if (out.IsRegister()) {
+      if (!ArmAssembler::IsLowRegister(out.AsRegister<Register>())) {
+        return false;
+      }
+    } else {
+      DCHECK(out.IsRegisterPair());
+
+      if (!ArmAssembler::IsLowRegister(out.AsRegisterPairHigh<Register>())) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
 #undef __
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<ArmAssembler*>(GetAssembler())->  // NOLINT
 
+Label* CodeGeneratorARM::GetFinalLabel(HInstruction* instruction, Label* final_label) {
+  DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+  DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
+
+  const HBasicBlock* const block = instruction->GetBlock();
+  const HLoopInformation* const info = block->GetLoopInformation();
+  HInstruction* const next = instruction->GetNext();
+
+  // Avoid a branch to a branch.
+  if (next->IsGoto() && (info == nullptr ||
+                         !info->IsBackEdge(*block) ||
+                         !info->HasSuspendCheck())) {
+    final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
+  }
+
+  return final_label;
+}
+
 void CodeGeneratorARM::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << Register(reg);
 }
@@ -1626,8 +1962,6 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -2094,51 +2428,6 @@
 void InstructionCodeGeneratorARM::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARM::GenerateVcmp(HInstruction* instruction) {
-  Primitive::Type type = instruction->InputAt(0)->GetType();
-  Location lhs_loc = instruction->GetLocations()->InAt(0);
-  Location rhs_loc = instruction->GetLocations()->InAt(1);
-  if (rhs_loc.IsConstant()) {
-    // 0.0 is the only immediate that can be encoded directly in
-    // a VCMP instruction.
-    //
-    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
-    // specify that in a floating-point comparison, positive zero
-    // and negative zero are considered equal, so we can use the
-    // literal 0.0 for both cases here.
-    //
-    // Note however that some methods (Float.equal, Float.compare,
-    // Float.compareTo, Double.equal, Double.compare,
-    // Double.compareTo, Math.max, Math.min, StrictMath.max,
-    // StrictMath.min) consider 0.0 to be (strictly) greater than
-    // -0.0. So if we ever translate calls to these methods into a
-    // HCompare instruction, we must handle the -0.0 case with
-    // care here.
-    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
-    if (type == Primitive::kPrimFloat) {
-      __ vcmpsz(lhs_loc.AsFpuRegister<SRegister>());
-    } else {
-      DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ vcmpdz(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()));
-    }
-  } else {
-    if (type == Primitive::kPrimFloat) {
-      __ vcmps(lhs_loc.AsFpuRegister<SRegister>(), rhs_loc.AsFpuRegister<SRegister>());
-    } else {
-      DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ vcmpd(FromLowSToD(lhs_loc.AsFpuRegisterPairLow<SRegister>()),
-               FromLowSToD(rhs_loc.AsFpuRegisterPairLow<SRegister>()));
-    }
-  }
-}
-
-void InstructionCodeGeneratorARM::GenerateFPJumps(HCondition* cond,
-                                                  Label* true_label,
-                                                  Label* false_label ATTRIBUTE_UNUSED) {
-  __ vmstat();  // transfer FP status register to ARM APSR.
-  __ b(true_label, ARMFPCondition(cond->GetCondition(), cond->IsGtBias()));
-}
-
 void InstructionCodeGeneratorARM::GenerateLongComparesAndJumps(HCondition* cond,
                                                                Label* true_label,
                                                                Label* false_label) {
@@ -2155,7 +2444,6 @@
 
   // Set the conditions for the test, remembering that == needs to be
   // decided using the low words.
-  // TODO: consider avoiding jumps with temporary and CMP low+SBC high
   switch (if_cond) {
     case kCondEQ:
     case kCondNE:
@@ -2226,25 +2514,38 @@
 void InstructionCodeGeneratorARM::GenerateCompareTestAndBranch(HCondition* condition,
                                                                Label* true_target_in,
                                                                Label* false_target_in) {
+  if (CanGenerateTest(condition, codegen_->GetAssembler())) {
+    Label* non_fallthrough_target;
+    bool invert;
+
+    if (true_target_in == nullptr) {
+      DCHECK(false_target_in != nullptr);
+      non_fallthrough_target = false_target_in;
+      invert = true;
+    } else {
+      non_fallthrough_target = true_target_in;
+      invert = false;
+    }
+
+    const auto cond = GenerateTest(condition, invert, codegen_);
+
+    __ b(non_fallthrough_target, cond.first);
+
+    if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
+      __ b(false_target_in);
+    }
+
+    return;
+  }
+
   // Generated branching requires both targets to be explicit. If either of the
   // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
   Label fallthrough_target;
   Label* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
   Label* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
 
-  Primitive::Type type = condition->InputAt(0)->GetType();
-  switch (type) {
-    case Primitive::kPrimLong:
-      GenerateLongComparesAndJumps(condition, true_target, false_target);
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      GenerateVcmp(condition);
-      GenerateFPJumps(condition, true_target, false_target);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected compare type " << type;
-  }
+  DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+  GenerateLongComparesAndJumps(condition, true_target, false_target);
 
   if (false_target != &fallthrough_target) {
     __ b(false_target);
@@ -2309,20 +2610,38 @@
       return;
     }
 
+    Label* non_fallthrough_target;
+    Condition arm_cond;
     LocationSummary* locations = cond->GetLocations();
     DCHECK(locations->InAt(0).IsRegister());
     Register left = locations->InAt(0).AsRegister<Register>();
     Location right = locations->InAt(1);
-    if (right.IsRegister()) {
-      __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
-    } else {
-      DCHECK(right.IsConstant());
-      __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-    }
+
     if (true_target == nullptr) {
-      __ b(false_target, ARMCondition(condition->GetOppositeCondition()));
+      arm_cond = ARMCondition(condition->GetOppositeCondition());
+      non_fallthrough_target = false_target;
     } else {
-      __ b(true_target, ARMCondition(condition->GetCondition()));
+      arm_cond = ARMCondition(condition->GetCondition());
+      non_fallthrough_target = true_target;
+    }
+
+    if (right.IsConstant() && (arm_cond == NE || arm_cond == EQ) &&
+        CodeGenerator::GetInt32ValueOf(right.GetConstant()) == 0) {
+      if (arm_cond == EQ) {
+        __ CompareAndBranchIfZero(left, non_fallthrough_target);
+      } else {
+        DCHECK_EQ(arm_cond, NE);
+        __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+      }
+    } else {
+      if (right.IsRegister()) {
+        __ cmp(left, ShifterOperand(right.AsRegister<Register>()));
+      } else {
+        DCHECK(right.IsConstant());
+        __ CmpConstant(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
+      }
+
+      __ b(non_fallthrough_target, arm_cond);
     }
   }
 
@@ -2382,28 +2701,148 @@
 
 void LocationsBuilderARM::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  if (Primitive::IsFloatingPointType(select->GetType())) {
+  const bool is_floating_point = Primitive::IsFloatingPointType(select->GetType());
+
+  if (is_floating_point) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
   }
+
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
-    locations->SetInAt(2, Location::RequiresRegister());
+    locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
+    // The code generator handles overlap with the values, but not with the condition.
+    locations->SetOut(Location::SameAsFirstInput());
+  } else if (is_floating_point) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    if (!locations->InAt(1).IsConstant()) {
+      locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
+    }
+
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
-  locations->SetOut(Location::SameAsFirstInput());
 }
 
 void InstructionCodeGeneratorARM::VisitSelect(HSelect* select) {
-  LocationSummary* locations = select->GetLocations();
-  Label false_target;
-  GenerateTestAndBranch(select,
-                        /* condition_input_index */ 2,
-                        /* true_target */ nullptr,
-                        &false_target);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  HInstruction* const condition = select->GetCondition();
+  const LocationSummary* const locations = select->GetLocations();
+  const Primitive::Type type = select->GetType();
+  const Location first = locations->InAt(0);
+  const Location out = locations->Out();
+  const Location second = locations->InAt(1);
+  Location src;
+
+  if (condition->IsIntConstant()) {
+    if (condition->AsIntConstant()->IsFalse()) {
+      src = first;
+    } else {
+      src = second;
+    }
+
+    codegen_->MoveLocation(out, src, type);
+    return;
+  }
+
+  if (!Primitive::IsFloatingPointType(type) &&
+      (IsBooleanValueOrMaterializedCondition(condition) ||
+       CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) {
+    bool invert = false;
+
+    if (out.Equals(second)) {
+      src = first;
+      invert = true;
+    } else if (out.Equals(first)) {
+      src = second;
+    } else if (second.IsConstant()) {
+      DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
+      src = second;
+    } else if (first.IsConstant()) {
+      DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
+      src = first;
+      invert = true;
+    } else {
+      src = second;
+    }
+
+    if (CanGenerateConditionalMove(out, src)) {
+      if (!out.Equals(first) && !out.Equals(second)) {
+        codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
+      }
+
+      std::pair<Condition, Condition> cond;
+
+      if (IsBooleanValueOrMaterializedCondition(condition)) {
+        __ CmpConstant(locations->InAt(2).AsRegister<Register>(), 0);
+        cond = invert ? std::make_pair(EQ, NE) : std::make_pair(NE, EQ);
+      } else {
+        cond = GenerateTest(condition->AsCondition(), invert, codegen_);
+      }
+
+      if (out.IsRegister()) {
+        ShifterOperand operand;
+
+        if (src.IsConstant()) {
+          operand = ShifterOperand(CodeGenerator::GetInt32ValueOf(src.GetConstant()));
+        } else {
+          DCHECK(src.IsRegister());
+          operand = ShifterOperand(src.AsRegister<Register>());
+        }
+
+        __ it(cond.first);
+        __ mov(out.AsRegister<Register>(), operand, cond.first);
+      } else {
+        DCHECK(out.IsRegisterPair());
+
+        ShifterOperand operand_high;
+        ShifterOperand operand_low;
+
+        if (src.IsConstant()) {
+          const int64_t value = src.GetConstant()->AsLongConstant()->GetValue();
+
+          operand_high = ShifterOperand(High32Bits(value));
+          operand_low = ShifterOperand(Low32Bits(value));
+        } else {
+          DCHECK(src.IsRegisterPair());
+          operand_high = ShifterOperand(src.AsRegisterPairHigh<Register>());
+          operand_low = ShifterOperand(src.AsRegisterPairLow<Register>());
+        }
+
+        __ it(cond.first);
+        __ mov(out.AsRegisterPairLow<Register>(), operand_low, cond.first);
+        __ it(cond.first);
+        __ mov(out.AsRegisterPairHigh<Register>(), operand_high, cond.first);
+      }
+
+      return;
+    }
+  }
+
+  Label* false_target = nullptr;
+  Label* true_target = nullptr;
+  Label select_end;
+  Label* target = codegen_->GetFinalLabel(select, &select_end);
+
+  if (out.Equals(second)) {
+    true_target = target;
+    src = first;
+  } else {
+    false_target = target;
+    src = second;
+
+    if (!out.Equals(first)) {
+      codegen_->MoveLocation(out, first, type);
+    }
+  }
+
+  GenerateTestAndBranch(select, 2, true_target, false_target);
+  codegen_->MoveLocation(out, src, type);
+
+  if (select_end.IsLinked()) {
+    __ Bind(&select_end);
+  }
 }
 
 void LocationsBuilderARM::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -2427,7 +2866,7 @@
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
       if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
       break;
 
@@ -2454,51 +2893,48 @@
     return;
   }
 
-  LocationSummary* locations = cond->GetLocations();
-  Location left = locations->InAt(0);
-  Location right = locations->InAt(1);
-  Register out = locations->Out().AsRegister<Register>();
-  Label true_label, false_label;
+  const Register out = cond->GetLocations()->Out().AsRegister<Register>();
 
-  switch (cond->InputAt(0)->GetType()) {
-    default: {
-      // Integer case.
-      if (right.IsRegister()) {
-        __ cmp(left.AsRegister<Register>(), ShifterOperand(right.AsRegister<Register>()));
-      } else {
-        DCHECK(right.IsConstant());
-        __ CmpConstant(left.AsRegister<Register>(),
-                       CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-      }
-      __ it(ARMCondition(cond->GetCondition()), kItElse);
-      __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(1),
-             ARMCondition(cond->GetCondition()));
-      __ mov(locations->Out().AsRegister<Register>(), ShifterOperand(0),
-             ARMCondition(cond->GetOppositeCondition()));
-      return;
-    }
-    case Primitive::kPrimLong:
-      GenerateLongComparesAndJumps(cond, &true_label, &false_label);
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      GenerateVcmp(cond);
-      GenerateFPJumps(cond, &true_label, &false_label);
-      break;
+  if (ArmAssembler::IsLowRegister(out) && CanGenerateTest(cond, codegen_->GetAssembler())) {
+    const auto condition = GenerateTest(cond, false, codegen_);
+
+    __ it(condition.first);
+    __ mov(out, ShifterOperand(1), condition.first);
+    __ it(condition.second);
+    __ mov(out, ShifterOperand(0), condition.second);
+    return;
   }
 
   // Convert the jumps into the result.
   Label done_label;
+  Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
 
-  // False case: result = 0.
-  __ Bind(&false_label);
-  __ LoadImmediate(out, 0);
-  __ b(&done_label);
+  if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+    Label true_label, false_label;
 
-  // True case: result = 1.
-  __ Bind(&true_label);
-  __ LoadImmediate(out, 1);
-  __ Bind(&done_label);
+    GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+
+    // False case: result = 0.
+    __ Bind(&false_label);
+    __ LoadImmediate(out, 0);
+    __ b(final_label);
+
+    // True case: result = 1.
+    __ Bind(&true_label);
+    __ LoadImmediate(out, 1);
+  } else {
+    DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+
+    const auto condition = GenerateTest(cond, false, codegen_);
+
+    __ mov(out, ShifterOperand(0), AL, kCcKeep);
+    __ b(final_label, condition.second);
+    __ LoadImmediate(out, 1);
+  }
+
+  if (done_label.IsLinked()) {
+    __ Bind(&done_label);
+  }
 }
 
 void LocationsBuilderARM::VisitEqual(HEqual* comp) {
@@ -4029,7 +4465,8 @@
 // rotates by swapping input regs (effectively rotating by the first 32-bits of
 // a larger rotation) or flipping direction (thus treating larger right/left
 // rotations as sub-word sized rotations in the other direction) as appropriate.
-void InstructionCodeGeneratorARM::HandleLongRotate(LocationSummary* locations) {
+void InstructionCodeGeneratorARM::HandleLongRotate(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
   Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
   Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
   Location rhs = locations->InAt(1);
@@ -4062,6 +4499,7 @@
     Register shift_left = locations->GetTemp(1).AsRegister<Register>();
     Label end;
     Label shift_by_32_plus_shift_right;
+    Label* final_label = codegen_->GetFinalLabel(ror, &end);
 
     __ and_(shift_right, rhs.AsRegister<Register>(), ShifterOperand(0x1F));
     __ Lsrs(shift_left, rhs.AsRegister<Register>(), 6);
@@ -4076,7 +4514,7 @@
     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
     __ Lsr(shift_left, in_reg_hi, shift_right);
     __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_left));
-    __ b(&end);
+    __ b(final_label);
 
     __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
     // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
@@ -4088,7 +4526,9 @@
     __ Lsl(shift_right, in_reg_hi, shift_left);
     __ add(out_reg_lo, out_reg_lo, ShifterOperand(shift_right));
 
-    __ Bind(&end);
+    if (end.IsLinked()) {
+      __ Bind(&end);
+    }
   }
 }
 
@@ -4128,7 +4568,7 @@
       break;
     }
     case Primitive::kPrimLong: {
-      HandleLongRotate(locations);
+      HandleLongRotate(ror);
       break;
     }
     default:
@@ -4507,6 +4947,7 @@
   Location right = locations->InAt(1);
 
   Label less, greater, done;
+  Label* final_label = codegen_->GetFinalLabel(compare, &done);
   Primitive::Type type = compare->InputAt(0)->GetType();
   Condition less_cond;
   switch (type) {
@@ -4536,7 +4977,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       __ LoadImmediate(out, 0);
-      GenerateVcmp(compare);
+      GenerateVcmp(compare, codegen_);
       __ vmstat();  // transfer FP status register to ARM APSR.
       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
       break;
@@ -4546,17 +4987,19 @@
       UNREACHABLE();
   }
 
-  __ b(&done, EQ);
+  __ b(final_label, EQ);
   __ b(&less, less_cond);
 
   __ Bind(&greater);
   __ LoadImmediate(out, 1);
-  __ b(&done);
+  __ b(final_label);
 
   __ Bind(&less);
   __ LoadImmediate(out, -1);
 
-  __ Bind(&done);
+  if (done.IsLinked()) {
+    __ Bind(&done);
+  }
 }
 
 void LocationsBuilderARM::VisitPhi(HPhi* instruction) {
@@ -4892,17 +5335,29 @@
     return true;
   }
   Opcode neg_opcode = kNoOperand;
+  uint32_t neg_value = 0;
   switch (opcode) {
-    case AND: neg_opcode = BIC; value = ~value; break;
-    case ORR: neg_opcode = ORN; value = ~value; break;
-    case ADD: neg_opcode = SUB; value = -value; break;
-    case ADC: neg_opcode = SBC; value = ~value; break;
-    case SUB: neg_opcode = ADD; value = -value; break;
-    case SBC: neg_opcode = ADC; value = ~value; break;
+    case AND: neg_opcode = BIC; neg_value = ~value; break;
+    case ORR: neg_opcode = ORN; neg_value = ~value; break;
+    case ADD: neg_opcode = SUB; neg_value = -value; break;
+    case ADC: neg_opcode = SBC; neg_value = ~value; break;
+    case SUB: neg_opcode = ADD; neg_value = -value; break;
+    case SBC: neg_opcode = ADC; neg_value = ~value; break;
+    case MOV: neg_opcode = MVN; neg_value = ~value; break;
     default:
       return false;
   }
-  return assembler->ShifterOperandCanHold(kNoRegister, kNoRegister, neg_opcode, value, set_cc, &so);
+
+  if (assembler->ShifterOperandCanHold(kNoRegister,
+                                       kNoRegister,
+                                       neg_opcode,
+                                       neg_value,
+                                       set_cc,
+                                       &so)) {
+    return true;
+  }
+
+  return opcode == AND && IsPowerOfTwo(value + 1);
 }
 
 void InstructionCodeGeneratorARM::HandleFieldGet(HInstruction* instruction,
@@ -5322,6 +5777,7 @@
         int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
         if (maybe_compressed_char_at) {
           Label uncompressed_load, done;
+          Label* final_label = codegen_->GetFinalLabel(instruction, &done);
           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
                         "Expecting 0=compressed, 1=uncompressed");
@@ -5330,13 +5786,15 @@
                             out_loc.AsRegister<Register>(),
                             obj,
                             data_offset + const_index);
-          __ b(&done);
+          __ b(final_label);
           __ Bind(&uncompressed_load);
           __ LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
                             out_loc.AsRegister<Register>(),
                             obj,
                             data_offset + (const_index << 1));
-          __ Bind(&done);
+          if (done.IsLinked()) {
+            __ Bind(&done);
+          }
         } else {
           uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
 
@@ -5360,17 +5818,20 @@
         }
         if (maybe_compressed_char_at) {
           Label uncompressed_load, done;
+          Label* final_label = codegen_->GetFinalLabel(instruction, &done);
           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
                         "Expecting 0=compressed, 1=uncompressed");
           __ b(&uncompressed_load, CS);
           __ ldrb(out_loc.AsRegister<Register>(),
                   Address(temp, index.AsRegister<Register>(), Shift::LSL, 0));
-          __ b(&done);
+          __ b(final_label);
           __ Bind(&uncompressed_load);
           __ ldrh(out_loc.AsRegister<Register>(),
                   Address(temp, index.AsRegister<Register>(), Shift::LSL, 1));
-          __ Bind(&done);
+          if (done.IsLinked()) {
+            __ Bind(&done);
+          }
         } else {
           codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, index.AsRegister<Register>());
         }
@@ -5595,6 +6056,7 @@
       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
       Label done;
+      Label* final_label = codegen_->GetFinalLabel(instruction, &done);
       SlowPathCodeARM* slow_path = nullptr;
 
       if (may_need_runtime_call_for_type_check) {
@@ -5616,7 +6078,7 @@
                                               index.AsRegister<Register>());
           }
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ b(&done);
+          __ b(final_label);
           __ Bind(&non_zero);
         }
 
@@ -5804,21 +6266,59 @@
   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
   caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+
+  HInstruction* index = instruction->InputAt(0);
+  HInstruction* length = instruction->InputAt(1);
+  // If both index and length are constants we can statically check the bounds. But if at least one
+  // of them is not encodable ArmEncodableConstantOrRegister will create
+  // Location::RequiresRegister() which is not desired to happen. Instead we create constant
+  // locations.
+  bool both_const = index->IsConstant() && length->IsConstant();
+  locations->SetInAt(0, both_const
+      ? Location::ConstantLocation(index->AsConstant())
+      : ArmEncodableConstantOrRegister(index, CMP));
+  locations->SetInAt(1, both_const
+      ? Location::ConstantLocation(length->AsConstant())
+      : ArmEncodableConstantOrRegister(length, CMP));
 }
 
 void InstructionCodeGeneratorARM::VisitBoundsCheck(HBoundsCheck* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  SlowPathCodeARM* slow_path =
-      new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
-  codegen_->AddSlowPath(slow_path);
+  Location index_loc = locations->InAt(0);
+  Location length_loc = locations->InAt(1);
 
-  Register index = locations->InAt(0).AsRegister<Register>();
-  Register length = locations->InAt(1).AsRegister<Register>();
+  if (length_loc.IsConstant()) {
+    int32_t length = helpers::Int32ConstantFrom(length_loc);
+    if (index_loc.IsConstant()) {
+      // BCE will remove the bounds check if we are guaranteed to pass.
+      int32_t index = helpers::Int32ConstantFrom(index_loc);
+      if (index < 0 || index >= length) {
+        SlowPathCodeARM* slow_path =
+            new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
+        codegen_->AddSlowPath(slow_path);
+        __ b(slow_path->GetEntryLabel());
+      } else {
+        // Some optimization after BCE may have generated this, and we should not
+        // generate a bounds check if it is a valid range.
+      }
+      return;
+    }
 
-  __ cmp(index, ShifterOperand(length));
-  __ b(slow_path->GetEntryLabel(), HS);
+    SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
+    __ cmp(index_loc.AsRegister<Register>(), ShifterOperand(length));
+    codegen_->AddSlowPath(slow_path);
+    __ b(slow_path->GetEntryLabel(), HS);
+  } else {
+    SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathARM(instruction);
+    if (index_loc.IsConstant()) {
+      int32_t index = helpers::Int32ConstantFrom(index_loc);
+      __ cmp(length_loc.AsRegister<Register>(), ShifterOperand(index));
+    } else {
+      __ cmp(length_loc.AsRegister<Register>(), ShifterOperand(index_loc.AsRegister<Register>()));
+    }
+    codegen_->AddSlowPath(slow_path);
+    __ b(slow_path->GetEntryLabel(), LS);
+  }
 }
 
 void CodeGeneratorARM::MarkGCCard(Register temp,
@@ -6558,13 +7058,16 @@
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  Label done, zero;
+  Label done;
+  Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
   SlowPathCodeARM* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
   // avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ CompareAndBranchIfZero(obj, &zero);
+    DCHECK_NE(out, obj);
+    __ LoadImmediate(out, 0);
+    __ CompareAndBranchIfZero(obj, final_label);
   }
 
   switch (type_check_kind) {
@@ -6576,11 +7079,23 @@
                                         class_offset,
                                         maybe_temp_loc,
                                         kCompilerReadBarrierOption);
-      __ cmp(out, ShifterOperand(cls));
       // Classes must be equal for the instanceof to succeed.
-      __ b(&zero, NE);
-      __ LoadImmediate(out, 1);
-      __ b(&done);
+      __ cmp(out, ShifterOperand(cls));
+      // We speculatively set the result to false without changing the condition
+      // flags, which allows us to avoid some branching later.
+      __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (ArmAssembler::IsLowRegister(out)) {
+        __ it(EQ);
+        __ mov(out, ShifterOperand(1), EQ);
+      } else {
+        __ b(final_label, NE);
+        __ LoadImmediate(out, 1);
+      }
+
       break;
     }
 
@@ -6602,14 +7117,11 @@
                                        super_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      // If `out` is null, we use it for the result, and jump to `done`.
-      __ CompareAndBranchIfZero(out, &done);
+      // If `out` is null, we use it for the result, and jump to the final label.
+      __ CompareAndBranchIfZero(out, final_label);
       __ cmp(out, ShifterOperand(cls));
       __ b(&loop, NE);
       __ LoadImmediate(out, 1);
-      if (zero.IsLinked()) {
-        __ b(&done);
-      }
       break;
     }
 
@@ -6632,14 +7144,32 @@
                                        super_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      __ CompareAndBranchIfNonZero(out, &loop);
-      // If `out` is null, we use it for the result, and jump to `done`.
-      __ b(&done);
-      __ Bind(&success);
-      __ LoadImmediate(out, 1);
-      if (zero.IsLinked()) {
-        __ b(&done);
+      // This is essentially a null check, but it sets the condition flags to the
+      // proper value for the code that follows the loop, i.e. not `EQ`.
+      __ cmp(out, ShifterOperand(1));
+      __ b(&loop, HS);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (ArmAssembler::IsLowRegister(out)) {
+        // If `out` is null, we use it for the result, and the condition flags
+        // have already been set to `NE`, so the IT block that comes afterwards
+        // (and which handles the successful case) turns into a NOP (instead of
+        // overwriting `out`).
+        __ Bind(&success);
+        // There is only one branch to the `success` label (which is bound to this
+        // IT block), and it has the same condition, `EQ`, so in that case the MOV
+        // is executed.
+        __ it(EQ);
+        __ mov(out, ShifterOperand(1), EQ);
+      } else {
+        // If `out` is null, we use it for the result, and jump to the final label.
+        __ b(final_label);
+        __ Bind(&success);
+        __ LoadImmediate(out, 1);
       }
+
       break;
     }
 
@@ -6662,14 +7192,28 @@
                                        component_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      // If `out` is null, we use it for the result, and jump to `done`.
-      __ CompareAndBranchIfZero(out, &done);
+      // If `out` is null, we use it for the result, and jump to the final label.
+      __ CompareAndBranchIfZero(out, final_label);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(out, &zero);
-      __ Bind(&exact_check);
-      __ LoadImmediate(out, 1);
-      __ b(&done);
+      __ cmp(out, ShifterOperand(0));
+      // We speculatively set the result to false without changing the condition
+      // flags, which allows us to avoid some branching later.
+      __ mov(out, ShifterOperand(0), AL, kCcKeep);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (ArmAssembler::IsLowRegister(out)) {
+        __ Bind(&exact_check);
+        __ it(EQ);
+        __ mov(out, ShifterOperand(1), EQ);
+      } else {
+        __ b(final_label, NE);
+        __ Bind(&exact_check);
+        __ LoadImmediate(out, 1);
+      }
+
       break;
     }
 
@@ -6689,9 +7233,6 @@
       codegen_->AddSlowPath(slow_path);
       __ b(slow_path->GetEntryLabel(), NE);
       __ LoadImmediate(out, 1);
-      if (zero.IsLinked()) {
-        __ b(&done);
-      }
       break;
     }
 
@@ -6720,18 +7261,10 @@
                                                                     /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ b(slow_path->GetEntryLabel());
-      if (zero.IsLinked()) {
-        __ b(&done);
-      }
       break;
     }
   }
 
-  if (zero.IsLinked()) {
-    __ Bind(&zero);
-    __ LoadImmediate(out, 0);
-  }
-
   if (done.IsLinked()) {
     __ Bind(&done);
   }
@@ -6807,9 +7340,10 @@
   codegen_->AddSlowPath(type_check_slow_path);
 
   Label done;
+  Label* final_label = codegen_->GetFinalLabel(instruction, &done);
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ CompareAndBranchIfZero(obj, &done);
+    __ CompareAndBranchIfZero(obj, final_label);
   }
 
   switch (type_check_kind) {
@@ -6873,7 +7407,7 @@
       Label loop;
       __ Bind(&loop);
       __ cmp(temp, ShifterOperand(cls));
-      __ b(&done, EQ);
+      __ b(final_label, EQ);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction,
@@ -6901,7 +7435,7 @@
 
       // Do an exact check.
       __ cmp(temp, ShifterOperand(cls));
-      __ b(&done, EQ);
+      __ b(final_label, EQ);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
@@ -6971,7 +7505,10 @@
       break;
     }
   }
-  __ Bind(&done);
+
+  if (done.IsLinked()) {
+    __ Bind(&done);
+  }
 
   __ Bind(type_check_slow_path->GetExitLabel());
 }
@@ -7158,9 +7695,11 @@
   ShifterOperand so;
   if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, AND, value, &so)) {
     __ and_(out, first, so);
-  } else {
-    DCHECK(__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so));
+  } else if (__ ShifterOperandCanHold(kNoRegister, kNoRegister, BIC, ~value, &so)) {
     __ bic(out, first, ShifterOperand(~value));
+  } else {
+    DCHECK(IsPowerOfTwo(value + 1));
+    __ ubfx(out, first, 0, WhichPowerOf2(value + 1));
   }
 }
 
@@ -7846,9 +8385,7 @@
 }
 
 Literal* CodeGeneratorARM::DeduplicateBootImageAddressLiteral(uint32_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 Literal* CodeGeneratorARM::DeduplicateJitStringLiteral(const DexFile& dex_file,
@@ -7899,8 +8436,7 @@
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
-      boot_image_address_patches_.size();
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -7934,13 +8470,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = literal->GetLabel()->Position();
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index 1f68777..86f2f21 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -237,7 +237,7 @@
   void HandleBitwiseOperation(HBinaryOperation* operation);
   void HandleCondition(HCondition* condition);
   void HandleIntegerRotate(LocationSummary* locations);
-  void HandleLongRotate(LocationSummary* locations);
+  void HandleLongRotate(HRor* ror);
   void HandleShift(HBinaryOperation* operation);
 
   void GenerateWideAtomicStore(Register addr, uint32_t offset,
@@ -299,8 +299,6 @@
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     Label* true_target,
                                     Label* false_target);
-  void GenerateVcmp(HInstruction* instruction);
-  void GenerateFPJumps(HCondition* cond, Label* true_label, Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond, Label* true_label, Label* false_label);
   void DivRemOneOrMinusOne(HBinaryOperation* instruction);
   void DivRemByPowerOfTwo(HBinaryOperation* instruction);
@@ -422,6 +420,8 @@
     return CommonGetLabelOf<Label>(block_labels_, block);
   }
 
+  Label* GetFinalLabel(HInstruction* instruction, Label* final_label);
+
   void Initialize() OVERRIDE {
     block_labels_ = CommonInitializeLabels<Label>();
   }
@@ -648,8 +648,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 7b6c97c..794e05c 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -153,7 +153,8 @@
                                          codegen->GetNumberOfFloatingPointRegisters()));
 
   CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
-  CPURegList fp_list = CPURegList(CPURegister::kFPRegister, kDRegSize, fp_spills);
+  unsigned v_reg_size = codegen->GetGraph()->HasSIMD() ? kQRegSize : kDRegSize;
+  CPURegList fp_list = CPURegList(CPURegister::kVRegister, v_reg_size, fp_spills);
 
   MacroAssembler* masm = down_cast<CodeGeneratorARM64*>(codegen)->GetVIXLAssembler();
   UseScratchRegisterScope temps(masm);
@@ -464,10 +465,13 @@
       : SlowPathCodeARM64(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
     __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);  // Only saves live 128-bit regs for SIMD.
     arm64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+    RestoreLiveRegisters(codegen, locations);  // Only restores live 128-bit regs for SIMD.
     if (successor_ == nullptr) {
       __ B(GetReturnLabel());
     } else {
@@ -808,6 +812,14 @@
     DCHECK(!(instruction_->IsArrayGet() &&
              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
+    // Temporary register `temp_`, used to store the lock word, must
+    // not be IP0 nor IP1, as we may use them to emit the reference
+    // load (in the call to GenerateRawReferenceLoad below), and we
+    // need the lock word to still be in `temp_` after the reference
+    // load.
+    DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+    DCHECK_NE(LocationFrom(temp_).reg(), IP1);
+
     __ Bind(GetEntryLabel());
 
     // When using MaybeGenerateReadBarrierSlow, the read barrier call is
@@ -956,6 +968,14 @@
     Location field_offset = index_;
     DCHECK(field_offset.IsRegister()) << field_offset;
 
+    // Temporary register `temp_`, used to store the lock word, must
+    // not be IP0 nor IP1, as we may use them to emit the reference
+    // load (in the call to GenerateRawReferenceLoad below), and we
+    // need the lock word to still be in `temp_` after the reference
+    // load.
+    DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+    DCHECK_NE(LocationFrom(temp_).reg(), IP1);
+
     __ Bind(GetEntryLabel());
 
     // /* int32_t */ monitor = obj->monitor_
@@ -1134,7 +1154,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
     // The read barrier instrumentation of object ArrayGet
@@ -1395,8 +1415,6 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -2381,7 +2399,7 @@
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(instr->InputAt(1)));
-      locations->SetOut(Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       break;
     }
     default:
@@ -2551,7 +2569,7 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, ARM64EncodableConstantOrRegister(instruction->GetOffset(), instruction));
-  locations->SetOut(Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
 }
 
 void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
@@ -2622,6 +2640,9 @@
                                                        LocationSummary::kNoCall);
   if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
@@ -2657,7 +2678,7 @@
 
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object ArrayGet with Baker's read barrier case.
-    Register temp = temps.AcquireW();
+    Register temp = WRegisterFrom(locations->GetTemp(0));
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
     codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -3264,7 +3285,7 @@
 void InstructionCodeGeneratorARM64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   Primitive::Type type = instruction->GetResultType();
-  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   LocationSummary* locations = instruction->GetLocations();
   Register out = OutputRegister(instruction);
@@ -3614,7 +3635,7 @@
   if (Primitive::IsFloatingPointType(select->GetType())) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
     locations->SetInAt(1, Location::RequiresFpuRegister());
-    locations->SetOut(Location::RequiresFpuRegister());
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
     HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
     HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
@@ -3637,7 +3658,7 @@
                                                  : Location::ConstantLocation(cst_true_value));
     locations->SetInAt(0, false_value_in_register ? Location::RequiresRegister()
                                                   : Location::ConstantLocation(cst_false_value));
-    locations->SetOut(Location::RequiresRegister());
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
 
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
@@ -4289,7 +4310,7 @@
 }
 
 void LocationsBuilderARM64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
-  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
   }
@@ -4302,7 +4323,7 @@
   // art::PrepareForRegisterAllocation.
   DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
 
-  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena());
+  IntrinsicLocationsBuilderARM64 intrinsic(GetGraph()->GetArena(), codegen_);
   if (intrinsic.TryDispatch(invoke)) {
     return;
   }
@@ -4523,9 +4544,7 @@
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateBootImageAddressLiteral(
     uint64_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 vixl::aarch64::Literal<uint32_t>* CodeGeneratorARM64::DeduplicateJitStringLiteral(
@@ -4593,8 +4612,7 @@
       pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       pc_relative_type_patches_.size() +
-      type_bss_entry_patches_.size() +
-      boot_image_address_patches_.size();
+      type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   for (const PcRelativePatchInfo& info : pc_relative_dex_cache_patches_) {
     linker_patches->push_back(LinkerPatch::DexCacheArrayPatch(info.label.GetLocation(),
@@ -4628,11 +4646,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    vixl::aarch64::Literal<uint32_t>* literal = entry.second;
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal->GetOffset()));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
@@ -5511,7 +5524,11 @@
 void LocationsBuilderARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  // In suspend check slow path, usually there are no caller-save registers at all.
+  // If SIMD instructions are present, however, we force spilling all live SIMD
+  // registers in full width (since the runtime only saves/restores lower part).
+  locations->SetCustomSlowPathCallerSaves(
+      GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
 }
 
 void InstructionCodeGeneratorARM64::VisitSuspendCheck(HSuspendCheck* instruction) {
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 231fb05..10d8b84 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -318,6 +318,11 @@
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
 
+  vixl::aarch64::MemOperand CreateVecMemRegisters(
+      HVecMemoryOperation* instruction,
+      Location* reg_loc,
+      bool is_load);
+
   Arm64Assembler* const assembler_;
   CodeGeneratorARM64* const codegen_;
 
@@ -771,8 +776,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 0239ac9..cce412b 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -42,6 +42,7 @@
 using helpers::DWARFReg;
 using helpers::HighDRegisterFrom;
 using helpers::HighRegisterFrom;
+using helpers::InputDRegisterAt;
 using helpers::InputOperandAt;
 using helpers::InputRegister;
 using helpers::InputRegisterAt;
@@ -53,6 +54,7 @@
 using helpers::LocationFrom;
 using helpers::LowRegisterFrom;
 using helpers::LowSRegisterFrom;
+using helpers::OperandFrom;
 using helpers::OutputRegister;
 using helpers::OutputSRegister;
 using helpers::OutputVRegister;
@@ -830,6 +832,12 @@
     DCHECK(!(instruction_->IsArrayGet() &&
              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
+    // Temporary register `temp_`, used to store the lock word, must
+    // not be IP, as we may use it to emit the reference load (in the
+    // call to GenerateRawReferenceLoad below), and we need the lock
+    // word to still be in `temp_` after the reference load.
+    DCHECK(!temp_.Is(ip));
+
     __ Bind(GetEntryLabel());
 
     // When using MaybeGenerateReadBarrierSlow, the read barrier call is
@@ -971,6 +979,12 @@
     Location field_offset = index_;
     DCHECK(field_offset.IsRegisterPair()) << field_offset;
 
+    // Temporary register `temp1_`, used to store the lock word, must
+    // not be IP, as we may use it to emit the reference load (in the
+    // call to GenerateRawReferenceLoad below), and we need the lock
+    // word to still be in `temp1_` after the reference load.
+    DCHECK(!temp1_.Is(ip));
+
     __ Bind(GetEntryLabel());
 
     CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
@@ -1161,7 +1175,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
     // The read barrier instrumentation of object ArrayGet
@@ -1640,8 +1654,335 @@
   }
 }
 
+static void GenerateVcmp(HInstruction* instruction, CodeGeneratorARMVIXL* codegen) {
+  const Location rhs_loc = instruction->GetLocations()->InAt(1);
+  if (rhs_loc.IsConstant()) {
+    // 0.0 is the only immediate that can be encoded directly in
+    // a VCMP instruction.
+    //
+    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
+    // specify that in a floating-point comparison, positive zero
+    // and negative zero are considered equal, so we can use the
+    // literal 0.0 for both cases here.
+    //
+    // Note however that some methods (Float.equal, Float.compare,
+    // Float.compareTo, Double.equal, Double.compare,
+    // Double.compareTo, Math.max, Math.min, StrictMath.max,
+    // StrictMath.min) consider 0.0 to be (strictly) greater than
+    // -0.0. So if we ever translate calls to these methods into a
+    // HCompare instruction, we must handle the -0.0 case with
+    // care here.
+    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
+
+    const Primitive::Type type = instruction->InputAt(0)->GetType();
+
+    if (type == Primitive::kPrimFloat) {
+      __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ Vcmp(F64, InputDRegisterAt(instruction, 0), 0.0);
+    }
+  } else {
+    __ Vcmp(InputVRegisterAt(instruction, 0), InputVRegisterAt(instruction, 1));
+  }
+}
+
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTestConstant(
+    HCondition* condition,
+    bool invert,
+    CodeGeneratorARMVIXL* codegen) {
+  DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = condition->GetLocations();
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
+  const Location left = locations->InAt(0);
+  const Location right = locations->InAt(1);
+
+  DCHECK(right.IsConstant());
+
+  const vixl32::Register left_high = HighRegisterFrom(left);
+  const vixl32::Register left_low = LowRegisterFrom(left);
+  int64_t value = Int64ConstantFrom(right);
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondB:
+    case kCondBE:
+    case kCondA:
+    case kCondAE: {
+      __ Cmp(left_high, High32Bits(value));
+
+      // We use the scope because of the IT block that follows.
+      ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                               2 * vixl32::k16BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kExactSize);
+
+      __ it(eq);
+      __ cmp(eq, left_low, Low32Bits(value));
+      ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
+      break;
+    }
+    case kCondLE:
+    case kCondGT:
+      // Trivially true or false.
+      if (value == std::numeric_limits<int64_t>::max()) {
+        __ Cmp(left_low, left_low);
+        ret = cond == kCondLE ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
+        break;
+      }
+
+      if (cond == kCondLE) {
+        DCHECK_EQ(opposite, kCondGT);
+        cond = kCondLT;
+        opposite = kCondGE;
+      } else {
+        DCHECK_EQ(cond, kCondGT);
+        DCHECK_EQ(opposite, kCondLE);
+        cond = kCondGE;
+        opposite = kCondLT;
+      }
+
+      value++;
+      FALLTHROUGH_INTENDED;
+    case kCondGE:
+    case kCondLT: {
+      UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+      __ Cmp(left_low, Low32Bits(value));
+      __ Sbcs(temps.Acquire(), left_high, High32Bits(value));
+      ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+  }
+
+  return ret;
+}
+
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateLongTest(
+    HCondition* condition,
+    bool invert,
+    CodeGeneratorARMVIXL* codegen) {
+  DCHECK_EQ(condition->GetLeft()->GetType(), Primitive::kPrimLong);
+
+  const LocationSummary* const locations = condition->GetLocations();
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  DCHECK(right.IsRegisterPair());
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+    case kCondB:
+    case kCondBE:
+    case kCondA:
+    case kCondAE: {
+      __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));
+
+      // We use the scope because of the IT block that follows.
+      ExactAssemblyScope guard(codegen->GetVIXLAssembler(),
+                               2 * vixl32::k16BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kExactSize);
+
+      __ it(eq);
+      __ cmp(eq, LowRegisterFrom(left), LowRegisterFrom(right));
+      ret = std::make_pair(ARMUnsignedCondition(cond), ARMUnsignedCondition(opposite));
+      break;
+    }
+    case kCondLE:
+    case kCondGT:
+      if (cond == kCondLE) {
+        DCHECK_EQ(opposite, kCondGT);
+        cond = kCondGE;
+        opposite = kCondLT;
+      } else {
+        DCHECK_EQ(cond, kCondGT);
+        DCHECK_EQ(opposite, kCondLE);
+        cond = kCondLT;
+        opposite = kCondGE;
+      }
+
+      std::swap(left, right);
+      FALLTHROUGH_INTENDED;
+    case kCondGE:
+    case kCondLT: {
+      UseScratchRegisterScope temps(codegen->GetVIXLAssembler());
+
+      __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));
+      __ Sbcs(temps.Acquire(), HighRegisterFrom(left), HighRegisterFrom(right));
+      ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+  }
+
+  return ret;
+}
+
+static std::pair<vixl32::Condition, vixl32::Condition> GenerateTest(HCondition* condition,
+                                                                    bool invert,
+                                                                    CodeGeneratorARMVIXL* codegen) {
+  const Primitive::Type type = condition->GetLeft()->GetType();
+  IfCondition cond = condition->GetCondition();
+  IfCondition opposite = condition->GetOppositeCondition();
+  std::pair<vixl32::Condition, vixl32::Condition> ret(eq, ne);
+
+  if (invert) {
+    std::swap(cond, opposite);
+  }
+
+  if (type == Primitive::kPrimLong) {
+    ret = condition->GetLocations()->InAt(1).IsConstant()
+        ? GenerateLongTestConstant(condition, invert, codegen)
+        : GenerateLongTest(condition, invert, codegen);
+  } else if (Primitive::IsFloatingPointType(type)) {
+    GenerateVcmp(condition, codegen);
+    __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+    ret = std::make_pair(ARMFPCondition(cond, condition->IsGtBias()),
+                         ARMFPCondition(opposite, condition->IsGtBias()));
+  } else {
+    DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+    __ Cmp(InputRegisterAt(condition, 0), InputOperandAt(condition, 1));
+    ret = std::make_pair(ARMCondition(cond), ARMCondition(opposite));
+  }
+
+  return ret;
+}
+
+static bool CanGenerateTest(HCondition* condition, ArmVIXLAssembler* assembler) {
+  if (condition->GetLeft()->GetType() == Primitive::kPrimLong) {
+    const LocationSummary* const locations = condition->GetLocations();
+    const IfCondition c = condition->GetCondition();
+
+    if (locations->InAt(1).IsConstant()) {
+      const int64_t value = Int64ConstantFrom(locations->InAt(1));
+
+      if (c < kCondLT || c > kCondGE) {
+        // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+        // we check that the least significant half of the first input to be compared
+        // is in a low register (the other half is read outside an IT block), and
+        // the constant fits in an 8-bit unsigned integer, so that a 16-bit CMP
+        // encoding can be used.
+        if (!LowRegisterFrom(locations->InAt(0)).IsLow() || !IsUint<8>(Low32Bits(value))) {
+          return false;
+        }
+      // TODO(VIXL): The rest of the checks are there to keep the backend in sync with
+      // the previous one, but are not strictly necessary.
+      } else if (c == kCondLE || c == kCondGT) {
+        if (value < std::numeric_limits<int64_t>::max() &&
+            !assembler->ShifterOperandCanHold(SBC, High32Bits(value + 1), kCcSet)) {
+          return false;
+        }
+      } else if (!assembler->ShifterOperandCanHold(SBC, High32Bits(value), kCcSet)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool CanEncodeConstantAs8BitImmediate(HConstant* constant) {
+  const Primitive::Type type = constant->GetType();
+  bool ret = false;
+
+  DCHECK(Primitive::IsIntegralType(type) || type == Primitive::kPrimNot) << type;
+
+  if (type == Primitive::kPrimLong) {
+    const uint64_t value = Uint64ConstantFrom(constant);
+
+    ret = IsUint<8>(Low32Bits(value)) && IsUint<8>(High32Bits(value));
+  } else {
+    ret = IsUint<8>(Int32ConstantFrom(constant));
+  }
+
+  return ret;
+}
+
+static Location Arm8BitEncodableConstantOrRegister(HInstruction* constant) {
+  DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+
+  if (constant->IsConstant() && CanEncodeConstantAs8BitImmediate(constant->AsConstant())) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+
+  return Location::RequiresRegister();
+}
+
+static bool CanGenerateConditionalMove(const Location& out, const Location& src) {
+  // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+  // we check that we are not dealing with floating-point output (there is no
+  // 16-bit VMOV encoding).
+  if (!out.IsRegister() && !out.IsRegisterPair()) {
+    return false;
+  }
+
+  // For constants, we also check that the output is in one or two low registers,
+  // and that the constants fit in an 8-bit unsigned integer, so that a 16-bit
+  // MOV encoding can be used.
+  if (src.IsConstant()) {
+    if (!CanEncodeConstantAs8BitImmediate(src.GetConstant())) {
+      return false;
+    }
+
+    if (out.IsRegister()) {
+      if (!RegisterFrom(out).IsLow()) {
+        return false;
+      }
+    } else {
+      DCHECK(out.IsRegisterPair());
+
+      if (!HighRegisterFrom(out).IsLow()) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
 #undef __
 
+vixl32::Label* CodeGeneratorARMVIXL::GetFinalLabel(HInstruction* instruction,
+                                                   vixl32::Label* final_label) {
+  DCHECK(!instruction->IsControlFlow() && !instruction->IsSuspendCheck());
+  DCHECK(!instruction->IsInvoke() || !instruction->GetLocations()->CanCall());
+
+  const HBasicBlock* const block = instruction->GetBlock();
+  const HLoopInformation* const info = block->GetLoopInformation();
+  HInstruction* const next = instruction->GetNext();
+
+  // Avoid a branch to a branch.
+  if (next->IsGoto() && (info == nullptr ||
+                         !info->IsBackEdge(*block) ||
+                         !info->HasSuspendCheck())) {
+    final_label = GetLabelOf(next->AsGoto()->GetSuccessor());
+  }
+
+  return final_label;
+}
+
 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
                                            const ArmInstructionSetFeatures& isa_features,
                                            const CompilerOptions& compiler_options,
@@ -1671,23 +2012,16 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
                          graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
-  // Give d14 and d15 as scratch registers to VIXL.
-  // They are removed from the register allocator in `SetupBlockedRegisters()`.
-  // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack
-  // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead
-  // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give
-  // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may
-  // also want to investigate giving those 14 other D registers to the allocator.
-  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14);
-  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
+  // Give D30 and D31 as scratch register to VIXL. The register allocator only works on
+  // S0-S31, which alias to D0-D15.
+  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d31);
+  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d30);
 }
 
 void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
@@ -1753,13 +2087,6 @@
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
-  // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers.
-  // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.)
-  blocked_fpu_registers_[28] = true;
-  blocked_fpu_registers_[29] = true;
-  blocked_fpu_registers_[30] = true;
-  blocked_fpu_registers_[31] = true;
-
   if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
@@ -2135,51 +2462,6 @@
 void InstructionCodeGeneratorARMVIXL::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
 }
 
-void InstructionCodeGeneratorARMVIXL::GenerateVcmp(HInstruction* instruction) {
-  Primitive::Type type = instruction->InputAt(0)->GetType();
-  Location lhs_loc = instruction->GetLocations()->InAt(0);
-  Location rhs_loc = instruction->GetLocations()->InAt(1);
-  if (rhs_loc.IsConstant()) {
-    // 0.0 is the only immediate that can be encoded directly in
-    // a VCMP instruction.
-    //
-    // Both the JLS (section 15.20.1) and the JVMS (section 6.5)
-    // specify that in a floating-point comparison, positive zero
-    // and negative zero are considered equal, so we can use the
-    // literal 0.0 for both cases here.
-    //
-    // Note however that some methods (Float.equal, Float.compare,
-    // Float.compareTo, Double.equal, Double.compare,
-    // Double.compareTo, Math.max, Math.min, StrictMath.max,
-    // StrictMath.min) consider 0.0 to be (strictly) greater than
-    // -0.0. So if we ever translate calls to these methods into a
-    // HCompare instruction, we must handle the -0.0 case with
-    // care here.
-    DCHECK(rhs_loc.GetConstant()->IsArithmeticZero());
-    if (type == Primitive::kPrimFloat) {
-      __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
-    } else {
-      DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ Vcmp(F64, DRegisterFrom(lhs_loc), 0.0);
-    }
-  } else {
-    if (type == Primitive::kPrimFloat) {
-      __ Vcmp(InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1));
-    } else {
-      DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ Vcmp(DRegisterFrom(lhs_loc), DRegisterFrom(rhs_loc));
-    }
-  }
-}
-
-void InstructionCodeGeneratorARMVIXL::GenerateFPJumps(HCondition* cond,
-                                                      vixl32::Label* true_label,
-                                                      vixl32::Label* false_label ATTRIBUTE_UNUSED) {
-  // To branch on the result of the FP compare we transfer FPSCR to APSR (encoded as PC in VMRS).
-  __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
-  __ B(ARMFPCondition(cond->GetCondition(), cond->IsGtBias()), true_label);
-}
-
 void InstructionCodeGeneratorARMVIXL::GenerateLongComparesAndJumps(HCondition* cond,
                                                                    vixl32::Label* true_label,
                                                                    vixl32::Label* false_label) {
@@ -2196,7 +2478,6 @@
 
   // Set the conditions for the test, remembering that == needs to be
   // decided using the low words.
-  // TODO: consider avoiding jumps with temporary and CMP low+SBC high
   switch (if_cond) {
     case kCondEQ:
     case kCondNE:
@@ -2267,31 +2548,44 @@
 void InstructionCodeGeneratorARMVIXL::GenerateCompareTestAndBranch(HCondition* condition,
                                                                    vixl32::Label* true_target_in,
                                                                    vixl32::Label* false_target_in) {
+  if (CanGenerateTest(condition, codegen_->GetAssembler())) {
+    vixl32::Label* non_fallthrough_target;
+    bool invert;
+
+    if (true_target_in == nullptr) {
+      DCHECK(false_target_in != nullptr);
+      non_fallthrough_target = false_target_in;
+      invert = true;
+    } else {
+      non_fallthrough_target = true_target_in;
+      invert = false;
+    }
+
+    const auto cond = GenerateTest(condition, invert, codegen_);
+
+    __ B(cond.first, non_fallthrough_target);
+
+    if (false_target_in != nullptr && false_target_in != non_fallthrough_target) {
+      __ B(false_target_in);
+    }
+
+    return;
+  }
+
   // Generated branching requires both targets to be explicit. If either of the
   // targets is nullptr (fallthrough) use and bind `fallthrough` instead.
   vixl32::Label fallthrough;
   vixl32::Label* true_target = (true_target_in == nullptr) ? &fallthrough : true_target_in;
   vixl32::Label* false_target = (false_target_in == nullptr) ? &fallthrough : false_target_in;
 
-  Primitive::Type type = condition->InputAt(0)->GetType();
-  switch (type) {
-    case Primitive::kPrimLong:
-      GenerateLongComparesAndJumps(condition, true_target, false_target);
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      GenerateVcmp(condition);
-      GenerateFPJumps(condition, true_target, false_target);
-      break;
-    default:
-      LOG(FATAL) << "Unexpected compare type " << type;
-  }
+  DCHECK_EQ(condition->InputAt(0)->GetType(), Primitive::kPrimLong);
+  GenerateLongComparesAndJumps(condition, true_target, false_target);
 
   if (false_target != &fallthrough) {
     __ B(false_target);
   }
 
-  if (true_target_in == nullptr || false_target_in == nullptr) {
+  if (fallthrough.IsReferenced()) {
     __ Bind(&fallthrough);
   }
 }
@@ -2357,20 +2651,29 @@
       return;
     }
 
-    LocationSummary* locations = cond->GetLocations();
-    DCHECK(locations->InAt(0).IsRegister());
-    vixl32::Register left = InputRegisterAt(cond, 0);
-    Location right = locations->InAt(1);
-    if (right.IsRegister()) {
-      __ Cmp(left, InputRegisterAt(cond, 1));
-    } else {
-      DCHECK(right.IsConstant());
-      __ Cmp(left, CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-    }
+    vixl32::Label* non_fallthrough_target;
+    vixl32::Condition arm_cond = vixl32::Condition::None();
+    const vixl32::Register left = InputRegisterAt(cond, 0);
+    const Operand right = InputOperandAt(cond, 1);
+
     if (true_target == nullptr) {
-      __ B(ARMCondition(condition->GetOppositeCondition()), false_target);
+      arm_cond = ARMCondition(condition->GetOppositeCondition());
+      non_fallthrough_target = false_target;
     } else {
-      __ B(ARMCondition(condition->GetCondition()), true_target);
+      arm_cond = ARMCondition(condition->GetCondition());
+      non_fallthrough_target = true_target;
+    }
+
+    if (right.IsImmediate() && right.GetImmediate() == 0 && (arm_cond.Is(ne) || arm_cond.Is(eq))) {
+      if (arm_cond.Is(eq)) {
+        __ CompareAndBranchIfZero(left, non_fallthrough_target);
+      } else {
+        DCHECK(arm_cond.Is(ne));
+        __ CompareAndBranchIfNonZero(left, non_fallthrough_target);
+      }
+    } else {
+      __ Cmp(left, right);
+      __ B(arm_cond, non_fallthrough_target);
     }
   }
 
@@ -2431,29 +2734,145 @@
 
 void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  if (Primitive::IsFloatingPointType(select->GetType())) {
+  const bool is_floating_point = Primitive::IsFloatingPointType(select->GetType());
+
+  if (is_floating_point) {
     locations->SetInAt(0, Location::RequiresFpuRegister());
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::FpuRegisterOrConstant(select->GetTrueValue()));
   } else {
     locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, Arm8BitEncodableConstantOrRegister(select->GetTrueValue()));
   }
+
   if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
-    locations->SetInAt(2, Location::RequiresRegister());
+    locations->SetInAt(2, Location::RegisterOrConstant(select->GetCondition()));
+    // The code generator handles overlap with the values, but not with the condition.
+    locations->SetOut(Location::SameAsFirstInput());
+  } else if (is_floating_point) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    if (!locations->InAt(1).IsConstant()) {
+      locations->SetInAt(0, Arm8BitEncodableConstantOrRegister(select->GetFalseValue()));
+    }
+
+    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
   }
-  locations->SetOut(Location::SameAsFirstInput());
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
-  LocationSummary* locations = select->GetLocations();
-  vixl32::Label false_target;
-  GenerateTestAndBranch(select,
-                        /* condition_input_index */ 2,
-                        /* true_target */ nullptr,
-                        &false_target,
-                        /* far_target */ false);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  HInstruction* const condition = select->GetCondition();
+  const LocationSummary* const locations = select->GetLocations();
+  const Primitive::Type type = select->GetType();
+  const Location first = locations->InAt(0);
+  const Location out = locations->Out();
+  const Location second = locations->InAt(1);
+  Location src;
+
+  if (condition->IsIntConstant()) {
+    if (condition->AsIntConstant()->IsFalse()) {
+      src = first;
+    } else {
+      src = second;
+    }
+
+    codegen_->MoveLocation(out, src, type);
+    return;
+  }
+
+  if (!Primitive::IsFloatingPointType(type) &&
+      (IsBooleanValueOrMaterializedCondition(condition) ||
+       CanGenerateTest(condition->AsCondition(), codegen_->GetAssembler()))) {
+    bool invert = false;
+
+    if (out.Equals(second)) {
+      src = first;
+      invert = true;
+    } else if (out.Equals(first)) {
+      src = second;
+    } else if (second.IsConstant()) {
+      DCHECK(CanEncodeConstantAs8BitImmediate(second.GetConstant()));
+      src = second;
+    } else if (first.IsConstant()) {
+      DCHECK(CanEncodeConstantAs8BitImmediate(first.GetConstant()));
+      src = first;
+      invert = true;
+    } else {
+      src = second;
+    }
+
+    if (CanGenerateConditionalMove(out, src)) {
+      if (!out.Equals(first) && !out.Equals(second)) {
+        codegen_->MoveLocation(out, src.Equals(first) ? second : first, type);
+      }
+
+      std::pair<vixl32::Condition, vixl32::Condition> cond(eq, ne);
+
+      if (IsBooleanValueOrMaterializedCondition(condition)) {
+        __ Cmp(InputRegisterAt(select, 2), 0);
+        cond = invert ? std::make_pair(eq, ne) : std::make_pair(ne, eq);
+      } else {
+        cond = GenerateTest(condition->AsCondition(), invert, codegen_);
+      }
+
+      const size_t instr_count = out.IsRegisterPair() ? 4 : 2;
+      // We use the scope because of the IT block that follows.
+      ExactAssemblyScope guard(GetVIXLAssembler(),
+                               instr_count * vixl32::k16BitT32InstructionSizeInBytes,
+                               CodeBufferCheckScope::kExactSize);
+
+      if (out.IsRegister()) {
+        __ it(cond.first);
+        __ mov(cond.first, RegisterFrom(out), OperandFrom(src, type));
+      } else {
+        DCHECK(out.IsRegisterPair());
+
+        Operand operand_high(0);
+        Operand operand_low(0);
+
+        if (src.IsConstant()) {
+          const int64_t value = Int64ConstantFrom(src);
+
+          operand_high = High32Bits(value);
+          operand_low = Low32Bits(value);
+        } else {
+          DCHECK(src.IsRegisterPair());
+          operand_high = HighRegisterFrom(src);
+          operand_low = LowRegisterFrom(src);
+        }
+
+        __ it(cond.first);
+        __ mov(cond.first, LowRegisterFrom(out), operand_low);
+        __ it(cond.first);
+        __ mov(cond.first, HighRegisterFrom(out), operand_high);
+      }
+
+      return;
+    }
+  }
+
+  vixl32::Label* false_target = nullptr;
+  vixl32::Label* true_target = nullptr;
+  vixl32::Label select_end;
+  vixl32::Label* const target = codegen_->GetFinalLabel(select, &select_end);
+
+  if (out.Equals(second)) {
+    true_target = target;
+    src = first;
+  } else {
+    false_target = target;
+    src = second;
+
+    if (!out.Equals(first)) {
+      codegen_->MoveLocation(out, first, type);
+    }
+  }
+
+  GenerateTestAndBranch(select, 2, true_target, false_target, /* far_target */ false);
+  codegen_->MoveLocation(out, src, type);
+
+  if (select_end.IsReferenced()) {
+    __ Bind(&select_end);
+  }
 }
 
 void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -2477,7 +2896,7 @@
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RegisterOrConstant(cond->InputAt(1)));
       if (!cond->IsEmittedAtUseSite()) {
-        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       }
       break;
 
@@ -2504,50 +2923,52 @@
     return;
   }
 
-  Location right = cond->GetLocations()->InAt(1);
-  vixl32::Register out = OutputRegister(cond);
-  vixl32::Label true_label, false_label;
+  const vixl32::Register out = OutputRegister(cond);
 
-  switch (cond->InputAt(0)->GetType()) {
-    default: {
-      // Integer case.
-      if (right.IsRegister()) {
-        __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
-      } else {
-        DCHECK(right.IsConstant());
-        __ Cmp(InputRegisterAt(cond, 0),
-               CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-      }
-      ExactAssemblyScope aas(GetVIXLAssembler(),
-                             3 * vixl32::kMaxInstructionSizeInBytes,
-                             CodeBufferCheckScope::kMaximumSize);
-      __ ite(ARMCondition(cond->GetCondition()));
-      __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1);
-      __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0);
-      return;
-    }
-    case Primitive::kPrimLong:
-      GenerateLongComparesAndJumps(cond, &true_label, &false_label);
-      break;
-    case Primitive::kPrimFloat:
-    case Primitive::kPrimDouble:
-      GenerateVcmp(cond);
-      GenerateFPJumps(cond, &true_label, &false_label);
-      break;
+  if (out.IsLow() && CanGenerateTest(cond, codegen_->GetAssembler())) {
+    const auto condition = GenerateTest(cond, false, codegen_);
+    // We use the scope because of the IT block that follows.
+    ExactAssemblyScope guard(GetVIXLAssembler(),
+                             4 * vixl32::k16BitT32InstructionSizeInBytes,
+                             CodeBufferCheckScope::kExactSize);
+
+    __ it(condition.first);
+    __ mov(condition.first, out, 1);
+    __ it(condition.second);
+    __ mov(condition.second, out, 0);
+    return;
   }
 
   // Convert the jumps into the result.
   vixl32::Label done_label;
+  vixl32::Label* const final_label = codegen_->GetFinalLabel(cond, &done_label);
 
-  // False case: result = 0.
-  __ Bind(&false_label);
-  __ Mov(out, 0);
-  __ B(&done_label);
+  if (cond->InputAt(0)->GetType() == Primitive::kPrimLong) {
+    vixl32::Label true_label, false_label;
 
-  // True case: result = 1.
-  __ Bind(&true_label);
-  __ Mov(out, 1);
-  __ Bind(&done_label);
+    GenerateLongComparesAndJumps(cond, &true_label, &false_label);
+
+    // False case: result = 0.
+    __ Bind(&false_label);
+    __ Mov(out, 0);
+    __ B(final_label);
+
+    // True case: result = 1.
+    __ Bind(&true_label);
+    __ Mov(out, 1);
+  } else {
+    DCHECK(CanGenerateTest(cond, codegen_->GetAssembler()));
+
+    const auto condition = GenerateTest(cond, false, codegen_);
+
+    __ Mov(LeaveFlags, out, 0);
+    __ B(condition.second, final_label, /* far_target */ false);
+    __ Mov(out, 1);
+  }
+
+  if (done_label.IsReferenced()) {
+    __ Bind(&done_label);
+  }
 }
 
 void LocationsBuilderARMVIXL::VisitEqual(HEqual* comp) {
@@ -4060,6 +4481,7 @@
     vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
     vixl32::Label end;
     vixl32::Label shift_by_32_plus_shift_right;
+    vixl32::Label* final_label = codegen_->GetFinalLabel(ror, &end);
 
     __ And(shift_right, RegisterFrom(rhs), 0x1F);
     __ Lsrs(shift_left, RegisterFrom(rhs), 6);
@@ -4074,7 +4496,7 @@
     __ Lsl(out_reg_lo, in_reg_lo, shift_left);
     __ Lsr(shift_left, in_reg_hi, shift_right);
     __ Add(out_reg_lo, out_reg_lo, shift_left);
-    __ B(&end);
+    __ B(final_label);
 
     __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
     // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
@@ -4086,7 +4508,9 @@
     __ Lsl(shift_right, in_reg_hi, shift_left);
     __ Add(out_reg_lo, out_reg_lo, shift_right);
 
-    __ Bind(&end);
+    if (end.IsReferenced()) {
+      __ Bind(&end);
+    }
   }
 }
 
@@ -4519,6 +4943,7 @@
   Location right = locations->InAt(1);
 
   vixl32::Label less, greater, done;
+  vixl32::Label* final_label = codegen_->GetFinalLabel(compare, &done);
   Primitive::Type type = compare->InputAt(0)->GetType();
   vixl32::Condition less_cond = vixl32::Condition(kNone);
   switch (type) {
@@ -4546,7 +4971,7 @@
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble: {
       __ Mov(out, 0);
-      GenerateVcmp(compare);
+      GenerateVcmp(compare, codegen_);
       // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
       __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
       less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
@@ -4557,17 +4982,19 @@
       UNREACHABLE();
   }
 
-  __ B(eq, &done, /* far_target */ false);
+  __ B(eq, final_label, /* far_target */ false);
   __ B(less_cond, &less, /* far_target */ false);
 
   __ Bind(&greater);
   __ Mov(out, 1);
-  __ B(&done);
+  __ B(final_label);
 
   __ Bind(&less);
   __ Mov(out, -1);
 
-  __ Bind(&done);
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
 }
 
 void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
@@ -4916,17 +5343,24 @@
     return true;
   }
   Opcode neg_opcode = kNoOperand;
+  uint32_t neg_value = 0;
   switch (opcode) {
-    case AND: neg_opcode = BIC; value = ~value; break;
-    case ORR: neg_opcode = ORN; value = ~value; break;
-    case ADD: neg_opcode = SUB; value = -value; break;
-    case ADC: neg_opcode = SBC; value = ~value; break;
-    case SUB: neg_opcode = ADD; value = -value; break;
-    case SBC: neg_opcode = ADC; value = ~value; break;
+    case AND: neg_opcode = BIC; neg_value = ~value; break;
+    case ORR: neg_opcode = ORN; neg_value = ~value; break;
+    case ADD: neg_opcode = SUB; neg_value = -value; break;
+    case ADC: neg_opcode = SBC; neg_value = ~value; break;
+    case SUB: neg_opcode = ADD; neg_value = -value; break;
+    case SBC: neg_opcode = ADC; neg_value = ~value; break;
+    case MOV: neg_opcode = MVN; neg_value = ~value; break;
     default:
       return false;
   }
-  return assembler->ShifterOperandCanHold(neg_opcode, value, set_cc);
+
+  if (assembler->ShifterOperandCanHold(neg_opcode, neg_value, set_cc)) {
+    return true;
+  }
+
+  return opcode == AND && IsPowerOfTwo(value + 1);
 }
 
 void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
@@ -5352,6 +5786,7 @@
         int32_t const_index = Int32ConstantFrom(index);
         if (maybe_compressed_char_at) {
           vixl32::Label uncompressed_load, done;
+          vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
                         "Expecting 0=compressed, 1=uncompressed");
@@ -5360,13 +5795,15 @@
                                          RegisterFrom(out_loc),
                                          obj,
                                          data_offset + const_index);
-          __ B(&done);
+          __ B(final_label);
           __ Bind(&uncompressed_load);
           GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
                                          RegisterFrom(out_loc),
                                          obj,
                                          data_offset + (const_index << 1));
-          __ Bind(&done);
+          if (done.IsReferenced()) {
+            __ Bind(&done);
+          }
         } else {
           uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
 
@@ -5391,15 +5828,18 @@
         }
         if (maybe_compressed_char_at) {
           vixl32::Label uncompressed_load, done;
+          vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
           __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
           static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
                         "Expecting 0=compressed, 1=uncompressed");
           __ B(cs, &uncompressed_load, /* far_target */ false);
           __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
-          __ B(&done);
+          __ B(final_label);
           __ Bind(&uncompressed_load);
           __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
-          __ Bind(&done);
+          if (done.IsReferenced()) {
+            __ Bind(&done);
+          }
         } else {
           codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
         }
@@ -5638,6 +6078,7 @@
       uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
       uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
       vixl32::Label done;
+      vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
       SlowPathCodeARMVIXL* slow_path = nullptr;
 
       if (may_need_runtime_call_for_type_check) {
@@ -5660,7 +6101,7 @@
           // TODO(VIXL): Use a scope to ensure we record the pc info immediately after the preceding
           // store instruction.
           codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ B(&done);
+          __ B(final_label);
           __ Bind(&non_zero);
         }
 
@@ -5864,20 +6305,56 @@
   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
   caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
   LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::RequiresRegister());
+
+  HInstruction* index = instruction->InputAt(0);
+  HInstruction* length = instruction->InputAt(1);
+  // If both index and length are constants we can statically check the bounds. But if at least one
+  // of them is not encodable ArmEncodableConstantOrRegister will create
+  // Location::RequiresRegister() which is not desired to happen. Instead we create constant
+  // locations.
+  bool both_const = index->IsConstant() && length->IsConstant();
+  locations->SetInAt(0, both_const
+      ? Location::ConstantLocation(index->AsConstant())
+      : ArmEncodableConstantOrRegister(index, CMP));
+  locations->SetInAt(1, both_const
+      ? Location::ConstantLocation(length->AsConstant())
+      : ArmEncodableConstantOrRegister(length, CMP));
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
-  SlowPathCodeARMVIXL* slow_path =
-      new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
-  codegen_->AddSlowPath(slow_path);
+  LocationSummary* locations = instruction->GetLocations();
+  Location index_loc = locations->InAt(0);
+  Location length_loc = locations->InAt(1);
 
-  vixl32::Register index = InputRegisterAt(instruction, 0);
-  vixl32::Register length = InputRegisterAt(instruction, 1);
+  if (length_loc.IsConstant()) {
+    int32_t length = Int32ConstantFrom(length_loc);
+    if (index_loc.IsConstant()) {
+      // BCE will remove the bounds check if we are guaranteed to pass.
+      int32_t index = Int32ConstantFrom(index_loc);
+      if (index < 0 || index >= length) {
+        SlowPathCodeARMVIXL* slow_path =
+            new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+        codegen_->AddSlowPath(slow_path);
+        __ B(slow_path->GetEntryLabel());
+      } else {
+        // Some optimization after BCE may have generated this, and we should not
+        // generate a bounds check if it is a valid range.
+      }
+      return;
+    }
 
-  __ Cmp(index, length);
-  __ B(hs, slow_path->GetEntryLabel());
+    SlowPathCodeARMVIXL* slow_path =
+        new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+    __ Cmp(RegisterFrom(index_loc), length);
+    codegen_->AddSlowPath(slow_path);
+    __ B(hs, slow_path->GetEntryLabel());
+  } else {
+    SlowPathCodeARMVIXL* slow_path =
+        new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+    __ Cmp(RegisterFrom(length_loc), InputOperandAt(instruction, 0));
+    codegen_->AddSlowPath(slow_path);
+    __ B(ls, slow_path->GetEntryLabel());
+  }
 }
 
 void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
@@ -6107,13 +6584,16 @@
 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
   // TODO(VIXL32): Double check the performance of this implementation.
   UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
-  vixl32::SRegister temp_1 = temps.AcquireS();
-  vixl32::SRegister temp_2 = temps.AcquireS();
+  vixl32::Register temp1 = temps.Acquire();
+  ScratchRegisterScope ensure_scratch(
+      this, temp1.GetCode(), r0.GetCode(), codegen_->GetNumberOfCoreRegisters());
+  vixl32::Register temp2(ensure_scratch.GetRegister());
 
-  __ Vldr(temp_1, MemOperand(sp, mem1));
-  __ Vldr(temp_2, MemOperand(sp, mem2));
-  __ Vstr(temp_1, MemOperand(sp, mem2));
-  __ Vstr(temp_2, MemOperand(sp, mem1));
+  int stack_offset = ensure_scratch.IsSpilled() ? kArmWordSize : 0;
+  GetAssembler()->LoadFromOffset(kLoadWord, temp1, sp, mem1 + stack_offset);
+  GetAssembler()->LoadFromOffset(kLoadWord, temp2, sp, mem2 + stack_offset);
+  GetAssembler()->StoreToOffset(kStoreWord, temp1, sp, mem2 + stack_offset);
+  GetAssembler()->StoreToOffset(kStoreWord, temp2, sp, mem1 + stack_offset);
 }
 
 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
@@ -6136,7 +6616,7 @@
   } else if (source.IsStackSlot() && destination.IsStackSlot()) {
     Exchange(source.GetStackIndex(), destination.GetStackIndex());
   } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
-    vixl32::SRegister temp = temps.AcquireS();
+    vixl32::Register temp = temps.Acquire();
     __ Vmov(temp, SRegisterFrom(source));
     __ Vmov(SRegisterFrom(source), SRegisterFrom(destination));
     __ Vmov(SRegisterFrom(destination), temp);
@@ -6195,12 +6675,12 @@
   }
 }
 
-void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::SpillScratch(int reg) {
+  __ Push(vixl32::Register(reg));
 }
 
-void ParallelMoveResolverARMVIXL::RestoreScratch(int reg ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::RestoreScratch(int reg) {
+  __ Pop(vixl32::Register(reg));
 }
 
 HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
@@ -6628,13 +7108,16 @@
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
   uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
-  vixl32::Label done, zero;
+  vixl32::Label done;
+  vixl32::Label* const final_label = codegen_->GetFinalLabel(instruction, &done);
   SlowPathCodeARMVIXL* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
   // avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ CompareAndBranchIfZero(obj, &zero, /* far_target */ false);
+    DCHECK(!out.Is(obj));
+    __ Mov(out, 0);
+    __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
   }
 
   switch (type_check_kind) {
@@ -6646,11 +7129,28 @@
                                         class_offset,
                                         maybe_temp_loc,
                                         kCompilerReadBarrierOption);
-      __ Cmp(out, cls);
       // Classes must be equal for the instanceof to succeed.
-      __ B(ne, &zero, /* far_target */ false);
-      __ Mov(out, 1);
-      __ B(&done);
+      __ Cmp(out, cls);
+      // We speculatively set the result to false without changing the condition
+      // flags, which allows us to avoid some branching later.
+      __ Mov(LeaveFlags, out, 0);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (out.IsLow()) {
+        // We use the scope because of the IT block that follows.
+        ExactAssemblyScope guard(GetVIXLAssembler(),
+                                 2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                 CodeBufferCheckScope::kExactSize);
+
+        __ it(eq);
+        __ mov(eq, out, 1);
+      } else {
+        __ B(ne, final_label, /* far_target */ false);
+        __ Mov(out, 1);
+      }
+
       break;
     }
 
@@ -6672,14 +7172,11 @@
                                        super_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      // If `out` is null, we use it for the result, and jump to `done`.
-      __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+      // If `out` is null, we use it for the result, and jump to the final label.
+      __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
       __ Cmp(out, cls);
       __ B(ne, &loop, /* far_target */ false);
       __ Mov(out, 1);
-      if (zero.IsReferenced()) {
-        __ B(&done);
-      }
       break;
     }
 
@@ -6702,14 +7199,38 @@
                                        super_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      __ CompareAndBranchIfNonZero(out, &loop);
-      // If `out` is null, we use it for the result, and jump to `done`.
-      __ B(&done);
-      __ Bind(&success);
-      __ Mov(out, 1);
-      if (zero.IsReferenced()) {
-        __ B(&done);
+      // This is essentially a null check, but it sets the condition flags to the
+      // proper value for the code that follows the loop, i.e. not `eq`.
+      __ Cmp(out, 1);
+      __ B(hs, &loop, /* far_target */ false);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (out.IsLow()) {
+        // If `out` is null, we use it for the result, and the condition flags
+        // have already been set to `ne`, so the IT block that comes afterwards
+        // (and which handles the successful case) turns into a NOP (instead of
+        // overwriting `out`).
+        __ Bind(&success);
+
+        // We use the scope because of the IT block that follows.
+        ExactAssemblyScope guard(GetVIXLAssembler(),
+                                 2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                 CodeBufferCheckScope::kExactSize);
+
+        // There is only one branch to the `success` label (which is bound to this
+        // IT block), and it has the same condition, `eq`, so in that case the MOV
+        // is executed.
+        __ it(eq);
+        __ mov(eq, out, 1);
+      } else {
+        // If `out` is null, we use it for the result, and jump to the final label.
+        __ B(final_label);
+        __ Bind(&success);
+        __ Mov(out, 1);
       }
+
       break;
     }
 
@@ -6732,14 +7253,34 @@
                                        component_offset,
                                        maybe_temp_loc,
                                        kCompilerReadBarrierOption);
-      // If `out` is null, we use it for the result, and jump to `done`.
-      __ CompareAndBranchIfZero(out, &done, /* far_target */ false);
+      // If `out` is null, we use it for the result, and jump to the final label.
+      __ CompareAndBranchIfZero(out, final_label, /* far_target */ false);
       GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ CompareAndBranchIfNonZero(out, &zero, /* far_target */ false);
-      __ Bind(&exact_check);
-      __ Mov(out, 1);
-      __ B(&done);
+      __ Cmp(out, 0);
+      // We speculatively set the result to false without changing the condition
+      // flags, which allows us to avoid some branching later.
+      __ Mov(LeaveFlags, out, 0);
+
+      // Since IT blocks longer than a 16-bit instruction are deprecated by ARMv8,
+      // we check that the output is in a low register, so that a 16-bit MOV
+      // encoding can be used.
+      if (out.IsLow()) {
+        __ Bind(&exact_check);
+
+        // We use the scope because of the IT block that follows.
+        ExactAssemblyScope guard(GetVIXLAssembler(),
+                                 2 * vixl32::k16BitT32InstructionSizeInBytes,
+                                 CodeBufferCheckScope::kExactSize);
+
+        __ it(eq);
+        __ mov(eq, out, 1);
+      } else {
+        __ B(ne, final_label, /* far_target */ false);
+        __ Bind(&exact_check);
+        __ Mov(out, 1);
+      }
+
       break;
     }
 
@@ -6759,9 +7300,6 @@
       codegen_->AddSlowPath(slow_path);
       __ B(ne, slow_path->GetEntryLabel());
       __ Mov(out, 1);
-      if (zero.IsReferenced()) {
-        __ B(&done);
-      }
       break;
     }
 
@@ -6790,18 +7328,10 @@
                                                                         /* is_fatal */ false);
       codegen_->AddSlowPath(slow_path);
       __ B(slow_path->GetEntryLabel());
-      if (zero.IsReferenced()) {
-        __ B(&done);
-      }
       break;
     }
   }
 
-  if (zero.IsReferenced()) {
-    __ Bind(&zero);
-    __ Mov(out, 0);
-  }
-
   if (done.IsReferenced()) {
     __ Bind(&done);
   }
@@ -6877,9 +7407,10 @@
   codegen_->AddSlowPath(type_check_slow_path);
 
   vixl32::Label done;
+  vixl32::Label* final_label = codegen_->GetFinalLabel(instruction, &done);
   // Avoid null check if we know obj is not null.
   if (instruction->MustDoNullCheck()) {
-    __ CompareAndBranchIfZero(obj, &done, /* far_target */ false);
+    __ CompareAndBranchIfZero(obj, final_label, /* far_target */ false);
   }
 
   switch (type_check_kind) {
@@ -6943,7 +7474,7 @@
       vixl32::Label loop;
       __ Bind(&loop);
       __ Cmp(temp, cls);
-      __ B(eq, &done, /* far_target */ false);
+      __ B(eq, final_label, /* far_target */ false);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
       GenerateReferenceLoadOneRegister(instruction,
@@ -6971,7 +7502,7 @@
 
       // Do an exact check.
       __ Cmp(temp, cls);
-      __ B(eq, &done, /* far_target */ false);
+      __ B(eq, final_label, /* far_target */ false);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
@@ -7039,7 +7570,9 @@
       break;
     }
   }
-  __ Bind(&done);
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
 
   __ Bind(type_check_slow_path->GetExitLabel());
 }
@@ -7231,10 +7764,12 @@
     return;
   }
   if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
-  __ And(out, first, value);
+    __ And(out, first, value);
+  } else if (GetAssembler()->ShifterOperandCanHold(BIC, ~value)) {
+    __ Bic(out, first, ~value);
   } else {
-    DCHECK(GetAssembler()->ShifterOperandCanHold(BIC, ~value));
-  __ Bic(out, first, ~value);
+    DCHECK(IsPowerOfTwo(value + 1));
+    __ Ubfx(out, first, 0, WhichPowerOf2(value + 1));
   }
 }
 
@@ -7974,9 +8509,7 @@
 }
 
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateBootImageAddressLiteral(uint32_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 VIXLUInt32Literal* CodeGeneratorARMVIXL::DeduplicateDexCacheAddressLiteral(uint32_t address) {
@@ -8036,8 +8569,7 @@
       /* MOVW+MOVT for each entry */ 2u * pc_relative_string_patches_.size() +
       boot_image_type_patches_.size() +
       /* MOVW+MOVT for each entry */ 2u * pc_relative_type_patches_.size() +
-      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size() +
-      boot_image_address_patches_.size();
+      /* MOVW+MOVT for each entry */ 2u * type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -8071,13 +8603,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    VIXLUInt32Literal* literal = entry.second;
-    DCHECK(literal->IsBound());
-    uint32_t literal_offset = literal->GetLocation();
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 2a636db..1e9669d 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -401,10 +401,6 @@
   void GenerateCompareTestAndBranch(HCondition* condition,
                                     vixl::aarch32::Label* true_target,
                                     vixl::aarch32::Label* false_target);
-  void GenerateVcmp(HInstruction* instruction);
-  void GenerateFPJumps(HCondition* cond,
-                       vixl::aarch32::Label* true_label,
-                       vixl::aarch32::Label* false_label);
   void GenerateLongComparesAndJumps(HCondition* cond,
                                     vixl::aarch32::Label* true_label,
                                     vixl::aarch32::Label* false_label);
@@ -510,6 +506,8 @@
     return &(block_labels_[block->GetBlockId()]);
   }
 
+  vixl32::Label* GetFinalLabel(HInstruction* instruction, vixl32::Label* final_label);
+
   void Initialize() OVERRIDE {
     block_labels_.resize(GetGraph()->GetBlocks().size());
   }
@@ -752,8 +750,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
 
   // Patches for string literals in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index c9dde7c..287891f 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -391,7 +391,8 @@
 
 class TypeCheckSlowPathMIPS : public SlowPathCodeMIPS {
  public:
-  explicit TypeCheckSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
+  explicit TypeCheckSlowPathMIPS(HInstruction* instruction, bool is_fatal)
+      : SlowPathCodeMIPS(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -401,7 +402,9 @@
     CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
+    if (!is_fatal_) {
+      SaveLiveRegisters(codegen, locations);
+    }
 
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
@@ -424,13 +427,19 @@
       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
-    RestoreLiveRegisters(codegen, locations);
-    __ B(GetExitLabel());
+    if (!is_fatal_) {
+      RestoreLiveRegisters(codegen, locations);
+      __ B(GetExitLabel());
+    }
   }
 
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS"; }
 
+  bool IsFatal() const OVERRIDE { return is_fatal_; }
+
  private:
+  const bool is_fatal_;
+
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS);
 };
 
@@ -452,6 +461,536 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS);
 };
 
+class ArraySetSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  explicit ArraySetSlowPathMIPS(HInstruction* instruction) : SlowPathCodeMIPS(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    mips_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathMIPS below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
+class ReadBarrierMarkSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  ReadBarrierMarkSlowPathMIPS(HInstruction* instruction,
+                              Location ref,
+                              Location entrypoint = Location::NoLocation())
+      : SlowPathCodeMIPS(instruction), ref_(ref), entrypoint_(entrypoint) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register ref_reg = ref_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    DCHECK((V0 <= ref_reg && ref_reg <= T7) ||
+           (S2 <= ref_reg && ref_reg <= S7) ||
+           (ref_reg == FP)) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in A0 and V0 respectively):
+    //
+    //   A0 <- ref
+    //   V0 <- ReadBarrierMark(A0)
+    //   ref <- V0
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    if (entrypoint_.IsValid()) {
+      mips_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+      DCHECK_EQ(entrypoint_.AsRegister<Register>(), T9);
+      __ Jalr(entrypoint_.AsRegister<Register>());
+      __ NopIfNoReordering();
+    } else {
+      int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+      // This runtime call does not require a stack map.
+      mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+                                                        instruction_,
+                                                        this,
+                                                        /* direct */ false);
+    }
+    __ B(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+
+  // The location of the entrypoint if already loaded.
+  const Location entrypoint_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathMIPS above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  ReadBarrierMarkAndUpdateFieldSlowPathMIPS(HInstruction* instruction,
+                                            Location ref,
+                                            Register obj,
+                                            Location field_offset,
+                                            Register temp1)
+      : SlowPathCodeMIPS(instruction),
+        ref_(ref),
+        obj_(obj),
+        field_offset_(field_offset),
+        temp1_(temp1) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS";
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register ref_reg = ref_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking and field updating slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+    DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+
+    __ Bind(GetEntryLabel());
+
+    // Save the old reference.
+    // Note that we cannot use AT or TMP to save the old reference, as those
+    // are used by the code that follows, but we need the old reference after
+    // the call to the ReadBarrierMarkRegX entry point.
+    DCHECK_NE(temp1_, AT);
+    DCHECK_NE(temp1_, TMP);
+    __ Move(temp1_, ref_reg);
+
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    DCHECK((V0 <= ref_reg && ref_reg <= T7) ||
+           (S2 <= ref_reg && ref_reg <= S7) ||
+           (ref_reg == FP)) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in A0 and V0 respectively):
+    //
+    //   A0 <- ref
+    //   V0 <- ReadBarrierMark(A0)
+    //   ref <- V0
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(ref_reg - 1);
+    // This runtime call does not require a stack map.
+    mips_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+                                                      instruction_,
+                                                      this,
+                                                      /* direct */ false);
+
+    // If the new reference is different from the old reference,
+    // update the field in the holder (`*(obj_ + field_offset_)`).
+    //
+    // Note that this field could also hold a different object, if
+    // another thread had concurrently changed it. In that case, the
+    // the compare-and-set (CAS) loop below would abort, leaving the
+    // field as-is.
+    MipsLabel done;
+    __ Beq(temp1_, ref_reg, &done);
+
+    // Update the the holder's field atomically.  This may fail if
+    // mutator updates before us, but it's OK.  This is achieved
+    // using a strong compare-and-set (CAS) operation with relaxed
+    // memory synchronization ordering, where the expected value is
+    // the old reference and the desired value is the new reference.
+
+    // Convenience aliases.
+    Register base = obj_;
+    // The UnsafeCASObject intrinsic uses a register pair as field
+    // offset ("long offset"), of which only the low part contains
+    // data.
+    Register offset = field_offset_.AsRegisterPairLow<Register>();
+    Register expected = temp1_;
+    Register value = ref_reg;
+    Register tmp_ptr = TMP;      // Pointer to actual memory.
+    Register tmp = AT;           // Value in memory.
+
+    __ Addu(tmp_ptr, base, offset);
+
+    if (kPoisonHeapReferences) {
+      __ PoisonHeapReference(expected);
+      // Do not poison `value` if it is the same register as
+      // `expected`, which has just been poisoned.
+      if (value != expected) {
+        __ PoisonHeapReference(value);
+      }
+    }
+
+    // do {
+    //   tmp = [r_ptr] - expected;
+    // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+    bool is_r6 = mips_codegen->GetInstructionSetFeatures().IsR6();
+    MipsLabel loop_head, exit_loop;
+    __ Bind(&loop_head);
+    if (is_r6) {
+      __ LlR6(tmp, tmp_ptr);
+    } else {
+      __ LlR2(tmp, tmp_ptr);
+    }
+    __ Bne(tmp, expected, &exit_loop);
+    __ Move(tmp, value);
+    if (is_r6) {
+      __ ScR6(tmp, tmp_ptr);
+    } else {
+      __ ScR2(tmp, tmp_ptr);
+    }
+    __ Beqz(tmp, &loop_head);
+    __ Bind(&exit_loop);
+
+    if (kPoisonHeapReferences) {
+      __ UnpoisonHeapReference(expected);
+      // Do not unpoison `value` if it is the same register as
+      // `expected`, which has just been unpoisoned.
+      if (value != expected) {
+        __ UnpoisonHeapReference(value);
+      }
+    }
+
+    __ Bind(&done);
+    __ B(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // The register containing the object holding the marked object reference field.
+  const Register obj_;
+  // The location of the offset of the marked reference field within `obj_`.
+  Location field_offset_;
+
+  const Register temp1_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  ReadBarrierForHeapReferenceSlowPathMIPS(HInstruction* instruction,
+                                          Location out,
+                                          Location ref,
+                                          Location obj,
+                                          uint32_t offset,
+                                          Location index)
+      : SlowPathCodeMIPS(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ LoadFromOffset(kLoadWord, out, out, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        Register index_reg = index_.AsRegister<Register>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::mips::MipsAssembler::Sll and
+          // art::mips::MipsAssembler::Addiu32 below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          Register free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Move(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Sll(index_reg, index_reg, TIMES_4);
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ Addiu32(index_reg, index_reg, offset_);
+      } else {
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegisterPair());
+        // UnsafeGet's offset location is a register pair, the low
+        // part contains the correct offset.
+        index = index_.ToLow();
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ LoadConst32(calling_convention.GetRegisterAt(2), offset_);
+    }
+    mips_codegen->InvokeRuntime(kQuickReadBarrierSlow,
+                                instruction_,
+                                instruction_->GetDexPc(),
+                                this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForHeapReferenceSlowPathMIPS"; }
+
+ private:
+  Register FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<Register>());
+    size_t obj = static_cast<int>(obj_.AsRegister<Register>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref &&
+          i != obj &&
+          !codegen->IsCoreCalleeSaveRegister(i) &&
+          !codegen->IsBlockedCoreRegister(i)) {
+        return static_cast<Register>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on MIPS
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathMIPS : public SlowPathCodeMIPS {
+ public:
+  ReadBarrierForRootSlowPathMIPS(HInstruction* instruction, Location out, Location root)
+      : SlowPathCodeMIPS(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register reg_out = out_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorMIPS* mips_codegen = down_cast<CodeGeneratorMIPS*>(codegen);
+    mips_codegen->Move32(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
+    mips_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+                                instruction_,
+                                instruction_->GetDexPc(),
+                                this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    mips_codegen->Move32(out_, calling_convention.GetReturnLocation(Primitive::kPrimNot));
+
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS"; }
+
+ private:
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS);
+};
+
 CodeGeneratorMIPS::CodeGeneratorMIPS(HGraph* graph,
                                      const MipsInstructionSetFeatures& isa_features,
                                      const CompilerOptions& compiler_options,
@@ -482,8 +1021,6 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       clobbered_ra_(false) {
@@ -1026,8 +1563,7 @@
       pc_relative_type_patches_.size() +
       type_bss_entry_patches_.size() +
       boot_image_string_patches_.size() +
-      boot_image_type_patches_.size() +
-      boot_image_address_patches_.size();
+      boot_image_type_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -1061,13 +1597,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
@@ -1125,9 +1654,7 @@
 }
 
 Literal* CodeGeneratorMIPS::DeduplicateBootImageAddressLiteral(uint32_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 void CodeGeneratorMIPS::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
@@ -1313,10 +1840,26 @@
                                       uint32_t dex_pc,
                                       SlowPathCode* slow_path) {
   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
+  GenerateInvokeRuntime(GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value(),
+                        IsDirectEntrypoint(entrypoint));
+  if (EntrypointRequiresStackMap(entrypoint)) {
+    RecordPcInfo(instruction, dex_pc, slow_path);
+  }
+}
+
+void CodeGeneratorMIPS::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                            HInstruction* instruction,
+                                                            SlowPathCode* slow_path,
+                                                            bool direct) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  GenerateInvokeRuntime(entry_point_offset, direct);
+}
+
+void CodeGeneratorMIPS::GenerateInvokeRuntime(int32_t entry_point_offset, bool direct) {
   bool reordering = __ SetReorder(false);
-  __ LoadFromOffset(kLoadWord, T9, TR, GetThreadOffset<kMipsPointerSize>(entrypoint).Int32Value());
+  __ LoadFromOffset(kLoadWord, T9, TR, entry_point_offset);
   __ Jalr(T9);
-  if (IsDirectEntrypoint(entrypoint)) {
+  if (direct) {
     // Reserve argument space on stack (for $a0-$a3) for
     // entrypoints that directly reference native implementations.
     // Called function may use this space to store $a0-$a3 regs.
@@ -1326,9 +1869,6 @@
     __ Nop();  // In delay slot.
   }
   __ SetReorder(reordering);
-  if (EntrypointRequiresStackMap(entrypoint)) {
-    RecordPcInfo(instruction, dex_pc, slow_path);
-  }
 }
 
 void InstructionCodeGeneratorMIPS::GenerateClassInitializationCheck(SlowPathCodeMIPS* slow_path,
@@ -1888,37 +2428,56 @@
 }
 
 void LocationsBuilderMIPS::VisitArrayGet(HArrayGet* instruction) {
+  Primitive::Type type = instruction->GetType();
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (type == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier
+                                                       ? LocationSummary::kCallOnSlowPath
+                                                       : LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+  if (Primitive::IsFloatingPointType(type)) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(Location::RequiresRegister(),
+                      object_array_get_with_read_barrier
+                          ? Location::kOutputOverlap
+                          : Location::kNoOutputOverlap);
+  }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
-auto InstructionCodeGeneratorMIPS::GetImplicitNullChecker(HInstruction* instruction) {
-  auto null_checker = [this, instruction]() {
-    this->codegen_->MaybeRecordImplicitNullCheck(instruction);
+static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS* codegen) {
+  auto null_checker = [codegen, instruction]() {
+    codegen->MaybeRecordImplicitNullCheck(instruction);
   };
   return null_checker;
 }
 
 void InstructionCodeGeneratorMIPS::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
+  Location out_loc = locations->Out();
   Location index = locations->InAt(1);
   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
-  auto null_checker = GetImplicitNullChecker(instruction);
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
 
   Primitive::Type type = instruction->GetType();
   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
                                         instruction->IsStringCharAt();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1931,7 +2490,7 @@
     }
 
     case Primitive::kPrimByte: {
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
@@ -1944,7 +2503,7 @@
     }
 
     case Primitive::kPrimShort: {
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
@@ -1958,7 +2517,7 @@
     }
 
     case Primitive::kPrimChar: {
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (maybe_compressed_char_at) {
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
         __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
@@ -2011,10 +2570,9 @@
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      Register out = locations->Out().AsRegister<Register>();
+      Register out = out_loc.AsRegister<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -2027,8 +2585,53 @@
       break;
     }
 
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+                                                        out_loc,
+                                                        obj,
+                                                        data_offset,
+                                                        index,
+                                                        temp,
+                                                        /* needs_null_check */ true);
+      } else {
+        Register out = out_loc.AsRegister<Register>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ LoadFromOffset(kLoadWord, out, obj, offset, null_checker);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ Sll(TMP, index.AsRegister<Register>(), TIMES_4);
+          __ Addu(TMP, obj, TMP);
+          __ LoadFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction,
+                                                 out_loc,
+                                                 out_loc,
+                                                 obj_loc,
+                                                 data_offset,
+                                                 index);
+        }
+      }
+      break;
+    }
+
     case Primitive::kPrimLong: {
-      Register out = locations->Out().AsRegisterPairLow<Register>();
+      Register out = out_loc.AsRegisterPairLow<Register>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -2042,7 +2645,7 @@
     }
 
     case Primitive::kPrimFloat: {
-      FRegister out = locations->Out().AsFpuRegister<FRegister>();
+      FRegister out = out_loc.AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
@@ -2056,7 +2659,7 @@
     }
 
     case Primitive::kPrimDouble: {
-      FRegister out = locations->Out().AsFpuRegister<FRegister>();
+      FRegister out = out_loc.AsFpuRegister<FRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
@@ -2114,23 +2717,28 @@
 }
 
 void LocationsBuilderMIPS::VisitArraySet(HArraySet* instruction) {
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      may_need_runtime_call_for_type_check ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+    locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
   } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
-      locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
-    } else {
-      locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
-    }
+    locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
+  }
+  if (needs_write_barrier) {
+    // Temporary register for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
   }
 }
 
@@ -2140,10 +2748,10 @@
   Location index = locations->InAt(1);
   Location value_location = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
-  auto null_checker = GetImplicitNullChecker(instruction);
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
   Register base_reg = index.IsConstant() ? obj : TMP;
 
   switch (value_type) {
@@ -2184,9 +2792,27 @@
       break;
     }
 
-    case Primitive::kPrimInt:
+    case Primitive::kPrimInt: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      if (index.IsConstant()) {
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+      } else {
+        __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+        __ Addu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        Register value = value_location.AsRegister<Register>();
+        __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+      }
+      break;
+    }
+
     case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
+      if (value_location.IsConstant()) {
+        // Just setting null.
         uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
         if (index.IsConstant()) {
           data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
@@ -2194,22 +2820,110 @@
           __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
           __ Addu(base_reg, obj, base_reg);
         }
-        if (value_location.IsConstant()) {
-          int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
-          __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
-          DCHECK(!needs_write_barrier);
-        } else {
-          Register value = value_location.AsRegister<Register>();
-          __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
-          if (needs_write_barrier) {
-            DCHECK_EQ(value_type, Primitive::kPrimNot);
-            codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        DCHECK_EQ(value, 0);
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
+        break;
+      }
+
+      DCHECK(needs_write_barrier);
+      Register value = value_location.AsRegister<Register>();
+      Register temp1 = locations->GetTemp(0).AsRegister<Register>();
+      Register temp2 = TMP;  // Doesn't need to survive slow path.
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      MipsLabel done;
+      SlowPathCodeMIPS* slow_path = nullptr;
+
+      if (may_need_runtime_call_for_type_check) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          MipsLabel non_zero;
+          __ Bnez(value, &non_zero);
+          uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+          if (index.IsConstant()) {
+            data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+          } else {
+            __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+            __ Addu(base_reg, obj, base_reg);
           }
+          __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+          __ B(&done);
+          __ Bind(&non_zero);
         }
+
+        // Note that when read barriers are enabled, the type checks
+        // are performed without read barriers.  This is fine, even in
+        // the case where a class object is in the from-space after
+        // the flip, as a comparison involving such a type would not
+        // produce a false positive; it may of course produce a false
+        // negative, in which case we would take the ArraySet slow
+        // path.
+
+        // /* HeapReference<Class> */ temp1 = obj->klass_
+        __ LoadFromOffset(kLoadWord, temp1, obj, class_offset, null_checker);
+        __ MaybeUnpoisonHeapReference(temp1);
+
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+        // /* HeapReference<Class> */ temp2 = value->klass_
+        __ LoadFromOffset(kLoadWord, temp2, value, class_offset);
+        // If heap poisoning is enabled, no need to unpoison `temp1`
+        // nor `temp2`, as we are comparing two poisoned references.
+
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          MipsLabel do_put;
+          __ Beq(temp1, temp2, &do_put);
+          // If heap poisoning is enabled, the `temp1` reference has
+          // not been unpoisoned yet; unpoison it now.
+          __ MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          __ LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+          // If heap poisoning is enabled, no need to unpoison
+          // `temp1`, as we are comparing against null below.
+          __ Bnez(temp1, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ Bne(temp1, temp2, slow_path->GetEntryLabel());
+        }
+      }
+
+      Register source = value;
+      if (kPoisonHeapReferences) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        __ Move(temp1, value);
+        __ PoisonHeapReference(temp1);
+        source = temp1;
+      }
+
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      if (index.IsConstant()) {
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
       } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
-        CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+        __ Sll(base_reg, index.AsRegister<Register>(), TIMES_4);
+        __ Addu(base_reg, obj, base_reg);
+      }
+      __ StoreToOffset(kStoreWord, source, base_reg, data_offset);
+
+      if (!may_need_runtime_call_for_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+
+      if (done.IsLinked()) {
+        __ Bind(&done);
+      }
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
       }
       break;
     }
@@ -2299,30 +3013,234 @@
   __ Bgeu(index, length, slow_path->GetEntryLabel());
 }
 
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Extra temp is used for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
 void LocationsBuilderMIPS::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction,
-      LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
+          ? LocationSummary::kCallOnSlowPath
+          : LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathMIPS uses this register too.
-  locations->AddTemp(Location::RequiresRegister());
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorMIPS::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register obj_cls = locations->GetTemp(0).AsRegister<Register>();
+  Location temp_loc = locations->GetTemp(0);
+  Register temp = temp_loc.AsRegister<Register>();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_LE(num_temps, 2u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+  MipsLabel done;
 
-  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction);
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  bool is_type_check_slow_path_fatal = false;
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
+  SlowPathCodeMIPS* slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
+                                                         is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(slow_path);
 
-  // TODO: avoid this check if we know obj is not null.
-  __ Beqz(obj, slow_path->GetExitLabel());
-  // Compare the class of `obj` with `cls`.
-  __ LoadFromOffset(kLoadWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
-  __ Bne(obj_cls, cls, slow_path->GetEntryLabel());
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Beqz(obj, &done);
+  }
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Jump to slow path for throwing the exception or doing a
+      // more involved array check.
+      __ Bne(temp, cls, slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      MipsLabel loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Beqz(temp, slow_path->GetEntryLabel());
+      // Otherwise, compare the classes.
+      __ Bne(temp, cls, &loop);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Walk over the class hierarchy to find a match.
+      MipsLabel loop;
+      __ Bind(&loop);
+      __ Beq(temp, cls, &done);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception. Otherwise, jump to the beginning of the loop.
+      __ Bnez(temp, &loop);
+      __ B(slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Do an exact check.
+      __ Beq(temp, cls, &done);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ Beqz(temp, slow_path->GetEntryLabel());
+      // Otherwise, the object is indeed an array, further check that this component
+      // type is not a primitive type.
+      __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Bnez(temp, slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+      // We always go into the type check slow path for the unresolved check case.
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ B(slow_path->GetEntryLabel());
+      break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Iftable is never null.
+      __ Lw(TMP, temp, array_length_offset);
+      // Loop through the iftable and check if any class matches.
+      MipsLabel loop;
+      __ Bind(&loop);
+      __ Addiu(temp, temp, 2 * kHeapReferenceSize);  // Possibly in delay slot on R2.
+      __ Beqz(TMP, slow_path->GetEntryLabel());
+      __ Lw(AT, temp, object_array_data_offset - 2 * kHeapReferenceSize);
+      __ MaybeUnpoisonHeapReference(AT);
+      // Go to next interface.
+      __ Addiu(TMP, TMP, -2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Bne(AT, cls, &loop);
+      break;
+    }
+  }
+
+  __ Bind(&done);
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -4855,8 +5773,15 @@
   Primitive::Type field_type = field_info.GetFieldType();
   bool is_wide = (field_type == Primitive::kPrimLong) || (field_type == Primitive::kPrimDouble);
   bool generate_volatile = field_info.IsVolatile() && is_wide;
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot);
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction, generate_volatile ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
+      instruction,
+      generate_volatile
+          ? LocationSummary::kCallOnMainOnly
+          : (object_field_get_with_read_barrier
+              ? LocationSummary::kCallOnSlowPath
+              : LocationSummary::kNoCall));
 
   locations->SetInAt(0, Location::RequiresRegister());
   if (generate_volatile) {
@@ -4877,7 +5802,18 @@
     if (Primitive::IsFloatingPointType(instruction->GetType())) {
       locations->SetOut(Location::RequiresFpuRegister());
     } else {
-      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      // The output overlaps in the case of an object field get with
+      // read barriers enabled: we do not want the move to overwrite the
+      // object's location, as we need it to emit the read barrier.
+      locations->SetOut(Location::RequiresRegister(),
+                        object_field_get_with_read_barrier
+                            ? Location::kOutputOverlap
+                            : Location::kNoOutputOverlap);
+    }
+    if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+      // We need a temporary register for the read barrier marking slow
+      // path in CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier.
+      locations->AddTemp(Location::RequiresRegister());
     }
   }
 }
@@ -4887,11 +5823,13 @@
                                                   uint32_t dex_pc) {
   Primitive::Type type = field_info.GetFieldType();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
+  Location dst_loc = locations->Out();
   LoadOperandType load_type = kLoadUnsignedByte;
   bool is_volatile = field_info.IsVolatile();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
-  auto null_checker = GetImplicitNullChecker(instruction);
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -4930,37 +5868,61 @@
     CheckEntrypointTypes<kQuickA64Load, int64_t, volatile const int64_t*>();
     if (type == Primitive::kPrimDouble) {
       // FP results are returned in core registers. Need to move them.
-      Location out = locations->Out();
-      if (out.IsFpuRegister()) {
-        __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), out.AsFpuRegister<FRegister>());
+      if (dst_loc.IsFpuRegister()) {
+        __ Mtc1(locations->GetTemp(1).AsRegister<Register>(), dst_loc.AsFpuRegister<FRegister>());
         __ MoveToFpuHigh(locations->GetTemp(2).AsRegister<Register>(),
-                         out.AsFpuRegister<FRegister>());
+                         dst_loc.AsFpuRegister<FRegister>());
       } else {
-        DCHECK(out.IsDoubleStackSlot());
+        DCHECK(dst_loc.IsDoubleStackSlot());
         __ StoreToOffset(kStoreWord,
                          locations->GetTemp(1).AsRegister<Register>(),
                          SP,
-                         out.GetStackIndex());
+                         dst_loc.GetStackIndex());
         __ StoreToOffset(kStoreWord,
                          locations->GetTemp(2).AsRegister<Register>(),
                          SP,
-                         out.GetStackIndex() + 4);
+                         dst_loc.GetStackIndex() + 4);
       }
     }
   } else {
-    if (!Primitive::IsFloatingPointType(type)) {
+    if (type == Primitive::kPrimNot) {
+      // /* HeapReference<Object> */ dst = *(obj + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                        dst_loc,
+                                                        obj,
+                                                        offset,
+                                                        temp_loc,
+                                                        /* needs_null_check */ true);
+        if (is_volatile) {
+          GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ LoadFromOffset(kLoadWord, dst_loc.AsRegister<Register>(), obj, offset, null_checker);
+        if (is_volatile) {
+          GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+      }
+    } else if (!Primitive::IsFloatingPointType(type)) {
       Register dst;
       if (type == Primitive::kPrimLong) {
-        DCHECK(locations->Out().IsRegisterPair());
-        dst = locations->Out().AsRegisterPairLow<Register>();
+        DCHECK(dst_loc.IsRegisterPair());
+        dst = dst_loc.AsRegisterPairLow<Register>();
       } else {
-        DCHECK(locations->Out().IsRegister());
-        dst = locations->Out().AsRegister<Register>();
+        DCHECK(dst_loc.IsRegister());
+        dst = dst_loc.AsRegister<Register>();
       }
       __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
     } else {
-      DCHECK(locations->Out().IsFpuRegister());
-      FRegister dst = locations->Out().AsFpuRegister<FRegister>();
+      DCHECK(dst_loc.IsFpuRegister());
+      FRegister dst = dst_loc.AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
         __ LoadSFromOffset(dst, obj, offset, null_checker);
       } else {
@@ -4969,7 +5931,9 @@
     }
   }
 
-  if (is_volatile) {
+  // Memory barriers, in the case of references, are handled in the
+  // previous switch statement.
+  if (is_volatile && (type != Primitive::kPrimNot)) {
     GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
   }
 }
@@ -5016,7 +5980,8 @@
   StoreOperandType store_type = kStoreByte;
   bool is_volatile = field_info.IsVolatile();
   uint32_t offset = field_info.GetFieldOffset().Uint32Value();
-  auto null_checker = GetImplicitNullChecker(instruction);
+  bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -5089,7 +6054,16 @@
       } else {
         src = value_location.AsRegister<Register>();
       }
-      __ StoreToOffset(store_type, src, obj, offset, null_checker);
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(type, Primitive::kPrimNot);
+        __ PoisonHeapReference(TMP, src);
+        __ StoreToOffset(store_type, TMP, obj, offset, null_checker);
+      } else {
+        __ StoreToOffset(store_type, src, obj, offset, null_checker);
+      }
     } else {
       FRegister src = value_location.AsFpuRegister<FRegister>();
       if (type == Primitive::kPrimFloat) {
@@ -5100,8 +6074,7 @@
     }
   }
 
-  // TODO: memory barriers?
-  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
+  if (needs_write_barrier) {
     Register src = value_location.AsRegister<Register>();
     codegen_->MarkGCCard(obj, src, value_can_be_null);
   }
@@ -5130,14 +6103,133 @@
                  instruction->GetValueCanBeNull());
 }
 
-void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(
-    HInstruction* instruction ATTRIBUTE_UNUSED,
-    Location root,
-    Register obj,
-    uint32_t offset) {
+void InstructionCodeGeneratorMIPS::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  Register out_reg = out.AsRegister<Register>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      out_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Move(maybe_temp.AsRegister<Register>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  Register out_reg = out.AsRegister<Register>();
+  Register obj_reg = obj.AsRegister<Register>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
+    if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      obj_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenerateGcRootFieldLoad(HInstruction* instruction,
+                                                           Location root,
+                                                           Register obj,
+                                                           uint32_t offset,
+                                                           ReadBarrierOption read_barrier_option) {
   Register root_reg = root.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
+  if (read_barrier_option == kWithReadBarrier) {
+    DCHECK(kEmitCompilerReadBarrier);
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //   if (temp != null) {
+      //     root = temp(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ LoadFromOffset(kLoadWord, root_reg, obj, offset);
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path marking the GC root `root`.
+      Location temp = Location::RegisterLocation(T9);
+      SlowPathCodeMIPS* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(
+              instruction,
+              root,
+              /*entrypoint*/ temp);
+      codegen_->AddSlowPath(slow_path);
+
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMipsPointerSize>(root.reg() - 1);
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+      // The entrypoint is null when the GC is not marking, this prevents one load compared to
+      // checking GetIsGcMarking.
+      __ Bnez(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ Addiu32(root_reg, obj, offset);
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
   } else {
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -5147,47 +6239,425 @@
   }
 }
 
+void CodeGeneratorMIPS::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                              Location ref,
+                                                              Register obj,
+                                                              uint32_t offset,
+                                                              Location temp,
+                                                              bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  ScaleFactor no_scale_factor = TIMES_1;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            offset,
+                                            no_index,
+                                            no_scale_factor,
+                                            temp,
+                                            needs_null_check);
+}
+
+void CodeGeneratorMIPS::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                              Location ref,
+                                                              Register obj,
+                                                              uint32_t data_offset,
+                                                              Location index,
+                                                              Location temp,
+                                                              bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  ScaleFactor scale_factor = TIMES_4;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            data_offset,
+                                            index,
+                                            scale_factor,
+                                            temp,
+                                            needs_null_check);
+}
+
+void CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                  Location ref,
+                                                                  Register obj,
+                                                                  uint32_t offset,
+                                                                  Location index,
+                                                                  ScaleFactor scale_factor,
+                                                                  Location temp,
+                                                                  bool needs_null_check,
+                                                                  bool always_update_field) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
+
+  Register ref_reg = ref.AsRegister<Register>();
+  Register temp_reg = temp.AsRegister<Register>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+
+  __ Sync(0);  // Barrier to prevent load-load reordering.
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    // Load types involving an "index": ArrayGet,
+    // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+    // intrinsics.
+    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+    if (index.IsConstant()) {
+      size_t computed_offset =
+          (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
+      __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
+    } else {
+      // Handle the special case of the
+      // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+      // intrinsics, which use a register pair as index ("long
+      // offset"), of which only the low part contains data.
+      Register index_reg = index.IsRegisterPair()
+          ? index.AsRegisterPairLow<Register>()
+          : index.AsRegister<Register>();
+      __ Sll(TMP, index_reg, scale_factor);
+      __ Addu(TMP, obj, TMP);
+      __ LoadFromOffset(kLoadWord, ref_reg, TMP, offset);
+    }
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    __ LoadFromOffset(kLoadWord, ref_reg, obj, offset);
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path marking the object `ref` when it is gray.
+  SlowPathCodeMIPS* slow_path;
+  if (always_update_field) {
+    // ReadBarrierMarkAndUpdateFieldSlowPathMIPS only supports address
+    // of the form `obj + field_offset`, where `obj` is a register and
+    // `field_offset` is a register pair (of which only the lower half
+    // is used). Thus `offset` and `scale_factor` above are expected
+    // to be null in this code path.
+    DCHECK_EQ(offset, 0u);
+    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+    slow_path = new (GetGraph()->GetArena())
+        ReadBarrierMarkAndUpdateFieldSlowPathMIPS(instruction,
+                                                  ref,
+                                                  obj,
+                                                  /* field_offset */ index,
+                                                  temp_reg);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS(instruction, ref);
+  }
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::GrayState())
+  //   ref = ReadBarrier::Mark(ref);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit into the sign bit (31) and
+  // performing a branch on less than zero.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
+  __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
+  __ Bltz(temp_reg, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                Location out,
+                                                Location ref,
+                                                Location obj,
+                                                uint32_t offset,
+                                                Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the reference load.
+  //
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathMIPS(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                     Location out,
+                                                     Location ref,
+                                                     Location obj,
+                                                     uint32_t offset,
+                                                     Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<Register>());
+  }
+}
+
+void CodeGeneratorMIPS::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                       Location out,
+                                                       Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCodeMIPS* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  __ B(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderMIPS::VisitInstanceOf(HInstanceOf* instruction) {
-  LocationSummary::CallKind call_kind =
-      instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorMIPS::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  Register obj = locations->InAt(0).AsRegister<Register>();
+  Location obj_loc = locations->InAt(0);
+  Register obj = obj_loc.AsRegister<Register>();
   Register cls = locations->InAt(1).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
-
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   MipsLabel done;
+  SlowPathCodeMIPS* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
-  // TODO: Avoid this check if we know `obj` is not null.
-  __ Move(out, ZERO);
-  __ Beqz(obj, &done);
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Move(out, ZERO);
+    __ Beqz(obj, &done);
+  }
 
-  // Compare the class of `obj` with `cls`.
-  __ LoadFromOffset(kLoadWord, out, obj, mirror::Object::ClassOffset().Int32Value());
-  if (instruction->IsExactCheck()) {
-    // Classes must be equal for the instanceof to succeed.
-    __ Xor(out, out, cls);
-    __ Sltiu(out, out, 1);
-  } else {
-    // If the classes are not equal, we go into a slow path.
-    DCHECK(locations->OnlyCallsOnSlowPath());
-    SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction);
-    codegen_->AddSlowPath(slow_path);
-    __ Bne(out, cls, slow_path->GetEntryLabel());
-    __ LoadConst32(out, 1);
-    __ Bind(slow_path->GetExitLabel());
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // Classes must be equal for the instanceof to succeed.
+      __ Xor(out, out, cls);
+      __ Sltiu(out, out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      MipsLabel loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqz(out, &done);
+      __ Bne(out, cls, &loop);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // Walk over the class hierarchy to find a match.
+      MipsLabel loop, success;
+      __ Bind(&loop);
+      __ Beq(out, cls, &success);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
+      __ Bnez(out, &loop);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ B(&done);
+      __ Bind(&success);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // Do an exact check.
+      MipsLabel success;
+      __ Beq(out, cls, &success);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqz(out, &done);
+      __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Sltiu(out, out, 1);
+      __ B(&done);
+      __ Bind(&success);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
+                                                                     /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ Bne(out, cls, slow_path->GetEntryLabel());
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS(instruction,
+                                                                     /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      break;
+    }
   }
 
   __ Bind(&done);
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderMIPS::VisitIntConstant(HIntConstant* constant) {
@@ -5239,6 +6709,14 @@
     __ LoadFromOffset(kLoadWord, temp, receiver.AsRegister<Register>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   __ LoadFromOffset(kLoadWord, temp, temp,
       mirror::Class::ImtPtrOffset(kMipsPointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -5307,9 +6785,6 @@
 
 HLoadString::LoadKind CodeGeneratorMIPS::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
-  }
   // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   // TODO: Create as many MipsDexCacheArraysBase instructions as needed for methods
@@ -5345,9 +6820,6 @@
 
 HLoadClass::LoadKind CodeGeneratorMIPS::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
-  }
   // We disable PC-relative load on pre-R6 when there is an irreducible loop, as the optimization
   // is incompatible with it.
   bool has_irreducible_loops = GetGraph()->HasIrreducibleLoops();
@@ -5562,6 +7034,14 @@
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadWord, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -5588,12 +7068,13 @@
     CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
         cls,
         Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Location::RegisterLocation(V0));
+        calling_convention.GetReturnLocation(Primitive::kPrimNot));
     return;
   }
   DCHECK(!cls->NeedsAccessCheck());
 
-  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
@@ -5648,6 +7129,9 @@
       break;
   }
 
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -5657,11 +7141,13 @@
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               base_or_current_method_reg,
-                              ArtMethod::DeclaringClassOffset().Int32Value());
+                              ArtMethod::DeclaringClassOffset().Int32Value(),
+                              read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ LoadLiteral(out,
                      base_or_current_method_reg,
                      codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
@@ -5669,6 +7155,7 @@
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       CodeGeneratorMIPS::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       bool reordering = __ SetReorder(false);
@@ -5678,7 +7165,7 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       uint32_t address = dchecked_integral_cast<uint32_t>(
           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
       DCHECK_NE(address, 0u);
@@ -5692,7 +7179,7 @@
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
       bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
       __ SetReorder(reordering);
       generate_null_check = true;
       break;
@@ -5704,7 +7191,7 @@
       bool reordering = __ SetReorder(false);
       __ Bind(&info->high_label);
       __ Lui(out, /* placeholder */ 0x1234);
-      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
       __ SetReorder(reordering);
       break;
     }
@@ -5837,7 +7324,11 @@
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       bool reordering = __ SetReorder(false);
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out, base_or_current_method_reg);
-      __ LoadFromOffset(kLoadWord, out, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              out,
+                              /* placeholder */ 0x5678,
+                              kCompilerReadBarrierOption);
       __ SetReorder(reordering);
       SlowPathCodeMIPS* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS(load);
       codegen_->AddSlowPath(slow_path);
@@ -5853,7 +7344,11 @@
       bool reordering = __ SetReorder(false);
       __ Bind(&info->high_label);
       __ Lui(out, /* placeholder */ 0x1234);
-      GenerateGcRootFieldLoad(load, out_loc, out, /* placeholder */ 0x5678);
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              out,
+                              /* placeholder */ 0x5678,
+                              kCompilerReadBarrierOption);
       __ SetReorder(reordering);
       return;
     }
@@ -6059,6 +7554,8 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewArray(HNewArray* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
 }
@@ -6076,6 +7573,8 @@
 }
 
 void InstructionCodeGeneratorMIPS::VisitNewInstance(HNewInstance* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     Register temp = instruction->GetLocations()->GetTemp(0).AsRegister<Register>();
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 47eba50..3875c4b 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -241,6 +241,38 @@
                       uint32_t dex_pc,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info, uint32_t dex_pc);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
+
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
@@ -249,7 +281,9 @@
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                Register obj,
-                               uint32_t offset);
+                               uint32_t offset,
+                               ReadBarrierOption read_barrier_option);
+
   void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
   // When the function returns `false` it means that the condition holds if `dst` is non-zero
   // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
@@ -297,7 +331,6 @@
   void GenerateDivRemWithAnyConstant(HBinaryOperation* instruction);
   void GenerateDivRemIntegral(HBinaryOperation* instruction);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
-  auto GetImplicitNullChecker(HInstruction* instruction);
   void GenPackedSwitchWithCompares(Register value_reg,
                                    int32_t lower_bound,
                                    uint32_t num_entries,
@@ -354,6 +387,91 @@
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             Register obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at the address
+  // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+  // `ref`, and mark it if needed.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
   void MarkGCCard(Register object, Register value, bool value_can_be_null);
 
   // Register allocation.
@@ -401,6 +519,15 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path = nullptr) OVERRIDE;
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path,
+                                           bool direct);
+
+  void GenerateInvokeRuntime(int32_t entry_point_offset, bool direct);
+
   ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
 
   bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
@@ -536,8 +663,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
   // Patches for string root accesses in JIT compiled code.
   ArenaDeque<JitPatchInfo> jit_string_patches_;
   // Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 5be0da4..78b31e9 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -336,7 +336,8 @@
 
 class TypeCheckSlowPathMIPS64 : public SlowPathCodeMIPS64 {
  public:
-  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
+  explicit TypeCheckSlowPathMIPS64(HInstruction* instruction, bool is_fatal)
+      : SlowPathCodeMIPS64(instruction), is_fatal_(is_fatal) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
@@ -347,7 +348,9 @@
     CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
 
     __ Bind(GetEntryLabel());
-    SaveLiveRegisters(codegen, locations);
+    if (!is_fatal_) {
+      SaveLiveRegisters(codegen, locations);
+    }
 
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
@@ -370,13 +373,19 @@
       CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
-    RestoreLiveRegisters(codegen, locations);
-    __ Bc(GetExitLabel());
+    if (!is_fatal_) {
+      RestoreLiveRegisters(codegen, locations);
+      __ Bc(GetExitLabel());
+    }
   }
 
   const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathMIPS64"; }
 
+  bool IsFatal() const OVERRIDE { return is_fatal_; }
+
  private:
+  const bool is_fatal_;
+
   DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathMIPS64);
 };
 
@@ -398,6 +407,528 @@
   DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathMIPS64);
 };
 
+class ArraySetSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  explicit ArraySetSlowPathMIPS64(HInstruction* instruction) : SlowPathCodeMIPS64(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    mips64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+    RestoreLiveRegisters(codegen, locations);
+    __ Bc(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathMIPS64"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathMIPS64);
+};
+
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+//
+// If `entrypoint` is a valid location it is assumed to already be
+// holding the entrypoint. The case where the entrypoint is passed in
+// is for the GcRoot read barrier.
+class ReadBarrierMarkSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  ReadBarrierMarkSlowPathMIPS64(HInstruction* instruction,
+                                Location ref,
+                                Location entrypoint = Location::NoLocation())
+      : SlowPathCodeMIPS64(instruction), ref_(ref), entrypoint_(entrypoint) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathMIPS"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsArraySet() ||
+           instruction_->IsLoadClass() ||
+           instruction_->IsLoadString() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
+           (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    DCHECK((V0 <= ref_reg && ref_reg <= T2) ||
+           (S2 <= ref_reg && ref_reg <= S7) ||
+           (ref_reg == S8)) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in A0 and V0 respectively):
+    //
+    //   A0 <- ref
+    //   V0 <- ReadBarrierMark(A0)
+    //   ref <- V0
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    if (entrypoint_.IsValid()) {
+      mips64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+      DCHECK_EQ(entrypoint_.AsRegister<GpuRegister>(), T9);
+      __ Jalr(entrypoint_.AsRegister<GpuRegister>());
+      __ Nop();
+    } else {
+      int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+      // This runtime call does not require a stack map.
+      mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+                                                          instruction_,
+                                                          this);
+    }
+    __ Bc(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+
+  // The location of the entrypoint if already loaded.
+  const Location entrypoint_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathMIPS64);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathMIPS64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(HInstruction* instruction,
+                                              Location ref,
+                                              GpuRegister obj,
+                                              Location field_offset,
+                                              GpuRegister temp1)
+      : SlowPathCodeMIPS64(instruction),
+        ref_(ref),
+        obj_(obj),
+        field_offset_(field_offset),
+        temp1_(temp1) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierMarkAndUpdateFieldSlowPathMIPS64";
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    GpuRegister ref_reg = ref_.AsRegister<GpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking and field updating slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+    DCHECK(field_offset_.IsRegister()) << field_offset_;
+
+    __ Bind(GetEntryLabel());
+
+    // Save the old reference.
+    // Note that we cannot use AT or TMP to save the old reference, as those
+    // are used by the code that follows, but we need the old reference after
+    // the call to the ReadBarrierMarkRegX entry point.
+    DCHECK_NE(temp1_, AT);
+    DCHECK_NE(temp1_, TMP);
+    __ Move(temp1_, ref_reg);
+
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    DCHECK((V0 <= ref_reg && ref_reg <= T2) ||
+           (S2 <= ref_reg && ref_reg <= S7) ||
+           (ref_reg == S8)) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in A0 and V0 respectively):
+    //
+    //   A0 <- ref
+    //   V0 <- ReadBarrierMark(A0)
+    //   ref <- V0
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(ref_reg - 1);
+    // This runtime call does not require a stack map.
+    mips64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset,
+                                                        instruction_,
+                                                        this);
+
+    // If the new reference is different from the old reference,
+    // update the field in the holder (`*(obj_ + field_offset_)`).
+    //
+    // Note that this field could also hold a different object, if
+    // another thread had concurrently changed it. In that case, the
+    // the compare-and-set (CAS) loop below would abort, leaving the
+    // field as-is.
+    Mips64Label done;
+    __ Beqc(temp1_, ref_reg, &done);
+
+    // Update the the holder's field atomically.  This may fail if
+    // mutator updates before us, but it's OK.  This is achieved
+    // using a strong compare-and-set (CAS) operation with relaxed
+    // memory synchronization ordering, where the expected value is
+    // the old reference and the desired value is the new reference.
+
+    // Convenience aliases.
+    GpuRegister base = obj_;
+    GpuRegister offset = field_offset_.AsRegister<GpuRegister>();
+    GpuRegister expected = temp1_;
+    GpuRegister value = ref_reg;
+    GpuRegister tmp_ptr = TMP;      // Pointer to actual memory.
+    GpuRegister tmp = AT;           // Value in memory.
+
+    __ Daddu(tmp_ptr, base, offset);
+
+    if (kPoisonHeapReferences) {
+      __ PoisonHeapReference(expected);
+      // Do not poison `value` if it is the same register as
+      // `expected`, which has just been poisoned.
+      if (value != expected) {
+        __ PoisonHeapReference(value);
+      }
+    }
+
+    // do {
+    //   tmp = [r_ptr] - expected;
+    // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+    Mips64Label loop_head, exit_loop;
+    __ Bind(&loop_head);
+    __ Ll(tmp, tmp_ptr);
+    // The LL instruction sign-extends the 32-bit value, but
+    // 32-bit references must be zero-extended. Zero-extend `tmp`.
+    __ Dext(tmp, tmp, 0, 32);
+    __ Bnec(tmp, expected, &exit_loop);
+    __ Move(tmp, value);
+    __ Sc(tmp, tmp_ptr);
+    __ Beqzc(tmp, &loop_head);
+    __ Bind(&exit_loop);
+
+    if (kPoisonHeapReferences) {
+      __ UnpoisonHeapReference(expected);
+      // Do not unpoison `value` if it is the same register as
+      // `expected`, which has just been unpoisoned.
+      if (value != expected) {
+        __ UnpoisonHeapReference(value);
+      }
+    }
+
+    __ Bind(&done);
+    __ Bc(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // The register containing the object holding the marked object reference field.
+  const GpuRegister obj_;
+  // The location of the offset of the marked reference field within `obj_`.
+  Location field_offset_;
+
+  const GpuRegister temp1_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathMIPS64);
+};
+
+// Slow path generating a read barrier for a heap reference.
+class ReadBarrierForHeapReferenceSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  ReadBarrierForHeapReferenceSlowPathMIPS64(HInstruction* instruction,
+                                            Location out,
+                                            Location ref,
+                                            Location obj,
+                                            uint32_t offset,
+                                            Location index)
+      : SlowPathCodeMIPS64(instruction),
+        out_(out),
+        ref_(ref),
+        obj_(obj),
+        offset_(offset),
+        index_(index) {
+    DCHECK(kEmitCompilerReadBarrier);
+    // If `obj` is equal to `out` or `ref`, it means the initial object
+    // has been overwritten by (or after) the heap object reference load
+    // to be instrumented, e.g.:
+    //
+    //   __ LoadFromOffset(kLoadWord, out, out, offset);
+    //   codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
+    //
+    // In that case, we have lost the information about the original
+    // object, and the emitted read barrier cannot work properly.
+    DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
+    DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    GpuRegister reg_out = out_.AsRegister<GpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsInstanceFieldGet() ||
+           instruction_->IsStaticFieldGet() ||
+           instruction_->IsArrayGet() ||
+           instruction_->IsInstanceOf() ||
+           instruction_->IsCheckCast() ||
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier for heap reference slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    // We may have to change the index's value, but as `index_` is a
+    // constant member (like other "inputs" of this slow path),
+    // introduce a copy of it, `index`.
+    Location index = index_;
+    if (index_.IsValid()) {
+      // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+      if (instruction_->IsArrayGet()) {
+        // Compute the actual memory offset and store it in `index`.
+        GpuRegister index_reg = index_.AsRegister<GpuRegister>();
+        DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
+        if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
+          // We are about to change the value of `index_reg` (see the
+          // calls to art::mips64::Mips64Assembler::Sll and
+          // art::mips64::MipsAssembler::Addiu32 below), but it has
+          // not been saved by the previous call to
+          // art::SlowPathCode::SaveLiveRegisters, as it is a
+          // callee-save register --
+          // art::SlowPathCode::SaveLiveRegisters does not consider
+          // callee-save registers, as it has been designed with the
+          // assumption that callee-save registers are supposed to be
+          // handled by the called function.  So, as a callee-save
+          // register, `index_reg` _would_ eventually be saved onto
+          // the stack, but it would be too late: we would have
+          // changed its value earlier.  Therefore, we manually save
+          // it here into another freely available register,
+          // `free_reg`, chosen of course among the caller-save
+          // registers (as a callee-save `free_reg` register would
+          // exhibit the same problem).
+          //
+          // Note we could have requested a temporary register from
+          // the register allocator instead; but we prefer not to, as
+          // this is a slow path, and we know we can find a
+          // caller-save register that is available.
+          GpuRegister free_reg = FindAvailableCallerSaveRegister(codegen);
+          __ Move(free_reg, index_reg);
+          index_reg = free_reg;
+          index = Location::RegisterLocation(index_reg);
+        } else {
+          // The initial register stored in `index_` has already been
+          // saved in the call to art::SlowPathCode::SaveLiveRegisters
+          // (as it is not a callee-save register), so we can freely
+          // use it.
+        }
+        // Shifting the index value contained in `index_reg` by the scale
+        // factor (2) cannot overflow in practice, as the runtime is
+        // unable to allocate object arrays with a size larger than
+        // 2^26 - 1 (that is, 2^28 - 4 bytes).
+        __ Sll(index_reg, index_reg, TIMES_4);
+        static_assert(
+            sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+            "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+        __ Addiu32(index_reg, index_reg, offset_);
+      } else {
+        // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
+        // intrinsics, `index_` is not shifted by a scale factor of 2
+        // (as in the case of ArrayGet), as it is actually an offset
+        // to an object field within an object.
+        DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
+        DCHECK(instruction_->GetLocations()->Intrinsified());
+        DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
+               (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
+            << instruction_->AsInvoke()->GetIntrinsic();
+        DCHECK_EQ(offset_, 0U);
+        DCHECK(index_.IsRegister());
+      }
+    }
+
+    // We're moving two or three locations to locations that could
+    // overlap, so we need a parallel move resolver.
+    InvokeRuntimeCallingConvention calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(ref_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    parallel_move.AddMove(obj_,
+                          Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                          Primitive::kPrimNot,
+                          nullptr);
+    if (index.IsValid()) {
+      parallel_move.AddMove(index,
+                            Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
+                            Primitive::kPrimInt,
+                            nullptr);
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+    } else {
+      codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+      __ LoadConst32(calling_convention.GetRegisterAt(2), offset_);
+    }
+    mips64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<
+        kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
+    mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ Bc(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierForHeapReferenceSlowPathMIPS64";
+  }
+
+ private:
+  GpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
+    size_t ref = static_cast<int>(ref_.AsRegister<GpuRegister>());
+    size_t obj = static_cast<int>(obj_.AsRegister<GpuRegister>());
+    for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
+      if (i != ref &&
+          i != obj &&
+          !codegen->IsCoreCalleeSaveRegister(i) &&
+          !codegen->IsBlockedCoreRegister(i)) {
+        return static_cast<GpuRegister>(i);
+      }
+    }
+    // We shall never fail to find a free caller-save register, as
+    // there are more than two core caller-save registers on MIPS64
+    // (meaning it is possible to find one which is different from
+    // `ref` and `obj`).
+    DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
+    LOG(FATAL) << "Could not find a free caller-save register";
+    UNREACHABLE();
+  }
+
+  const Location out_;
+  const Location ref_;
+  const Location obj_;
+  const uint32_t offset_;
+  // An additional location containing an index to an array.
+  // Only used for HArrayGet and the UnsafeGetObject &
+  // UnsafeGetObjectVolatile intrinsics.
+  const Location index_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathMIPS64);
+};
+
+// Slow path generating a read barrier for a GC root.
+class ReadBarrierForRootSlowPathMIPS64 : public SlowPathCodeMIPS64 {
+ public:
+  ReadBarrierForRootSlowPathMIPS64(HInstruction* instruction, Location out, Location root)
+      : SlowPathCodeMIPS64(instruction), out_(out), root_(root) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Primitive::Type type = Primitive::kPrimNot;
+    GpuRegister reg_out = out_.AsRegister<GpuRegister>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out));
+    DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
+        << "Unexpected instruction in read barrier for GC root slow path: "
+        << instruction_->DebugName();
+
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConvention calling_convention;
+    CodeGeneratorMIPS64* mips64_codegen = down_cast<CodeGeneratorMIPS64*>(codegen);
+    mips64_codegen->MoveLocation(Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                                 root_,
+                                 Primitive::kPrimNot);
+    mips64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
+                                  instruction_,
+                                  instruction_->GetDexPc(),
+                                  this);
+    CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
+    mips64_codegen->MoveLocation(out_, calling_convention.GetReturnLocation(type), type);
+
+    RestoreLiveRegisters(codegen, locations);
+    __ Bc(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathMIPS64"; }
+
+ private:
+  const Location out_;
+  const Location root_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathMIPS64);
+};
+
 CodeGeneratorMIPS64::CodeGeneratorMIPS64(HGraph* graph,
                                          const Mips64InstructionSetFeatures& isa_features,
                                          const CompilerOptions& compiler_options,
@@ -430,8 +961,6 @@
                                graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       pc_relative_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      boot_image_address_patches_(std::less<uint32_t>(),
-                                  graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_string_patches_(StringReferenceValueComparator(),
                           graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       jit_class_patches_(TypeReferenceValueComparator(),
@@ -551,26 +1080,21 @@
     return;
   }
 
-  // Make sure the frame size isn't unreasonably large. Per the various APIs
-  // it looks like it should always be less than 2GB in size, which allows
-  // us using 32-bit signed offsets from the stack pointer.
-  if (GetFrameSize() > 0x7FFFFFFF)
-    LOG(FATAL) << "Stack frame larger than 2GB";
+  // Make sure the frame size isn't unreasonably large.
+  if (GetFrameSize() > GetStackOverflowReservedBytes(kMips64)) {
+    LOG(FATAL) << "Stack frame larger than " << GetStackOverflowReservedBytes(kMips64) << " bytes";
+  }
 
   // Spill callee-saved registers.
-  // Note that their cumulative size is small and they can be indexed using
-  // 16-bit offsets.
 
-  // TODO: increment/decrement SP in one step instead of two or remove this comment.
-
-  uint32_t ofs = FrameEntrySpillSize();
+  uint32_t ofs = GetFrameSize();
   __ IncreaseFrameSize(ofs);
 
   for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
     GpuRegister reg = kCoreCalleeSaves[i];
     if (allocated_registers_.ContainsCoreRegister(reg)) {
       ofs -= kMips64DoublewordSize;
-      __ Sd(reg, SP, ofs);
+      __ StoreToOffset(kStoreDoubleword, reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
@@ -579,23 +1103,16 @@
     FpuRegister reg = kFpuCalleeSaves[i];
     if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
       ofs -= kMips64DoublewordSize;
-      __ Sdc1(reg, SP, ofs);
+      __ StoreFpuToOffset(kStoreDoubleword, reg, SP, ofs);
       __ cfi().RelOffset(DWARFReg(reg), ofs);
     }
   }
 
-  // Allocate the rest of the frame and store the current method pointer
-  // at its end.
-
-  __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
   // Save the current method if we need it. Note that we do not
   // do this in HCurrentMethod, as the instruction might have been removed
   // in the SSA graph.
   if (RequiresCurrentMethod()) {
-    static_assert(IsInt<16>(kCurrentMethodStackOffset),
-                  "kCurrentMethodStackOffset must fit into int16_t");
-    __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+    __ StoreToOffset(kStoreDoubleword, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
   }
 
   if (GetGraph()->HasShouldDeoptimizeFlag()) {
@@ -608,42 +1125,32 @@
   __ cfi().RememberState();
 
   if (!HasEmptyFrame()) {
-    // Deallocate the rest of the frame.
-
-    __ DecreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
-
     // Restore callee-saved registers.
-    // Note that their cumulative size is small and they can be indexed using
-    // 16-bit offsets.
 
-    // TODO: increment/decrement SP in one step instead of two or remove this comment.
-
-    uint32_t ofs = 0;
-
-    for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
-      FpuRegister reg = kFpuCalleeSaves[i];
-      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
-        __ Ldc1(reg, SP, ofs);
-        ofs += kMips64DoublewordSize;
-        __ cfi().Restore(DWARFReg(reg));
-      }
-    }
-
-    for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+    // For better instruction scheduling restore RA before other registers.
+    uint32_t ofs = GetFrameSize();
+    for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
       GpuRegister reg = kCoreCalleeSaves[i];
       if (allocated_registers_.ContainsCoreRegister(reg)) {
-        __ Ld(reg, SP, ofs);
-        ofs += kMips64DoublewordSize;
+        ofs -= kMips64DoublewordSize;
+        __ LoadFromOffset(kLoadDoubleword, reg, SP, ofs);
         __ cfi().Restore(DWARFReg(reg));
       }
     }
 
-    DCHECK_EQ(ofs, FrameEntrySpillSize());
-    __ DecreaseFrameSize(ofs);
+    for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+      FpuRegister reg = kFpuCalleeSaves[i];
+      if (allocated_registers_.ContainsFloatingPointRegister(reg)) {
+        ofs -= kMips64DoublewordSize;
+        __ LoadFpuFromOffset(kLoadDoubleword, reg, SP, ofs);
+        __ cfi().Restore(DWARFReg(reg));
+      }
+    }
+
+    __ DecreaseFrameSize(GetFrameSize());
   }
 
-  __ Jr(RA);
-  __ Nop();
+  __ Jic(RA, 0);
 
   __ cfi().RestoreState();
   __ cfi().DefCFAOffset(GetFrameSize());
@@ -937,8 +1444,7 @@
       pc_relative_type_patches_.size() +
       type_bss_entry_patches_.size() +
       boot_image_string_patches_.size() +
-      boot_image_type_patches_.size() +
-      boot_image_address_patches_.size();
+      boot_image_type_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
@@ -972,13 +1478,6 @@
                                                      target_type.dex_file,
                                                      target_type.type_index.index_));
   }
-  for (const auto& entry : boot_image_address_patches_) {
-    DCHECK(GetCompilerOptions().GetIncludePatchInformation());
-    Literal* literal = entry.second;
-    DCHECK(literal->GetLabel()->IsBound());
-    uint32_t literal_offset = __ GetLabelLocation(literal->GetLabel());
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   DCHECK_EQ(size, linker_patches->size());
 }
 
@@ -1042,9 +1541,7 @@
 }
 
 Literal* CodeGeneratorMIPS64::DeduplicateBootImageAddressLiteral(uint64_t address) {
-  bool needs_patch = GetCompilerOptions().GetIncludePatchInformation();
-  Uint32ToLiteralMap* map = needs_patch ? &boot_image_address_patches_ : &uint32_literals_;
-  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), map);
+  return DeduplicateUint32Literal(dchecked_integral_cast<uint32_t>(address), &uint32_literals_);
 }
 
 void CodeGeneratorMIPS64::EmitPcRelativeAddressPlaceholderHigh(PcRelativePatchInfo* info,
@@ -1165,23 +1662,32 @@
                                         uint32_t dex_pc,
                                         SlowPathCode* slow_path) {
   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
-  __ LoadFromOffset(kLoadDoubleword,
-                    T9,
-                    TR,
-                    GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value());
-  __ Jalr(T9);
-  __ Nop();
+  GenerateInvokeRuntime(GetThreadOffset<kMips64PointerSize>(entrypoint).Int32Value());
   if (EntrypointRequiresStackMap(entrypoint)) {
     RecordPcInfo(instruction, dex_pc, slow_path);
   }
 }
 
+void CodeGeneratorMIPS64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                                              HInstruction* instruction,
+                                                              SlowPathCode* slow_path) {
+  ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
+  GenerateInvokeRuntime(entry_point_offset);
+}
+
+void CodeGeneratorMIPS64::GenerateInvokeRuntime(int32_t entry_point_offset) {
+  __ LoadFromOffset(kLoadDoubleword, T9, TR, entry_point_offset);
+  __ Jalr(T9);
+  __ Nop();
+}
+
 void InstructionCodeGeneratorMIPS64::GenerateClassInitializationCheck(SlowPathCodeMIPS64* slow_path,
                                                                       GpuRegister class_reg) {
   __ LoadFromOffset(kLoadWord, TMP, class_reg, mirror::Class::StatusOffset().Int32Value());
   __ LoadConst32(AT, mirror::Class::kStatusInitialized);
   __ Bltc(TMP, AT, slow_path->GetEntryLabel());
-  // TODO: barrier needed?
+  // Even if the initialized flag is set, we need to ensure consistent memory ordering.
+  __ Sync(0);
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -1472,73 +1978,99 @@
 }
 
 void LocationsBuilderMIPS64::VisitArrayGet(HArrayGet* instruction) {
+  Primitive::Type type = instruction->GetType();
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (type == Primitive::kPrimNot);
   LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier
+                                                       ? LocationSummary::kCallOnSlowPath
+                                                       : LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+  if (Primitive::IsFloatingPointType(type)) {
     locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(Location::RequiresRegister(),
+                      object_array_get_with_read_barrier
+                          ? Location::kOutputOverlap
+                          : Location::kNoOutputOverlap);
   }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier.
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+static auto GetImplicitNullChecker(HInstruction* instruction, CodeGeneratorMIPS64* codegen) {
+  auto null_checker = [codegen, instruction]() {
+    codegen->MaybeRecordImplicitNullCheck(instruction);
+  };
+  return null_checker;
 }
 
 void InstructionCodeGeneratorMIPS64::VisitArrayGet(HArrayGet* instruction) {
   LocationSummary* locations = instruction->GetLocations();
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
+  Location out_loc = locations->Out();
   Location index = locations->InAt(1);
   uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
 
   Primitive::Type type = instruction->GetType();
   const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
                                         instruction->IsStringCharAt();
   switch (type) {
     case Primitive::kPrimBoolean: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset);
+        __ LoadFromOffset(kLoadUnsignedByte, out, obj, offset, null_checker);
       } else {
         __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
-        __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset);
+        __ LoadFromOffset(kLoadUnsignedByte, out, TMP, data_offset, null_checker);
       }
       break;
     }
 
     case Primitive::kPrimByte: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ LoadFromOffset(kLoadSignedByte, out, obj, offset);
+        __ LoadFromOffset(kLoadSignedByte, out, obj, offset, null_checker);
       } else {
         __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
-        __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset);
+        __ LoadFromOffset(kLoadSignedByte, out, TMP, data_offset, null_checker);
       }
       break;
     }
 
     case Primitive::kPrimShort: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset);
+        __ LoadFromOffset(kLoadSignedHalfword, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset);
+        __ LoadFromOffset(kLoadSignedHalfword, out, TMP, data_offset, null_checker);
       }
       break;
     }
 
     case Primitive::kPrimChar: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (maybe_compressed_char_at) {
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
-        __ LoadFromOffset(kLoadWord, TMP, obj, count_offset);
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ LoadFromOffset(kLoadWord, TMP, obj, count_offset, null_checker);
         __ Dext(TMP, TMP, 0, 1);
         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
                       "Expecting 0=compressed, 1=uncompressed");
@@ -1563,7 +2095,8 @@
           __ LoadFromOffset(kLoadUnsignedHalfword,
                             out,
                             obj,
-                            data_offset + (const_index << TIMES_2));
+                            data_offset + (const_index << TIMES_2),
+                            null_checker);
         }
       } else {
         GpuRegister index_reg = index.AsRegister<GpuRegister>();
@@ -1581,67 +2114,111 @@
         } else {
           __ Dsll(TMP, index_reg, TIMES_2);
           __ Daddu(TMP, obj, TMP);
-          __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset);
+          __ LoadFromOffset(kLoadUnsignedHalfword, out, TMP, data_offset, null_checker);
         }
       }
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
+    case Primitive::kPrimInt: {
       DCHECK_EQ(sizeof(mirror::HeapReference<mirror::Object>), sizeof(int32_t));
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       LoadOperandType load_type = (type == Primitive::kPrimNot) ? kLoadUnsignedWord : kLoadWord;
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadFromOffset(load_type, out, obj, offset);
+        __ LoadFromOffset(load_type, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFromOffset(load_type, out, TMP, data_offset);
+        __ LoadFromOffset(load_type, out, TMP, data_offset, null_checker);
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier call.
+        codegen_->GenerateArrayLoadWithBakerReadBarrier(instruction,
+                                                        out_loc,
+                                                        obj,
+                                                        data_offset,
+                                                        index,
+                                                        temp,
+                                                        /* needs_null_check */ true);
+      } else {
+        GpuRegister out = out_loc.AsRegister<GpuRegister>();
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          __ LoadFromOffset(kLoadUnsignedWord, out, obj, offset, null_checker);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          __ Sll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
+          __ Addu(TMP, obj, TMP);
+          __ LoadFromOffset(kLoadUnsignedWord, out, TMP, data_offset, null_checker);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction,
+                                                 out_loc,
+                                                 out_loc,
+                                                 obj_loc,
+                                                 data_offset,
+                                                 index);
+        }
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+      GpuRegister out = out_loc.AsRegister<GpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadFromOffset(kLoadDoubleword, out, obj, offset);
+        __ LoadFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset);
+        __ LoadFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
-      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister out = out_loc.AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ LoadFpuFromOffset(kLoadWord, out, obj, offset);
+        __ LoadFpuFromOffset(kLoadWord, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset);
+        __ LoadFpuFromOffset(kLoadWord, out, TMP, data_offset, null_checker);
       }
       break;
     }
 
     case Primitive::kPrimDouble: {
-      FpuRegister out = locations->Out().AsFpuRegister<FpuRegister>();
+      FpuRegister out = out_loc.AsFpuRegister<FpuRegister>();
       if (index.IsConstant()) {
         size_t offset =
             (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset);
+        __ LoadFpuFromOffset(kLoadDoubleword, out, obj, offset, null_checker);
       } else {
         __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
         __ Daddu(TMP, obj, TMP);
-        __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset);
+        __ LoadFpuFromOffset(kLoadDoubleword, out, TMP, data_offset, null_checker);
       }
       break;
     }
@@ -1650,9 +2227,6 @@
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
   }
-  if (!maybe_compressed_char_at) {
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
-  }
 }
 
 void LocationsBuilderMIPS64::VisitArrayLength(HArrayLength* instruction) {
@@ -1674,24 +2248,48 @@
   }
 }
 
+Location LocationsBuilderMIPS64::RegisterOrZeroConstant(HInstruction* instruction) {
+  return (instruction->IsConstant() && instruction->AsConstant()->IsZeroBitPattern())
+      ? Location::ConstantLocation(instruction->AsConstant())
+      : Location::RequiresRegister();
+}
+
+Location LocationsBuilderMIPS64::FpuRegisterOrConstantForStore(HInstruction* instruction) {
+  // We can store 0.0 directly (from the ZERO register) without loading it into an FPU register.
+  // We can store a non-zero float or double constant without first loading it into the FPU,
+  // but we should only prefer this if the constant has a single use.
+  if (instruction->IsConstant() &&
+      (instruction->AsConstant()->IsZeroBitPattern() ||
+       instruction->GetUses().HasExactlyOneElement())) {
+    return Location::ConstantLocation(instruction->AsConstant());
+    // Otherwise fall through and require an FPU register for the constant.
+  }
+  return Location::RequiresFpuRegister();
+}
+
 void LocationsBuilderMIPS64::VisitArraySet(HArraySet* instruction) {
-  bool needs_runtime_call = instruction->NeedsTypeCheck();
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
       instruction,
-      needs_runtime_call ? LocationSummary::kCallOnMainOnly : LocationSummary::kNoCall);
-  if (needs_runtime_call) {
-    InvokeRuntimeCallingConvention calling_convention;
-    locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-    locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-    locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
+      may_need_runtime_call_for_type_check ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
+    locations->SetInAt(2, FpuRegisterOrConstantForStore(instruction->InputAt(2)));
   } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
-    if (Primitive::IsFloatingPointType(instruction->InputAt(2)->GetType())) {
-      locations->SetInAt(2, Location::RequiresFpuRegister());
-    } else {
-      locations->SetInAt(2, Location::RequiresRegister());
-    }
+    locations->SetInAt(2, RegisterOrZeroConstant(instruction->InputAt(2)));
+  }
+  if (needs_write_barrier) {
+    // Temporary register for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
   }
 }
 
@@ -1699,23 +2297,29 @@
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
   Location index = locations->InAt(1);
+  Location value_location = locations->InAt(2);
   Primitive::Type value_type = instruction->GetComponentType();
-  bool needs_runtime_call = locations->WillCall();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
   bool needs_write_barrier =
       CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+  GpuRegister base_reg = index.IsConstant() ? obj : TMP;
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
-      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
-        __ StoreToOffset(kStoreByte, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1;
       } else {
-        __ Daddu(TMP, obj, index.AsRegister<GpuRegister>());
-        __ StoreToOffset(kStoreByte, value, TMP, data_offset);
+        __ Daddu(base_reg, obj, index.AsRegister<GpuRegister>());
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreByte, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        GpuRegister value = value_location.AsRegister<GpuRegister>();
+        __ StoreToOffset(kStoreByte, value, base_reg, data_offset, null_checker);
       }
       break;
     }
@@ -1723,90 +2327,208 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
-      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2) + data_offset;
-        __ StoreToOffset(kStoreHalfword, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_2;
       } else {
-        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_2);
-        __ Daddu(TMP, obj, TMP);
-        __ StoreToOffset(kStoreHalfword, value, TMP, data_offset);
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_2);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreHalfword, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        GpuRegister value = value_location.AsRegister<GpuRegister>();
+        __ StoreToOffset(kStoreHalfword, value, base_reg, data_offset, null_checker);
       }
       break;
     }
 
-    case Primitive::kPrimInt:
-    case Primitive::kPrimNot: {
-      if (!needs_runtime_call) {
-        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
-        GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
-        if (index.IsConstant()) {
-          size_t offset =
-              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-          __ StoreToOffset(kStoreWord, value, obj, offset);
-        } else {
-          DCHECK(index.IsRegister()) << index;
-          __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
-          __ Daddu(TMP, obj, TMP);
-          __ StoreToOffset(kStoreWord, value, TMP, data_offset);
-        }
-        codegen_->MaybeRecordImplicitNullCheck(instruction);
-        if (needs_write_barrier) {
-          DCHECK_EQ(value_type, Primitive::kPrimNot);
-          codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
-        }
+    case Primitive::kPrimInt: {
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      if (index.IsConstant()) {
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
       } else {
-        DCHECK_EQ(value_type, Primitive::kPrimNot);
-        codegen_->InvokeRuntime(kQuickAputObject, instruction, instruction->GetDexPc());
-        CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        GpuRegister value = value_location.AsRegister<GpuRegister>();
+        __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      if (value_location.IsConstant()) {
+        // Just setting null.
+        uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+        if (index.IsConstant()) {
+          data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+        } else {
+          __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+          __ Daddu(base_reg, obj, base_reg);
+        }
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        DCHECK_EQ(value, 0);
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
+        break;
+      }
+
+      DCHECK(needs_write_barrier);
+      GpuRegister value = value_location.AsRegister<GpuRegister>();
+      GpuRegister temp1 = locations->GetTemp(0).AsRegister<GpuRegister>();
+      GpuRegister temp2 = TMP;  // Doesn't need to survive slow path.
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      Mips64Label done;
+      SlowPathCodeMIPS64* slow_path = nullptr;
+
+      if (may_need_runtime_call_for_type_check) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathMIPS64(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          Mips64Label non_zero;
+          __ Bnezc(value, &non_zero);
+          uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+          if (index.IsConstant()) {
+            data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+          } else {
+            __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+            __ Daddu(base_reg, obj, base_reg);
+          }
+          __ StoreToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
+          __ Bc(&done);
+          __ Bind(&non_zero);
+        }
+
+        // Note that when read barriers are enabled, the type checks
+        // are performed without read barriers.  This is fine, even in
+        // the case where a class object is in the from-space after
+        // the flip, as a comparison involving such a type would not
+        // produce a false positive; it may of course produce a false
+        // negative, in which case we would take the ArraySet slow
+        // path.
+
+        // /* HeapReference<Class> */ temp1 = obj->klass_
+        __ LoadFromOffset(kLoadUnsignedWord, temp1, obj, class_offset, null_checker);
+        __ MaybeUnpoisonHeapReference(temp1);
+
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, component_offset);
+        // /* HeapReference<Class> */ temp2 = value->klass_
+        __ LoadFromOffset(kLoadUnsignedWord, temp2, value, class_offset);
+        // If heap poisoning is enabled, no need to unpoison `temp1`
+        // nor `temp2`, as we are comparing two poisoned references.
+
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          Mips64Label do_put;
+          __ Beqc(temp1, temp2, &do_put);
+          // If heap poisoning is enabled, the `temp1` reference has
+          // not been unpoisoned yet; unpoison it now.
+          __ MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          __ LoadFromOffset(kLoadUnsignedWord, temp1, temp1, super_offset);
+          // If heap poisoning is enabled, no need to unpoison
+          // `temp1`, as we are comparing against null below.
+          __ Bnezc(temp1, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ Bnec(temp1, temp2, slow_path->GetEntryLabel());
+        }
+      }
+
+      GpuRegister source = value;
+      if (kPoisonHeapReferences) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        __ Move(temp1, value);
+        __ PoisonHeapReference(temp1);
+        source = temp1;
+      }
+
+      uint32_t data_offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
+      if (index.IsConstant()) {
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
+      } else {
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      __ StoreToOffset(kStoreWord, source, base_reg, data_offset);
+
+      if (!may_need_runtime_call_for_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      codegen_->MarkGCCard(obj, value, instruction->GetValueCanBeNull());
+
+      if (done.IsLinked()) {
+        __ Bind(&done);
+      }
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
       }
       break;
     }
 
     case Primitive::kPrimLong: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
-      GpuRegister value = locations->InAt(2).AsRegister<GpuRegister>();
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ StoreToOffset(kStoreDoubleword, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
       } else {
-        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
-        __ Daddu(TMP, obj, TMP);
-        __ StoreToOffset(kStoreDoubleword, value, TMP, data_offset);
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        GpuRegister value = value_location.AsRegister<GpuRegister>();
+        __ StoreToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker);
       }
       break;
     }
 
     case Primitive::kPrimFloat: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
-      FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
-      DCHECK(locations->InAt(2).IsFpuRegister());
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
-        __ StoreFpuToOffset(kStoreWord, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4;
       } else {
-        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_4);
-        __ Daddu(TMP, obj, TMP);
-        __ StoreFpuToOffset(kStoreWord, value, TMP, data_offset);
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_4);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int32_t value = CodeGenerator::GetInt32ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreWord, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        FpuRegister value = value_location.AsFpuRegister<FpuRegister>();
+        __ StoreFpuToOffset(kStoreWord, value, base_reg, data_offset, null_checker);
       }
       break;
     }
 
     case Primitive::kPrimDouble: {
       uint32_t data_offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
-      FpuRegister value = locations->InAt(2).AsFpuRegister<FpuRegister>();
-      DCHECK(locations->InAt(2).IsFpuRegister());
       if (index.IsConstant()) {
-        size_t offset =
-            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
-        __ StoreFpuToOffset(kStoreDoubleword, value, obj, offset);
+        data_offset += index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8;
       } else {
-        __ Dsll(TMP, index.AsRegister<GpuRegister>(), TIMES_8);
-        __ Daddu(TMP, obj, TMP);
-        __ StoreFpuToOffset(kStoreDoubleword, value, TMP, data_offset);
+        __ Dsll(base_reg, index.AsRegister<GpuRegister>(), TIMES_8);
+        __ Daddu(base_reg, obj, base_reg);
+      }
+      if (value_location.IsConstant()) {
+        int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+        __ StoreConstToOffset(kStoreDoubleword, value, base_reg, data_offset, TMP, null_checker);
+      } else {
+        FpuRegister value = value_location.AsFpuRegister<FpuRegister>();
+        __ StoreFpuToOffset(kStoreDoubleword, value, base_reg, data_offset, null_checker);
       }
       break;
     }
@@ -1815,11 +2537,6 @@
       LOG(FATAL) << "Unreachable type " << instruction->GetType();
       UNREACHABLE();
   }
-
-  // Ints and objects are handled in the switch.
-  if (value_type != Primitive::kPrimInt && value_type != Primitive::kPrimNot) {
-    codegen_->MaybeRecordImplicitNullCheck(instruction);
-  }
 }
 
 void LocationsBuilderMIPS64::VisitBoundsCheck(HBoundsCheck* instruction) {
@@ -1847,31 +2564,234 @@
   __ Bgeuc(index, length, slow_path->GetEntryLabel());
 }
 
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Extra temp is used for read barrier.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
+}
+
 void LocationsBuilderMIPS64::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
-      instruction,
-      LocationSummary::kCallOnSlowPath);
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier)
+          ? LocationSummary::kCallOnSlowPath
+          : LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathMIPS64 uses this register too.
-  locations->AddTemp(Location::RequiresRegister());
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister obj_cls = locations->GetTemp(0).AsRegister<GpuRegister>();
+  Location temp_loc = locations->GetTemp(0);
+  GpuRegister temp = temp_loc.AsRegister<GpuRegister>();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_LE(num_temps, 2u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+  Mips64Label done;
 
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  bool is_type_check_slow_path_fatal = false;
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
   SlowPathCodeMIPS64* slow_path =
-      new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
+      new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+                                                           is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(slow_path);
 
-  // TODO: avoid this check if we know obj is not null.
-  __ Beqzc(obj, slow_path->GetExitLabel());
-  // Compare the class of `obj` with `cls`.
-  __ LoadFromOffset(kLoadUnsignedWord, obj_cls, obj, mirror::Object::ClassOffset().Int32Value());
-  __ Bnec(obj_cls, cls, slow_path->GetEntryLabel());
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Beqzc(obj, &done);
+  }
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Jump to slow path for throwing the exception or doing a
+      // more involved array check.
+      __ Bnec(temp, cls, slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      Mips64Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Beqzc(temp, slow_path->GetEntryLabel());
+      // Otherwise, compare the classes.
+      __ Bnec(temp, cls, &loop);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Walk over the class hierarchy to find a match.
+      Mips64Label loop;
+      __ Bind(&loop);
+      __ Beqc(temp, cls, &done);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception. Otherwise, jump to the beginning of the loop.
+      __ Bnezc(temp, &loop);
+      __ Bc(slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Do an exact check.
+      __ Beqc(temp, cls, &done);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ Beqzc(temp, slow_path->GetEntryLabel());
+      // Otherwise, the object is indeed an array, further check that this component
+      // type is not a primitive type.
+      __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Bnezc(temp, slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+      // We always go into the type check slow path for the unresolved check case.
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ Bc(slow_path->GetEntryLabel());
+      break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Iftable is never null.
+      __ Lw(TMP, temp, array_length_offset);
+      // Loop through the iftable and check if any class matches.
+      Mips64Label loop;
+      __ Bind(&loop);
+      __ Beqzc(TMP, slow_path->GetEntryLabel());
+      __ Lwu(AT, temp, object_array_data_offset);
+      __ MaybeUnpoisonHeapReference(AT);
+      // Go to next interface.
+      __ Daddiu(temp, temp, 2 * kHeapReferenceSize);
+      __ Addiu(TMP, TMP, -2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Bnec(AT, cls, &loop);
+      break;
+    }
+  }
+
+  __ Bind(&done);
   __ Bind(slow_path->GetExitLabel());
 }
 
@@ -3069,14 +3989,31 @@
 }
 
 void LocationsBuilderMIPS64::HandleFieldGet(HInstruction* instruction,
-                                            const FieldInfo& field_info ATTRIBUTE_UNUSED) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+                                            const FieldInfo& field_info) {
+  Primitive::Type field_type = field_info.GetFieldType();
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (field_type == Primitive::kPrimNot);
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      object_field_get_with_read_barrier
+          ? LocationSummary::kCallOnSlowPath
+          : LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->GetType())) {
     locations->SetOut(Location::RequiresFpuRegister());
   } else {
-    locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+    // The output overlaps in the case of an object field get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // object's location, as we need it to emit the read barrier.
+    locations->SetOut(Location::RequiresRegister(),
+                      object_field_get_with_read_barrier
+                          ? Location::kOutputOverlap
+                          : Location::kNoOutputOverlap);
+  }
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
 }
 
@@ -3084,8 +4021,14 @@
                                                     const FieldInfo& field_info) {
   Primitive::Type type = field_info.GetFieldType();
   LocationSummary* locations = instruction->GetLocations();
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
+  Location dst_loc = locations->Out();
   LoadOperandType load_type = kLoadUnsignedByte;
+  bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+
   switch (type) {
     case Primitive::kPrimBoolean:
       load_type = kLoadUnsignedByte;
@@ -3115,17 +4058,47 @@
       UNREACHABLE();
   }
   if (!Primitive::IsFloatingPointType(type)) {
-    DCHECK(locations->Out().IsRegister());
-    GpuRegister dst = locations->Out().AsRegister<GpuRegister>();
-    __ LoadFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+    DCHECK(dst_loc.IsRegister());
+    GpuRegister dst = dst_loc.AsRegister<GpuRegister>();
+    if (type == Primitive::kPrimNot) {
+      // /* HeapReference<Object> */ dst = *(obj + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        Location temp_loc = locations->GetTemp(0);
+        // Note that a potential implicit null check is handled in this
+        // CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier call.
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                        dst_loc,
+                                                        obj,
+                                                        offset,
+                                                        temp_loc,
+                                                        /* needs_null_check */ true);
+        if (is_volatile) {
+          GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+      } else {
+        __ LoadFromOffset(kLoadUnsignedWord, dst, obj, offset, null_checker);
+        if (is_volatile) {
+          GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, dst_loc, dst_loc, obj_loc, offset);
+      }
+    } else {
+      __ LoadFromOffset(load_type, dst, obj, offset, null_checker);
+    }
   } else {
-    DCHECK(locations->Out().IsFpuRegister());
-    FpuRegister dst = locations->Out().AsFpuRegister<FpuRegister>();
-    __ LoadFpuFromOffset(load_type, dst, obj, field_info.GetFieldOffset().Uint32Value());
+    DCHECK(dst_loc.IsFpuRegister());
+    FpuRegister dst = dst_loc.AsFpuRegister<FpuRegister>();
+    __ LoadFpuFromOffset(load_type, dst, obj, offset, null_checker);
   }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
-  // TODO: memory barrier?
+  // Memory barriers, in the case of references, are handled in the
+  // previous switch statement.
+  if (is_volatile && (type != Primitive::kPrimNot)) {
+    GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+  }
 }
 
 void LocationsBuilderMIPS64::HandleFieldSet(HInstruction* instruction,
@@ -3134,9 +4107,9 @@
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
   if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
-    locations->SetInAt(1, Location::RequiresFpuRegister());
+    locations->SetInAt(1, FpuRegisterOrConstantForStore(instruction->InputAt(1)));
   } else {
-    locations->SetInAt(1, Location::RequiresRegister());
+    locations->SetInAt(1, RegisterOrZeroConstant(instruction->InputAt(1)));
   }
 }
 
@@ -3146,7 +4119,13 @@
   Primitive::Type type = field_info.GetFieldType();
   LocationSummary* locations = instruction->GetLocations();
   GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location value_location = locations->InAt(1);
   StoreOperandType store_type = kStoreByte;
+  bool is_volatile = field_info.IsVolatile();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  bool needs_write_barrier = CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1));
+  auto null_checker = GetImplicitNullChecker(instruction, codegen_);
+
   switch (type) {
     case Primitive::kPrimBoolean:
     case Primitive::kPrimByte:
@@ -3169,23 +4148,44 @@
       LOG(FATAL) << "Unreachable type " << type;
       UNREACHABLE();
   }
-  if (!Primitive::IsFloatingPointType(type)) {
-    DCHECK(locations->InAt(1).IsRegister());
-    GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
-    __ StoreToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
-  } else {
-    DCHECK(locations->InAt(1).IsFpuRegister());
-    FpuRegister src = locations->InAt(1).AsFpuRegister<FpuRegister>();
-    __ StoreFpuToOffset(store_type, src, obj, field_info.GetFieldOffset().Uint32Value());
+
+  if (is_volatile) {
+    GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
   }
 
-  codegen_->MaybeRecordImplicitNullCheck(instruction);
-  // TODO: memory barriers?
-  if (CodeGenerator::StoreNeedsWriteBarrier(type, instruction->InputAt(1))) {
-    DCHECK(locations->InAt(1).IsRegister());
-    GpuRegister src = locations->InAt(1).AsRegister<GpuRegister>();
+  if (value_location.IsConstant()) {
+    int64_t value = CodeGenerator::GetInt64ValueOf(value_location.GetConstant());
+    __ StoreConstToOffset(store_type, value, obj, offset, TMP, null_checker);
+  } else {
+    if (!Primitive::IsFloatingPointType(type)) {
+      DCHECK(value_location.IsRegister());
+      GpuRegister src = value_location.AsRegister<GpuRegister>();
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(type, Primitive::kPrimNot);
+        __ PoisonHeapReference(TMP, src);
+        __ StoreToOffset(store_type, TMP, obj, offset, null_checker);
+      } else {
+        __ StoreToOffset(store_type, src, obj, offset, null_checker);
+      }
+    } else {
+      DCHECK(value_location.IsFpuRegister());
+      FpuRegister src = value_location.AsFpuRegister<FpuRegister>();
+      __ StoreFpuToOffset(store_type, src, obj, offset, null_checker);
+    }
+  }
+
+  if (needs_write_barrier) {
+    DCHECK(value_location.IsRegister());
+    GpuRegister src = value_location.AsRegister<GpuRegister>();
     codegen_->MarkGCCard(obj, src, value_can_be_null);
   }
+
+  if (is_volatile) {
+    GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+  }
 }
 
 void LocationsBuilderMIPS64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
@@ -3204,14 +4204,134 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
+void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  GpuRegister out_reg = out.AsRegister<GpuRegister>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
+    DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+    if (kUseBakerReadBarrier) {
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(out + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      out_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // Save the value of `out` into `maybe_temp` before overwriting it
+      // in the following move operation, as we will need it for the
+      // read barrier below.
+      __ Move(maybe_temp.AsRegister<GpuRegister>(), out_reg);
+      // /* HeapReference<Object> */ out = *(out + offset)
+      __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    __ LoadFromOffset(kLoadUnsignedWord, out_reg, out_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorMIPS64::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
+  GpuRegister out_reg = out.AsRegister<GpuRegister>();
+  GpuRegister obj_reg = obj.AsRegister<GpuRegister>();
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
+    if (kUseBakerReadBarrier) {
+      DCHECK(maybe_temp.IsRegister()) << maybe_temp;
+      // Load with fast path based Baker's read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(instruction,
+                                                      out,
+                                                      obj_reg,
+                                                      offset,
+                                                      maybe_temp,
+                                                      /* needs_null_check */ false);
+    } else {
+      // Load with slow path based read barrier.
+      // /* HeapReference<Object> */ out = *(obj + offset)
+      __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset);
+      codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
+    }
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    __ LoadFromOffset(kLoadUnsignedWord, out_reg, obj_reg, offset);
+    __ MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
 void InstructionCodeGeneratorMIPS64::GenerateGcRootFieldLoad(
-    HInstruction* instruction ATTRIBUTE_UNUSED,
+    HInstruction* instruction,
     Location root,
     GpuRegister obj,
-    uint32_t offset) {
+    uint32_t offset,
+    ReadBarrierOption read_barrier_option) {
   GpuRegister root_reg = root.AsRegister<GpuRegister>();
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
+  if (read_barrier_option == kWithReadBarrier) {
+    DCHECK(kEmitCompilerReadBarrier);
+    if (kUseBakerReadBarrier) {
+      // Fast path implementation of art::ReadBarrier::BarrierForRoot when
+      // Baker's read barrier are used:
+      //
+      //   root = obj.field;
+      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //   if (temp != null) {
+      //     root = temp(root)
+      //   }
+
+      // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+      __ LoadFromOffset(kLoadUnsignedWord, root_reg, obj, offset);
+      static_assert(
+          sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
+          "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
+          "have different sizes.");
+      static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
+                    "art::mirror::CompressedReference<mirror::Object> and int32_t "
+                    "have different sizes.");
+
+      // Slow path marking the GC root `root`.
+      Location temp = Location::RegisterLocation(T9);
+      SlowPathCodeMIPS64* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(
+              instruction,
+              root,
+              /*entrypoint*/ temp);
+      codegen_->AddSlowPath(slow_path);
+
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kMips64PointerSize>(root.reg() - 1);
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ LoadFromOffset(kLoadDoubleword, temp.AsRegister<GpuRegister>(), TR, entry_point_offset);
+      // The entrypoint is null when the GC is not marking, this prevents one load compared to
+      // checking GetIsGcMarking.
+      __ Bnezc(temp.AsRegister<GpuRegister>(), slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+    } else {
+      // GC root loaded through a slow path for read barriers other
+      // than Baker's.
+      // /* GcRoot<mirror::Object>* */ root = obj + offset
+      __ Daddiu64(root_reg, obj, static_cast<int32_t>(offset));
+      // /* mirror::Object* */ root = root->Read()
+      codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
+    }
   } else {
     // Plain GC root load with no read barrier.
     // /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -3221,48 +4341,418 @@
   }
 }
 
+void CodeGeneratorMIPS64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                GpuRegister obj,
+                                                                uint32_t offset,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // /* HeapReference<Object> */ ref = *(obj + offset)
+  Location no_index = Location::NoLocation();
+  ScaleFactor no_scale_factor = TIMES_1;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            offset,
+                                            no_index,
+                                            no_scale_factor,
+                                            temp,
+                                            needs_null_check);
+}
+
+void CodeGeneratorMIPS64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                Location ref,
+                                                                GpuRegister obj,
+                                                                uint32_t data_offset,
+                                                                Location index,
+                                                                Location temp,
+                                                                bool needs_null_check) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  static_assert(
+      sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+      "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+  // /* HeapReference<Object> */ ref =
+  //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+  ScaleFactor scale_factor = TIMES_4;
+  GenerateReferenceLoadWithBakerReadBarrier(instruction,
+                                            ref,
+                                            obj,
+                                            data_offset,
+                                            index,
+                                            scale_factor,
+                                            temp,
+                                            needs_null_check);
+}
+
+void CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                                    Location ref,
+                                                                    GpuRegister obj,
+                                                                    uint32_t offset,
+                                                                    Location index,
+                                                                    ScaleFactor scale_factor,
+                                                                    Location temp,
+                                                                    bool needs_null_check,
+                                                                    bool always_update_field) {
+  DCHECK(kEmitCompilerReadBarrier);
+  DCHECK(kUseBakerReadBarrier);
+
+  // In slow path based read barriers, the read barrier call is
+  // inserted after the original load. However, in fast path based
+  // Baker's read barriers, we need to perform the load of
+  // mirror::Object::monitor_ *before* the original reference load.
+  // This load-load ordering is required by the read barrier.
+  // The fast path/slow path (for Baker's algorithm) should look like:
+  //
+  //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
+  //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+  //   HeapReference<Object> ref = *src;  // Original reference load.
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+  //   if (is_gray) {
+  //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
+  //   }
+  //
+  // Note: the original implementation in ReadBarrier::Barrier is
+  // slightly more complex as it performs additional checks that we do
+  // not do here for performance reasons.
+
+  GpuRegister ref_reg = ref.AsRegister<GpuRegister>();
+  GpuRegister temp_reg = temp.AsRegister<GpuRegister>();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  // /* int32_t */ monitor = obj->monitor_
+  __ LoadFromOffset(kLoadWord, temp_reg, obj, monitor_offset);
+  if (needs_null_check) {
+    MaybeRecordImplicitNullCheck(instruction);
+  }
+  // /* LockWord */ lock_word = LockWord(monitor)
+  static_assert(sizeof(LockWord) == sizeof(int32_t),
+                "art::LockWord and int32_t have different sizes.");
+
+  __ Sync(0);  // Barrier to prevent load-load reordering.
+
+  // The actual reference load.
+  if (index.IsValid()) {
+    // Load types involving an "index": ArrayGet,
+    // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+    // intrinsics.
+    // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
+    if (index.IsConstant()) {
+      size_t computed_offset =
+          (index.GetConstant()->AsIntConstant()->GetValue() << scale_factor) + offset;
+      __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, computed_offset);
+    } else {
+      GpuRegister index_reg = index.AsRegister<GpuRegister>();
+      __ Dsll(TMP, index_reg, scale_factor);
+      __ Daddu(TMP, obj, TMP);
+      __ LoadFromOffset(kLoadUnsignedWord, ref_reg, TMP, offset);
+    }
+  } else {
+    // /* HeapReference<Object> */ ref = *(obj + offset)
+    __ LoadFromOffset(kLoadUnsignedWord, ref_reg, obj, offset);
+  }
+
+  // Object* ref = ref_addr->AsMirrorPtr()
+  __ MaybeUnpoisonHeapReference(ref_reg);
+
+  // Slow path marking the object `ref` when it is gray.
+  SlowPathCodeMIPS64* slow_path;
+  if (always_update_field) {
+    // ReadBarrierMarkAndUpdateFieldSlowPathMIPS64 only supports address
+    // of the form `obj + field_offset`, where `obj` is a register and
+    // `field_offset` is a register. Thus `offset` and `scale_factor`
+    // above are expected to be null in this code path.
+    DCHECK_EQ(offset, 0u);
+    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+    slow_path = new (GetGraph()->GetArena())
+        ReadBarrierMarkAndUpdateFieldSlowPathMIPS64(instruction,
+                                                    ref,
+                                                    obj,
+                                                    /* field_offset */ index,
+                                                    temp_reg);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathMIPS64(instruction, ref);
+  }
+  AddSlowPath(slow_path);
+
+  // if (rb_state == ReadBarrier::GrayState())
+  //   ref = ReadBarrier::Mark(ref);
+  // Given the numeric representation, it's enough to check the low bit of the
+  // rb_state. We do that by shifting the bit into the sign bit (31) and
+  // performing a branch on less than zero.
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+  static_assert(LockWord::kReadBarrierStateSize == 1, "Expecting 1-bit read barrier state size");
+  __ Sll(temp_reg, temp_reg, 31 - LockWord::kReadBarrierStateShift);
+  __ Bltzc(temp_reg, slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS64::GenerateReadBarrierSlow(HInstruction* instruction,
+                                                  Location out,
+                                                  Location ref,
+                                                  Location obj,
+                                                  uint32_t offset,
+                                                  Location index) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the reference load.
+  //
+  // If heap poisoning is enabled, the unpoisoning of the loaded
+  // reference will be carried out by the runtime within the slow
+  // path.
+  //
+  // Note that `ref` currently does not get unpoisoned (when heap
+  // poisoning is enabled), which is alright as the `ref` argument is
+  // not used by the artReadBarrierSlow entry point.
+  //
+  // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
+  SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena())
+      ReadBarrierForHeapReferenceSlowPathMIPS64(instruction, out, ref, obj, offset, index);
+  AddSlowPath(slow_path);
+
+  __ Bc(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void CodeGeneratorMIPS64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                                       Location out,
+                                                       Location ref,
+                                                       Location obj,
+                                                       uint32_t offset,
+                                                       Location index) {
+  if (kEmitCompilerReadBarrier) {
+    // Baker's read barriers shall be handled by the fast path
+    // (CodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier).
+    DCHECK(!kUseBakerReadBarrier);
+    // If heap poisoning is enabled, unpoisoning will be taken care of
+    // by the runtime within the slow path.
+    GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
+  } else if (kPoisonHeapReferences) {
+    __ UnpoisonHeapReference(out.AsRegister<GpuRegister>());
+  }
+}
+
+void CodeGeneratorMIPS64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
+                                                         Location out,
+                                                         Location root) {
+  DCHECK(kEmitCompilerReadBarrier);
+
+  // Insert a slow path based read barrier *after* the GC root load.
+  //
+  // Note that GC roots are not affected by heap poisoning, so we do
+  // not need to do anything special for this here.
+  SlowPathCodeMIPS64* slow_path =
+      new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathMIPS64(instruction, out, root);
+  AddSlowPath(slow_path);
+
+  __ Bc(slow_path->GetEntryLabel());
+  __ Bind(slow_path->GetExitLabel());
+}
+
 void LocationsBuilderMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
-  LocationSummary::CallKind call_kind =
-      instruction->IsExactCheck() ? LocationSummary::kNoCall : LocationSummary::kCallOnSlowPath;
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
   // The output does overlap inputs.
   // Note that TypeCheckSlowPathMIPS64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorMIPS64::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
   LocationSummary* locations = instruction->GetLocations();
-  GpuRegister obj = locations->InAt(0).AsRegister<GpuRegister>();
+  Location obj_loc = locations->InAt(0);
+  GpuRegister obj = obj_loc.AsRegister<GpuRegister>();
   GpuRegister cls = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
-
+  Location out_loc = locations->Out();
+  GpuRegister out = out_loc.AsRegister<GpuRegister>();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
   Mips64Label done;
+  SlowPathCodeMIPS64* slow_path = nullptr;
 
   // Return 0 if `obj` is null.
-  // TODO: Avoid this check if we know `obj` is not null.
-  __ Move(out, ZERO);
-  __ Beqzc(obj, &done);
+  // Avoid this check if we know `obj` is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Move(out, ZERO);
+    __ Beqzc(obj, &done);
+  }
 
-  // Compare the class of `obj` with `cls`.
-  __ LoadFromOffset(kLoadUnsignedWord, out, obj, mirror::Object::ClassOffset().Int32Value());
-  if (instruction->IsExactCheck()) {
-    // Classes must be equal for the instanceof to succeed.
-    __ Xor(out, out, cls);
-    __ Sltiu(out, out, 1);
-  } else {
-    // If the classes are not equal, we go into a slow path.
-    DCHECK(locations->OnlyCallsOnSlowPath());
-    SlowPathCodeMIPS64* slow_path =
-        new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction);
-    codegen_->AddSlowPath(slow_path);
-    __ Bnec(out, cls, slow_path->GetEntryLabel());
-    __ LoadConst32(out, 1);
-    __ Bind(slow_path->GetExitLabel());
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // Classes must be equal for the instanceof to succeed.
+      __ Xor(out, out, cls);
+      __ Sltiu(out, out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      Mips64Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqzc(out, &done);
+      __ Bnec(out, cls, &loop);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // Walk over the class hierarchy to find a match.
+      Mips64Label loop, success;
+      __ Bind(&loop);
+      __ Beqc(out, cls, &success);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
+      __ Bnezc(out, &loop);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Bc(&done);
+      __ Bind(&success);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
+      // Do an exact check.
+      Mips64Label success;
+      __ Beqc(out, cls, &success);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Beqzc(out, &done);
+      __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Sltiu(out, out, 1);
+      __ Bc(&done);
+      __ Bind(&success);
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+                                                                       /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ Bnec(out, cls, slow_path->GetEntryLabel());
+      __ LoadConst32(out, 1);
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathMIPS64(instruction,
+                                                                       /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ Bc(slow_path->GetEntryLabel());
+      break;
+    }
   }
 
   __ Bind(&done);
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
 }
 
 void LocationsBuilderMIPS64::VisitIntConstant(HIntConstant* constant) {
@@ -3325,6 +4815,14 @@
     __ LoadFromOffset(kLoadUnsignedWord, temp, receiver.AsRegister<GpuRegister>(), class_offset);
   }
   codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   __ LoadFromOffset(kLoadDoubleword, temp, temp,
       mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
   uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -3381,9 +4879,6 @@
 
 HLoadString::LoadKind CodeGeneratorMIPS64::GetSupportedLoadStringKind(
     HLoadString::LoadKind desired_string_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
-  }
   bool fallback_load = false;
   switch (desired_string_load_kind) {
     case HLoadString::LoadKind::kBootImageLinkTimeAddress:
@@ -3411,9 +4906,6 @@
 
 HLoadClass::LoadKind CodeGeneratorMIPS64::GetSupportedLoadClassKind(
     HLoadClass::LoadKind desired_class_load_kind) {
-  if (kEmitCompilerReadBarrier) {
-    UNIMPLEMENTED(FATAL) << "for read barrier";
-  }
   bool fallback_load = false;
   switch (desired_class_load_kind) {
     case HLoadClass::LoadKind::kInvalid:
@@ -3567,6 +5059,14 @@
   // temp = object->GetClass();
   __ LoadFromOffset(kLoadUnsignedWord, temp, receiver, class_offset);
   MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  __ MaybeUnpoisonHeapReference(temp);
   // temp = temp->GetMethodAt(method_offset);
   __ LoadFromOffset(kLoadDoubleword, temp, temp, method_offset);
   // T9 = temp->GetEntryPoint();
@@ -3598,7 +5098,8 @@
   }
   DCHECK(!cls->NeedsAccessCheck());
 
-  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
@@ -3627,6 +5128,9 @@
       current_method_reg = locations->InAt(0).AsRegister<GpuRegister>();
   }
 
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (load_kind) {
     case HLoadClass::LoadKind::kReferrersClass:
@@ -3636,10 +5140,12 @@
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               current_method_reg,
-                              ArtMethod::DeclaringClassOffset().Int32Value());
+                              ArtMethod::DeclaringClassOffset().Int32Value(),
+                              read_barrier_option);
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ LoadLiteral(out,
                      kLoadUnsignedWord,
                      codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
@@ -3647,6 +5153,7 @@
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
       DCHECK(codegen_->GetCompilerOptions().IsBootImage());
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
@@ -3654,7 +5161,7 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!kEmitCompilerReadBarrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       uint32_t address = dchecked_integral_cast<uint32_t>(
           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
       DCHECK_NE(address, 0u);
@@ -3666,8 +5173,8 @@
     case HLoadClass::LoadKind::kBssEntry: {
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewTypeBssEntryPatch(cls->GetDexFile(), cls->GetTypeIndex());
-      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
-      __ Lwu(out, AT, /* placeholder */ 0x5678);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+      GenerateGcRootFieldLoad(cls, out_loc, out, /* placeholder */ 0x5678, read_barrier_option);
       generate_null_check = true;
       break;
     }
@@ -3677,7 +5184,7 @@
                      codegen_->DeduplicateJitClassLiteral(cls->GetDexFile(),
                                                           cls->GetTypeIndex(),
                                                           cls->GetClass()));
-      GenerateGcRootFieldLoad(cls, out_loc, out, 0);
+      GenerateGcRootFieldLoad(cls, out_loc, out, 0, read_barrier_option);
       break;
     case HLoadClass::LoadKind::kDexCacheViaMethod:
     case HLoadClass::LoadKind::kInvalid:
@@ -3773,8 +5280,12 @@
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
       CodeGeneratorMIPS64::PcRelativePatchInfo* info =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
-      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, AT);
-      __ Lwu(out, AT, /* placeholder */ 0x5678);
+      codegen_->EmitPcRelativeAddressPlaceholderHigh(info, out);
+      GenerateGcRootFieldLoad(load,
+                              out_loc,
+                              out,
+                              /* placeholder */ 0x5678,
+                              kCompilerReadBarrierOption);
       SlowPathCodeMIPS64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathMIPS64(load);
       codegen_->AddSlowPath(slow_path);
       __ Beqzc(out, slow_path->GetEntryLabel());
@@ -3787,7 +5298,7 @@
                      codegen_->DeduplicateJitStringLiteral(load->GetDexFile(),
                                                            load->GetStringIndex(),
                                                            load->GetString()));
-      GenerateGcRootFieldLoad(load, out_loc, out, 0);
+      GenerateGcRootFieldLoad(load, out_loc, out, 0, kCompilerReadBarrierOption);
       return;
     default:
       break;
@@ -3944,6 +5455,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewArray(HNewArray* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   codegen_->InvokeRuntime(kQuickAllocArrayResolved, instruction, instruction->GetDexPc());
   CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
 }
@@ -3961,6 +5474,8 @@
 }
 
 void InstructionCodeGeneratorMIPS64::VisitNewInstance(HNewInstance* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes care
+  // of poisoning the reference.
   if (instruction->IsStringAlloc()) {
     // String is allocated through StringFactory. Call NewEmptyString entry point.
     GpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<GpuRegister>();
@@ -4722,12 +6237,34 @@
   }
 }
 
-void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+void LocationsBuilderMIPS64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet*) {
-  UNIMPLEMENTED(FATAL) << "ClassTableGet is unimplemented on mips64";
+void InstructionCodeGeneratorMIPS64::VisitClassTableGet(HClassTableGet* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
+    uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+        instruction->GetIndex(), kMips64PointerSize).SizeValue();
+    __ LoadFromOffset(kLoadDoubleword,
+                      locations->Out().AsRegister<GpuRegister>(),
+                      locations->InAt(0).AsRegister<GpuRegister>(),
+                      method_offset);
+  } else {
+    uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+        instruction->GetIndex(), kMips64PointerSize));
+    __ LoadFromOffset(kLoadDoubleword,
+                      locations->Out().AsRegister<GpuRegister>(),
+                      locations->InAt(0).AsRegister<GpuRegister>(),
+                      mirror::Class::ImtPtrOffset(kMips64PointerSize).Uint32Value());
+    __ LoadFromOffset(kLoadDoubleword,
+                      locations->Out().AsRegister<GpuRegister>(),
+                      locations->Out().AsRegister<GpuRegister>(),
+                      method_offset);
+  }
 }
 
 }  // namespace mips64
diff --git a/compiler/optimizing/code_generator_mips64.h b/compiler/optimizing/code_generator_mips64.h
index 26cc7dc..fd1a174 100644
--- a/compiler/optimizing/code_generator_mips64.h
+++ b/compiler/optimizing/code_generator_mips64.h
@@ -189,6 +189,8 @@
   void HandleShift(HBinaryOperation* operation);
   void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+  Location RegisterOrZeroConstant(HInstruction* instruction);
+  Location FpuRegisterOrConstantForStore(HInstruction* instruction);
 
   InvokeDexCallingConventionVisitorMIPS64 parameter_visitor_;
 
@@ -235,6 +237,38 @@
                       const FieldInfo& field_info,
                       bool value_can_be_null);
   void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
+
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
@@ -243,7 +277,9 @@
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                GpuRegister obj,
-                               uint32_t offset);
+                               uint32_t offset,
+                               ReadBarrierOption read_barrier_option);
+
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              Mips64Label* true_target,
@@ -314,6 +350,91 @@
   void EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) OVERRIDE;
   void EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) OVERRIDE;
 
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             GpuRegister obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference array load when Baker's read barriers are used.
+  void GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             GpuRegister obj,
+                                             uint32_t data_offset,
+                                             Location index,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at the address
+  // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+  // `ref`, and mark it if needed.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 GpuRegister obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
+  // Generate a read barrier for a GC root within `instruction` using
+  // a slow path.
+  //
+  // A read barrier for an object reference GC root is implemented as
+  // a call to the artReadBarrierForRootSlow runtime entry point,
+  // which is passed the value in location `root`:
+  //
+  //   mirror::Object* artReadBarrierForRootSlow(GcRoot<mirror::Object>* root);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierForRootSlow.
+  void GenerateReadBarrierForRootSlow(HInstruction* instruction, Location out, Location root);
+
   void MarkGCCard(GpuRegister object, GpuRegister value, bool value_can_be_null);
 
   // Register allocation.
@@ -364,6 +485,14 @@
                      uint32_t dex_pc,
                      SlowPathCode* slow_path = nullptr) OVERRIDE;
 
+  // Generate code to invoke a runtime entry point, but do not record
+  // PC-related information in a stack map.
+  void InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
+                                           HInstruction* instruction,
+                                           SlowPathCode* slow_path);
+
+  void GenerateInvokeRuntime(int32_t entry_point_offset);
+
   ParallelMoveResolver* GetMoveResolver() OVERRIDE { return &move_resolver_; }
 
   bool NeedsTwoRegisters(Primitive::Type type ATTRIBUTE_UNUSED) const OVERRIDE { return false; }
@@ -492,8 +621,6 @@
   ArenaDeque<PcRelativePatchInfo> pc_relative_type_patches_;
   // PC-relative type patch info for kBssEntry.
   ArenaDeque<PcRelativePatchInfo> type_bss_entry_patches_;
-  // Deduplication map for patchable boot image addresses.
-  Uint32ToLiteralMap boot_image_address_patches_;
   // Patches for string root accesses in JIT compiled code.
   StringToLiteralMap jit_string_patches_;
   // Patches for class root accesses in JIT compiled code.
diff --git a/compiler/optimizing/code_generator_vector_arm.cc b/compiler/optimizing/code_generator_vector_arm.cc
new file mode 100644
index 0000000..e7f7b30
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm.h"
+
+namespace art {
+namespace arm {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<ArmAssembler*>(GetAssembler())->  // NOLINT
+
+void LocationsBuilderARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM::VisitVecCnv(HVecCnv* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecCnv(HVecCnv* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecNeg(HVecNeg* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecNeg(HVecNeg* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecAbs(HVecAbs* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAbs(HVecAbs* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecNot(HVecNot* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecNot(HVecNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM::VisitVecAdd(HVecAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAdd(HVecAdd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecSub(HVecSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecSub(HVecSub* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecMul(HVecMul* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecMul(HVecMul* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecDiv(HVecDiv* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecDiv(HVecDiv* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecAnd(HVecAnd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAnd(HVecAnd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecAndNot(HVecAndNot* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecAndNot(HVecAndNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecOr(HVecOr* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecOr(HVecOr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecXor(HVecXor* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecXor(HVecXor* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM::VisitVecShl(HVecShl* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecShl(HVecShl* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecShr(HVecShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecShr(HVecShr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecUShr(HVecUShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM::VisitVecUShr(HVecUShr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecLoad(HVecLoad* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecLoad(HVecLoad* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM::VisitVecStore(HVecStore* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM::VisitVecStore(HVecStore* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_vector_arm64.cc b/compiler/optimizing/code_generator_vector_arm64.cc
new file mode 100644
index 0000000..f4874fe
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm64.cc
@@ -0,0 +1,672 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm64.h"
+#include "mirror/array-inl.h"
+
+using namespace vixl::aarch64;  // NOLINT(build/namespaces)
+
+namespace art {
+namespace arm64 {
+
+using helpers::DRegisterFrom;
+using helpers::HeapOperand;
+using helpers::InputRegisterAt;
+using helpers::Int64ConstantFrom;
+using helpers::XRegisterFrom;
+
+#define __ GetVIXLAssembler()->
+
+void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    case Primitive::kPrimFloat:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Dup(dst.V8B(), InputRegisterAt(instruction, 0));
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Dup(dst.V4H(), InputRegisterAt(instruction, 0));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Dup(dst.V2S(), InputRegisterAt(instruction, 0));
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Dup(dst.V2S(), DRegisterFrom(locations->InAt(0)).V2S(), 0);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(),
+                        instruction->IsVecNot() ? Location::kOutputOverlap
+                                                : Location::kNoOutputOverlap);
+      break;
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister src = DRegisterFrom(locations->InAt(0));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  Primitive::Type from = instruction->GetInputType();
+  Primitive::Type to = instruction->GetResultType();
+  if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+    DCHECK_EQ(2u, instruction->GetVectorLength());
+    __ Scvtf(dst.V2S(), src.V2S());
+  } else {
+    LOG(FATAL) << "Unsupported SIMD type";
+  }
+}
+
+void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister src = DRegisterFrom(locations->InAt(0));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Neg(dst.V8B(), src.V8B());
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Neg(dst.V4H(), src.V4H());
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Neg(dst.V2S(), src.V2S());
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Fneg(dst.V2S(), src.V2S());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister src = DRegisterFrom(locations->InAt(0));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Abs(dst.V8B(), src.V8B());
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Abs(dst.V4H(), src.V4H());
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Abs(dst.V2S(), src.V2S());
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Fabs(dst.V2S(), src.V2S());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+  }
+}
+
+void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister src = DRegisterFrom(locations->InAt(0));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:  // special case boolean-not
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Movi(dst.V8B(), 1);
+      __ Eor(dst.V8B(), dst.V8B(), src.V8B());
+      break;
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+      __ Not(dst.V8B(), src.V8B());  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister rhs = DRegisterFrom(locations->InAt(1));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Add(dst.V8B(), lhs.V8B(), rhs.V8B());
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Add(dst.V4H(), lhs.V4H(), rhs.V4H());
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Add(dst.V2S(), lhs.V2S(), rhs.V2S());
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Fadd(dst.V2S(), lhs.V2S(), rhs.V2S());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister rhs = DRegisterFrom(locations->InAt(1));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Sub(dst.V8B(), lhs.V8B(), rhs.V8B());
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Sub(dst.V4H(), lhs.V4H(), rhs.V4H());
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Sub(dst.V2S(), lhs.V2S(), rhs.V2S());
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Fsub(dst.V2S(), lhs.V2S(), rhs.V2S());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister rhs = DRegisterFrom(locations->InAt(1));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Mul(dst.V8B(), lhs.V8B(), rhs.V8B());
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Mul(dst.V4H(), lhs.V4H(), rhs.V4H());
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Mul(dst.V2S(), lhs.V2S(), rhs.V2S());
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Fmul(dst.V2S(), lhs.V2S(), rhs.V2S());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister rhs = DRegisterFrom(locations->InAt(1));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Fdiv(dst.V2S(), lhs.V2S(), rhs.V2S());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister rhs = DRegisterFrom(locations->InAt(1));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      __ And(dst.V8B(), lhs.V8B(), rhs.V8B());  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
+  LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
+}
+
+void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister rhs = DRegisterFrom(locations->InAt(1));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      __ Orr(dst.V8B(), lhs.V8B(), rhs.V8B());  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister rhs = DRegisterFrom(locations->InAt(1));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      __ Eor(dst.V8B(), lhs.V8B(), rhs.V8B());  // lanes do not matter
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Shl(dst.V8B(), lhs.V8B(), value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Shl(dst.V4H(), lhs.V4H(), value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Shl(dst.V2S(), lhs.V2S(), value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Sshr(dst.V8B(), lhs.V8B(), value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Sshr(dst.V4H(), lhs.V4H(), value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Sshr(dst.V2S(), lhs.V2S(), value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  FPRegister lhs = DRegisterFrom(locations->InAt(0));
+  FPRegister dst = DRegisterFrom(locations->Out());
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Ushr(dst.V8B(), lhs.V8B(), value);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Ushr(dst.V4H(), lhs.V4H(), value);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Ushr(dst.V2S(), lhs.V2S(), value);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+                                  HVecMemoryOperation* instruction,
+                                  bool is_load) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      if (is_load) {
+        locations->SetOut(Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(2, Location::RequiresFpuRegister());
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up registers and address for vector memory operations.
+MemOperand InstructionCodeGeneratorARM64::CreateVecMemRegisters(
+    HVecMemoryOperation* instruction,
+    Location* reg_loc,
+    bool is_load) {
+  LocationSummary* locations = instruction->GetLocations();
+  Register base = InputRegisterAt(instruction, 0);
+  Location index = locations->InAt(1);
+  *reg_loc = is_load ? locations->Out() : locations->InAt(2);
+
+  Primitive::Type packed_type = instruction->GetPackedType();
+  uint32_t offset = mirror::Array::DataOffset(Primitive::ComponentSize(packed_type)).Uint32Value();
+  size_t shift = Primitive::ComponentSizeShift(packed_type);
+
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  Register temp = temps.AcquireSameSizeAs(base);
+  if (index.IsConstant()) {
+    offset += Int64ConstantFrom(index) << shift;
+    __ Add(temp, base, offset);
+  } else {
+    if (instruction->InputAt(0)->IsIntermediateAddress()) {
+      temp = base;
+    } else {
+      __ Add(temp, base, offset);
+    }
+    __ Add(temp.X(), temp.X(), Operand(XRegisterFrom(index), LSL, shift));
+  }
+  return HeapOperand(temp);
+}
+
+void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
+  Location reg_loc = Location::NoLocation();
+  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+  FPRegister reg = DRegisterFrom(reg_loc);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ Ld1(reg.V8B(), mem);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ Ld1(reg.V4H(), mem);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ Ld1(reg.V2S(), mem);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
+  Location reg_loc = Location::NoLocation();
+  MemOperand mem = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+  FPRegister reg = DRegisterFrom(reg_loc);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ St1(reg.V8B(), mem);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ St1(reg.V4H(), mem);
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ St1(reg.V2S(), mem);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+#undef __
+
+}  // namespace arm64
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_vector_arm_vixl.cc b/compiler/optimizing/code_generator_vector_arm_vixl.cc
new file mode 100644
index 0000000..74fa584
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_arm_vixl.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_arm_vixl.h"
+
+namespace art {
+namespace arm {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
+
+void LocationsBuilderARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitVecCnv(HVecCnv* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecCnv(HVecCnv* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecNeg(HVecNeg* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecNeg(HVecNeg* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAbs(HVecAbs* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAbs(HVecAbs* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecNot(HVecNot* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecNot(HVecNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitVecAdd(HVecAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAdd(HVecAdd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecSub(HVecSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecSub(HVecSub* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecMul(HVecMul* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecMul(HVecMul* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecDiv(HVecDiv* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecDiv(HVecDiv* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAnd(HVecAnd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAnd(HVecAnd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecAndNot(HVecAndNot* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecAndNot(HVecAndNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecOr(HVecOr* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecOr(HVecOr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecXor(HVecXor* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecXor(HVecXor* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitVecShl(HVecShl* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecShl(HVecShl* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecShr(HVecShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecShr(HVecShr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecUShr(HVecUShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecUShr(HVecUShr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecLoad(HVecLoad* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecLoad(HVecLoad* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderARMVIXL::VisitVecStore(HVecStore* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitVecStore(HVecStore* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_vector_mips.cc b/compiler/optimizing/code_generator_vector_mips.cc
new file mode 100644
index 0000000..6969abd
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_mips.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips.h"
+
+namespace art {
+namespace mips {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<MipsAssembler*>(GetAssembler())->  // NOLINT
+
+void LocationsBuilderMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderMIPS::VisitVecCnv(HVecCnv* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecCnv(HVecCnv* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecNeg(HVecNeg* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecNeg(HVecNeg* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecAbs(HVecAbs* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAbs(HVecAbs* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecNot(HVecNot* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecNot(HVecNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderMIPS::VisitVecAdd(HVecAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAdd(HVecAdd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecSub(HVecSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecSub(HVecSub* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecMul(HVecMul* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecMul(HVecMul* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecDiv(HVecDiv* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecDiv(HVecDiv* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecAnd(HVecAnd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAnd(HVecAnd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecAndNot(HVecAndNot* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecAndNot(HVecAndNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecOr(HVecOr* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecOr(HVecOr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecXor(HVecXor* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecXor(HVecXor* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderMIPS::VisitVecShl(HVecShl* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecShl(HVecShl* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecShr(HVecShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecShr(HVecShr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecUShr(HVecUShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecUShr(HVecUShr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecLoad(HVecLoad* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecLoad(HVecLoad* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS::VisitVecStore(HVecStore* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS::VisitVecStore(HVecStore* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+}  // namespace mips
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_vector_mips64.cc b/compiler/optimizing/code_generator_vector_mips64.cc
new file mode 100644
index 0000000..87118ce
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_mips64.cc
@@ -0,0 +1,243 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_mips64.h"
+
+namespace art {
+namespace mips64 {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<Mips64Assembler*>(GetAssembler())->  // NOLINT
+
+void LocationsBuilderMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderMIPS64::VisitVecCnv(HVecCnv* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecCnv(HVecCnv* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecNeg(HVecNeg* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecNeg(HVecNeg* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecAbs(HVecAbs* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAbs(HVecAbs* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecNot(HVecNot* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecNot(HVecNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderMIPS64::VisitVecAdd(HVecAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAdd(HVecAdd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecSub(HVecSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecSub(HVecSub* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecMul(HVecMul* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecMul(HVecMul* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecDiv(HVecDiv* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecDiv(HVecDiv* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecAnd(HVecAnd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAnd(HVecAnd* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecAndNot(HVecAndNot* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecAndNot(HVecAndNot* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecOr(HVecOr* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecOr(HVecOr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecXor(HVecXor* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecXor(HVecXor* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK(locations);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderMIPS64::VisitVecShl(HVecShl* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecShl(HVecShl* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecShr(HVecShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecShr(HVecShr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecUShr(HVecUShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecUShr(HVecUShr* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecLoad(HVecLoad* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecLoad(HVecLoad* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderMIPS64::VisitVecStore(HVecStore* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorMIPS64::VisitVecStore(HVecStore* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+#undef __
+
+}  // namespace mips64
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_vector_x86.cc b/compiler/optimizing/code_generator_vector_x86.cc
new file mode 100644
index 0000000..8dabb4d
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_x86.cc
@@ -0,0 +1,807 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_x86.h"
+#include "mirror/array-inl.h"
+
+namespace art {
+namespace x86 {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86Assembler*>(GetAssembler())->  // NOLINT
+
+void LocationsBuilderX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimLong:
+      // Long needs extra temporary to load the register pair.
+      locations->AddTemp(Location::RequiresFpuRegister());
+      FALLTHROUGH_INTENDED;
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ movd(reg, locations->InAt(0).AsRegister<Register>());
+      __ punpcklbw(reg, reg);
+      __ punpcklwd(reg, reg);
+      __ pshufd(reg, reg, Immediate(0));
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ movd(reg, locations->InAt(0).AsRegister<Register>());
+      __ punpcklwd(reg, reg);
+      __ pshufd(reg, reg, Immediate(0));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ movd(reg, locations->InAt(0).AsRegister<Register>());
+      __ pshufd(reg, reg, Immediate(0));
+      break;
+    case Primitive::kPrimLong: {
+      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ movd(reg, locations->InAt(0).AsRegisterPairLow<Register>());
+      __ movd(tmp, locations->InAt(0).AsRegisterPairHigh<Register>());
+      __ punpckldq(reg, tmp);
+      __ punpcklqdq(reg, reg);
+      break;
+    }
+    case Primitive::kPrimFloat:
+      DCHECK(locations->InAt(0).Equals(locations->Out()));
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ shufps(reg, reg, Immediate(0));
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK(locations->InAt(0).Equals(locations->Out()));
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ shufpd(reg, reg, Immediate(0));
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecCnv(HVecCnv* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecCnv(HVecCnv* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  Primitive::Type from = instruction->GetInputType();
+  Primitive::Type to = instruction->GetResultType();
+  if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+    DCHECK_EQ(4u, instruction->GetVectorLength());
+    __ cvtdq2ps(dst, src);
+  } else {
+    LOG(FATAL) << "Unsupported SIMD type";
+  }
+}
+
+void LocationsBuilderX86::VisitVecNeg(HVecNeg* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecNeg(HVecNeg* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ pxor(dst, dst);
+      __ psubb(dst, src);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pxor(dst, dst);
+      __ psubw(dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pxor(dst, dst);
+      __ psubd(dst, src);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ pxor(dst, dst);
+      __ psubq(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ xorps(dst, dst);
+      __ subps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ xorpd(dst, dst);
+      __ subpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecAbs(HVecAbs* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+  if (instruction->GetPackedType() == Primitive::kPrimInt) {
+    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitVecAbs(HVecAbs* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimInt: {
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      __ movaps(dst, src);
+      __ pxor(tmp, tmp);
+      __ pcmpgtd(tmp, dst);
+      __ pxor(dst, tmp);
+      __ psubd(dst, tmp);
+      break;
+    }
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ psrld(dst, Immediate(1));
+      __ andps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ psrlq(dst, Immediate(1));
+      __ andpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecNot(HVecNot* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+  // Boolean-not requires a temporary to construct the 16 x one.
+  if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
+    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+  }
+}
+
+void InstructionCodeGeneratorX86::VisitVecNot(HVecNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean: {  // special case boolean-not
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      __ pxor(dst, dst);
+      __ pcmpeqb(tmp, tmp);  // all ones
+      __ psubb(dst, tmp);  // 16 x one
+      __ pxor(dst, src);
+      break;
+    }
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ pcmpeqb(dst, dst);  // all ones
+      __ pxor(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ xorps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ xorpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecAdd(HVecAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAdd(HVecAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ paddb(dst, src);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ paddw(dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ paddd(dst, src);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ paddq(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ addps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ addpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecSub(HVecSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecSub(HVecSub* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ psubb(dst, src);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psubw(dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ psubd(dst, src);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ psubq(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ subps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ subpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecMul(HVecMul* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecMul(HVecMul* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pmullw(dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pmulld(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ mulps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ mulpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecDiv(HVecDiv* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecDiv(HVecDiv* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ divps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ divpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecAnd(HVecAnd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAnd(HVecAnd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ pand(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ andps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ andpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecAndNot(HVecAndNot* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecAndNot(HVecAndNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ pandn(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ andnps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ andnpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecOr(HVecOr* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecOr(HVecOr* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ por(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ orps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ orpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecXor(HVecXor* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecXor(HVecXor* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ pxor(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ xorps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ xorpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecShl(HVecShl* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecShl(HVecShl* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psllw(dst, Immediate(static_cast<uint8_t>(value)));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pslld(dst, Immediate(static_cast<uint8_t>(value)));
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ psllq(dst, Immediate(static_cast<uint8_t>(value)));
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecShr(HVecShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecShr(HVecShr* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psraw(dst, Immediate(static_cast<uint8_t>(value)));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ psrad(dst, Immediate(static_cast<uint8_t>(value)));
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecUShr(HVecUShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86::VisitVecUShr(HVecUShr* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psrlw(dst, Immediate(static_cast<uint8_t>(value)));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ psrld(dst, Immediate(static_cast<uint8_t>(value)));
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ psrlq(dst, Immediate(static_cast<uint8_t>(value)));
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+                                  HVecMemoryOperation* instruction,
+                                  bool is_load) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      if (is_load) {
+        locations->SetOut(Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(2, Location::RequiresFpuRegister());
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up registers and address for vector memory operations.
+static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
+                                     Location* reg_loc,
+                                     bool is_load) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location base = locations->InAt(0);
+  Location index = locations->InAt(1);
+  *reg_loc = is_load ? locations->Out() : locations->InAt(2);
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
+  ScaleFactor scale = TIMES_1;
+  switch (size) {
+    case 2: scale = TIMES_2; break;
+    case 4: scale = TIMES_4; break;
+    case 8: scale = TIMES_8; break;
+    default: break;
+  }
+  return CodeGeneratorX86::ArrayAddress(base.AsRegister<Register>(), index, scale, offset);
+}
+
+void LocationsBuilderX86::VisitVecLoad(HVecLoad* instruction) {
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorX86::VisitVecLoad(HVecLoad* instruction) {
+  Location reg_loc = Location::NoLocation();
+  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86::VisitVecStore(HVecStore* instruction) {
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorX86::VisitVecStore(HVecStore* instruction) {
+  Location reg_loc = Location::NoLocation();
+  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+#undef __
+
+}  // namespace x86
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_vector_x86_64.cc b/compiler/optimizing/code_generator_vector_x86_64.cc
new file mode 100644
index 0000000..e956088
--- /dev/null
+++ b/compiler/optimizing/code_generator_vector_x86_64.cc
@@ -0,0 +1,800 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_generator_x86_64.h"
+#include "mirror/array-inl.h"
+
+namespace art {
+namespace x86_64 {
+
+// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
+#define __ down_cast<X86_64Assembler*>(GetAssembler())->  // NOLINT
+
+void LocationsBuilderX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister reg = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+      __ punpcklbw(reg, reg);
+      __ punpcklwd(reg, reg);
+      __ pshufd(reg, reg, Immediate(0));
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+      __ punpcklwd(reg, reg);
+      __ pshufd(reg, reg, Immediate(0));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());
+      __ pshufd(reg, reg, Immediate(0));
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ movd(reg, locations->InAt(0).AsRegister<CpuRegister>());  // is 64-bit
+      __ punpcklqdq(reg, reg);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK(locations->InAt(0).Equals(locations->Out()));
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ shufps(reg, reg, Immediate(0));
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK(locations->InAt(0).Equals(locations->Out()));
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ shufpd(reg, reg, Immediate(0));
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSetScalars(HVecSetScalars* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void LocationsBuilderX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSumReduce(HVecSumReduce* instruction) {
+  LOG(FATAL) << "No SIMD for " << instruction->GetId();
+}
+
+// Helper to set up locations for vector unary operations.
+static void CreateVecUnOpLocations(ArenaAllocator* arena, HVecUnaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecCnv(HVecCnv* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecCnv(HVecCnv* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  Primitive::Type from = instruction->GetInputType();
+  Primitive::Type to = instruction->GetResultType();
+  if (from == Primitive::kPrimInt && to == Primitive::kPrimFloat) {
+    DCHECK_EQ(4u, instruction->GetVectorLength());
+    __ cvtdq2ps(dst, src);
+  } else {
+    LOG(FATAL) << "Unsupported SIMD type";
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecNeg(HVecNeg* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecNeg(HVecNeg* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ pxor(dst, dst);
+      __ psubb(dst, src);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pxor(dst, dst);
+      __ psubw(dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pxor(dst, dst);
+      __ psubd(dst, src);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ pxor(dst, dst);
+      __ psubq(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ xorps(dst, dst);
+      __ subps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ xorpd(dst, dst);
+      __ subpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecAbs(HVecAbs* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+  if (instruction->GetPackedType() == Primitive::kPrimInt) {
+    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAbs(HVecAbs* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimInt: {
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      __ movaps(dst, src);
+      __ pxor(tmp, tmp);
+      __ pcmpgtd(tmp, dst);
+      __ pxor(dst, tmp);
+      __ psubd(dst, tmp);
+      break;
+    }
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ psrld(dst, Immediate(1));
+      __ andps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ psrlq(dst, Immediate(1));
+      __ andpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecNot(HVecNot* instruction) {
+  CreateVecUnOpLocations(GetGraph()->GetArena(), instruction);
+  // Boolean-not requires a temporary to construct the 16 x one.
+  if (instruction->GetPackedType() == Primitive::kPrimBoolean) {
+    instruction->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+  }
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecNot(HVecNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  XmmRegister src = locations->InAt(0).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean: {  // special case boolean-not
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      XmmRegister tmp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
+      __ pxor(dst, dst);
+      __ pcmpeqb(tmp, tmp);  // all ones
+      __ psubb(dst, tmp);  // 16 x one
+      __ pxor(dst, src);
+      break;
+    }
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ pcmpeqb(dst, dst);  // all ones
+      __ pxor(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ xorps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ pcmpeqb(dst, dst);  // all ones
+      __ xorpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up locations for vector binary operations.
+static void CreateVecBinOpLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::RequiresFpuRegister());
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecAdd(HVecAdd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAdd(HVecAdd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ paddb(dst, src);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ paddw(dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ paddd(dst, src);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ paddq(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ addps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ addpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecSub(HVecSub* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecSub(HVecSub* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimByte:
+      DCHECK_EQ(16u, instruction->GetVectorLength());
+      __ psubb(dst, src);
+      break;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psubw(dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ psubd(dst, src);
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ psubq(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ subps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ subpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecMul(HVecMul* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecMul(HVecMul* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ pmullw(dst, src);
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pmulld(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ mulps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ mulpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecDiv(HVecDiv* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecDiv(HVecDiv* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ divps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ divpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecAnd(HVecAnd* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAnd(HVecAnd* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ pand(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ andps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ andpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecAndNot(HVecAndNot* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecAndNot(HVecAndNot* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ pandn(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ andnps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ andnpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecOr(HVecOr* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecOr(HVecOr* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ por(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ orps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ orpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecXor(HVecXor* instruction) {
+  CreateVecBinOpLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecXor(HVecXor* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  XmmRegister src = locations->InAt(1).AsFpuRegister<XmmRegister>();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      __ pxor(dst, src);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ xorps(dst, src);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ xorpd(dst, src);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up locations for vector shift operations.
+static void CreateVecShiftLocations(ArenaAllocator* arena, HVecBinaryOperation* instruction) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
+      locations->SetOut(Location::SameAsFirstInput());
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecShl(HVecShl* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecShl(HVecShl* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psllw(dst, Immediate(static_cast<int8_t>(value)));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ pslld(dst, Immediate(static_cast<int8_t>(value)));
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ psllq(dst, Immediate(static_cast<int8_t>(value)));
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecShr(HVecShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecShr(HVecShr* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psraw(dst, Immediate(static_cast<int8_t>(value)));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ psrad(dst, Immediate(static_cast<int8_t>(value)));
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecUShr(HVecUShr* instruction) {
+  CreateVecShiftLocations(GetGraph()->GetArena(), instruction);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecUShr(HVecUShr* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  DCHECK(locations->InAt(0).Equals(locations->Out()));
+  int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
+  XmmRegister dst = locations->Out().AsFpuRegister<XmmRegister>();
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      DCHECK_EQ(8u, instruction->GetVectorLength());
+      __ psrlw(dst, Immediate(static_cast<int8_t>(value)));
+      break;
+    case Primitive::kPrimInt:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      __ psrld(dst, Immediate(static_cast<int8_t>(value)));
+      break;
+    case Primitive::kPrimLong:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      __ psrlq(dst, Immediate(static_cast<int8_t>(value)));
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up locations for vector memory operations.
+static void CreateVecMemLocations(ArenaAllocator* arena,
+                                  HVecMemoryOperation* instruction,
+                                  bool is_load) {
+  LocationSummary* locations = new (arena) LocationSummary(instruction);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+      if (is_load) {
+        locations->SetOut(Location::RequiresFpuRegister());
+      } else {
+        locations->SetInAt(2, Location::RequiresFpuRegister());
+      }
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+// Helper to set up registers and address for vector memory operations.
+static Address CreateVecMemRegisters(HVecMemoryOperation* instruction,
+                                     Location* reg_loc,
+                                     bool is_load) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location base = locations->InAt(0);
+  Location index = locations->InAt(1);
+  *reg_loc = is_load ? locations->Out() : locations->InAt(2);
+  size_t size = Primitive::ComponentSize(instruction->GetPackedType());
+  uint32_t offset = mirror::Array::DataOffset(size).Uint32Value();
+  ScaleFactor scale = TIMES_1;
+  switch (size) {
+    case 2: scale = TIMES_2; break;
+    case 4: scale = TIMES_4; break;
+    case 8: scale = TIMES_8; break;
+    default: break;
+  }
+  return CodeGeneratorX86_64::ArrayAddress(base.AsRegister<CpuRegister>(), index, scale, offset);
+}
+
+void LocationsBuilderX86_64::VisitVecLoad(HVecLoad* instruction) {
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ true);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecLoad(HVecLoad* instruction) {
+  Location reg_loc = Location::NoLocation();
+  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ true);
+  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      is_aligned16 ? __ movdqa(reg, address) : __ movdqu(reg, address);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      is_aligned16 ? __ movaps(reg, address) : __ movups(reg, address);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      is_aligned16 ? __ movapd(reg, address) : __ movupd(reg, address);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderX86_64::VisitVecStore(HVecStore* instruction) {
+  CreateVecMemLocations(GetGraph()->GetArena(), instruction, /*is_load*/ false);
+}
+
+void InstructionCodeGeneratorX86_64::VisitVecStore(HVecStore* instruction) {
+  Location reg_loc = Location::NoLocation();
+  Address address = CreateVecMemRegisters(instruction, &reg_loc, /*is_load*/ false);
+  XmmRegister reg = reg_loc.AsFpuRegister<XmmRegister>();
+  bool is_aligned16 = instruction->GetAlignment().IsAlignedAt(16);
+  switch (instruction->GetPackedType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong:
+      DCHECK_LE(2u, instruction->GetVectorLength());
+      DCHECK_LE(instruction->GetVectorLength(), 16u);
+      is_aligned16 ? __ movdqa(address, reg) : __ movdqu(address, reg);
+      break;
+    case Primitive::kPrimFloat:
+      DCHECK_EQ(4u, instruction->GetVectorLength());
+      is_aligned16 ? __ movaps(address, reg) : __ movups(address, reg);
+      break;
+    case Primitive::kPrimDouble:
+      DCHECK_EQ(2u, instruction->GetVectorLength());
+      is_aligned16 ? __ movapd(address, reg) : __ movupd(address, reg);
+      break;
+    default:
+      LOG(FATAL) << "Unsupported SIMD type";
+      UNREACHABLE();
+  }
+}
+
+#undef __
+
+}  // namespace x86_64
+}  // namespace art
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index b779aed..08a752f 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -183,10 +183,13 @@
       : SlowPathCode(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
     __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
     x86_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+    RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
@@ -720,7 +723,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -963,12 +966,20 @@
 }
 
 size_t CodeGeneratorX86::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
-  __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+  if (GetGraph()->HasSIMD()) {
+    __ movups(Address(ESP, stack_index), XmmRegister(reg_id));
+  } else {
+    __ movsd(Address(ESP, stack_index), XmmRegister(reg_id));
+  }
   return GetFloatingPointSpillSlotSize();
 }
 
 size_t CodeGeneratorX86::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
-  __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+  if (GetGraph()->HasSIMD()) {
+    __ movups(XmmRegister(reg_id), Address(ESP, stack_index));
+  } else {
+    __ movsd(XmmRegister(reg_id), Address(ESP, stack_index));
+  }
   return GetFloatingPointSpillSlotSize();
 }
 
@@ -1015,7 +1026,6 @@
       assembler_(graph->GetArena()),
       isa_features_(isa_features),
       pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-      simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -4603,13 +4613,6 @@
       temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86PointerSize).Int32Value()));
 }
 
-void CodeGeneratorX86::RecordSimplePatch() {
-  if (GetCompilerOptions().GetIncludePatchInformation()) {
-    simple_patches_.emplace_back();
-    __ Bind(&simple_patches_.back());
-  }
-}
-
 void CodeGeneratorX86::RecordBootStringPatch(HLoadString* load_string) {
   DCHECK(GetCompilerOptions().IsBootImage());
   HX86ComputeBaseMethodAddress* address = nullptr;
@@ -4682,17 +4685,12 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      simple_patches_.size() +
       string_patches_.size() +
       boot_image_type_patches_.size() +
       type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  for (const Label& label : simple_patches_) {
-    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   if (!GetCompilerOptions().IsBootImage()) {
     DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
@@ -5712,7 +5710,11 @@
 void LocationsBuilderX86::VisitSuspendCheck(HSuspendCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  // In suspend check slow path, usually there are no caller-save registers at all.
+  // If SIMD instructions are present, however, we force spilling all live SIMD
+  // registers in full width (since the runtime only saves/restores lower part).
+  locations->SetCustomSlowPathCallerSaves(
+      GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
 }
 
 void InstructionCodeGeneratorX86::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5815,9 +5817,11 @@
       __ movd(destination.AsRegisterPairHigh<Register>(), src_reg);
     } else if (destination.IsStackSlot()) {
       __ movss(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
-    } else {
-      DCHECK(destination.IsDoubleStackSlot());
+    } else if (destination.IsDoubleStackSlot()) {
       __ movsd(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
+    } else {
+      DCHECK(destination.IsSIMDStackSlot());
+      __ movups(Address(ESP, destination.GetStackIndex()), source.AsFpuRegister<XmmRegister>());
     }
   } else if (source.IsStackSlot()) {
     if (destination.IsRegister()) {
@@ -5839,6 +5843,9 @@
       DCHECK(destination.IsDoubleStackSlot()) << destination;
       MoveMemoryToMemory64(destination.GetStackIndex(), source.GetStackIndex());
     }
+  } else if (source.IsSIMDStackSlot()) {
+    DCHECK(destination.IsFpuRegister());
+    __ movups(destination.AsFpuRegister<XmmRegister>(), Address(ESP, source.GetStackIndex()));
   } else if (source.IsConstant()) {
     HConstant* constant = source.GetConstant();
     if (constant->IsIntConstant() || constant->IsNullConstant()) {
@@ -6154,7 +6161,6 @@
           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
       DCHECK_NE(address, 0u);
       __ movl(out, Immediate(address));
-      codegen_->RecordSimplePatch();
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -6311,7 +6317,6 @@
           reinterpret_cast<uintptr_t>(load->GetString().Get()));
       DCHECK_NE(address, 0u);
       __ movl(out, Immediate(address));
-      codegen_->RecordSimplePatch();
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBssEntry: {
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 5360dc9..ca3a9ea 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -348,8 +348,9 @@
   }
 
   size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
-    // 8 bytes == 2 words for each spill.
-    return 2 * kX86WordSize;
+    return GetGraph()->HasSIMD()
+        ? 4 * kX86WordSize   // 16 bytes == 4 words for each spill
+        : 2 * kX86WordSize;  //  8 bytes == 2 words for each spill
   }
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -412,7 +413,6 @@
   // Generate a call to a virtual method.
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
-  void RecordSimplePatch();
   void RecordBootStringPatch(HLoadString* load_string);
   void RecordBootTypePatch(HLoadClass* load_class);
   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
@@ -633,8 +633,6 @@
 
   // PC-relative DexCache access info.
   ArenaDeque<X86PcRelativePatchInfo> pc_relative_dex_cache_patches_;
-  // Patch locations for patchoat where the linker doesn't do any other work.
-  ArenaDeque<Label> simple_patches_;
   // String patch locations; type depends on configuration (app .bss or boot image PIC/non-PIC).
   ArenaDeque<X86PcRelativePatchInfo> string_patches_;
   // Type patch locations for boot image; type depends on configuration (boot image PIC/non-PIC).
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 179bf6d..ff6e099 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -140,10 +140,13 @@
       : SlowPathCode(instruction), successor_(successor) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
     __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);  // Only saves full width XMM for SIMD.
     x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+    RestoreLiveRegisters(codegen, locations);  // Only restores full width XMM for SIMD.
     if (successor_ == nullptr) {
       __ jmp(GetReturnLabel());
     } else {
@@ -741,7 +744,7 @@
            instruction_->IsArrayGet() ||
            instruction_->IsInstanceOf() ||
            instruction_->IsCheckCast() ||
-           (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
+           (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
 
@@ -983,7 +986,7 @@
       callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
-      __ movq(temp.AsRegister<CpuRegister>(), Immediate(invoke->GetMethodAddress()));
+      Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
       break;
     case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
       __ movq(temp.AsRegister<CpuRegister>(),
@@ -1070,13 +1073,6 @@
       kX86_64PointerSize).SizeValue()));
 }
 
-void CodeGeneratorX86_64::RecordSimplePatch() {
-  if (GetCompilerOptions().GetIncludePatchInformation()) {
-    simple_patches_.emplace_back();
-    __ Bind(&simple_patches_.back());
-  }
-}
-
 void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
   DCHECK(GetCompilerOptions().IsBootImage());
   string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
@@ -1126,17 +1122,12 @@
   DCHECK(linker_patches->empty());
   size_t size =
       pc_relative_dex_cache_patches_.size() +
-      simple_patches_.size() +
       string_patches_.size() +
       boot_image_type_patches_.size() +
       type_bss_entry_patches_.size();
   linker_patches->reserve(size);
   EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
                                                                linker_patches);
-  for (const Label& label : simple_patches_) {
-    uint32_t literal_offset = label.Position() - kLabelPositionToLiteralOffsetAdjustment;
-    linker_patches->push_back(LinkerPatch::RecordPosition(literal_offset));
-  }
   if (!GetCompilerOptions().IsBootImage()) {
     DCHECK(boot_image_type_patches_.empty());
     EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
@@ -1170,13 +1161,21 @@
 }
 
 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
-  __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
-  return kX86_64WordSize;
+  if (GetGraph()->HasSIMD()) {
+    __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+  } else {
+    __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
+  }
+  return GetFloatingPointSpillSlotSize();
 }
 
 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
-  __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
-  return kX86_64WordSize;
+  if (GetGraph()->HasSIMD()) {
+    __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+  } else {
+    __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
+  }
+  return GetFloatingPointSpillSlotSize();
 }
 
 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -1227,7 +1226,6 @@
         isa_features_(isa_features),
         constant_area_start_(0),
         pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
-        simple_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
         type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
@@ -3662,7 +3660,7 @@
 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   Primitive::Type type = instruction->GetResultType();
-  DCHECK(type == Primitive::kPrimInt || Primitive::kPrimLong);
+  DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
 
   bool is_div = instruction->IsDiv();
   LocationSummary* locations = instruction->GetLocations();
@@ -5165,7 +5163,11 @@
 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
-  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  // In suspend check slow path, usually there are no caller-save registers at all.
+  // If SIMD instructions are present, however, we force spilling all live SIMD
+  // registers in full width (since the runtime only saves/restores lower part).
+  locations->SetCustomSlowPathCallerSaves(
+      GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
 }
 
 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
@@ -5254,6 +5256,10 @@
       __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
       __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
     }
+  } else if (source.IsSIMDStackSlot()) {
+    DCHECK(destination.IsFpuRegister());
+    __ movups(destination.AsFpuRegister<XmmRegister>(),
+              Address(CpuRegister(RSP), source.GetStackIndex()));
   } else if (source.IsConstant()) {
     HConstant* constant = source.GetConstant();
     if (constant->IsIntConstant() || constant->IsNullConstant()) {
@@ -5304,10 +5310,13 @@
     } else if (destination.IsStackSlot()) {
       __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
                source.AsFpuRegister<XmmRegister>());
-    } else {
-      DCHECK(destination.IsDoubleStackSlot()) << destination;
+    } else if (destination.IsDoubleStackSlot()) {
       __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
                source.AsFpuRegister<XmmRegister>());
+    } else {
+       DCHECK(destination.IsSIMDStackSlot());
+      __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
+                source.AsFpuRegister<XmmRegister>());
     }
   }
 }
@@ -5544,8 +5553,7 @@
       uint32_t address = dchecked_integral_cast<uint32_t>(
           reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
       DCHECK_NE(address, 0u);
-      __ movl(out, Immediate(address));  // Zero-extended.
-      codegen_->RecordSimplePatch();
+      __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
       break;
     }
     case HLoadClass::LoadKind::kBssEntry: {
@@ -5680,8 +5688,7 @@
       uint32_t address = dchecked_integral_cast<uint32_t>(
           reinterpret_cast<uintptr_t>(load->GetString().Get()));
       DCHECK_NE(address, 0u);
-      __ movl(out, Immediate(address));  // Zero-extended.
-      codegen_->RecordSimplePatch();
+      __ movl(out, Immediate(static_cast<int32_t>(address)));  // Zero-extended.
       return;  // No dex cache slow path.
     }
     case HLoadString::LoadKind::kBssEntry: {
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 3a83731..c8336da 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -326,7 +326,9 @@
   }
 
   size_t GetFloatingPointSpillSlotSize() const OVERRIDE {
-    return kX86_64WordSize;
+    return GetGraph()->HasSIMD()
+        ? 2 * kX86_64WordSize   // 16 bytes == 2 x86_64 words for each spill
+        : 1 * kX86_64WordSize;  //  8 bytes == 1 x86_64 words for each spill
   }
 
   HGraphVisitor* GetLocationBuilder() OVERRIDE {
@@ -406,7 +408,6 @@
   void GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke, Location temp) OVERRIDE;
   void GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) OVERRIDE;
 
-  void RecordSimplePatch();
   void RecordBootStringPatch(HLoadString* load_string);
   void RecordBootTypePatch(HLoadClass* load_class);
   Label* NewTypeBssEntryPatch(HLoadClass* load_class);
@@ -602,8 +603,6 @@
 
   // PC-relative DexCache access info.
   ArenaDeque<PatchInfo<Label>> pc_relative_dex_cache_patches_;
-  // Patch locations for patchoat where the linker doesn't do any other work.
-  ArenaDeque<Label> simple_patches_;
   // String patch locations; type depends on configuration (app .bss or boot image PIC).
   ArenaDeque<PatchInfo<Label>> string_patches_;
   // Type patch locations for boot image (always PIC).
diff --git a/compiler/optimizing/code_sinking.cc b/compiler/optimizing/code_sinking.cc
new file mode 100644
index 0000000..dc3d378
--- /dev/null
+++ b/compiler/optimizing/code_sinking.cc
@@ -0,0 +1,403 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "code_sinking.h"
+
+#include "common_dominator.h"
+#include "nodes.h"
+
+namespace art {
+
+void CodeSinking::Run() {
+  HBasicBlock* exit = graph_->GetExitBlock();
+  if (exit == nullptr) {
+    // Infinite loop, just bail.
+    return;
+  }
+  // TODO(ngeoffray): we do not profile branches yet, so use throw instructions
+  // as an indicator of an uncommon branch.
+  for (HBasicBlock* exit_predecessor : exit->GetPredecessors()) {
+    if (exit_predecessor->GetLastInstruction()->IsThrow()) {
+      SinkCodeToUncommonBranch(exit_predecessor);
+    }
+  }
+}
+
+static bool IsInterestingInstruction(HInstruction* instruction) {
+  // Instructions from the entry graph (for example constants) are never interesting to move.
+  if (instruction->GetBlock() == instruction->GetBlock()->GetGraph()->GetEntryBlock()) {
+    return false;
+  }
+  // We want to move moveable instructions that cannot throw, as well as
+  // heap stores and allocations.
+
+  // Volatile stores cannot be moved.
+  if (instruction->IsInstanceFieldSet()) {
+    if (instruction->AsInstanceFieldSet()->IsVolatile()) {
+      return false;
+    }
+  }
+
+  // Check allocations first, as they can throw, but it is safe to move them.
+  if (instruction->IsNewInstance() || instruction->IsNewArray()) {
+    return true;
+  }
+
+  // All other instructions that can throw cannot be moved.
+  if (instruction->CanThrow()) {
+    return false;
+  }
+
+  // We can only store on local allocations. Other heap references can
+  // be escaping. Note that allocations can escape too, but we only move
+  // allocations if their users can move to, or are in the list of
+  // post dominated blocks.
+  if (instruction->IsInstanceFieldSet()) {
+    if (!instruction->InputAt(0)->IsNewInstance()) {
+      return false;
+    }
+  }
+
+  if (instruction->IsArraySet()) {
+    if (!instruction->InputAt(0)->IsNewArray()) {
+      return false;
+    }
+  }
+
+  // Heap accesses cannot go pass instructions that have memory side effects, which
+  // we are not tracking here. Note that the load/store elimination optimization
+  // runs before this optimization, and should have removed interesting ones.
+  // In theory, we could handle loads of local allocations, but this is currently
+  // hard to test, as LSE removes them.
+  if (instruction->IsStaticFieldGet() ||
+      instruction->IsInstanceFieldGet() ||
+      instruction->IsArrayGet()) {
+    return false;
+  }
+
+  if (instruction->IsInstanceFieldSet() ||
+      instruction->IsArraySet() ||
+      instruction->CanBeMoved()) {
+    return true;
+  }
+  return false;
+}
+
+static void AddInstruction(HInstruction* instruction,
+                           const ArenaBitVector& processed_instructions,
+                           const ArenaBitVector& discard_blocks,
+                           ArenaVector<HInstruction*>* worklist) {
+  // Add to the work list if the instruction is not in the list of blocks
+  // to discard, hasn't been already processed and is of interest.
+  if (!discard_blocks.IsBitSet(instruction->GetBlock()->GetBlockId()) &&
+      !processed_instructions.IsBitSet(instruction->GetId()) &&
+      IsInterestingInstruction(instruction)) {
+    worklist->push_back(instruction);
+  }
+}
+
+static void AddInputs(HInstruction* instruction,
+                      const ArenaBitVector& processed_instructions,
+                      const ArenaBitVector& discard_blocks,
+                      ArenaVector<HInstruction*>* worklist) {
+  for (HInstruction* input : instruction->GetInputs()) {
+    AddInstruction(input, processed_instructions, discard_blocks, worklist);
+  }
+}
+
+static void AddInputs(HBasicBlock* block,
+                      const ArenaBitVector& processed_instructions,
+                      const ArenaBitVector& discard_blocks,
+                      ArenaVector<HInstruction*>* worklist) {
+  for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
+    AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+  }
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    AddInputs(it.Current(), processed_instructions, discard_blocks, worklist);
+  }
+}
+
+static bool ShouldFilterUse(HInstruction* instruction,
+                            HInstruction* user,
+                            const ArenaBitVector& post_dominated) {
+  if (instruction->IsNewInstance()) {
+    return user->IsInstanceFieldSet() &&
+        (user->InputAt(0) == instruction) &&
+        !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+  } else if (instruction->IsNewArray()) {
+    return user->IsArraySet() &&
+        (user->InputAt(0) == instruction) &&
+        !post_dominated.IsBitSet(user->GetBlock()->GetBlockId());
+  }
+  return false;
+}
+
+
+// Find the ideal position for moving `instruction`. If `filter` is true,
+// we filter out store instructions to that instruction, which are processed
+// first in the step (3) of the sinking algorithm.
+// This method is tailored to the sinking algorithm, unlike
+// the generic HInstruction::MoveBeforeFirstUserAndOutOfLoops.
+static HInstruction* FindIdealPosition(HInstruction* instruction,
+                                       const ArenaBitVector& post_dominated,
+                                       bool filter = false) {
+  DCHECK(!instruction->IsPhi());  // Makes no sense for Phi.
+
+  // Find the target block.
+  CommonDominator finder(/* start_block */ nullptr);
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (!(filter && ShouldFilterUse(instruction, user, post_dominated))) {
+      finder.Update(user->IsPhi()
+          ? user->GetBlock()->GetPredecessors()[use.GetIndex()]
+          : user->GetBlock());
+    }
+  }
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    DCHECK(!use.GetUser()->GetHolder()->IsPhi());
+    DCHECK(!filter || !ShouldFilterUse(instruction, use.GetUser()->GetHolder(), post_dominated));
+    finder.Update(use.GetUser()->GetHolder()->GetBlock());
+  }
+  HBasicBlock* target_block = finder.Get();
+  if (target_block == nullptr) {
+    // No user we can go next to? Likely a LSE or DCE limitation.
+    return nullptr;
+  }
+
+  // Move to the first dominator not in a loop, if we can.
+  while (target_block->IsInLoop()) {
+    if (!post_dominated.IsBitSet(target_block->GetDominator()->GetBlockId())) {
+      break;
+    }
+    target_block = target_block->GetDominator();
+    DCHECK(target_block != nullptr);
+  }
+
+  // Find insertion position. No need to filter anymore, as we have found a
+  // target block.
+  HInstruction* insert_pos = nullptr;
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    if (use.GetUser()->GetBlock() == target_block &&
+        (insert_pos == nullptr || use.GetUser()->StrictlyDominates(insert_pos))) {
+      insert_pos = use.GetUser();
+    }
+  }
+  for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+    HInstruction* user = use.GetUser()->GetHolder();
+    if (user->GetBlock() == target_block &&
+        (insert_pos == nullptr || user->StrictlyDominates(insert_pos))) {
+      insert_pos = user;
+    }
+  }
+  if (insert_pos == nullptr) {
+    // No user in `target_block`, insert before the control flow instruction.
+    insert_pos = target_block->GetLastInstruction();
+    DCHECK(insert_pos->IsControlFlow());
+    // Avoid splitting HCondition from HIf to prevent unnecessary materialization.
+    if (insert_pos->IsIf()) {
+      HInstruction* if_input = insert_pos->AsIf()->InputAt(0);
+      if (if_input == insert_pos->GetPrevious()) {
+        insert_pos = if_input;
+      }
+    }
+  }
+  DCHECK(!insert_pos->IsPhi());
+  return insert_pos;
+}
+
+
+void CodeSinking::SinkCodeToUncommonBranch(HBasicBlock* end_block) {
+  // Local allocator to discard data structures created below at the end of
+  // this optimization.
+  ArenaAllocator allocator(graph_->GetArena()->GetArenaPool());
+
+  size_t number_of_instructions = graph_->GetCurrentInstructionId();
+  ArenaVector<HInstruction*> worklist(allocator.Adapter(kArenaAllocMisc));
+  ArenaBitVector processed_instructions(&allocator, number_of_instructions, /* expandable */ false);
+  ArenaBitVector post_dominated(&allocator, graph_->GetBlocks().size(), /* expandable */ false);
+  ArenaBitVector instructions_that_can_move(
+      &allocator, number_of_instructions, /* expandable */ false);
+  ArenaVector<HInstruction*> move_in_order(allocator.Adapter(kArenaAllocMisc));
+
+  // Step (1): Visit post order to get a subset of blocks post dominated by `end_block`.
+  // TODO(ngeoffray): Getting the full set of post-dominated shoud be done by
+  // computint the post dominator tree, but that could be too time consuming. Also,
+  // we should start the analysis from blocks dominated by an uncommon branch, but we
+  // don't profile branches yet.
+  bool found_block = false;
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
+    if (block == end_block) {
+      found_block = true;
+      post_dominated.SetBit(block->GetBlockId());
+    } else if (found_block) {
+      bool is_post_dominated = true;
+      if (block->GetSuccessors().empty()) {
+        // We currently bail for loops.
+        is_post_dominated = false;
+      } else {
+        for (HBasicBlock* successor : block->GetSuccessors()) {
+          if (!post_dominated.IsBitSet(successor->GetBlockId())) {
+            is_post_dominated = false;
+            break;
+          }
+        }
+      }
+      if (is_post_dominated) {
+        post_dominated.SetBit(block->GetBlockId());
+      }
+    }
+  }
+
+  // Now that we have found a subset of post-dominated blocks, add to the worklist all inputs
+  // of instructions in these blocks that are not themselves in these blocks.
+  // Also find the common dominator of the found post dominated blocks, to help filtering
+  // out un-movable uses in step (2).
+  CommonDominator finder(end_block);
+  for (size_t i = 0, e = graph_->GetBlocks().size(); i < e; ++i) {
+    if (post_dominated.IsBitSet(i)) {
+      finder.Update(graph_->GetBlocks()[i]);
+      AddInputs(graph_->GetBlocks()[i], processed_instructions, post_dominated, &worklist);
+    }
+  }
+  HBasicBlock* common_dominator = finder.Get();
+
+  // Step (2): iterate over the worklist to find sinking candidates.
+  while (!worklist.empty()) {
+    HInstruction* instruction = worklist.back();
+    if (processed_instructions.IsBitSet(instruction->GetId())) {
+      // The instruction has already been processed, continue. This happens
+      // when the instruction is the input/user of multiple instructions.
+      worklist.pop_back();
+      continue;
+    }
+    bool all_users_in_post_dominated_blocks = true;
+    bool can_move = true;
+    // Check users of the instruction.
+    for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+      HInstruction* user = use.GetUser();
+      if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId()) &&
+          !instructions_that_can_move.IsBitSet(user->GetId())) {
+        all_users_in_post_dominated_blocks = false;
+        // If we've already processed this user, or the user cannot be moved, or
+        // is not dominating the post dominated blocks, bail.
+        // TODO(ngeoffray): The domination check is an approximation. We should
+        // instead check if the dominated blocks post dominate the user's block,
+        // but we do not have post dominance information here.
+        if (processed_instructions.IsBitSet(user->GetId()) ||
+            !IsInterestingInstruction(user) ||
+            !user->GetBlock()->Dominates(common_dominator)) {
+          can_move = false;
+          break;
+        }
+      }
+    }
+
+    // Check environment users of the instruction. Some of these users require
+    // the instruction not to move.
+    if (all_users_in_post_dominated_blocks) {
+      for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+        HEnvironment* environment = use.GetUser();
+        HInstruction* user = environment->GetHolder();
+        if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+          if (graph_->IsDebuggable() ||
+              user->IsDeoptimize() ||
+              user->CanThrowIntoCatchBlock() ||
+              (user->IsSuspendCheck() && graph_->IsCompilingOsr())) {
+            can_move = false;
+            break;
+          }
+        }
+      }
+    }
+    if (!can_move) {
+      // Instruction cannot be moved, mark it as processed and remove it from the work
+      // list.
+      processed_instructions.SetBit(instruction->GetId());
+      worklist.pop_back();
+    } else if (all_users_in_post_dominated_blocks) {
+      // Instruction is a candidate for being sunk. Mark it as such, remove it from the
+      // work list, and add its inputs to the work list.
+      instructions_that_can_move.SetBit(instruction->GetId());
+      move_in_order.push_back(instruction);
+      processed_instructions.SetBit(instruction->GetId());
+      worklist.pop_back();
+      AddInputs(instruction, processed_instructions, post_dominated, &worklist);
+      // Drop the environment use not in the list of post-dominated block. This is
+      // to help step (3) of this optimization, when we start moving instructions
+      // closer to their use.
+      for (const HUseListNode<HEnvironment*>& use : instruction->GetEnvUses()) {
+        HEnvironment* environment = use.GetUser();
+        HInstruction* user = environment->GetHolder();
+        if (!post_dominated.IsBitSet(user->GetBlock()->GetBlockId())) {
+          environment->RemoveAsUserOfInput(use.GetIndex());
+          environment->SetRawEnvAt(use.GetIndex(), nullptr);
+        }
+      }
+    } else {
+      // The information we have on the users was not enough to decide whether the
+      // instruction could be moved.
+      // Add the users to the work list, and keep the instruction in the work list
+      // to process it again once all users have been processed.
+      for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+        AddInstruction(use.GetUser(), processed_instructions, post_dominated, &worklist);
+      }
+    }
+  }
+
+  // Make sure we process instructions in dominated order. This is required for heap
+  // stores.
+  std::sort(move_in_order.begin(), move_in_order.end(), [](HInstruction* a, HInstruction* b) {
+    return b->StrictlyDominates(a);
+  });
+
+  // Step (3): Try to move sinking candidates.
+  for (HInstruction* instruction : move_in_order) {
+    HInstruction* position = nullptr;
+    if (instruction->IsArraySet() || instruction->IsInstanceFieldSet()) {
+      if (!instructions_that_can_move.IsBitSet(instruction->InputAt(0)->GetId())) {
+        // A store can trivially move, but it can safely do so only if the heap
+        // location it stores to can also move.
+        // TODO(ngeoffray): Handle allocation/store cycles by pruning these instructions
+        // from the set and all their inputs.
+        continue;
+      }
+      // Find the position of the instruction we're storing into, filtering out this
+      // store and all other stores to that instruction.
+      position = FindIdealPosition(instruction->InputAt(0), post_dominated, /* filter */ true);
+
+      // The position needs to be dominated by the store, in order for the store to move there.
+      if (position == nullptr || !instruction->GetBlock()->Dominates(position->GetBlock())) {
+        continue;
+      }
+    } else {
+      // Find the ideal position within the post dominated blocks.
+      position = FindIdealPosition(instruction, post_dominated);
+      if (position == nullptr) {
+        continue;
+      }
+    }
+    // Bail if we could not find a position in the post dominated blocks (for example,
+    // if there are multiple users whose common dominator is not in the list of
+    // post dominated blocks).
+    if (!post_dominated.IsBitSet(position->GetBlock()->GetBlockId())) {
+      continue;
+    }
+    MaybeRecordStat(MethodCompilationStat::kInstructionSunk);
+    instruction->MoveBefore(position, /* ensure_safety */ false);
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/code_sinking.h b/compiler/optimizing/code_sinking.h
new file mode 100644
index 0000000..59cda52
--- /dev/null
+++ b/compiler/optimizing/code_sinking.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+#define ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
+
+#include "nodes.h"
+#include "optimization.h"
+
+namespace art {
+
+/**
+ * Optimization pass to move instructions into uncommon branches,
+ * when it is safe to do so.
+ */
+class CodeSinking : public HOptimization {
+ public:
+  CodeSinking(HGraph* graph, OptimizingCompilerStats* stats)
+      : HOptimization(graph, kCodeSinkingPassName, stats) {}
+
+  void Run() OVERRIDE;
+
+  static constexpr const char* kCodeSinkingPassName = "code_sinking";
+
+ private:
+  // Try to move code only used by `end_block` and all its post-dominated / dominated
+  // blocks, to these blocks.
+  void SinkCodeToUncommonBranch(HBasicBlock* end_block);
+
+  DISALLOW_COPY_AND_ASSIGN(CodeSinking);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_CODE_SINKING_H_
diff --git a/compiler/optimizing/codegen_test_utils.h b/compiler/optimizing/codegen_test_utils.h
index cd95404..31cd204 100644
--- a/compiler/optimizing/codegen_test_utils.h
+++ b/compiler/optimizing/codegen_test_utils.h
@@ -74,7 +74,6 @@
   }
 
  private:
-  CodegenTargetConfig() {}
   InstructionSet isa_;
   CreateCodegenFn create_codegen_;
 };
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index e184745..01304ac 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -66,6 +66,11 @@
   return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>());
 }
 
+inline vixl::aarch32::SRegister HighSRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegisterPair()) << location;
+  return vixl::aarch32::SRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::SRegister>());
+}
+
 inline vixl::aarch32::Register RegisterFrom(Location location) {
   DCHECK(location.IsRegister()) << location;
   return vixl::aarch32::Register(location.reg());
diff --git a/compiler/optimizing/common_dominator.h b/compiler/optimizing/common_dominator.h
index b459d24..9f012cf 100644
--- a/compiler/optimizing/common_dominator.h
+++ b/compiler/optimizing/common_dominator.h
@@ -36,12 +36,16 @@
   // Create a finder starting with a given block.
   explicit CommonDominator(HBasicBlock* block)
       : dominator_(block), chain_length_(ChainLength(block)) {
-    DCHECK(block != nullptr);
   }
 
   // Update the common dominator with another block.
   void Update(HBasicBlock* block) {
     DCHECK(block != nullptr);
+    if (dominator_ == nullptr) {
+      dominator_ = block;
+      chain_length_ = ChainLength(block);
+      return;
+    }
     HBasicBlock* block2 = dominator_;
     DCHECK(block2 != nullptr);
     if (block == block2) {
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 2bf5c53..cc3c143 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -322,9 +322,11 @@
       codegen_.DumpCoreRegister(stream, location.high());
     } else if (location.IsUnallocated()) {
       stream << "unallocated";
-    } else {
-      DCHECK(location.IsDoubleStackSlot());
+    } else if (location.IsDoubleStackSlot()) {
       stream << "2x" << location.GetStackIndex() << "(sp)";
+    } else {
+      DCHECK(location.IsSIMDStackSlot());
+      stream << "4x" << location.GetStackIndex() << "(sp)";
     }
   }
 
@@ -503,6 +505,10 @@
     StartAttributeStream("kind") << (try_boundary->IsEntry() ? "entry" : "exit");
   }
 
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE {
+    StartAttributeStream("kind") << deoptimize->GetKind();
+  }
+
 #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
   void VisitMultiplyAccumulate(HMultiplyAccumulate* instruction) OVERRIDE {
     StartAttributeStream("kind") << instruction->GetOpKind();
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 82ee93d..9516ccb 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -29,7 +29,21 @@
  */
 class InductionVarAnalysisTest : public CommonCompilerTest {
  public:
-  InductionVarAnalysisTest() : pool_(), allocator_(&pool_) {
+  InductionVarAnalysisTest()
+      : pool_(),
+        allocator_(&pool_),
+        iva_(nullptr),
+        entry_(nullptr),
+        return_(nullptr),
+        exit_(nullptr),
+        parameter_(nullptr),
+        constant0_(nullptr),
+        constant1_(nullptr),
+        constant2_(nullptr),
+        constant7_(nullptr),
+        constant100_(nullptr),
+        constantm1_(nullptr),
+        float_constant0_(nullptr) {
     graph_ = CreateGraph(&allocator_);
   }
 
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index 5539413..d6513c8 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -57,21 +57,27 @@
   return false;
 }
 
-/** Returns b^e for b,e >= 1. Sets overflow if arithmetic wrap-around occurred. */
+/** Computes a * b for a,b > 0 (at least until first overflow happens). */
+static int64_t SafeMul(int64_t a, int64_t b, /*out*/ bool* overflow) {
+  if (a > 0 && b > 0 && a > (std::numeric_limits<int64_t>::max() / b)) {
+    *overflow = true;
+  }
+  return a * b;
+}
+
+/** Returns b^e for b,e > 0. Sets overflow if arithmetic wrap-around occurred. */
 static int64_t IntPow(int64_t b, int64_t e, /*out*/ bool* overflow) {
-  DCHECK_GE(b, 1);
-  DCHECK_GE(e, 1);
+  DCHECK_LT(0, b);
+  DCHECK_LT(0, e);
   int64_t pow = 1;
   while (e) {
     if (e & 1) {
-      int64_t oldpow = pow;
-      pow *= b;
-      if (pow < oldpow) {
-        *overflow = true;
-      }
+      pow = SafeMul(pow, b, overflow);
     }
     e >>= 1;
-    b *= b;
+    if (e) {
+      b = SafeMul(b, b, overflow);
+    }
   }
   return pow;
 }
@@ -377,6 +383,54 @@
   return false;
 }
 
+bool InductionVarRange::IsUnitStride(HInstruction* instruction,
+                                     /*out*/ HInstruction** offset) const {
+  HLoopInformation* loop = nullptr;
+  HInductionVarAnalysis::InductionInfo* info = nullptr;
+  HInductionVarAnalysis::InductionInfo* trip = nullptr;
+  if (HasInductionInfo(instruction, instruction, &loop, &info, &trip)) {
+    if (info->induction_class == HInductionVarAnalysis::kLinear &&
+        info->op_b->operation == HInductionVarAnalysis::kFetch &&
+        !HInductionVarAnalysis::IsNarrowingLinear(info)) {
+      int64_t stride_value = 0;
+      if (IsConstant(info->op_a, kExact, &stride_value) && stride_value == 1) {
+        int64_t off_value = 0;
+        if (IsConstant(info->op_b, kExact, &off_value) && off_value == 0) {
+          *offset = nullptr;
+        } else {
+          *offset = info->op_b->fetch;
+        }
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+HInstruction* InductionVarRange::GenerateTripCount(HLoopInformation* loop,
+                                                   HGraph* graph,
+                                                   HBasicBlock* block) {
+  HInductionVarAnalysis::InductionInfo *trip =
+      induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+  if (trip != nullptr && !IsUnsafeTripCount(trip)) {
+    HInstruction* taken_test = nullptr;
+    HInstruction* trip_expr = nullptr;
+    if (IsBodyTripCount(trip)) {
+      if (!GenerateCode(trip->op_b, nullptr, graph, block, &taken_test, false, false)) {
+        return nullptr;
+      }
+    }
+    if (GenerateCode(trip->op_a, nullptr, graph, block, &trip_expr, false, false)) {
+      if (taken_test != nullptr) {
+        HInstruction* zero = graph->GetConstant(trip->type, 0);
+        trip_expr = Insert(block, new (graph->GetArena()) HSelect(taken_test, trip_expr, zero, kNoDexPc));
+      }
+      return trip_expr;
+    }
+  }
+  return nullptr;
+}
+
 //
 // Private class methods.
 //
@@ -1157,12 +1211,15 @@
     HInstruction* opb = nullptr;
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
-        // Invariants (note that even though is_min does not impact code generation for
-        // invariants, some effort is made to keep this parameter consistent).
+        // Invariants (note that since invariants only have other invariants as
+        // sub expressions, viz. no induction, there is no need to adjust is_min).
         switch (info->operation) {
           case HInductionVarAnalysis::kAdd:
-          case HInductionVarAnalysis::kRem:  // no proper is_min for second arg
-          case HInductionVarAnalysis::kXor:  // no proper is_min for second arg
+          case HInductionVarAnalysis::kSub:
+          case HInductionVarAnalysis::kMul:
+          case HInductionVarAnalysis::kDiv:
+          case HInductionVarAnalysis::kRem:
+          case HInductionVarAnalysis::kXor:
           case HInductionVarAnalysis::kLT:
           case HInductionVarAnalysis::kLE:
           case HInductionVarAnalysis::kGT:
@@ -1174,6 +1231,12 @@
                 switch (info->operation) {
                   case HInductionVarAnalysis::kAdd:
                     operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+                  case HInductionVarAnalysis::kSub:
+                    operation = new (graph->GetArena()) HSub(type, opa, opb); break;
+                  case HInductionVarAnalysis::kMul:
+                    operation = new (graph->GetArena()) HMul(type, opa, opb, kNoDexPc); break;
+                  case HInductionVarAnalysis::kDiv:
+                    operation = new (graph->GetArena()) HDiv(type, opa, opb, kNoDexPc); break;
                   case HInductionVarAnalysis::kRem:
                     operation = new (graph->GetArena()) HRem(type, opa, opb, kNoDexPc); break;
                   case HInductionVarAnalysis::kXor:
@@ -1194,16 +1257,7 @@
               return true;
             }
             break;
-          case HInductionVarAnalysis::kSub:  // second reversed!
-            if (GenerateCode(info->op_a, trip, graph, block, &opa, in_body, is_min) &&
-                GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
-              if (graph != nullptr) {
-                *result = Insert(block, new (graph->GetArena()) HSub(type, opa, opb));
-              }
-              return true;
-            }
-            break;
-          case HInductionVarAnalysis::kNeg:  // reversed!
+          case HInductionVarAnalysis::kNeg:
             if (GenerateCode(info->op_b, trip, graph, block, &opb, in_body, !is_min)) {
               if (graph != nullptr) {
                 *result = Insert(block, new (graph->GetArena()) HNeg(type, opb));
@@ -1240,9 +1294,9 @@
               }
             }
             break;
-          default:
-            break;
-        }
+          case HInductionVarAnalysis::kNop:
+            LOG(FATAL) << "unexpected invariant nop";
+        }  // switch invariant operation
         break;
       case HInductionVarAnalysis::kLinear: {
         // Linear induction a * i + b, for normalized 0 <= i < TC. For ranges, this should
@@ -1293,7 +1347,7 @@
         }
         break;
       }
-    }
+    }  // switch induction class
   }
   return false;
 }
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 6c424b7..0858d73 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -24,7 +24,8 @@
 /**
  * This class implements range analysis on expressions within loops. It takes the results
  * of induction variable analysis in the constructor and provides a public API to obtain
- * a conservative lower and upper bound value on each instruction in the HIR.
+ * a conservative lower and upper bound value or last value on each instruction in the HIR.
+ * The public API also provides a few general-purpose utility methods related to induction.
  *
  * The range analysis is done with a combination of symbolic and partial integral evaluation
  * of expressions. The analysis avoids complications with wrap-around arithmetic on the integral
@@ -154,6 +155,19 @@
    */
   bool IsFinite(HLoopInformation* loop, /*out*/ int64_t* tc) const;
 
+  /**
+   * Checks if instruction is a unit stride induction inside the closest enveloping loop.
+   * Returns invariant offset on success.
+   */
+  bool IsUnitStride(HInstruction* instruction, /*out*/ HInstruction** offset) const;
+
+  /**
+   * Generates the trip count expression for the given loop. Code is generated in given block
+   * and graph. The expression is guarded by a taken test if needed. Returns the trip count
+   * expression on success or null otherwise.
+   */
+  HInstruction* GenerateTripCount(HLoopInformation* loop, HGraph* graph, HBasicBlock* block);
+
  private:
   /*
    * Enum used in IsConstant() request.
diff --git a/compiler/optimizing/induction_var_range_test.cc b/compiler/optimizing/induction_var_range_test.cc
index d81817f..fcdf8eb 100644
--- a/compiler/optimizing/induction_var_range_test.cc
+++ b/compiler/optimizing/induction_var_range_test.cc
@@ -48,6 +48,11 @@
     EXPECT_EQ(v1.is_known, v2.is_known);
   }
 
+  void ExpectInt(int32_t value, HInstruction* i) {
+    ASSERT_TRUE(i->IsIntConstant());
+    EXPECT_EQ(value, i->AsIntConstant()->GetValue());
+  }
+
   //
   // Construction methods.
   //
@@ -757,10 +762,20 @@
   // Last value (unsimplified).
   HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
   ASSERT_TRUE(last->IsAdd());
-  ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(0, last->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(1000, last->InputAt(0));
+  ExpectInt(0, last->InputAt(1));
+
+  // Loop logic.
+  int64_t tc = 0;
+  EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+  EXPECT_EQ(1000, tc);
+  HInstruction* offset = nullptr;
+  EXPECT_TRUE(range_.IsUnitStride(phi, &offset));
+  EXPECT_TRUE(offset == nullptr);
+  HInstruction* tce = range_.GenerateTripCount(
+      loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+  ASSERT_TRUE(tce != nullptr);
+  ExpectInt(1000, tce);
 }
 
 TEST_F(InductionVarRangeTest, ConstantTripCountDown) {
@@ -799,15 +814,27 @@
   // Last value (unsimplified).
   HInstruction* last = range_.GenerateLastValue(phi, graph_, loop_preheader_);
   ASSERT_TRUE(last->IsSub());
-  ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, last->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(1000, last->InputAt(0));
   ASSERT_TRUE(last->InputAt(1)->IsNeg());
   last = last->InputAt(1)->InputAt(0);
   ASSERT_TRUE(last->IsSub());
-  ASSERT_TRUE(last->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(0, last->InputAt(0)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(last->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(1000, last->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(0, last->InputAt(0));
+  ExpectInt(1000, last->InputAt(1));
+
+  // Loop logic.
+  int64_t tc = 0;
+  EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+  EXPECT_EQ(1000, tc);
+  HInstruction* offset = nullptr;
+  EXPECT_FALSE(range_.IsUnitStride(phi, &offset));
+  HInstruction* tce = range_.GenerateTripCount(
+      loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+  ASSERT_TRUE(tce != nullptr);
+  ASSERT_TRUE(tce->IsNeg());
+  last = tce->InputAt(0);
+  EXPECT_TRUE(last->IsSub());
+  ExpectInt(0, last->InputAt(0));
+  ExpectInt(1000, last->InputAt(1));
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountUp) {
@@ -851,27 +878,22 @@
   // Verify lower is 0+0.
   ASSERT_TRUE(lower != nullptr);
   ASSERT_TRUE(lower->IsAdd());
-  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(0, lower->InputAt(0)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(0, lower->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(0, lower->InputAt(0));
+  ExpectInt(0, lower->InputAt(1));
 
   // Verify upper is (V-1)+0.
   ASSERT_TRUE(upper != nullptr);
   ASSERT_TRUE(upper->IsAdd());
   ASSERT_TRUE(upper->InputAt(0)->IsSub());
   EXPECT_TRUE(upper->InputAt(0)->InputAt(0)->IsParameterValue());
-  ASSERT_TRUE(upper->InputAt(0)->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(1, upper->InputAt(0)->InputAt(1)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(1, upper->InputAt(0)->InputAt(1));
+  ExpectInt(0, upper->InputAt(1));
 
   // Verify taken-test is 0<V.
   HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
   ASSERT_TRUE(taken != nullptr);
   ASSERT_TRUE(taken->IsLessThan());
-  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(0, taken->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(0, taken->InputAt(0));
   EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
 
   // Replacement.
@@ -880,6 +902,21 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(1), v1);
   ExpectEqual(Value(y_, 1, 0), v2);
+
+  // Loop logic.
+  int64_t tc = 0;
+  EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+  EXPECT_EQ(0, tc);  // unknown
+  HInstruction* offset = nullptr;
+  EXPECT_TRUE(range_.IsUnitStride(phi, &offset));
+  EXPECT_TRUE(offset == nullptr);
+  HInstruction* tce = range_.GenerateTripCount(
+      loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+  ASSERT_TRUE(tce != nullptr);
+  EXPECT_TRUE(tce->IsSelect());  // guarded by taken-test
+  ExpectInt(0, tce->InputAt(0));
+  EXPECT_TRUE(tce->InputAt(1)->IsParameterValue());
+  EXPECT_TRUE(tce->InputAt(2)->IsLessThan());
 }
 
 TEST_F(InductionVarRangeTest, SymbolicTripCountDown) {
@@ -923,32 +960,26 @@
   // Verify lower is 1000-((1000-V)-1).
   ASSERT_TRUE(lower != nullptr);
   ASSERT_TRUE(lower->IsSub());
-  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(1000, lower->InputAt(0));
   lower = lower->InputAt(1);
   ASSERT_TRUE(lower->IsSub());
-  ASSERT_TRUE(lower->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(1, lower->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(1, lower->InputAt(1));
   lower = lower->InputAt(0);
   ASSERT_TRUE(lower->IsSub());
-  ASSERT_TRUE(lower->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, lower->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(1000, lower->InputAt(0));
   EXPECT_TRUE(lower->InputAt(1)->IsParameterValue());
 
   // Verify upper is 1000-0.
   ASSERT_TRUE(upper != nullptr);
   ASSERT_TRUE(upper->IsSub());
-  ASSERT_TRUE(upper->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, upper->InputAt(0)->AsIntConstant()->GetValue());
-  ASSERT_TRUE(upper->InputAt(1)->IsIntConstant());
-  EXPECT_EQ(0, upper->InputAt(1)->AsIntConstant()->GetValue());
+  ExpectInt(1000, upper->InputAt(0));
+  ExpectInt(0, upper->InputAt(1));
 
   // Verify taken-test is 1000>V.
   HInstruction* taken = range_.GenerateTakenTest(increment_, graph_, loop_preheader_);
   ASSERT_TRUE(taken != nullptr);
   ASSERT_TRUE(taken->IsGreaterThan());
-  ASSERT_TRUE(taken->InputAt(0)->IsIntConstant());
-  EXPECT_EQ(1000, taken->InputAt(0)->AsIntConstant()->GetValue());
+  ExpectInt(1000, taken->InputAt(0));
   EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
 
   // Replacement.
@@ -957,6 +988,23 @@
   EXPECT_FALSE(needs_finite_test);
   ExpectEqual(Value(y_, 1, 0), v1);
   ExpectEqual(Value(999), v2);
+
+  // Loop logic.
+  int64_t tc = 0;
+  EXPECT_TRUE(range_.IsFinite(loop_header_->GetLoopInformation(), &tc));
+  EXPECT_EQ(0, tc);  // unknown
+  HInstruction* offset = nullptr;
+  EXPECT_FALSE(range_.IsUnitStride(phi, &offset));
+  HInstruction* tce = range_.GenerateTripCount(
+      loop_header_->GetLoopInformation(), graph_, loop_preheader_);
+  ASSERT_TRUE(tce != nullptr);
+  EXPECT_TRUE(tce->IsSelect());  // guarded by taken-test
+  ExpectInt(0, tce->InputAt(0));
+  EXPECT_TRUE(tce->InputAt(1)->IsSub());
+  EXPECT_TRUE(tce->InputAt(2)->IsGreaterThan());
+  tce = tce->InputAt(1);
+  ExpectInt(1000, taken->InputAt(0));
+  EXPECT_TRUE(taken->InputAt(1)->IsParameterValue());
 }
 
 }  // namespace art
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 3e34090..298ae5c 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -46,29 +46,100 @@
 
 namespace art {
 
-static constexpr size_t kMaximumNumberOfHInstructions = 32;
+// Instruction limit to control memory.
+static constexpr size_t kMaximumNumberOfTotalInstructions = 1024;
+
+// Maximum number of instructions for considering a method small,
+// which we will always try to inline if the other non-instruction limits
+// are not reached.
+static constexpr size_t kMaximumNumberOfInstructionsForSmallMethod = 3;
 
 // Limit the number of dex registers that we accumulate while inlining
 // to avoid creating large amount of nested environments.
 static constexpr size_t kMaximumNumberOfCumulatedDexRegisters = 64;
 
-// Avoid inlining within a huge method due to memory pressure.
-static constexpr size_t kMaximumCodeUnitSize = 4096;
+// Limit recursive call inlining, which do not benefit from too
+// much inlining compared to code locality.
+static constexpr size_t kMaximumNumberOfRecursiveCalls = 4;
+
+// Controls the use of inline caches in AOT mode.
+static constexpr bool kUseAOTInlineCaches = true;
+
+// We check for line numbers to make sure the DepthString implementation
+// aligns the output nicely.
+#define LOG_INTERNAL(msg) \
+  static_assert(__LINE__ > 10, "Unhandled line number"); \
+  static_assert(__LINE__ < 10000, "Unhandled line number"); \
+  VLOG(compiler) << DepthString(__LINE__) << msg
+
+#define LOG_TRY() LOG_INTERNAL("Try inlinining call: ")
+#define LOG_NOTE() LOG_INTERNAL("Note: ")
+#define LOG_SUCCESS() LOG_INTERNAL("Success: ")
+#define LOG_FAIL(stat) MaybeRecordStat(stat); LOG_INTERNAL("Fail: ")
+#define LOG_FAIL_NO_STAT() LOG_INTERNAL("Fail: ")
+
+std::string HInliner::DepthString(int line) const {
+  std::string value;
+  // Indent according to the inlining depth.
+  size_t count = depth_;
+  // Line numbers get printed in the log, so add a space if the log's line is less
+  // than 1000, and two if less than 100. 10 cannot be reached as it's the copyright.
+  if (!kIsTargetBuild) {
+    if (line < 100) {
+      value += " ";
+    }
+    if (line < 1000) {
+      value += " ";
+    }
+    // Safeguard if this file reaches more than 10000 lines.
+    DCHECK_LT(line, 10000);
+  }
+  for (size_t i = 0; i < count; ++i) {
+    value += "  ";
+  }
+  return value;
+}
+
+static size_t CountNumberOfInstructions(HGraph* graph) {
+  size_t number_of_instructions = 0;
+  for (HBasicBlock* block : graph->GetReversePostOrderSkipEntryBlock()) {
+    for (HInstructionIterator instr_it(block->GetInstructions());
+         !instr_it.Done();
+         instr_it.Advance()) {
+      ++number_of_instructions;
+    }
+  }
+  return number_of_instructions;
+}
+
+void HInliner::UpdateInliningBudget() {
+  if (total_number_of_instructions_ >= kMaximumNumberOfTotalInstructions) {
+    // Always try to inline small methods.
+    inlining_budget_ = kMaximumNumberOfInstructionsForSmallMethod;
+  } else {
+    inlining_budget_ = std::max(
+        kMaximumNumberOfInstructionsForSmallMethod,
+        kMaximumNumberOfTotalInstructions - total_number_of_instructions_);
+  }
+}
 
 void HInliner::Run() {
-  const CompilerOptions& compiler_options = compiler_driver_->GetCompilerOptions();
-  if ((compiler_options.GetInlineDepthLimit() == 0)
-      || (compiler_options.GetInlineMaxCodeUnits() == 0)) {
-    return;
-  }
-  if (caller_compilation_unit_.GetCodeItem()->insns_size_in_code_units_ > kMaximumCodeUnitSize) {
-    return;
-  }
   if (graph_->IsDebuggable()) {
     // For simplicity, we currently never inline when the graph is debuggable. This avoids
     // doing some logic in the runtime to discover if a method could have been inlined.
     return;
   }
+
+  // Initialize the number of instructions for the method being compiled. Recursive calls
+  // to HInliner::Run have already updated the instruction count.
+  if (outermost_graph_ == graph_) {
+    total_number_of_instructions_ = CountNumberOfInstructions(graph_);
+  }
+
+  UpdateInliningBudget();
+  DCHECK_NE(total_number_of_instructions_, 0u);
+  DCHECK_NE(inlining_budget_, 0u);
+
   // Keep a copy of all blocks when starting the visit.
   ArenaVector<HBasicBlock*> blocks = graph_->GetReversePostOrder();
   DCHECK(!blocks.empty());
@@ -249,20 +320,25 @@
   ProfilingInfo* const profiling_info_;
 };
 
-static bool IsMonomorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  DCHECK_GE(InlineCache::kIndividualCacheSize, 2);
-  return classes->Get(0) != nullptr && classes->Get(1) == nullptr;
-}
-
-static bool IsMegamorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
-    if (classes->Get(i) == nullptr) {
-      return false;
+HInliner::InlineCacheType HInliner::GetInlineCacheType(
+    const Handle<mirror::ObjectArray<mirror::Class>>& classes)
+  REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint8_t number_of_types = 0;
+  for (; number_of_types < InlineCache::kIndividualCacheSize; ++number_of_types) {
+    if (classes->Get(number_of_types) == nullptr) {
+      break;
     }
   }
-  return true;
+
+  if (number_of_types == 0) {
+    return kInlineCacheUninitialized;
+  } else if (number_of_types == 1) {
+    return kInlineCacheMonomorphic;
+  } else if (number_of_types == InlineCache::kIndividualCacheSize) {
+    return kInlineCacheMegamorphic;
+  } else {
+    return kInlineCachePolymorphic;
+  }
 }
 
 static mirror::Class* GetMonomorphicType(Handle<mirror::ObjectArray<mirror::Class>> classes)
@@ -271,18 +347,6 @@
   return classes->Get(0);
 }
 
-static bool IsUninitialized(Handle<mirror::ObjectArray<mirror::Class>> classes)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  return classes->Get(0) == nullptr;
-}
-
-static bool IsPolymorphic(Handle<mirror::ObjectArray<mirror::Class>> classes)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  DCHECK_GE(InlineCache::kIndividualCacheSize, 3);
-  return classes->Get(1) != nullptr &&
-      classes->Get(InlineCache::kIndividualCacheSize - 1) == nullptr;
-}
-
 ArtMethod* HInliner::TryCHADevirtualization(ArtMethod* resolved_method) {
   if (!resolved_method->HasSingleImplementation()) {
     return nullptr;
@@ -296,7 +360,24 @@
     return nullptr;
   }
   PointerSize pointer_size = caller_compilation_unit_.GetClassLinker()->GetImagePointerSize();
-  return resolved_method->GetSingleImplementation(pointer_size);
+  ArtMethod* single_impl = resolved_method->GetSingleImplementation(pointer_size);
+  if (single_impl == nullptr) {
+    return nullptr;
+  }
+  if (single_impl->IsProxyMethod()) {
+    // Proxy method is a generic invoker that's not worth
+    // devirtualizing/inlining. It also causes issues when the proxy
+    // method is in another dex file if we try to rewrite invoke-interface to
+    // invoke-virtual because a proxy method doesn't have a real dex file.
+    return nullptr;
+  }
+  if (!single_impl->GetDeclaringClass()->IsResolved()) {
+    // There's a race with the class loading, which updates the CHA info
+    // before setting the class to resolved. So we just bail for this
+    // rare occurence.
+    return nullptr;
+  }
+  return single_impl;
 }
 
 bool HInliner::TryInline(HInvoke* invoke_instruction) {
@@ -309,17 +390,18 @@
   ScopedObjectAccess soa(Thread::Current());
   uint32_t method_index = invoke_instruction->GetDexMethodIndex();
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-  VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index);
+  LOG_TRY() << caller_dex_file.PrettyMethod(method_index);
 
-  // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod();
-  ArtMethod* actual_method = nullptr;
   if (resolved_method == nullptr) {
     DCHECK(invoke_instruction->IsInvokeStaticOrDirect());
     DCHECK(invoke_instruction->AsInvokeStaticOrDirect()->IsStringInit());
-    VLOG(compiler) << "Not inlining a String.<init> method";
+    LOG_FAIL_NO_STAT() << "Not inlining a String.<init> method";
     return false;
-  } else if (invoke_instruction->IsInvokeStaticOrDirect()) {
+  }
+  ArtMethod* actual_method = nullptr;
+
+  if (invoke_instruction->IsInvokeStaticOrDirect()) {
     actual_method = resolved_method;
   } else {
     // Check if we can statically find the method.
@@ -332,6 +414,7 @@
     if (method != nullptr) {
       cha_devirtualize = true;
       actual_method = method;
+      LOG_NOTE() << "Try CHA-based inlining of " << actual_method->PrettyMethod();
     }
   }
 
@@ -353,67 +436,226 @@
     }
     return result;
   }
-
   DCHECK(!invoke_instruction->IsInvokeStaticOrDirect());
 
-  // Check if we can use an inline cache.
-  ArtMethod* caller = graph_->GetArtMethod();
-  if (Runtime::Current()->UseJitCompilation()) {
-    // Under JIT, we should always know the caller.
-    DCHECK(caller != nullptr);
-    ScopedProfilingInfoInlineUse spiis(caller, soa.Self());
-    ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
-    if (profiling_info != nullptr) {
-      StackHandleScope<1> hs(soa.Self());
-      ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
-      Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs.NewHandle(
-          mirror::ObjectArray<mirror::Class>::Alloc(
-              soa.Self(),
-              class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
-              InlineCache::kIndividualCacheSize));
-      if (inline_cache == nullptr) {
-        // We got an OOME. Just clear the exception, and don't inline.
-        DCHECK(soa.Self()->IsExceptionPending());
-        soa.Self()->ClearException();
-        VLOG(compiler) << "Out of memory in the compiler when trying to inline";
-        return false;
+  // Try using inline caches.
+  return TryInlineFromInlineCache(caller_dex_file, invoke_instruction, resolved_method);
+}
+
+static Handle<mirror::ObjectArray<mirror::Class>> AllocateInlineCacheHolder(
+    const DexCompilationUnit& compilation_unit,
+    StackHandleScope<1>* hs)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  Thread* self = Thread::Current();
+  ClassLinker* class_linker = compilation_unit.GetClassLinker();
+  Handle<mirror::ObjectArray<mirror::Class>> inline_cache = hs->NewHandle(
+      mirror::ObjectArray<mirror::Class>::Alloc(
+          self,
+          class_linker->GetClassRoot(ClassLinker::kClassArrayClass),
+          InlineCache::kIndividualCacheSize));
+  if (inline_cache == nullptr) {
+    // We got an OOME. Just clear the exception, and don't inline.
+    DCHECK(self->IsExceptionPending());
+    self->ClearException();
+    VLOG(compiler) << "Out of memory in the compiler when trying to inline";
+  }
+  return inline_cache;
+}
+
+bool HInliner::TryInlineFromInlineCache(const DexFile& caller_dex_file,
+                                        HInvoke* invoke_instruction,
+                                        ArtMethod* resolved_method)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (Runtime::Current()->IsAotCompiler() && !kUseAOTInlineCaches) {
+    return false;
+  }
+
+  StackHandleScope<1> hs(Thread::Current());
+  Handle<mirror::ObjectArray<mirror::Class>> inline_cache;
+  InlineCacheType inline_cache_type = Runtime::Current()->IsAotCompiler()
+      ? GetInlineCacheAOT(caller_dex_file, invoke_instruction, &hs, &inline_cache)
+      : GetInlineCacheJIT(invoke_instruction, &hs, &inline_cache);
+
+  switch (inline_cache_type) {
+    case kInlineCacheNoData: {
+      LOG_FAIL_NO_STAT()
+          << "Interface or virtual call to "
+          << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+          << " could not be statically determined";
+      return false;
+    }
+
+    case kInlineCacheUninitialized: {
+      LOG_FAIL_NO_STAT()
+          << "Interface or virtual call to "
+          << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+          << " is not hit and not inlined";
+      return false;
+    }
+
+    case kInlineCacheMonomorphic: {
+      MaybeRecordStat(kMonomorphicCall);
+      if (outermost_graph_->IsCompilingOsr()) {
+        // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
+        // interpreter and it may have seen different receiver types.
+        return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
       } else {
-        Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(
-            *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()),
-            inline_cache);
-        if (IsUninitialized(inline_cache)) {
-          VLOG(compiler) << "Interface or virtual call to "
-                         << caller_dex_file.PrettyMethod(method_index)
-                         << " is not hit and not inlined";
-          return false;
-        } else if (IsMonomorphic(inline_cache)) {
-          MaybeRecordStat(kMonomorphicCall);
-          if (outermost_graph_->IsCompilingOsr()) {
-            // If we are compiling OSR, we pretend this call is polymorphic, as we may come from the
-            // interpreter and it may have seen different receiver types.
-            return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
-          } else {
-            return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
-          }
-        } else if (IsPolymorphic(inline_cache)) {
-          MaybeRecordStat(kPolymorphicCall);
-          return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
-        } else {
-          DCHECK(IsMegamorphic(inline_cache));
-          VLOG(compiler) << "Interface or virtual call to "
-                         << caller_dex_file.PrettyMethod(method_index)
-                         << " is megamorphic and not inlined";
-          MaybeRecordStat(kMegamorphicCall);
-          return false;
-        }
+        return TryInlineMonomorphicCall(invoke_instruction, resolved_method, inline_cache);
       }
     }
+
+    case kInlineCachePolymorphic: {
+      MaybeRecordStat(kPolymorphicCall);
+      return TryInlinePolymorphicCall(invoke_instruction, resolved_method, inline_cache);
+    }
+
+    case kInlineCacheMegamorphic: {
+      LOG_FAIL_NO_STAT()
+          << "Interface or virtual call to "
+          << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+          << " is megamorphic and not inlined";
+      MaybeRecordStat(kMegamorphicCall);
+      return false;
+    }
+
+    case kInlineCacheMissingTypes: {
+      LOG_FAIL_NO_STAT()
+          << "Interface or virtual call to "
+          << caller_dex_file.PrettyMethod(invoke_instruction->GetDexMethodIndex())
+          << " is missing types and not inlined";
+      return false;
+    }
+  }
+  UNREACHABLE();
+}
+
+HInliner::InlineCacheType HInliner::GetInlineCacheJIT(
+    HInvoke* invoke_instruction,
+    StackHandleScope<1>* hs,
+    /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(Runtime::Current()->UseJitCompilation());
+
+  ArtMethod* caller = graph_->GetArtMethod();
+  // Under JIT, we should always know the caller.
+  DCHECK(caller != nullptr);
+  ScopedProfilingInfoInlineUse spiis(caller, Thread::Current());
+  ProfilingInfo* profiling_info = spiis.GetProfilingInfo();
+
+  if (profiling_info == nullptr) {
+    return kInlineCacheNoData;
+  }
+
+  *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs);
+  if (inline_cache->Get() == nullptr) {
+    // We can't extract any data if we failed to allocate;
+    return kInlineCacheNoData;
+  } else {
+    Runtime::Current()->GetJit()->GetCodeCache()->CopyInlineCacheInto(
+        *profiling_info->GetInlineCache(invoke_instruction->GetDexPc()),
+        *inline_cache);
+    return GetInlineCacheType(*inline_cache);
+  }
+}
+
+HInliner::InlineCacheType HInliner::GetInlineCacheAOT(
+    const DexFile& caller_dex_file,
+    HInvoke* invoke_instruction,
+    StackHandleScope<1>* hs,
+    /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(Runtime::Current()->IsAotCompiler());
+  const ProfileCompilationInfo* pci = compiler_driver_->GetProfileCompilationInfo();
+  if (pci == nullptr) {
+    return kInlineCacheNoData;
+  }
+
+  ProfileCompilationInfo::OfflineProfileMethodInfo offline_profile;
+  bool found = pci->GetMethod(caller_dex_file.GetLocation(),
+                              caller_dex_file.GetLocationChecksum(),
+                              caller_compilation_unit_.GetDexMethodIndex(),
+                              &offline_profile);
+  if (!found) {
+    return kInlineCacheNoData;  // no profile information for this invocation.
+  }
+
+  *inline_cache = AllocateInlineCacheHolder(caller_compilation_unit_, hs);
+  if (inline_cache == nullptr) {
+    // We can't extract any data if we failed to allocate;
+    return kInlineCacheNoData;
+  } else {
+    return ExtractClassesFromOfflineProfile(invoke_instruction,
+                                            offline_profile,
+                                            *inline_cache);
+  }
+}
+
+HInliner::InlineCacheType HInliner::ExtractClassesFromOfflineProfile(
+    const HInvoke* invoke_instruction,
+    const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
+    /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  const auto it = offline_profile.inline_caches.find(invoke_instruction->GetDexPc());
+  if (it == offline_profile.inline_caches.end()) {
+    return kInlineCacheUninitialized;
+  }
+
+  const ProfileCompilationInfo::DexPcData& dex_pc_data = it->second;
+
+  if (dex_pc_data.is_missing_types) {
+    return kInlineCacheMissingTypes;
+  }
+  if (dex_pc_data.is_megamorphic) {
+    return kInlineCacheMegamorphic;
+  }
+
+  DCHECK_LE(dex_pc_data.classes.size(), InlineCache::kIndividualCacheSize);
+  Thread* self = Thread::Current();
+  // We need to resolve the class relative to the containing dex file.
+  // So first, build a mapping from the index of dex file in the profile to
+  // its dex cache. This will avoid repeating the lookup when walking over
+  // the inline cache types.
+  std::vector<ObjPtr<mirror::DexCache>> dex_profile_index_to_dex_cache(
+        offline_profile.dex_references.size());
+  for (size_t i = 0; i < offline_profile.dex_references.size(); i++) {
+    bool found = false;
+    for (const DexFile* dex_file : compiler_driver_->GetDexFilesForOatFile()) {
+      if (offline_profile.dex_references[i].MatchesDex(dex_file)) {
+        dex_profile_index_to_dex_cache[i] =
+            caller_compilation_unit_.GetClassLinker()->FindDexCache(self, *dex_file);
+        found = true;
+      }
+    }
+    if (!found) {
+      VLOG(compiler) << "Could not find profiled dex file: "
+          << offline_profile.dex_references[i].dex_location;
+      return kInlineCacheMissingTypes;
+    }
   }
 
-  VLOG(compiler) << "Interface or virtual call to "
-                 << caller_dex_file.PrettyMethod(method_index)
-                 << " could not be statically determined";
-  return false;
+  // Walk over the classes and resolve them. If we cannot find a type we return
+  // kInlineCacheMissingTypes.
+  int ic_index = 0;
+  for (const ProfileCompilationInfo::ClassReference& class_ref : dex_pc_data.classes) {
+    ObjPtr<mirror::DexCache> dex_cache =
+        dex_profile_index_to_dex_cache[class_ref.dex_profile_index];
+    DCHECK(dex_cache != nullptr);
+    ObjPtr<mirror::Class> clazz = ClassLinker::LookupResolvedType(
+          class_ref.type_index,
+          dex_cache,
+          caller_compilation_unit_.GetClassLoader().Get());
+    if (clazz != nullptr) {
+      inline_cache->Set(ic_index++, clazz);
+    } else {
+      VLOG(compiler) << "Could not resolve class from inline cache in AOT mode "
+          << caller_compilation_unit_.GetDexFile()->PrettyMethod(
+              invoke_instruction->GetDexMethodIndex()) << " : "
+          << caller_compilation_unit_
+              .GetDexFile()->StringByTypeIdx(class_ref.type_index);
+      return kInlineCacheMissingTypes;
+    }
+  }
+  return GetInlineCacheType(inline_cache);
 }
 
 HInstanceFieldGet* HInliner::BuildGetReceiverClass(ClassLinker* class_linker,
@@ -436,6 +678,32 @@
   return result;
 }
 
+static ArtMethod* ResolveMethodFromInlineCache(Handle<mirror::Class> klass,
+                                               ArtMethod* resolved_method,
+                                               HInstruction* invoke_instruction,
+                                               PointerSize pointer_size)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (Runtime::Current()->IsAotCompiler()) {
+    // We can get unrelated types when working with profiles (corruption,
+    // systme updates, or anyone can write to it). So first check if the class
+    // actually implements the declaring class of the method that is being
+    // called in bytecode.
+    // Note: the lookup methods used below require to have assignable types.
+    if (!resolved_method->GetDeclaringClass()->IsAssignableFrom(klass.Get())) {
+      return nullptr;
+    }
+  }
+
+  if (invoke_instruction->IsInvokeInterface()) {
+    resolved_method = klass->FindVirtualMethodForInterface(resolved_method, pointer_size);
+  } else {
+    DCHECK(invoke_instruction->IsInvokeVirtual());
+    resolved_method = klass->FindVirtualMethodForVirtual(resolved_method, pointer_size);
+  }
+  DCHECK(resolved_method != nullptr);
+  return resolved_method;
+}
+
 bool HInliner::TryInlineMonomorphicCall(HInvoke* invoke_instruction,
                                         ArtMethod* resolved_method,
                                         Handle<mirror::ObjectArray<mirror::Class>> classes) {
@@ -445,27 +713,29 @@
   dex::TypeIndex class_index = FindClassIndexIn(
       GetMonomorphicType(classes), caller_compilation_unit_);
   if (!class_index.IsValid()) {
-    VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
-                   << " from inline cache is not inlined because its class is not"
-                   << " accessible to the caller";
+    LOG_FAIL(kNotInlinedDexCache)
+        << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+        << " from inline cache is not inlined because its class is not"
+        << " accessible to the caller";
     return false;
   }
 
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   PointerSize pointer_size = class_linker->GetImagePointerSize();
-  if (invoke_instruction->IsInvokeInterface()) {
-    resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForInterface(
-        resolved_method, pointer_size);
-  } else {
-    DCHECK(invoke_instruction->IsInvokeVirtual());
-    resolved_method = GetMonomorphicType(classes)->FindVirtualMethodForVirtual(
-        resolved_method, pointer_size);
+  Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes));
+  resolved_method = ResolveMethodFromInlineCache(
+      monomorphic_type, resolved_method, invoke_instruction, pointer_size);
+
+  LOG_NOTE() << "Try inline monomorphic call to " << resolved_method->PrettyMethod();
+  if (resolved_method == nullptr) {
+    // Bogus AOT profile, bail.
+    DCHECK(Runtime::Current()->IsAotCompiler());
+    return false;
   }
-  DCHECK(resolved_method != nullptr);
+
   HInstruction* receiver = invoke_instruction->InputAt(0);
   HInstruction* cursor = invoke_instruction->GetPrevious();
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
-  Handle<mirror::Class> monomorphic_type = handles_->NewHandle(GetMonomorphicType(classes));
   if (!TryInlineAndReplace(invoke_instruction,
                            resolved_method,
                            ReferenceTypeInfo::Create(monomorphic_type, /* is_exact */ true),
@@ -504,7 +774,8 @@
       HShouldDeoptimizeFlag(graph_->GetArena(), dex_pc);
   HInstruction* compare = new (graph_->GetArena()) HNotEqual(
       deopt_flag, graph_->GetIntConstant(0, dex_pc));
-  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(compare, dex_pc);
+  HInstruction* deopt = new (graph_->GetArena()) HDeoptimize(
+      graph_->GetArena(), compare, HDeoptimize::Kind::kInline, dex_pc);
 
   if (cursor != nullptr) {
     bb_cursor->InsertInstructionAfter(deopt_flag, cursor);
@@ -549,21 +820,35 @@
                                                                is_referrer,
                                                                invoke_instruction->GetDexPc(),
                                                                /* needs_access_check */ false);
-  HLoadClass::LoadKind kind = HSharpening::SharpenClass(
+  HLoadClass::LoadKind kind = HSharpening::ComputeLoadClassKind(
       load_class, codegen_, compiler_driver_, caller_compilation_unit_);
   DCHECK(kind != HLoadClass::LoadKind::kInvalid)
       << "We should always be able to reference a class for inline caches";
   // Insert before setting the kind, as setting the kind affects the inputs.
   bb_cursor->InsertInstructionAfter(load_class, receiver_class);
   load_class->SetLoadKind(kind);
+  // In AOT mode, we will most likely load the class from BSS, which will involve a call
+  // to the runtime. In this case, the load instruction will need an environment so copy
+  // it from the invoke instruction.
+  if (load_class->NeedsEnvironment()) {
+    DCHECK(Runtime::Current()->IsAotCompiler());
+    load_class->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+  }
 
   HNotEqual* compare = new (graph_->GetArena()) HNotEqual(load_class, receiver_class);
   bb_cursor->InsertInstructionAfter(compare, load_class);
   if (with_deoptimization) {
     HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-        compare, invoke_instruction->GetDexPc());
+        graph_->GetArena(),
+        compare,
+        receiver,
+        HDeoptimize::Kind::kInline,
+        invoke_instruction->GetDexPc());
     bb_cursor->InsertInstructionAfter(deoptimize, compare);
     deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
+    DCHECK_EQ(invoke_instruction->InputAt(0), receiver);
+    receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
+    deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
   }
   return compare;
 }
@@ -590,11 +875,14 @@
     ArtMethod* method = nullptr;
 
     Handle<mirror::Class> handle = handles_->NewHandle(classes->Get(i));
-    if (invoke_instruction->IsInvokeInterface()) {
-      method = handle->FindVirtualMethodForInterface(resolved_method, pointer_size);
-    } else {
-      DCHECK(invoke_instruction->IsInvokeVirtual());
-      method = handle->FindVirtualMethodForVirtual(resolved_method, pointer_size);
+    method = ResolveMethodFromInlineCache(
+        handle, resolved_method, invoke_instruction, pointer_size);
+    if (method == nullptr) {
+      DCHECK(Runtime::Current()->IsAotCompiler());
+      // AOT profile is bogus. This loop expects to iterate over all entries,
+      // so just just continue.
+      all_targets_inlined = false;
+      continue;
     }
 
     HInstruction* receiver = invoke_instruction->InputAt(0);
@@ -603,6 +891,7 @@
 
     dex::TypeIndex class_index = FindClassIndexIn(handle.Get(), caller_compilation_unit_);
     HInstruction* return_replacement = nullptr;
+    LOG_NOTE() << "Try inline polymorphic call to " << method->PrettyMethod();
     if (!class_index.IsValid() ||
         !TryBuildAndInline(invoke_instruction,
                            method,
@@ -612,8 +901,8 @@
     } else {
       one_target_inlined = true;
 
-      VLOG(compiler) << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
-                     << " has inlined " << ArtMethod::PrettyMethod(method);
+      LOG_SUCCESS() << "Polymorphic call to " << ArtMethod::PrettyMethod(resolved_method)
+                    << " has inlined " << ArtMethod::PrettyMethod(method);
 
       // If we have inlined all targets before, and this receiver is the last seen,
       // we deoptimize instead of keeping the original invoke instruction.
@@ -638,7 +927,7 @@
         }
         invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
         // Because the inline cache data can be populated concurrently, we force the end of the
-        // iteration. Otherhwise, we could see a new receiver type.
+        // iteration. Otherwise, we could see a new receiver type.
         break;
       } else {
         CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
@@ -647,9 +936,10 @@
   }
 
   if (!one_target_inlined) {
-    VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
-                   << " from inline cache is not inlined because none"
-                   << " of its targets could be inlined";
+    LOG_FAIL_NO_STAT()
+        << "Call to " << ArtMethod::PrettyMethod(resolved_method)
+        << " from inline cache is not inlined because none"
+        << " of its targets could be inlined";
     return false;
   }
 
@@ -746,11 +1036,10 @@
     ArtMethod* resolved_method,
     Handle<mirror::ObjectArray<mirror::Class>> classes) {
   // This optimization only works under JIT for now.
-  DCHECK(Runtime::Current()->UseJitCompilation());
-  if (graph_->GetInstructionSet() == kMips64) {
-    // TODO: Support HClassTableGet for mips64.
+  if (!Runtime::Current()->UseJitCompilation()) {
     return false;
   }
+
   ClassLinker* class_linker = caller_compilation_unit_.GetClassLinker();
   PointerSize pointer_size = class_linker->GetImagePointerSize();
 
@@ -784,9 +1073,6 @@
       actual_method = new_method;
     } else if (actual_method != new_method) {
       // Different methods, bailout.
-      VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
-                     << " from inline cache is not inlined because it resolves"
-                     << " to different methods";
       return false;
     }
   }
@@ -840,13 +1126,19 @@
     CreateDiamondPatternForPolymorphicInline(compare, return_replacement, invoke_instruction);
   } else {
     HDeoptimize* deoptimize = new (graph_->GetArena()) HDeoptimize(
-        compare, invoke_instruction->GetDexPc());
+        graph_->GetArena(),
+        compare,
+        receiver,
+        HDeoptimize::Kind::kInline,
+        invoke_instruction->GetDexPc());
     bb_cursor->InsertInstructionAfter(deoptimize, compare);
     deoptimize->CopyEnvironmentFrom(invoke_instruction->GetEnvironment());
     if (return_replacement != nullptr) {
       invoke_instruction->ReplaceWith(return_replacement);
     }
+    receiver->ReplaceUsesDominatedBy(deoptimize, deoptimize);
     invoke_instruction->GetBlock()->RemoveInstruction(invoke_instruction);
+    deoptimize->SetReferenceTypeInfo(receiver->GetReferenceTypeInfo());
   }
 
   // Run type propagation to get the guard typed.
@@ -859,6 +1151,7 @@
 
   MaybeRecordStat(kInlinedPolymorphicCall);
 
+  LOG_SUCCESS() << "Inlined same polymorphic target " << actual_method->PrettyMethod();
   return true;
 }
 
@@ -873,11 +1166,23 @@
   HBasicBlock* bb_cursor = invoke_instruction->GetBlock();
   if (!TryBuildAndInline(invoke_instruction, method, receiver_type, &return_replacement)) {
     if (invoke_instruction->IsInvokeInterface()) {
+      DCHECK(!method->IsProxyMethod());
       // Turn an invoke-interface into an invoke-virtual. An invoke-virtual is always
       // better than an invoke-interface because:
       // 1) In the best case, the interface call has one more indirection (to fetch the IMT).
       // 2) We will not go to the conflict trampoline with an invoke-virtual.
       // TODO: Consider sharpening once it is not dependent on the compiler driver.
+
+      if (method->IsDefault() && !method->IsCopied()) {
+        // Changing to invoke-virtual cannot be done on an original default method
+        // since it's not in any vtable. Devirtualization by exact type/inline-cache
+        // always uses a method in the iftable which is never an original default
+        // method.
+        // On the other hand, inlining an original default method by CHA is fine.
+        DCHECK(cha_devirtualize);
+        return false;
+      }
+
       const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
       uint32_t dex_method_index = FindMethodIndexIn(
           method, caller_dex_file, invoke_instruction->GetDexMethodIndex());
@@ -928,13 +1233,34 @@
   return true;
 }
 
+size_t HInliner::CountRecursiveCallsOf(ArtMethod* method) const {
+  const HInliner* current = this;
+  size_t count = 0;
+  do {
+    if (current->graph_->GetArtMethod() == method) {
+      ++count;
+    }
+    current = current->parent_;
+  } while (current != nullptr);
+  return count;
+}
+
 bool HInliner::TryBuildAndInline(HInvoke* invoke_instruction,
                                  ArtMethod* method,
                                  ReferenceTypeInfo receiver_type,
                                  HInstruction** return_replacement) {
   if (method->IsProxyMethod()) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is not inlined because of unimplemented inline support for proxy methods.";
+    LOG_FAIL(kNotInlinedProxy)
+        << "Method " << method->PrettyMethod()
+        << " is not inlined because of unimplemented inline support for proxy methods.";
+    return false;
+  }
+
+  if (CountRecursiveCallsOf(method) > kMaximumNumberOfRecursiveCalls) {
+    LOG_FAIL(kNotInlinedRecursiveBudget)
+        << "Method "
+        << method->PrettyMethod()
+        << " is not inlined because it has reached its recursive call budget.";
     return false;
   }
 
@@ -943,15 +1269,16 @@
   if (!compiler_driver_->MayInline(method->GetDexFile(),
                                    outer_compilation_unit_.GetDexFile())) {
     if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
-      VLOG(compiler) << "Successfully replaced pattern of invoke "
-                     << method->PrettyMethod();
+      LOG_SUCCESS() << "Successfully replaced pattern of invoke "
+                    << method->PrettyMethod();
       MaybeRecordStat(kReplacedInvokeWithSimplePattern);
       return true;
     }
-    VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in "
-                   << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
-                   << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
-                   << method->GetDexFile()->GetLocation();
+    LOG_FAIL(kNotInlinedWont)
+        << "Won't inline " << method->PrettyMethod() << " in "
+        << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
+        << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
+        << method->GetDexFile()->GetLocation();
     return false;
   }
 
@@ -960,30 +1287,32 @@
   const DexFile::CodeItem* code_item = method->GetCodeItem();
 
   if (code_item == nullptr) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is not inlined because it is native";
+    LOG_FAIL_NO_STAT()
+        << "Method " << method->PrettyMethod() << " is not inlined because it is native";
     return false;
   }
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is too big to inline: "
-                   << code_item->insns_size_in_code_units_
-                   << " > "
-                   << inline_max_code_units;
+    LOG_FAIL(kNotInlinedCodeItem)
+        << "Method " << method->PrettyMethod()
+        << " is not inlined because its code item is too big: "
+        << code_item->insns_size_in_code_units_
+        << " > "
+        << inline_max_code_units;
     return false;
   }
 
   if (code_item->tries_size_ != 0) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is not inlined because of try block";
+    LOG_FAIL(kNotInlinedTryCatch)
+        << "Method " << method->PrettyMethod() << " is not inlined because of try block";
     return false;
   }
 
   if (!method->IsCompilable()) {
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " has soft failures un-handled by the compiler, so it cannot be inlined";
+    LOG_FAIL(kNotInlinedNotVerified)
+        << "Method " << method->PrettyMethod()
+        << " has soft failures un-handled by the compiler, so it cannot be inlined";
   }
 
   if (!method->GetDeclaringClass()->IsVerified()) {
@@ -991,8 +1320,9 @@
     if (Runtime::Current()->UseJitCompilation() ||
         !compiler_driver_->IsMethodVerifiedWithoutFailures(
             method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
-      VLOG(compiler) << "Method " << method->PrettyMethod()
-                     << " couldn't be verified, so it cannot be inlined";
+      LOG_FAIL(kNotInlinedNotVerified)
+          << "Method " << method->PrettyMethod()
+          << " couldn't be verified, so it cannot be inlined";
       return false;
     }
   }
@@ -1001,9 +1331,9 @@
       invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
     // Case of a static method that cannot be inlined because it implicitly
     // requires an initialization check of its declaring class.
-    VLOG(compiler) << "Method " << method->PrettyMethod()
-                   << " is not inlined because it is static and requires a clinit"
-                   << " check that cannot be emitted due to Dex cache limitations";
+    LOG_FAIL(kNotInlinedDexCache) << "Method " << method->PrettyMethod()
+             << " is not inlined because it is static and requires a clinit"
+             << " check that cannot be emitted due to Dex cache limitations";
     return false;
   }
 
@@ -1012,7 +1342,7 @@
     return false;
   }
 
-  VLOG(compiler) << "Successfully inlined " << method->PrettyMethod();
+  LOG_SUCCESS() << method->PrettyMethod();
   MaybeRecordStat(kInlinedInvoke);
   return true;
 }
@@ -1064,9 +1394,8 @@
         // TODO: Needs null check.
         return false;
       }
-      Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
       HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
-      HInstanceFieldGet* iget = CreateInstanceFieldGet(dex_cache, data.field_idx, obj);
+      HInstanceFieldGet* iget = CreateInstanceFieldGet(data.field_idx, resolved_method, obj);
       DCHECK_EQ(iget->GetFieldOffset().Uint32Value(), data.field_offset);
       DCHECK_EQ(iget->IsVolatile() ? 1u : 0u, data.is_volatile);
       invoke_instruction->GetBlock()->InsertInstructionBefore(iget, invoke_instruction);
@@ -1079,10 +1408,9 @@
         // TODO: Needs null check.
         return false;
       }
-      Handle<mirror::DexCache> dex_cache(handles_->NewHandle(resolved_method->GetDexCache()));
       HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, data.object_arg);
       HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, data.src_arg);
-      HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, data.field_idx, obj, value);
+      HInstanceFieldSet* iput = CreateInstanceFieldSet(data.field_idx, resolved_method, obj, value);
       DCHECK_EQ(iput->GetFieldOffset().Uint32Value(), data.field_offset);
       DCHECK_EQ(iput->IsVolatile() ? 1u : 0u, data.is_volatile);
       invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
@@ -1116,24 +1444,19 @@
                                  [](uint16_t index) { return index != DexFile::kDexNoIndex16; }));
 
       // Create HInstanceFieldSet for each IPUT that stores non-zero data.
-      Handle<mirror::DexCache> dex_cache;
       HInstruction* obj = GetInvokeInputForArgVRegIndex(invoke_instruction, /* this */ 0u);
       bool needs_constructor_barrier = false;
       for (size_t i = 0; i != number_of_iputs; ++i) {
         HInstruction* value = GetInvokeInputForArgVRegIndex(invoke_instruction, iput_args[i]);
         if (!value->IsConstant() || !value->AsConstant()->IsZeroBitPattern()) {
-          if (dex_cache.GetReference() == nullptr) {
-            dex_cache = handles_->NewHandle(resolved_method->GetDexCache());
-          }
           uint16_t field_index = iput_field_indexes[i];
-          HInstanceFieldSet* iput = CreateInstanceFieldSet(dex_cache, field_index, obj, value);
+          bool is_final;
+          HInstanceFieldSet* iput =
+              CreateInstanceFieldSet(field_index, resolved_method, obj, value, &is_final);
           invoke_instruction->GetBlock()->InsertInstructionBefore(iput, invoke_instruction);
 
           // Check whether the field is final. If it is, we need to add a barrier.
-          PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
-          ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
-          DCHECK(resolved_field != nullptr);
-          if (resolved_field->IsFinal()) {
+          if (is_final) {
             needs_constructor_barrier = true;
           }
         }
@@ -1152,12 +1475,13 @@
   return true;
 }
 
-HInstanceFieldGet* HInliner::CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
-                                                    uint32_t field_index,
+HInstanceFieldGet* HInliner::CreateInstanceFieldGet(uint32_t field_index,
+                                                    ArtMethod* referrer,
                                                     HInstruction* obj)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
-  ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* resolved_field =
+      class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
   DCHECK(resolved_field != nullptr);
   HInstanceFieldGet* iget = new (graph_->GetArena()) HInstanceFieldGet(
       obj,
@@ -1167,12 +1491,13 @@
       resolved_field->IsVolatile(),
       field_index,
       resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
-      *dex_cache->GetDexFile(),
+      *referrer->GetDexFile(),
       // Read barrier generates a runtime call in slow path and we need a valid
       // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
       /* dex_pc */ 0);
   if (iget->GetType() == Primitive::kPrimNot) {
     // Use the same dex_cache that we used for field lookup as the hint_dex_cache.
+    Handle<mirror::DexCache> dex_cache = handles_->NewHandle(referrer->GetDexCache());
     ReferenceTypePropagation rtp(graph_,
                                  outer_compilation_unit_.GetClassLoader(),
                                  dex_cache,
@@ -1183,14 +1508,21 @@
   return iget;
 }
 
-HInstanceFieldSet* HInliner::CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache,
-                                                    uint32_t field_index,
+HInstanceFieldSet* HInliner::CreateInstanceFieldSet(uint32_t field_index,
+                                                    ArtMethod* referrer,
                                                     HInstruction* obj,
-                                                    HInstruction* value)
+                                                    HInstruction* value,
+                                                    bool* is_final)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  PointerSize pointer_size = InstructionSetPointerSize(codegen_->GetInstructionSet());
-  ArtField* resolved_field = dex_cache->GetResolvedField(field_index, pointer_size);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* resolved_field =
+      class_linker->LookupResolvedField(field_index, referrer, /* is_static */ false);
   DCHECK(resolved_field != nullptr);
+  if (is_final != nullptr) {
+    // This information is needed only for constructors.
+    DCHECK(referrer->IsConstructor());
+    *is_final = resolved_field->IsFinal();
+  }
   HInstanceFieldSet* iput = new (graph_->GetArena()) HInstanceFieldSet(
       obj,
       value,
@@ -1200,7 +1532,7 @@
       resolved_field->IsVolatile(),
       field_index,
       resolved_field->GetDeclaringClass()->GetDexClassDefIndex(),
-      *dex_cache->GetDexFile(),
+      *referrer->GetDexFile(),
       // Read barrier generates a runtime call in slow path and we need a valid
       // dex pc for the associated stack map. 0 is bogus but valid. Bug: 26854537.
       /* dex_pc */ 0);
@@ -1298,15 +1630,17 @@
                         handles_);
 
   if (builder.BuildGraph() != kAnalysisSuccess) {
-    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                   << " could not be built, so cannot be inlined";
+    LOG_FAIL(kNotInlinedCannotBuild)
+        << "Method " << callee_dex_file.PrettyMethod(method_index)
+        << " could not be built, so cannot be inlined";
     return false;
   }
 
   if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
                                                   compiler_driver_->GetInstructionSet())) {
-    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                   << " cannot be inlined because of the register allocator";
+    LOG_FAIL(kNotInlinedRegisterAllocator)
+        << "Method " << callee_dex_file.PrettyMethod(method_index)
+        << " cannot be inlined because of the register allocator";
     return false;
   }
 
@@ -1353,15 +1687,13 @@
                              /* is_first_run */ false).Run();
   }
 
-  size_t number_of_instructions_budget = kMaximumNumberOfHInstructions;
-  size_t number_of_inlined_instructions =
-      RunOptimizations(callee_graph, code_item, dex_compilation_unit);
-  number_of_instructions_budget += number_of_inlined_instructions;
+  RunOptimizations(callee_graph, code_item, dex_compilation_unit);
 
   HBasicBlock* exit_block = callee_graph->GetExitBlock();
   if (exit_block == nullptr) {
-    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                   << " could not be inlined because it has an infinite loop";
+    LOG_FAIL(kNotInlinedInfiniteLoop)
+        << "Method " << callee_dex_file.PrettyMethod(method_index)
+        << " could not be inlined because it has an infinite loop";
     return false;
   }
 
@@ -1370,15 +1702,24 @@
     if (predecessor->GetLastInstruction()->IsThrow()) {
       if (invoke_instruction->GetBlock()->IsTryBlock()) {
         // TODO(ngeoffray): Support adding HTryBoundary in Hgraph::InlineInto.
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because one branch always throws and"
-                       << " caller is in a try/catch block";
+        LOG_FAIL(kNotInlinedTryCatch)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because one branch always throws and"
+            << " caller is in a try/catch block";
         return false;
       } else if (graph_->GetExitBlock() == nullptr) {
         // TODO(ngeoffray): Support adding HExit in the caller graph.
+        LOG_FAIL(kNotInlinedInfiniteLoop)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because one branch always throws and"
+            << " caller does not have an exit block";
+        return false;
+      } else if (graph_->HasIrreducibleLoops()) {
+        // TODO(ngeoffray): Support re-computing loop information to graphs with
+        // irreducible loops?
         VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                        << " could not be inlined because one branch always throws and"
-                       << " caller does not have an exit block";
+                       << " caller has irreducible loops";
         return false;
       }
     } else {
@@ -1387,32 +1728,31 @@
   }
 
   if (!has_one_return) {
-    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                   << " could not be inlined because it always throws";
+    LOG_FAIL(kNotInlinedAlwaysThrows)
+        << "Method " << callee_dex_file.PrettyMethod(method_index)
+        << " could not be inlined because it always throws";
     return false;
   }
 
   size_t number_of_instructions = 0;
-
-  bool can_inline_environment =
-      total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
-
   // Skip the entry block, it does not contain instructions that prevent inlining.
   for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
     if (block->IsLoopHeader()) {
       if (block->GetLoopInformation()->IsIrreducible()) {
         // Don't inline methods with irreducible loops, they could prevent some
         // optimizations to run.
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because it contains an irreducible loop";
+        LOG_FAIL(kNotInlinedIrreducibleLoop)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because it contains an irreducible loop";
         return false;
       }
       if (!block->GetLoopInformation()->HasExitEdge()) {
         // Don't inline methods with loops without exit, since they cause the
         // loop information to be computed incorrectly when updating after
         // inlining.
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because it contains a loop with no exit";
+        LOG_FAIL(kNotInlinedLoopWithoutExit)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because it contains a loop with no exit";
         return false;
       }
     }
@@ -1420,34 +1760,39 @@
     for (HInstructionIterator instr_it(block->GetInstructions());
          !instr_it.Done();
          instr_it.Advance()) {
-      if (number_of_instructions++ == number_of_instructions_budget) {
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " is not inlined because its caller has reached"
-                       << " its instruction budget limit.";
+      if (++number_of_instructions >= inlining_budget_) {
+        LOG_FAIL(kNotInlinedInstructionBudget)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " is not inlined because the outer method has reached"
+            << " its instruction budget limit.";
         return false;
       }
       HInstruction* current = instr_it.Current();
-      if (!can_inline_environment && current->NeedsEnvironment()) {
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " is not inlined because its caller has reached"
-                       << " its environment budget limit.";
+      if (current->NeedsEnvironment() &&
+          (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters)) {
+        LOG_FAIL(kNotInlinedEnvironmentBudget)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " is not inlined because its caller has reached"
+            << " its environment budget limit.";
         return false;
       }
 
       if (current->NeedsEnvironment() &&
           !CanEncodeInlinedMethodInStackMap(*caller_compilation_unit_.GetDexFile(),
                                             resolved_method)) {
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because " << current->DebugName()
-                       << " needs an environment, is in a different dex file"
-                       << ", and cannot be encoded in the stack maps.";
+        LOG_FAIL(kNotInlinedStackMaps)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because " << current->DebugName()
+            << " needs an environment, is in a different dex file"
+            << ", and cannot be encoded in the stack maps.";
         return false;
       }
 
       if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because " << current->DebugName()
-                       << " it is in a different dex file and requires access to the dex cache";
+        LOG_FAIL(kNotInlinedDexCache)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because " << current->DebugName()
+            << " it is in a different dex file and requires access to the dex cache";
         return false;
       }
 
@@ -1456,21 +1801,24 @@
           current->IsUnresolvedStaticFieldSet() ||
           current->IsUnresolvedInstanceFieldSet()) {
         // Entrypoint for unresolved fields does not handle inlined frames.
-        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
-                       << " could not be inlined because it is using an unresolved"
-                       << " entrypoint";
+        LOG_FAIL(kNotInlinedUnresolvedEntrypoint)
+            << "Method " << callee_dex_file.PrettyMethod(method_index)
+            << " could not be inlined because it is using an unresolved"
+            << " entrypoint";
         return false;
       }
     }
   }
-  number_of_inlined_instructions_ += number_of_instructions;
-
   DCHECK_EQ(caller_instruction_counter, graph_->GetCurrentInstructionId())
       << "No instructions can be added to the outer graph while inner graph is being built";
 
+  // Inline the callee graph inside the caller graph.
   const int32_t callee_instruction_counter = callee_graph->GetCurrentInstructionId();
   graph_->SetCurrentInstructionId(callee_instruction_counter);
   *return_replacement = callee_graph->InlineInto(graph_, invoke_instruction);
+  // Update our budget for other inlining attempts in `caller_graph`.
+  total_number_of_instructions_ += number_of_instructions;
+  UpdateInliningBudget();
 
   DCHECK_EQ(callee_instruction_counter, callee_graph->GetCurrentInstructionId())
       << "No instructions can be added to the inner graph during inlining into the outer graph";
@@ -1483,15 +1831,15 @@
   return true;
 }
 
-size_t HInliner::RunOptimizations(HGraph* callee_graph,
-                                  const DexFile::CodeItem* code_item,
-                                  const DexCompilationUnit& dex_compilation_unit) {
+void HInliner::RunOptimizations(HGraph* callee_graph,
+                                const DexFile::CodeItem* code_item,
+                                const DexCompilationUnit& dex_compilation_unit) {
   // Note: if the outermost_graph_ is being compiled OSR, we should not run any
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
   HDeadCodeElimination dce(callee_graph, inline_stats_, "dead_code_elimination$inliner");
   HConstantFolding fold(callee_graph, "constant_folding$inliner");
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_, handles_);
-  InstructionSimplifier simplify(callee_graph, inline_stats_);
+  InstructionSimplifier simplify(callee_graph, codegen_, inline_stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, inline_stats_);
 
   HOptimization* optimizations[] = {
@@ -1507,23 +1855,37 @@
     optimization->Run();
   }
 
-  size_t number_of_inlined_instructions = 0u;
-  if (depth_ + 1 < compiler_driver_->GetCompilerOptions().GetInlineDepthLimit()) {
-    HInliner inliner(callee_graph,
-                     outermost_graph_,
-                     codegen_,
-                     outer_compilation_unit_,
-                     dex_compilation_unit,
-                     compiler_driver_,
-                     handles_,
-                     inline_stats_,
-                     total_number_of_dex_registers_ + code_item->registers_size_,
-                     depth_ + 1);
-    inliner.Run();
-    number_of_inlined_instructions += inliner.number_of_inlined_instructions_;
+  // Bail early for pathological cases on the environment (for example recursive calls,
+  // or too large environment).
+  if (total_number_of_dex_registers_ >= kMaximumNumberOfCumulatedDexRegisters) {
+    LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+             << " will not be inlined because the outer method has reached"
+             << " its environment budget limit.";
+    return;
   }
 
-  return number_of_inlined_instructions;
+  // Bail early if we know we already are over the limit.
+  size_t number_of_instructions = CountNumberOfInstructions(callee_graph);
+  if (number_of_instructions > inlining_budget_) {
+    LOG_NOTE() << "Calls in " << callee_graph->GetArtMethod()->PrettyMethod()
+             << " will not be inlined because the outer method has reached"
+             << " its instruction budget limit. " << number_of_instructions;
+    return;
+  }
+
+  HInliner inliner(callee_graph,
+                   outermost_graph_,
+                   codegen_,
+                   outer_compilation_unit_,
+                   dex_compilation_unit,
+                   compiler_driver_,
+                   handles_,
+                   inline_stats_,
+                   total_number_of_dex_registers_ + code_item->registers_size_,
+                   total_number_of_instructions_ + number_of_instructions,
+                   this,
+                   depth_ + 1);
+  inliner.Run();
 }
 
 static bool IsReferenceTypeRefinement(ReferenceTypeInfo declared_rti,
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 75d025a..9e4685c 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -20,6 +20,7 @@
 #include "dex_file_types.h"
 #include "invoke_type.h"
 #include "optimization.h"
+#include "jit/profile_compilation_info.h"
 
 namespace art {
 
@@ -41,7 +42,9 @@
            VariableSizedHandleScope* handles,
            OptimizingCompilerStats* stats,
            size_t total_number_of_dex_registers,
-           size_t depth)
+           size_t total_number_of_instructions,
+           HInliner* parent,
+           size_t depth = 0)
       : HOptimization(outer_graph, kInlinerPassName, stats),
         outermost_graph_(outermost_graph),
         outer_compilation_unit_(outer_compilation_unit),
@@ -49,8 +52,10 @@
         codegen_(codegen),
         compiler_driver_(compiler_driver),
         total_number_of_dex_registers_(total_number_of_dex_registers),
+        total_number_of_instructions_(total_number_of_instructions),
+        parent_(parent),
         depth_(depth),
-        number_of_inlined_instructions_(0),
+        inlining_budget_(0),
         handles_(handles),
         inline_stats_(nullptr) {}
 
@@ -59,6 +64,15 @@
   static constexpr const char* kInlinerPassName = "inliner";
 
  private:
+  enum InlineCacheType {
+    kInlineCacheNoData = 0,
+    kInlineCacheUninitialized = 1,
+    kInlineCacheMonomorphic = 2,
+    kInlineCachePolymorphic = 3,
+    kInlineCacheMegamorphic = 4,
+    kInlineCacheMissingTypes = 5
+  };
+
   bool TryInline(HInvoke* invoke_instruction);
 
   // Try to inline `resolved_method` in place of `invoke_instruction`. `do_rtp` is whether
@@ -85,10 +99,10 @@
                                HInstruction** return_replacement);
 
   // Run simple optimizations on `callee_graph`.
-  // Returns the number of inlined instructions.
-  size_t RunOptimizations(HGraph* callee_graph,
-                          const DexFile::CodeItem* code_item,
-                          const DexCompilationUnit& dex_compilation_unit);
+  void RunOptimizations(HGraph* callee_graph,
+                        const DexFile::CodeItem* code_item,
+                        const DexCompilationUnit& dex_compilation_unit)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Try to recognize known simple patterns and replace invoke call with appropriate instructions.
   bool TryPatternSubstitution(HInvoke* invoke_instruction,
@@ -97,14 +111,54 @@
     REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Create a new HInstanceFieldGet.
-  HInstanceFieldGet* CreateInstanceFieldGet(Handle<mirror::DexCache> dex_cache,
-                                            uint32_t field_index,
+  HInstanceFieldGet* CreateInstanceFieldGet(uint32_t field_index,
+                                            ArtMethod* referrer,
                                             HInstruction* obj);
   // Create a new HInstanceFieldSet.
-  HInstanceFieldSet* CreateInstanceFieldSet(Handle<mirror::DexCache> dex_cache,
-                                            uint32_t field_index,
+  HInstanceFieldSet* CreateInstanceFieldSet(uint32_t field_index,
+                                            ArtMethod* referrer,
                                             HInstruction* obj,
-                                            HInstruction* value);
+                                            HInstruction* value,
+                                            bool* is_final = nullptr);
+
+  // Try inlining the invoke instruction using inline caches.
+  bool TryInlineFromInlineCache(
+      const DexFile& caller_dex_file,
+      HInvoke* invoke_instruction,
+      ArtMethod* resolved_method)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Try getting the inline cache from JIT code cache.
+  // Return true if the inline cache was successfully allocated and the
+  // invoke info was found in the profile info.
+  InlineCacheType GetInlineCacheJIT(
+      HInvoke* invoke_instruction,
+      StackHandleScope<1>* hs,
+      /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Try getting the inline cache from AOT offline profile.
+  // Return true if the inline cache was successfully allocated and the
+  // invoke info was found in the profile info.
+  InlineCacheType GetInlineCacheAOT(const DexFile& caller_dex_file,
+      HInvoke* invoke_instruction,
+      StackHandleScope<1>* hs,
+      /*out*/Handle<mirror::ObjectArray<mirror::Class>>* inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Extract the mirror classes from the offline profile and add them to the `inline_cache`.
+  // Note that even if we have profile data for the invoke the inline_cache might contain
+  // only null entries if the types cannot be resolved.
+  InlineCacheType ExtractClassesFromOfflineProfile(
+      const HInvoke* invoke_instruction,
+      const ProfileCompilationInfo::OfflineProfileMethodInfo& offline_profile,
+      /*out*/Handle<mirror::ObjectArray<mirror::Class>> inline_cache)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Compute the inline cache type.
+  InlineCacheType GetInlineCacheType(
+      const Handle<mirror::ObjectArray<mirror::Class>>& classes)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Try to inline the target of a monomorphic call. If successful, the code
   // in the graph will look like:
@@ -209,14 +263,30 @@
                                                 HInstruction* return_replacement,
                                                 HInstruction* invoke_instruction);
 
+  // Update the inlining budget based on `total_number_of_instructions_`.
+  void UpdateInliningBudget();
+
+  // Count the number of calls of `method` being inlined recursively.
+  size_t CountRecursiveCallsOf(ArtMethod* method) const;
+
+  // Pretty-print for spaces during logging.
+  std::string DepthString(int line) const;
+
   HGraph* const outermost_graph_;
   const DexCompilationUnit& outer_compilation_unit_;
   const DexCompilationUnit& caller_compilation_unit_;
   CodeGenerator* const codegen_;
   CompilerDriver* const compiler_driver_;
   const size_t total_number_of_dex_registers_;
+  size_t total_number_of_instructions_;
+
+  // The 'parent' inliner, that means the inlinigng optimization that requested
+  // `graph_` to be inlined.
+  const HInliner* const parent_;
   const size_t depth_;
-  size_t number_of_inlined_instructions_;
+
+  // The budget left for inlining, in number of instructions.
+  size_t inlining_budget_;
   VariableSizedHandleScope* const handles_;
 
   // Used to record stats about optimizations on the inlined graph.
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index c60f6e5..88f67fa 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -37,37 +37,45 @@
   return block_builder_->GetBlockAt(dex_pc);
 }
 
-ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
+inline ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsFor(HBasicBlock* block) {
   ArenaVector<HInstruction*>* locals = &locals_for_[block->GetBlockId()];
   const size_t vregs = graph_->GetNumberOfVRegs();
-  if (locals->size() != vregs) {
-    locals->resize(vregs, nullptr);
+  if (locals->size() == vregs) {
+    return locals;
+  }
+  return GetLocalsForWithAllocation(block, locals, vregs);
+}
 
-    if (block->IsCatchBlock()) {
-      // We record incoming inputs of catch phis at throwing instructions and
-      // must therefore eagerly create the phis. Phis for undefined vregs will
-      // be deleted when the first throwing instruction with the vreg undefined
-      // is encountered. Unused phis will be removed by dead phi analysis.
-      for (size_t i = 0; i < vregs; ++i) {
-        // No point in creating the catch phi if it is already undefined at
-        // the first throwing instruction.
-        HInstruction* current_local_value = (*current_locals_)[i];
-        if (current_local_value != nullptr) {
-          HPhi* phi = new (arena_) HPhi(
-              arena_,
-              i,
-              0,
-              current_local_value->GetType());
-          block->AddPhi(phi);
-          (*locals)[i] = phi;
-        }
+ArenaVector<HInstruction*>* HInstructionBuilder::GetLocalsForWithAllocation(
+    HBasicBlock* block,
+    ArenaVector<HInstruction*>* locals,
+    const size_t vregs) {
+  DCHECK_NE(locals->size(), vregs);
+  locals->resize(vregs, nullptr);
+  if (block->IsCatchBlock()) {
+    // We record incoming inputs of catch phis at throwing instructions and
+    // must therefore eagerly create the phis. Phis for undefined vregs will
+    // be deleted when the first throwing instruction with the vreg undefined
+    // is encountered. Unused phis will be removed by dead phi analysis.
+    for (size_t i = 0; i < vregs; ++i) {
+      // No point in creating the catch phi if it is already undefined at
+      // the first throwing instruction.
+      HInstruction* current_local_value = (*current_locals_)[i];
+      if (current_local_value != nullptr) {
+        HPhi* phi = new (arena_) HPhi(
+            arena_,
+            i,
+            0,
+            current_local_value->GetType());
+        block->AddPhi(phi);
+        (*locals)[i] = phi;
       }
     }
   }
   return locals;
 }
 
-HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
+inline HInstruction* HInstructionBuilder::ValueOfLocalAt(HBasicBlock* block, size_t local) {
   ArenaVector<HInstruction*>* locals = GetLocalsFor(block);
   return (*locals)[local];
 }
@@ -1676,10 +1684,10 @@
       dex_pc,
       needs_access_check);
 
-  HLoadClass::LoadKind load_kind = HSharpening::SharpenClass(load_class,
-                                                             code_generator_,
-                                                             compiler_driver_,
-                                                             *dex_compilation_unit_);
+  HLoadClass::LoadKind load_kind = HSharpening::ComputeLoadClassKind(load_class,
+                                                                     code_generator_,
+                                                                     compiler_driver_,
+                                                                     *dex_compilation_unit_);
 
   if (load_kind == HLoadClass::LoadKind::kInvalid) {
     // We actually cannot reference this class, we're forced to bail.
diff --git a/compiler/optimizing/instruction_builder.h b/compiler/optimizing/instruction_builder.h
index e735a0c..7fdc188 100644
--- a/compiler/optimizing/instruction_builder.h
+++ b/compiler/optimizing/instruction_builder.h
@@ -93,6 +93,10 @@
   HBasicBlock* FindBlockStartingAt(uint32_t dex_pc) const;
 
   ArenaVector<HInstruction*>* GetLocalsFor(HBasicBlock* block);
+  // Out of line version of GetLocalsFor(), which has a fast path that is
+  // beneficial to get inlined by callers.
+  ArenaVector<HInstruction*>* GetLocalsForWithAllocation(
+      HBasicBlock* block, ArenaVector<HInstruction*>* locals, const size_t vregs);
   HInstruction* ValueOfLocalAt(HBasicBlock* block, size_t local);
   HInstruction* LoadLocal(uint32_t register_index, Primitive::Type type) const;
   HInstruction* LoadNullCheckedLocal(uint32_t register_index, uint32_t dex_pc);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 35f59cb..60790e5 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -19,14 +19,18 @@
 #include "escape.h"
 #include "intrinsics.h"
 #include "mirror/class-inl.h"
+#include "sharpening.h"
 #include "scoped_thread_state_change-inl.h"
 
 namespace art {
 
 class InstructionSimplifierVisitor : public HGraphDelegateVisitor {
  public:
-  InstructionSimplifierVisitor(HGraph* graph, OptimizingCompilerStats* stats)
+  InstructionSimplifierVisitor(HGraph* graph,
+                               CodeGenerator* codegen,
+                               OptimizingCompilerStats* stats)
       : HGraphDelegateVisitor(graph),
+        codegen_(codegen),
         stats_(stats) {}
 
   void Run();
@@ -112,6 +116,7 @@
   void SimplifyAllocationIntrinsic(HInvoke* invoke);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
+  CodeGenerator* codegen_;
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
@@ -123,7 +128,7 @@
 };
 
 void InstructionSimplifier::Run() {
-  InstructionSimplifierVisitor visitor(graph_, stats_);
+  InstructionSimplifierVisitor visitor(graph_, codegen_, stats_);
   visitor.Run();
 }
 
@@ -1805,6 +1810,8 @@
 
   {
     ScopedObjectAccess soa(Thread::Current());
+    Primitive::Type source_component_type = Primitive::kPrimVoid;
+    Primitive::Type destination_component_type = Primitive::kPrimVoid;
     ReferenceTypeInfo destination_rti = destination->GetReferenceTypeInfo();
     if (destination_rti.IsValid()) {
       if (destination_rti.IsObjectArray()) {
@@ -1814,6 +1821,8 @@
         optimizations.SetDestinationIsTypedObjectArray();
       }
       if (destination_rti.IsPrimitiveArrayClass()) {
+        destination_component_type =
+            destination_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
         optimizations.SetDestinationIsPrimitiveArray();
       } else if (destination_rti.IsNonPrimitiveArrayClass()) {
         optimizations.SetDestinationIsNonPrimitiveArray();
@@ -1826,10 +1835,55 @@
       }
       if (source_rti.IsPrimitiveArrayClass()) {
         optimizations.SetSourceIsPrimitiveArray();
+        source_component_type = source_rti.GetTypeHandle()->GetComponentType()->GetPrimitiveType();
       } else if (source_rti.IsNonPrimitiveArrayClass()) {
         optimizations.SetSourceIsNonPrimitiveArray();
       }
     }
+    // For primitive arrays, use their optimized ArtMethod implementations.
+    if ((source_component_type != Primitive::kPrimVoid) &&
+        (source_component_type == destination_component_type)) {
+      ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+      PointerSize image_size = class_linker->GetImagePointerSize();
+      HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
+      mirror::Class* system = invoke->GetResolvedMethod()->GetDeclaringClass();
+      ArtMethod* method = nullptr;
+      switch (source_component_type) {
+        case Primitive::kPrimBoolean:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([ZI[ZII)V", image_size);
+          break;
+        case Primitive::kPrimByte:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([BI[BII)V", image_size);
+          break;
+        case Primitive::kPrimChar:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([CI[CII)V", image_size);
+          break;
+        case Primitive::kPrimShort:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([SI[SII)V", image_size);
+          break;
+        case Primitive::kPrimInt:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([II[III)V", image_size);
+          break;
+        case Primitive::kPrimFloat:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([FI[FII)V", image_size);
+          break;
+        case Primitive::kPrimLong:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([JI[JII)V", image_size);
+          break;
+        case Primitive::kPrimDouble:
+          method = system->FindDeclaredDirectMethod("arraycopy", "([DI[DII)V", image_size);
+          break;
+        default:
+          LOG(FATAL) << "Unreachable";
+      }
+      DCHECK(method != nullptr);
+      invoke->SetResolvedMethod(method);
+      // Sharpen the new invoke. Note that we do not update the dex method index of
+      // the invoke, as we would need to look it up in the current dex file, and it
+      // is unlikely that it exists. The most usual situation for such typed
+      // arraycopy methods is a direct pointer to the boot image.
+      HSharpening::SharpenInvokeStaticOrDirect(invoke, codegen_);
+    }
   }
 }
 
@@ -2078,6 +2132,9 @@
   if (cond->IsConstant()) {
     if (cond->AsIntConstant()->IsFalse()) {
       // Never deopt: instruction can be removed.
+      if (deoptimize->GuardsAnInput()) {
+        deoptimize->ReplaceWith(deoptimize->GuardedInput());
+      }
       deoptimize->GetBlock()->RemoveInstruction(deoptimize);
     } else {
       // Always deopt.
diff --git a/compiler/optimizing/instruction_simplifier.h b/compiler/optimizing/instruction_simplifier.h
index 7fe1067..f7329a4 100644
--- a/compiler/optimizing/instruction_simplifier.h
+++ b/compiler/optimizing/instruction_simplifier.h
@@ -23,6 +23,8 @@
 
 namespace art {
 
+class CodeGenerator;
+
 /**
  * Implements optimizations specific to each instruction.
  *
@@ -36,15 +38,19 @@
 class InstructionSimplifier : public HOptimization {
  public:
   explicit InstructionSimplifier(HGraph* graph,
+                                 CodeGenerator* codegen,
                                  OptimizingCompilerStats* stats = nullptr,
                                  const char* name = kInstructionSimplifierPassName)
-      : HOptimization(graph, name, stats) {}
+      : HOptimization(graph, name, stats),
+        codegen_(codegen) {}
 
   static constexpr const char* kInstructionSimplifierPassName = "instruction_simplifier";
 
   void Run() OVERRIDE;
 
  private:
+  CodeGenerator* codegen_;
+
   DISALLOW_COPY_AND_ASSIGN(InstructionSimplifier);
 };
 
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 17d683f..8df80ad 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -19,6 +19,7 @@
 #include "art_method.h"
 #include "class_linker.h"
 #include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
 #include "invoke_type.h"
 #include "mirror/dex_cache-inl.h"
 #include "nodes.h"
@@ -178,4 +179,112 @@
   return os;
 }
 
+void IntrinsicVisitor::ComputeIntegerValueOfLocations(HInvoke* invoke,
+                                                      CodeGenerator* codegen,
+                                                      Location return_location,
+                                                      Location first_argument_location) {
+  if (Runtime::Current()->IsAotCompiler()) {
+    if (codegen->GetCompilerOptions().IsBootImage() ||
+        codegen->GetCompilerOptions().GetCompilePic()) {
+      // TODO(ngeoffray): Support boot image compilation.
+      return;
+    }
+  }
+
+  IntegerValueOfInfo info = ComputeIntegerValueOfInfo();
+
+  // Most common case is that we have found all we needed (classes are initialized
+  // and in the boot image). Bail if not.
+  if (info.integer_cache == nullptr ||
+      info.integer == nullptr ||
+      info.cache == nullptr ||
+      info.value_offset == 0 ||
+      // low and high cannot be 0, per the spec.
+      info.low == 0 ||
+      info.high == 0) {
+    LOG(INFO) << "Integer.valueOf will not be optimized";
+    return;
+  }
+
+  // The intrinsic will call if it needs to allocate a j.l.Integer.
+  LocationSummary* locations = new (invoke->GetBlock()->GetGraph()->GetArena()) LocationSummary(
+      invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
+  if (!invoke->InputAt(0)->IsConstant()) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->AddTemp(first_argument_location);
+  locations->SetOut(return_location);
+}
+
+IntrinsicVisitor::IntegerValueOfInfo IntrinsicVisitor::ComputeIntegerValueOfInfo() {
+  // Note that we could cache all of the data looked up here. but there's no good
+  // location for it. We don't want to add it to WellKnownClasses, to avoid creating global
+  // jni values. Adding it as state to the compiler singleton seems like wrong
+  // separation of concerns.
+  // The need for this data should be pretty rare though.
+
+  // The most common case is that the classes are in the boot image and initialized,
+  // which is easy to generate code for. We bail if not.
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  Runtime* runtime = Runtime::Current();
+  ClassLinker* class_linker = runtime->GetClassLinker();
+  gc::Heap* heap = runtime->GetHeap();
+  IntegerValueOfInfo info;
+  info.integer_cache = class_linker->FindSystemClass(self, "Ljava/lang/Integer$IntegerCache;");
+  if (info.integer_cache == nullptr) {
+    self->ClearException();
+    return info;
+  }
+  if (!heap->ObjectIsInBootImageSpace(info.integer_cache) || !info.integer_cache->IsInitialized()) {
+    // Optimization only works if the class is initialized and in the boot image.
+    return info;
+  }
+  info.integer = class_linker->FindSystemClass(self, "Ljava/lang/Integer;");
+  if (info.integer == nullptr) {
+    self->ClearException();
+    return info;
+  }
+  if (!heap->ObjectIsInBootImageSpace(info.integer) || !info.integer->IsInitialized()) {
+    // Optimization only works if the class is initialized and in the boot image.
+    return info;
+  }
+
+  ArtField* field = info.integer_cache->FindDeclaredStaticField("cache", "[Ljava/lang/Integer;");
+  if (field == nullptr) {
+    return info;
+  }
+  info.cache = static_cast<mirror::ObjectArray<mirror::Object>*>(
+      field->GetObject(info.integer_cache).Ptr());
+  if (info.cache == nullptr) {
+    return info;
+  }
+
+  if (!heap->ObjectIsInBootImageSpace(info.cache)) {
+    // Optimization only works if the object is in the boot image.
+    return info;
+  }
+
+  field = info.integer->FindDeclaredInstanceField("value", "I");
+  if (field == nullptr) {
+    return info;
+  }
+  info.value_offset = field->GetOffset().Int32Value();
+
+  field = info.integer_cache->FindDeclaredStaticField("low", "I");
+  if (field == nullptr) {
+    return info;
+  }
+  info.low = field->GetInt(info.integer_cache);
+
+  field = info.integer_cache->FindDeclaredStaticField("high", "I");
+  if (field == nullptr) {
+    return info;
+  }
+  info.high = field->GetInt(info.integer_cache);
+
+  DCHECK_EQ(info.cache->GetLength(), info.high - info.low + 1);
+  return info;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/intrinsics.h b/compiler/optimizing/intrinsics.h
index 6425e13..9da5a7f 100644
--- a/compiler/optimizing/intrinsics.h
+++ b/compiler/optimizing/intrinsics.h
@@ -113,6 +113,39 @@
     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
   }
 
+  static void ComputeIntegerValueOfLocations(HInvoke* invoke,
+                                             CodeGenerator* codegen,
+                                             Location return_location,
+                                             Location first_argument_location);
+
+  // Temporary data structure for holding Integer.valueOf useful data. We only
+  // use it if the mirror::Class* are in the boot image, so it is fine to keep raw
+  // mirror::Class pointers in this structure.
+  struct IntegerValueOfInfo {
+    IntegerValueOfInfo()
+        : integer_cache(nullptr),
+          integer(nullptr),
+          cache(nullptr),
+          low(0),
+          high(0),
+          value_offset(0) {}
+
+    // The java.lang.IntegerCache class.
+    mirror::Class* integer_cache;
+    // The java.lang.Integer class.
+    mirror::Class* integer;
+    // Value of java.lang.IntegerCache#cache.
+    mirror::ObjectArray<mirror::Object>* cache;
+    // Value of java.lang.IntegerCache#low.
+    int32_t low;
+    // Value of java.lang.IntegerCache#high.
+    int32_t high;
+    // The offset of java.lang.Integer.value.
+    int32_t value_offset;
+  };
+
+  static IntegerValueOfInfo ComputeIntegerValueOfInfo();
+
  protected:
   IntrinsicVisitor() {}
 
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 751623c..1006a77 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -41,6 +41,54 @@
 
 using IntrinsicSlowPathARM = IntrinsicSlowPath<InvokeDexCallingConventionVisitorARM>;
 
+#define __ assembler->
+
+// Compute base address for the System.arraycopy intrinsic in `base`.
+static void GenSystemArrayCopyBaseAddress(ArmAssembler* assembler,
+                                          Primitive::Type type,
+                                          const Register& array,
+                                          const Location& pos,
+                                          const Register& base) {
+  // This routine is only used by the SystemArrayCopy intrinsic at the
+  // moment. We can allow Primitive::kPrimNot as `type` to implement
+  // the SystemArrayCopyChar intrinsic.
+  DCHECK_EQ(type, Primitive::kPrimNot);
+  const int32_t element_size = Primitive::ComponentSize(type);
+  const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
+  const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
+    __ AddConstant(base, array, element_size * constant + data_offset);
+  } else {
+    __ add(base, array, ShifterOperand(pos.AsRegister<Register>(), LSL, element_size_shift));
+    __ AddConstant(base, data_offset);
+  }
+}
+
+// Compute end address for the System.arraycopy intrinsic in `end`.
+static void GenSystemArrayCopyEndAddress(ArmAssembler* assembler,
+                                         Primitive::Type type,
+                                         const Location& copy_length,
+                                         const Register& base,
+                                         const Register& end) {
+  // This routine is only used by the SystemArrayCopy intrinsic at the
+  // moment. We can allow Primitive::kPrimNot as `type` to implement
+  // the SystemArrayCopyChar intrinsic.
+  DCHECK_EQ(type, Primitive::kPrimNot);
+  const int32_t element_size = Primitive::ComponentSize(type);
+  const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
+
+  if (copy_length.IsConstant()) {
+    int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+    __ AddConstant(end, base, element_size * constant);
+  } else {
+    __ add(end, base, ShifterOperand(copy_length.AsRegister<Register>(), LSL, element_size_shift));
+  }
+}
+
+#undef __
+
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<ArmAssembler*>(codegen->GetAssembler())->  // NOLINT
 
@@ -55,6 +103,7 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    ArmAssembler* assembler = arm_codegen->GetAssembler();
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(locations->CanCall());
     DCHECK(instruction_->IsInvokeStaticOrDirect())
@@ -63,9 +112,8 @@
     DCHECK(instruction_->GetLocations()->Intrinsified());
     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
 
-    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
-    uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
-    uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+    Primitive::Type type = Primitive::kPrimNot;
+    const int32_t element_size = Primitive::ComponentSize(type);
 
     Register dest = locations->InAt(2).AsRegister<Register>();
     Location dest_pos = locations->InAt(3);
@@ -76,15 +124,7 @@
 
     __ Bind(GetEntryLabel());
     // Compute the base destination address in `dst_curr_addr`.
-    if (dest_pos.IsConstant()) {
-      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
-      __ AddConstant(dst_curr_addr, dest, element_size * constant + offset);
-    } else {
-      __ add(dst_curr_addr,
-             dest,
-             ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
-      __ AddConstant(dst_curr_addr, offset);
-    }
+    GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
 
     Label loop;
     __ Bind(&loop);
@@ -108,6 +148,8 @@
     DCHECK_NE(src_stop_addr, IP);
     DCHECK_NE(tmp, IP);
     DCHECK(0 <= tmp && tmp < kNumberOfCoreRegisters) << tmp;
+    // TODO: Load the entrypoint once before the loop, instead of
+    // loading it at every iteration.
     int32_t entry_point_offset =
         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp);
     // This runtime call does not require a stack map.
@@ -129,6 +171,7 @@
 
 IntrinsicLocationsBuilderARM::IntrinsicLocationsBuilderARM(CodeGeneratorARM* codegen)
     : arena_(codegen->GetGraph()->GetArena()),
+      codegen_(codegen),
       assembler_(codegen->GetAssembler()),
       features_(codegen->GetInstructionSetFeatures()) {}
 
@@ -227,9 +270,11 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
-static void GenNumberOfLeadingZeros(LocationSummary* locations,
+static void GenNumberOfLeadingZeros(HInvoke* invoke,
                                     Primitive::Type type,
-                                    ArmAssembler* assembler) {
+                                    CodeGeneratorARM* codegen) {
+  ArmAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   Location in = locations->InAt(0);
   Register out = locations->Out().AsRegister<Register>();
 
@@ -239,11 +284,14 @@
     Register in_reg_lo = in.AsRegisterPairLow<Register>();
     Register in_reg_hi = in.AsRegisterPairHigh<Register>();
     Label end;
+    Label* final_label = codegen->GetFinalLabel(invoke, &end);
     __ clz(out, in_reg_hi);
-    __ CompareAndBranchIfNonZero(in_reg_hi, &end);
+    __ CompareAndBranchIfNonZero(in_reg_hi, final_label);
     __ clz(out, in_reg_lo);
     __ AddConstant(out, 32);
-    __ Bind(&end);
+    if (end.IsLinked()) {
+      __ Bind(&end);
+    }
   } else {
     __ clz(out, in.AsRegister<Register>());
   }
@@ -254,7 +302,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+  GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
 }
 
 void IntrinsicLocationsBuilderARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -266,27 +314,32 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+  GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
 }
 
-static void GenNumberOfTrailingZeros(LocationSummary* locations,
+static void GenNumberOfTrailingZeros(HInvoke* invoke,
                                      Primitive::Type type,
-                                     ArmAssembler* assembler) {
+                                     CodeGeneratorARM* codegen) {
   DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
 
+  ArmAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   Register out = locations->Out().AsRegister<Register>();
 
   if (type == Primitive::kPrimLong) {
     Register in_reg_lo = locations->InAt(0).AsRegisterPairLow<Register>();
     Register in_reg_hi = locations->InAt(0).AsRegisterPairHigh<Register>();
     Label end;
+    Label* final_label = codegen->GetFinalLabel(invoke, &end);
     __ rbit(out, in_reg_lo);
     __ clz(out, out);
-    __ CompareAndBranchIfNonZero(in_reg_lo, &end);
+    __ CompareAndBranchIfNonZero(in_reg_lo, final_label);
     __ rbit(out, in_reg_hi);
     __ clz(out, out);
     __ AddConstant(out, 32);
-    __ Bind(&end);
+    if (end.IsLinked()) {
+      __ Bind(&end);
+    }
   } else {
     Register in = locations->InAt(0).AsRegister<Register>();
     __ rbit(out, in);
@@ -303,7 +356,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+  GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
 }
 
 void IntrinsicLocationsBuilderARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -315,7 +368,7 @@
 }
 
 void IntrinsicCodeGeneratorARM::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+  GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
 }
 
 static void MathAbsFP(LocationSummary* locations, bool is64bit, ArmAssembler* assembler) {
@@ -1312,6 +1365,7 @@
   Label end;
   Label return_true;
   Label return_false;
+  Label* final_label = codegen_->GetFinalLabel(invoke, &end);
 
   // Get offsets of count, value, and class fields within a string object.
   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1385,12 +1439,15 @@
   // If loop does not result in returning false, we return true.
   __ Bind(&return_true);
   __ LoadImmediate(out, 1);
-  __ b(&end);
+  __ b(final_label);
 
   // Return false and exit the function.
   __ Bind(&return_false);
   __ LoadImmediate(out, 0);
-  __ Bind(&end);
+
+  if (end.IsLinked()) {
+    __ Bind(&end);
+  }
 }
 
 static void GenerateVisitStringIndexOf(HInvoke* invoke,
@@ -1924,138 +1981,113 @@
     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
   }
 
-  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
-  uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
-  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
-
-  // Compute the base source address in `temp1`.
-  if (src_pos.IsConstant()) {
-    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
-    __ AddConstant(temp1, src, element_size * constant + offset);
+  if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
+    // Null constant length: not need to emit the loop code at all.
   } else {
-    __ add(temp1, src, ShifterOperand(src_pos.AsRegister<Register>(), LSL, element_size_shift));
-    __ AddConstant(temp1, offset);
-  }
+    Label done;
+    const Primitive::Type type = Primitive::kPrimNot;
+    const int32_t element_size = Primitive::ComponentSize(type);
 
-  // Compute the end source address in `temp3`.
-  if (length.IsConstant()) {
-    int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
-    __ AddConstant(temp3, temp1, element_size * constant);
-  } else {
-    __ add(temp3, temp1, ShifterOperand(length.AsRegister<Register>(), LSL, element_size_shift));
-  }
-
-  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-    // TODO: Also convert this intrinsic to the IsGcMarking strategy?
-
-    // The base destination address is computed later, as `temp2` is
-    // used for intermediate computations.
-
-    // SystemArrayCopy implementation for Baker read barriers (see
-    // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
-    //
-    //   if (src_ptr != end_ptr) {
-    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
-    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
-    //     if (is_gray) {
-    //       // Slow-path copy.
-    //       do {
-    //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
-    //       } while (src_ptr != end_ptr)
-    //     } else {
-    //       // Fast-path copy.
-    //       do {
-    //         *dest_ptr++ = *src_ptr++;
-    //       } while (src_ptr != end_ptr)
-    //     }
-    //   }
-
-    Label loop, done;
-
-    // Don't enter copy loop if `length == 0`.
-    __ cmp(temp1, ShifterOperand(temp3));
-    __ b(&done, EQ);
-
-    // /* int32_t */ monitor = src->monitor_
-    __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
-    // /* LockWord */ lock_word = LockWord(monitor)
-    static_assert(sizeof(LockWord) == sizeof(int32_t),
-                  "art::LockWord and int32_t have different sizes.");
-
-    // Introduce a dependency on the lock_word including the rb_state,
-    // which shall prevent load-load reordering without using
-    // a memory barrier (which would be more expensive).
-    // `src` is unchanged by this operation, but its value now depends
-    // on `temp2`.
-    __ add(src, src, ShifterOperand(temp2, LSR, 32));
-
-    // Slow path used to copy array when `src` is gray.
-    SlowPathCode* read_barrier_slow_path =
-        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
-    codegen_->AddSlowPath(read_barrier_slow_path);
-
-    // Given the numeric representation, it's enough to check the low bit of the
-    // rb_state. We do that by shifting the bit out of the lock word with LSRS
-    // which can be a 16-bit instruction unlike the TST immediate.
-    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
-    __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
-    // Carry flag is the last bit shifted out by LSRS.
-    __ b(read_barrier_slow_path->GetEntryLabel(), CS);
-
-    // Fast-path copy.
-
-    // Compute the base destination address in `temp2`.
-    if (dest_pos.IsConstant()) {
-      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
-      __ AddConstant(temp2, dest, element_size * constant + offset);
-    } else {
-      __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
-      __ AddConstant(temp2, offset);
+    if (length.IsRegister()) {
+      // Don't enter the copy loop if the length is null.
+      __ CompareAndBranchIfZero(length.AsRegister<Register>(), &done);
     }
 
-    // Iterate over the arrays and do a raw copy of the objects. We don't need to
-    // poison/unpoison.
-    __ Bind(&loop);
-    __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
-    __ str(IP, Address(temp2, element_size, Address::PostIndex));
-    __ cmp(temp1, ShifterOperand(temp3));
-    __ b(&loop, NE);
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // TODO: Also convert this intrinsic to the IsGcMarking strategy?
 
-    __ Bind(read_barrier_slow_path->GetExitLabel());
-    __ Bind(&done);
-  } else {
-    // Non read barrier code.
+      // SystemArrayCopy implementation for Baker read barriers (see
+      // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+      //
+      //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+      //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+      //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+      //   if (is_gray) {
+      //     // Slow-path copy.
+      //     do {
+      //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+      //     } while (src_ptr != end_ptr)
+      //   } else {
+      //     // Fast-path copy.
+      //     do {
+      //       *dest_ptr++ = *src_ptr++;
+      //     } while (src_ptr != end_ptr)
+      //   }
 
-    // Compute the base destination address in `temp2`.
-    if (dest_pos.IsConstant()) {
-      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
-      __ AddConstant(temp2, dest, element_size * constant + offset);
+      // /* int32_t */ monitor = src->monitor_
+      __ LoadFromOffset(kLoadWord, temp2, src, monitor_offset);
+      // /* LockWord */ lock_word = LockWord(monitor)
+      static_assert(sizeof(LockWord) == sizeof(int32_t),
+                    "art::LockWord and int32_t have different sizes.");
+
+      // Introduce a dependency on the lock_word including the rb_state,
+      // which shall prevent load-load reordering without using
+      // a memory barrier (which would be more expensive).
+      // `src` is unchanged by this operation, but its value now depends
+      // on `temp2`.
+      __ add(src, src, ShifterOperand(temp2, LSR, 32));
+
+      // Compute the base source address in `temp1`.
+      // Note that `temp1` (the base source address) is computed from
+      // `src` (and `src_pos`) here, and thus honors the artificial
+      // dependency of `src` on `temp2`.
+      GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
+      // Compute the end source address in `temp3`.
+      GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
+      // The base destination address is computed later, as `temp2` is
+      // used for intermediate computations.
+
+      // Slow path used to copy array when `src` is gray.
+      // Note that the base destination address is computed in `temp2`
+      // by the slow path code.
+      SlowPathCode* read_barrier_slow_path =
+          new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM(invoke);
+      codegen_->AddSlowPath(read_barrier_slow_path);
+
+      // Given the numeric representation, it's enough to check the low bit of the
+      // rb_state. We do that by shifting the bit out of the lock word with LSRS
+      // which can be a 16-bit instruction unlike the TST immediate.
+      static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+      static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+      __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+      // Carry flag is the last bit shifted out by LSRS.
+      __ b(read_barrier_slow_path->GetEntryLabel(), CS);
+
+      // Fast-path copy.
+      // Compute the base destination address in `temp2`.
+      GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
+      // Iterate over the arrays and do a raw copy of the objects. We don't need to
+      // poison/unpoison.
+      Label loop;
+      __ Bind(&loop);
+      __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+      __ str(IP, Address(temp2, element_size, Address::PostIndex));
+      __ cmp(temp1, ShifterOperand(temp3));
+      __ b(&loop, NE);
+
+      __ Bind(read_barrier_slow_path->GetExitLabel());
     } else {
-      __ add(temp2, dest, ShifterOperand(dest_pos.AsRegister<Register>(), LSL, element_size_shift));
-      __ AddConstant(temp2, offset);
+      // Non read barrier code.
+      // Compute the base source address in `temp1`.
+      GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
+      // Compute the base destination address in `temp2`.
+      GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
+      // Compute the end source address in `temp3`.
+      GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
+      // Iterate over the arrays and do a raw copy of the objects. We don't need to
+      // poison/unpoison.
+      Label loop;
+      __ Bind(&loop);
+      __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
+      __ str(IP, Address(temp2, element_size, Address::PostIndex));
+      __ cmp(temp1, ShifterOperand(temp3));
+      __ b(&loop, NE);
     }
-
-    // Iterate over the arrays and do a raw copy of the objects. We don't need to
-    // poison/unpoison.
-    Label loop, done;
-    __ cmp(temp1, ShifterOperand(temp3));
-    __ b(&done, EQ);
-    __ Bind(&loop);
-    __ ldr(IP, Address(temp1, element_size, Address::PostIndex));
-    __ str(IP, Address(temp2, element_size, Address::PostIndex));
-    __ cmp(temp1, ShifterOperand(temp3));
-    __ b(&loop, NE);
     __ Bind(&done);
   }
 
   // We only need one card marking on the destination array.
-  codegen_->MarkGCCard(temp1,
-                       temp2,
-                       dest,
-                       Register(kNoRegister),
-                       /* value_can_be_null */ false);
+  codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
 
   __ Bind(intrinsic_slow_path->GetExitLabel());
 }
@@ -2473,13 +2505,14 @@
   Register dst_ptr = locations->GetTemp(2).AsRegister<Register>();
 
   Label done, compressed_string_loop;
+  Label* final_label = codegen_->GetFinalLabel(invoke, &done);
   // dst to be copied.
   __ add(dst_ptr, dstObj, ShifterOperand(data_offset));
   __ add(dst_ptr, dst_ptr, ShifterOperand(dstBegin, LSL, 1));
 
   __ subs(num_chr, srcEnd, ShifterOperand(srcBegin));
   // Early out for valid zero-length retrievals.
-  __ b(&done, EQ);
+  __ b(final_label, EQ);
 
   // src range to copy.
   __ add(src_ptr, srcObj, ShifterOperand(value_offset));
@@ -2516,7 +2549,7 @@
   __ b(&loop, GE);
 
   __ adds(num_chr, num_chr, ShifterOperand(4));
-  __ b(&done, EQ);
+  __ b(final_label, EQ);
 
   // Main loop for < 4 character case and remainder handling. Loads and stores one
   // 16-bit Java character at a time.
@@ -2527,7 +2560,7 @@
   __ b(&remainder, GT);
 
   if (mirror::kUseStringCompression) {
-    __ b(&done);
+    __ b(final_label);
 
     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
     DCHECK_EQ(c_char_size, 1u);
@@ -2541,7 +2574,9 @@
     __ b(&compressed_string_loop, GT);
   }
 
-  __ Bind(&done);
+  if (done.IsLinked()) {
+    __ Bind(&done);
+  }
 }
 
 void IntrinsicLocationsBuilderARM::VisitFloatIsInfinite(HInvoke* invoke) {
@@ -2646,6 +2681,75 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      Location::RegisterLocation(R0),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARM::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  ArmAssembler* const assembler = GetAssembler();
+
+  Register out = locations->Out().AsRegister<Register>();
+  InvokeRuntimeCallingConvention calling_convention;
+  Register argument = calling_convention.GetRegisterAt(0);
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ LoadImmediate(IP, value);
+      __ StoreToOffset(kStoreWord, IP, out, info.value_offset);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    // Check bounds of our cache.
+    __ AddConstant(out, in, -info.low);
+    __ CmpConstant(out, info.high - info.low + 1);
+    Label allocate, done;
+    __ b(&allocate, HS);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ LoadLiteral(IP, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+    codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), IP, out);
+    __ MaybeUnpoisonHeapReference(out);
+    __ b(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ LoadLiteral(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinDoubleDouble)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMinFloatFloat)
 UNIMPLEMENTED_INTRINSIC(ARM, MathMaxDoubleDouble)
diff --git a/compiler/optimizing/intrinsics_arm.h b/compiler/optimizing/intrinsics_arm.h
index 7f20ea4..2840863 100644
--- a/compiler/optimizing/intrinsics_arm.h
+++ b/compiler/optimizing/intrinsics_arm.h
@@ -51,6 +51,7 @@
 
  private:
   ArenaAllocator* arena_;
+  CodeGenerator* codegen_;
   ArmAssembler* assembler_;
 
   const ArmInstructionSetFeatures& features_;
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index f386422..423fd3c 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -198,6 +198,8 @@
     DCHECK_NE(LocationFrom(src_stop_addr).reg(), IP0);
     DCHECK_NE(tmp_.reg(), IP0);
     DCHECK(0 <= tmp_.reg() && tmp_.reg() < kNumberOfWRegisters) << tmp_.reg();
+    // TODO: Load the entrypoint once before the loop, instead of
+    // loading it at every iteration.
     int32_t entry_point_offset =
         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(tmp_.reg());
     // This runtime call does not require a stack map.
@@ -853,7 +855,6 @@
   DCHECK((type == Primitive::kPrimInt) ||
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot));
-  MacroAssembler* masm = codegen->GetVIXLAssembler();
   Location base_loc = locations->InAt(1);
   Register base = WRegisterFrom(base_loc);      // Object pointer.
   Location offset_loc = locations->InAt(2);
@@ -863,8 +864,7 @@
 
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // UnsafeGetObject/UnsafeGetObjectVolatile with Baker's read barrier case.
-    UseScratchRegisterScope temps(masm);
-    Register temp = temps.AcquireW();
+    Register temp = WRegisterFrom(locations->GetTemp(0));
     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
                                                        trg_loc,
                                                        base,
@@ -901,6 +901,9 @@
                                                            kIntrinsified);
   if (can_call && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM64::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
   }
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -1559,7 +1562,10 @@
     // Load `count` field of the argument string and check if it matches the const string.
     // Also compares the compression style, if differs return false.
     __ Ldr(temp, MemOperand(arg.X(), count_offset));
+    // Temporarily release temp1 as we may not be able to embed the flagged count in CMP immediate.
+    scratch_scope.Release(temp1);
     __ Cmp(temp, Operand(mirror::String::GetFlaggedCount(const_string_length, is_compressed)));
+    temp1 = scratch_scope.AcquireW();
     __ B(&return_false, ne);
   } else {
     // Load `count` fields of this and argument strings.
@@ -2187,8 +2193,9 @@
   }
 }
 
-// Compute base source address, base destination address, and end source address
-// for System.arraycopy* intrinsics.
+// Compute base source address, base destination address, and end
+// source address for System.arraycopy* intrinsics in `src_base`,
+// `dst_base` and `src_end` respectively.
 static void GenSystemArrayCopyAddresses(MacroAssembler* masm,
                                         Primitive::Type type,
                                         const Register& src,
@@ -2199,12 +2206,13 @@
                                         const Register& src_base,
                                         const Register& dst_base,
                                         const Register& src_end) {
+  // This routine is used by the SystemArrayCopy and the SystemArrayCopyChar intrinsics.
   DCHECK(type == Primitive::kPrimNot || type == Primitive::kPrimChar)
       << "Unexpected element type: " << type;
   const int32_t element_size = Primitive::ComponentSize(type);
   const int32_t element_size_shift = Primitive::ComponentSizeShift(type);
+  const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
 
-  uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
   if (src_pos.IsConstant()) {
     int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
     __ Add(src_base, src, element_size * constant + data_offset);
@@ -2381,9 +2389,14 @@
     // Temporary register IP0, obtained from the VIXL scratch register
     // pool, cannot be used in ReadBarrierSystemArrayCopySlowPathARM64
     // (because that register is clobbered by ReadBarrierMarkRegX
-    // entry points). Get an extra temporary register from the
-    // register allocator.
+    // entry points). It cannot be used in calls to
+    // CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
+    // either. For these reasons, get a third extra temporary register
+    // from the register allocator.
     locations->AddTemp(Location::RequiresRegister());
+  } else {
+    // Cases other than Baker read barriers: the third temporary will
+    // be acquired from the VIXL scratch register pool.
   }
 }
 
@@ -2494,11 +2507,12 @@
     // We use a block to end the scratch scope before the write barrier, thus
     // freeing the temporary registers so they can be used in `MarkGCCard`.
     UseScratchRegisterScope temps(masm);
-    // Note: Because it is acquired from VIXL's scratch register pool,
-    // `temp3` might be IP0, and thus cannot be used as `ref` argument
-    // of CodeGeneratorARM64::GenerateFieldLoadWithBakerReadBarrier
-    // calls below (see ReadBarrierMarkSlowPathARM64 for more details).
-    Register temp3 = temps.AcquireW();
+    Register temp3;
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      temp3 = WRegisterFrom(locations->GetTemp(2));
+    } else {
+      temp3 = temps.AcquireW();
+    }
 
     if (!optimizations.GetDoesNotNeedTypeCheck()) {
       // Check whether all elements of the source array are assignable to the component
@@ -2702,122 +2716,131 @@
       __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
     }
 
-    Register src_curr_addr = temp1.X();
-    Register dst_curr_addr = temp2.X();
-    Register src_stop_addr;
-    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-      // Temporary register IP0, obtained from the VIXL scratch
-      // register pool as `temp3`, cannot be used in
-      // ReadBarrierSystemArrayCopySlowPathARM64 (because that
-      // register is clobbered by ReadBarrierMarkRegX entry points).
-      // So another temporary register allocated by the register
-      // allocator instead.
-      DCHECK_EQ(LocationFrom(temp3).reg(), IP0);
-      src_stop_addr = XRegisterFrom(locations->GetTemp(2));
+    if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
+      // Null constant length: not need to emit the loop code at all.
     } else {
-      src_stop_addr = temp3.X();
-    }
+      Register src_curr_addr = temp1.X();
+      Register dst_curr_addr = temp2.X();
+      Register src_stop_addr = temp3.X();
+      vixl::aarch64::Label done;
+      const Primitive::Type type = Primitive::kPrimNot;
+      const int32_t element_size = Primitive::ComponentSize(type);
 
-    GenSystemArrayCopyAddresses(masm,
-                                Primitive::kPrimNot,
-                                src,
-                                src_pos,
-                                dest,
-                                dest_pos,
-                                length,
-                                src_curr_addr,
-                                dst_curr_addr,
-                                src_stop_addr);
+      if (length.IsRegister()) {
+        // Don't enter the copy loop if the length is null.
+        __ Cbz(WRegisterFrom(length), &done);
+      }
 
-    const int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        // TODO: Also convert this intrinsic to the IsGcMarking strategy?
 
-    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-      // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+        // SystemArrayCopy implementation for Baker read barriers (see
+        // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+        //
+        //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+        //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+        //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+        //   if (is_gray) {
+        //     // Slow-path copy.
+        //     do {
+        //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+        //     } while (src_ptr != end_ptr)
+        //   } else {
+        //     // Fast-path copy.
+        //     do {
+        //       *dest_ptr++ = *src_ptr++;
+        //     } while (src_ptr != end_ptr)
+        //   }
 
-      // SystemArrayCopy implementation for Baker read barriers (see
-      // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
-      //
-      //   if (src_ptr != end_ptr) {
-      //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
-      //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-      //     bool is_gray = (rb_state == ReadBarrier::GrayState());
-      //     if (is_gray) {
-      //       // Slow-path copy.
-      //       do {
-      //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
-      //       } while (src_ptr != end_ptr)
-      //     } else {
-      //       // Fast-path copy.
-      //       do {
-      //         *dest_ptr++ = *src_ptr++;
-      //       } while (src_ptr != end_ptr)
-      //     }
-      //   }
-
-      vixl::aarch64::Label loop, done;
-
-      // Don't enter copy loop if `length == 0`.
-      __ Cmp(src_curr_addr, src_stop_addr);
-      __ B(&done, eq);
-
-      Register tmp = temps.AcquireW();
-      // Make sure `tmp` is not IP0, as it is clobbered by
-      // ReadBarrierMarkRegX entry points in
-      // ReadBarrierSystemArrayCopySlowPathARM64.
-      DCHECK_NE(LocationFrom(tmp).reg(), IP0);
-
-      // /* int32_t */ monitor = src->monitor_
-      __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
-      // /* LockWord */ lock_word = LockWord(monitor)
-      static_assert(sizeof(LockWord) == sizeof(int32_t),
-                    "art::LockWord and int32_t have different sizes.");
-
-      // Introduce a dependency on the lock_word including rb_state,
-      // to prevent load-load reordering, and without using
-      // a memory barrier (which would be more expensive).
-      // `src` is unchanged by this operation, but its value now depends
-      // on `tmp`.
-      __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
-
-      // Slow path used to copy array when `src` is gray.
-      SlowPathCodeARM64* read_barrier_slow_path =
-          new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
-      codegen_->AddSlowPath(read_barrier_slow_path);
-
-      // Given the numeric representation, it's enough to check the low bit of the rb_state.
-      static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
-      static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
-      __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
-
-      // Fast-path copy.
-      // Iterate over the arrays and do a raw copy of the objects. We don't need to
-      // poison/unpoison.
-      __ Bind(&loop);
-      __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
-      __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
-      __ Cmp(src_curr_addr, src_stop_addr);
-      __ B(&loop, ne);
-
-      __ Bind(read_barrier_slow_path->GetExitLabel());
-      __ Bind(&done);
-    } else {
-      // Non read barrier code.
-
-      // Iterate over the arrays and do a raw copy of the objects. We don't need to
-      // poison/unpoison.
-      vixl::aarch64::Label loop, done;
-      __ Bind(&loop);
-      __ Cmp(src_curr_addr, src_stop_addr);
-      __ B(&done, eq);
-      {
+        // Make sure `tmp` is not IP0, as it is clobbered by
+        // ReadBarrierMarkRegX entry points in
+        // ReadBarrierSystemArrayCopySlowPathARM64.
+        temps.Exclude(ip0);
         Register tmp = temps.AcquireW();
+        DCHECK_NE(LocationFrom(tmp).reg(), IP0);
+
+        // /* int32_t */ monitor = src->monitor_
+        __ Ldr(tmp, HeapOperand(src.W(), monitor_offset));
+        // /* LockWord */ lock_word = LockWord(monitor)
+        static_assert(sizeof(LockWord) == sizeof(int32_t),
+                      "art::LockWord and int32_t have different sizes.");
+
+        // Introduce a dependency on the lock_word including rb_state,
+        // to prevent load-load reordering, and without using
+        // a memory barrier (which would be more expensive).
+        // `src` is unchanged by this operation, but its value now depends
+        // on `tmp`.
+        __ Add(src.X(), src.X(), Operand(tmp.X(), LSR, 32));
+
+        // Compute base source address, base destination address, and end
+        // source address for System.arraycopy* intrinsics in `src_base`,
+        // `dst_base` and `src_end` respectively.
+        // Note that `src_curr_addr` is computed from from `src` (and
+        // `src_pos`) here, and thus honors the artificial dependency
+        // of `src` on `tmp`.
+        GenSystemArrayCopyAddresses(masm,
+                                    type,
+                                    src,
+                                    src_pos,
+                                    dest,
+                                    dest_pos,
+                                    length,
+                                    src_curr_addr,
+                                    dst_curr_addr,
+                                    src_stop_addr);
+
+        // Slow path used to copy array when `src` is gray.
+        SlowPathCodeARM64* read_barrier_slow_path =
+            new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARM64(invoke, LocationFrom(tmp));
+        codegen_->AddSlowPath(read_barrier_slow_path);
+
+        // Given the numeric representation, it's enough to check the low bit of the rb_state.
+        static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+        static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+        __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
+
+        // Fast-path copy.
+        // Iterate over the arrays and do a raw copy of the objects. We don't need to
+        // poison/unpoison.
+        vixl::aarch64::Label loop;
+        __ Bind(&loop);
         __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
         __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+        __ Cmp(src_curr_addr, src_stop_addr);
+        __ B(&loop, ne);
+
+        __ Bind(read_barrier_slow_path->GetExitLabel());
+      } else {
+        // Non read barrier code.
+        // Compute base source address, base destination address, and end
+        // source address for System.arraycopy* intrinsics in `src_base`,
+        // `dst_base` and `src_end` respectively.
+        GenSystemArrayCopyAddresses(masm,
+                                    type,
+                                    src,
+                                    src_pos,
+                                    dest,
+                                    dest_pos,
+                                    length,
+                                    src_curr_addr,
+                                    dst_curr_addr,
+                                    src_stop_addr);
+        // Iterate over the arrays and do a raw copy of the objects. We don't need to
+        // poison/unpoison.
+        vixl::aarch64::Label loop;
+        __ Bind(&loop);
+        {
+          Register tmp = temps.AcquireW();
+          __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+          __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+        }
+        __ Cmp(src_curr_addr, src_stop_addr);
+        __ B(&loop, ne);
       }
-      __ B(&loop);
       __ Bind(&done);
     }
   }
+
   // We only need one card marking on the destination array.
   codegen_->MarkGCCard(dest.W(), Register(), /* value_can_be_null */ false);
 
@@ -2926,6 +2949,79 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderARM64::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      calling_convention.GetReturnLocation(Primitive::kPrimNot),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+}
+
+void IntrinsicCodeGeneratorARM64::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  MacroAssembler* masm = GetVIXLAssembler();
+
+  Register out = RegisterFrom(locations->Out(), Primitive::kPrimNot);
+  UseScratchRegisterScope temps(masm);
+  Register temp = temps.AcquireW();
+  InvokeRuntimeCallingConvention calling_convention;
+  Register argument = calling_convention.GetRegisterAt(0);
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ Mov(temp.W(), value);
+      __ Str(temp.W(), HeapOperand(out.W(), info.value_offset));
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    Register in = RegisterFrom(locations->InAt(0), Primitive::kPrimInt);
+    // Check bounds of our cache.
+    __ Add(out.W(), in.W(), -info.low);
+    __ Cmp(out.W(), info.high - info.low + 1);
+    vixl::aarch64::Label allocate, done;
+    __ B(&allocate, hs);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ Ldr(temp.W(), codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+    MemOperand source = HeapOperand(
+        temp, out.X(), LSL, Primitive::ComponentSizeShift(Primitive::kPrimNot));
+    codegen_->Load(Primitive::kPrimNot, out, source);
+    codegen_->GetAssembler()->MaybeUnpoisonHeapReference(out);
+    __ B(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ Ldr(argument.W(), codegen_->DeduplicateBootImageAddressLiteral(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ Str(in.W(), HeapOperand(out.W(), info.value_offset));
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongHighestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm64.h b/compiler/optimizing/intrinsics_arm64.h
index 28e41cb..3c53517 100644
--- a/compiler/optimizing/intrinsics_arm64.h
+++ b/compiler/optimizing/intrinsics_arm64.h
@@ -38,7 +38,8 @@
 
 class IntrinsicLocationsBuilderARM64 FINAL : public IntrinsicVisitor {
  public:
-  explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena) : arena_(arena) {}
+  explicit IntrinsicLocationsBuilderARM64(ArenaAllocator* arena, CodeGeneratorARM64* codegen)
+      : arena_(arena), codegen_(codegen) {}
 
   // Define visitor methods.
 
@@ -56,6 +57,7 @@
 
  private:
   ArenaAllocator* arena_;
+  CodeGeneratorARM64* codegen_;
 
   DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARM64);
 };
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
index cc4889b..0d933ea 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.cc
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -39,6 +39,7 @@
 using helpers::LocationFrom;
 using helpers::LowRegisterFrom;
 using helpers::LowSRegisterFrom;
+using helpers::HighSRegisterFrom;
 using helpers::OutputDRegister;
 using helpers::OutputSRegister;
 using helpers::OutputRegister;
@@ -117,6 +118,50 @@
   DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
 };
 
+// Compute base address for the System.arraycopy intrinsic in `base`.
+static void GenSystemArrayCopyBaseAddress(ArmVIXLAssembler* assembler,
+                                          Primitive::Type type,
+                                          const vixl32::Register& array,
+                                          const Location& pos,
+                                          const vixl32::Register& base) {
+  // This routine is only used by the SystemArrayCopy intrinsic at the
+  // moment. We can allow Primitive::kPrimNot as `type` to implement
+  // the SystemArrayCopyChar intrinsic.
+  DCHECK_EQ(type, Primitive::kPrimNot);
+  const int32_t element_size = Primitive::ComponentSize(type);
+  const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
+  const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t constant = Int32ConstantFrom(pos);
+    __ Add(base, array, element_size * constant + data_offset);
+  } else {
+    __ Add(base, array, Operand(RegisterFrom(pos), vixl32::LSL, element_size_shift));
+    __ Add(base, base, data_offset);
+  }
+}
+
+// Compute end address for the System.arraycopy intrinsic in `end`.
+static void GenSystemArrayCopyEndAddress(ArmVIXLAssembler* assembler,
+                                         Primitive::Type type,
+                                         const Location& copy_length,
+                                         const vixl32::Register& base,
+                                         const vixl32::Register& end) {
+  // This routine is only used by the SystemArrayCopy intrinsic at the
+  // moment. We can allow Primitive::kPrimNot as `type` to implement
+  // the SystemArrayCopyChar intrinsic.
+  DCHECK_EQ(type, Primitive::kPrimNot);
+  const int32_t element_size = Primitive::ComponentSize(type);
+  const uint32_t element_size_shift = Primitive::ComponentSizeShift(type);
+
+  if (copy_length.IsConstant()) {
+    int32_t constant = Int32ConstantFrom(copy_length);
+    __ Add(end, base, element_size * constant);
+  } else {
+    __ Add(end, base, Operand(RegisterFrom(copy_length), vixl32::LSL, element_size_shift));
+  }
+}
+
 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
 class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
  public:
@@ -137,9 +182,8 @@
     DCHECK(instruction_->GetLocations()->Intrinsified());
     DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
 
-    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
-    uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
-    uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+    Primitive::Type type = Primitive::kPrimNot;
+    const int32_t element_size = Primitive::ComponentSize(type);
 
     vixl32::Register dest = InputRegisterAt(instruction_, 2);
     Location dest_pos = locations->InAt(3);
@@ -150,15 +194,7 @@
 
     __ Bind(GetEntryLabel());
     // Compute the base destination address in `dst_curr_addr`.
-    if (dest_pos.IsConstant()) {
-      int32_t constant = Int32ConstantFrom(dest_pos);
-      __ Add(dst_curr_addr, dest, element_size * constant + offset);
-    } else {
-      __ Add(dst_curr_addr,
-             dest,
-             Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
-      __ Add(dst_curr_addr, dst_curr_addr, offset);
-    }
+    GenSystemArrayCopyBaseAddress(assembler, type, dest, dest_pos, dst_curr_addr);
 
     vixl32::Label loop;
     __ Bind(&loop);
@@ -182,6 +218,8 @@
     DCHECK(!src_stop_addr.Is(ip));
     DCHECK(!tmp.Is(ip));
     DCHECK(tmp.IsRegister()) << tmp;
+    // TODO: Load the entrypoint once before the loop, instead of
+    // loading it at every iteration.
     int32_t entry_point_offset =
         CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
     // This runtime call does not require a stack map.
@@ -203,6 +241,7 @@
 
 IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
     : arena_(codegen->GetGraph()->GetArena()),
+      codegen_(codegen),
       assembler_(codegen->GetAssembler()),
       features_(codegen->GetInstructionSetFeatures()) {}
 
@@ -295,9 +334,11 @@
   locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
 }
 
-static void GenNumberOfLeadingZeros(LocationSummary* locations,
+static void GenNumberOfLeadingZeros(HInvoke* invoke,
                                     Primitive::Type type,
-                                    ArmVIXLAssembler* assembler) {
+                                    CodeGeneratorARMVIXL* codegen) {
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   Location in = locations->InAt(0);
   vixl32::Register out = RegisterFrom(locations->Out());
 
@@ -307,11 +348,14 @@
     vixl32::Register in_reg_lo = LowRegisterFrom(in);
     vixl32::Register in_reg_hi = HighRegisterFrom(in);
     vixl32::Label end;
+    vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
     __ Clz(out, in_reg_hi);
-    __ CompareAndBranchIfNonZero(in_reg_hi, &end, /* far_target */ false);
+    __ CompareAndBranchIfNonZero(in_reg_hi, final_label, /* far_target */ false);
     __ Clz(out, in_reg_lo);
     __ Add(out, out, 32);
-    __ Bind(&end);
+    if (end.IsReferenced()) {
+      __ Bind(&end);
+    }
   } else {
     __ Clz(out, RegisterFrom(in));
   }
@@ -322,7 +366,7 @@
 }
 
 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+  GenNumberOfLeadingZeros(invoke, Primitive::kPrimInt, codegen_);
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
@@ -334,27 +378,32 @@
 }
 
 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
-  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+  GenNumberOfLeadingZeros(invoke, Primitive::kPrimLong, codegen_);
 }
 
-static void GenNumberOfTrailingZeros(LocationSummary* locations,
+static void GenNumberOfTrailingZeros(HInvoke* invoke,
                                      Primitive::Type type,
-                                     ArmVIXLAssembler* assembler) {
+                                     CodeGeneratorARMVIXL* codegen) {
   DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
 
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   vixl32::Register out = RegisterFrom(locations->Out());
 
   if (type == Primitive::kPrimLong) {
     vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
     vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
     vixl32::Label end;
+    vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &end);
     __ Rbit(out, in_reg_lo);
     __ Clz(out, out);
-    __ CompareAndBranchIfNonZero(in_reg_lo, &end, /* far_target */ false);
+    __ CompareAndBranchIfNonZero(in_reg_lo, final_label, /* far_target */ false);
     __ Rbit(out, in_reg_hi);
     __ Clz(out, out);
     __ Add(out, out, 32);
-    __ Bind(&end);
+    if (end.IsReferenced()) {
+      __ Bind(&end);
+    }
   } else {
     vixl32::Register in = RegisterFrom(locations->InAt(0));
     __ Rbit(out, in);
@@ -371,7 +420,7 @@
 }
 
 void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+  GenNumberOfTrailingZeros(invoke, Primitive::kPrimInt, codegen_);
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
@@ -383,7 +432,7 @@
 }
 
 void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
-  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+  GenNumberOfTrailingZeros(invoke, Primitive::kPrimLong, codegen_);
 }
 
 static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
@@ -464,7 +513,8 @@
   GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
 }
 
-static void GenMinMaxFloat(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+static void GenMinMaxFloat(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
   Location op1_loc = invoke->GetLocations()->InAt(0);
   Location op2_loc = invoke->GetLocations()->InAt(1);
   Location out_loc = invoke->GetLocations()->Out();
@@ -482,6 +532,7 @@
   const vixl32::Register temp1 = temps.Acquire();
   vixl32::Register temp2 = RegisterFrom(invoke->GetLocations()->GetTemp(0));
   vixl32::Label nan, done;
+  vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
 
   DCHECK(op1.Is(out));
 
@@ -498,7 +549,8 @@
     __ it(cond);
     __ vmov(cond, F32, out, op2);
   }
-  __ B(ne, &done, /* far_target */ false);  // for <>(not equal), we've done min/max calculation.
+  // for <>(not equal), we've done min/max calculation.
+  __ B(ne, final_label, /* far_target */ false);
 
   // handle op1 == op2, max(+0.0,-0.0), min(+0.0,-0.0).
   __ Vmov(temp1, op1);
@@ -509,14 +561,16 @@
     __ And(temp1, temp1, temp2);
   }
   __ Vmov(out, temp1);
-  __ B(&done);
+  __ B(final_label);
 
   // handle NaN input.
   __ Bind(&nan);
   __ Movt(temp1, High16Bits(kNanFloat));  // 0x7FC0xxxx is a NaN.
   __ Vmov(out, temp1);
 
-  __ Bind(&done);
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
 }
 
 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
@@ -534,7 +588,7 @@
 }
 
 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinFloatFloat(HInvoke* invoke) {
-  GenMinMaxFloat(invoke, /* is_min */ true, GetAssembler());
+  GenMinMaxFloat(invoke, /* is_min */ true, codegen_);
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
@@ -543,10 +597,11 @@
 }
 
 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxFloatFloat(HInvoke* invoke) {
-  GenMinMaxFloat(invoke, /* is_min */ false, GetAssembler());
+  GenMinMaxFloat(invoke, /* is_min */ false, codegen_);
 }
 
-static void GenMinMaxDouble(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+static void GenMinMaxDouble(HInvoke* invoke, bool is_min, CodeGeneratorARMVIXL* codegen) {
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
   Location op1_loc = invoke->GetLocations()->InAt(0);
   Location op2_loc = invoke->GetLocations()->InAt(1);
   Location out_loc = invoke->GetLocations()->Out();
@@ -561,6 +616,7 @@
   vixl32::DRegister op2 = DRegisterFrom(op2_loc);
   vixl32::DRegister out = OutputDRegister(invoke);
   vixl32::Label handle_nan_eq, done;
+  vixl32::Label* final_label = codegen->GetFinalLabel(invoke, &done);
 
   DCHECK(op1.Is(out));
 
@@ -577,19 +633,22 @@
     __ it(cond);
     __ vmov(cond, F64, out, op2);
   }
-  __ B(ne, &done, /* far_target */ false);  // for <>(not equal), we've done min/max calculation.
+  // for <>(not equal), we've done min/max calculation.
+  __ B(ne, final_label, /* far_target */ false);
 
   // handle op1 == op2, max(+0.0,-0.0).
   if (!is_min) {
     __ Vand(F64, out, op1, op2);
-    __ B(&done);
+    __ B(final_label);
   }
 
   // handle op1 == op2, min(+0.0,-0.0), NaN input.
   __ Bind(&handle_nan_eq);
   __ Vorr(F64, out, op1, op2);  // assemble op1/-0.0/NaN.
 
-  __ Bind(&done);
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
@@ -597,7 +656,7 @@
 }
 
 void IntrinsicCodeGeneratorARMVIXL::VisitMathMinDoubleDouble(HInvoke* invoke) {
-  GenMinMaxDouble(invoke, /* is_min */ true , GetAssembler());
+  GenMinMaxDouble(invoke, /* is_min */ true , codegen_);
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
@@ -605,7 +664,7 @@
 }
 
 void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxDoubleDouble(HInvoke* invoke) {
-  GenMinMaxDouble(invoke, /* is_min */ false, GetAssembler());
+  GenMinMaxDouble(invoke, /* is_min */ false, codegen_);
 }
 
 static void GenMinMaxLong(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
@@ -736,6 +795,58 @@
   __ Vrintn(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+  if (features_.HasARMv8AInstructions()) {
+    LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                              LocationSummary::kNoCall,
+                                                              kIntrinsified);
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetOut(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresFpuRegister());
+  }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathRoundFloat(HInvoke* invoke) {
+  DCHECK(codegen_->GetInstructionSetFeatures().HasARMv8AInstructions());
+
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::SRegister in_reg = InputSRegisterAt(invoke, 0);
+  vixl32::Register out_reg = OutputRegister(invoke);
+  vixl32::SRegister temp1 = LowSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  vixl32::SRegister temp2 = HighSRegisterFrom(invoke->GetLocations()->GetTemp(0));
+  vixl32::Label done;
+  vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
+
+  // Round to nearest integer, ties away from zero.
+  __ Vcvta(S32, F32, temp1, in_reg);
+  __ Vmov(out_reg, temp1);
+
+  // For positive, zero or NaN inputs, rounding is done.
+  __ Cmp(out_reg, 0);
+  __ B(ge, final_label, /* far_target */ false);
+
+  // Handle input < 0 cases.
+  // If input is negative but not a tie, previous result (round to nearest) is valid.
+  // If input is a negative tie, change rounding direction to positive infinity, out_reg += 1.
+  __ Vrinta(F32, F32, temp1, in_reg);
+  __ Vmov(temp2, 0.5);
+  __ Vsub(F32, temp1, in_reg, temp1);
+  __ Vcmp(F32, temp1, temp2);
+  __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+  {
+    // Use ExactAsemblyScope here because we are using IT.
+    ExactAssemblyScope it_scope(assembler->GetVIXLAssembler(),
+                                2 * kMaxInstructionSizeInBytes,
+                                CodeBufferCheckScope::kMaximumSize);
+    __ it(eq);
+    __ add(eq, out_reg, out_reg, 1);
+  }
+
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
+}
+
 void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
   CreateIntToIntLocations(arena_, invoke);
 }
@@ -1632,6 +1743,7 @@
   vixl32::Label end;
   vixl32::Label return_true;
   vixl32::Label return_false;
+  vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &end);
 
   // Get offsets of count, value, and class fields within a string object.
   const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
@@ -1708,12 +1820,15 @@
   // If loop does not result in returning false, we return true.
   __ Bind(&return_true);
   __ Mov(out, 1);
-  __ B(&end);
+  __ B(final_label);
 
   // Return false and exit the function.
   __ Bind(&return_false);
   __ Mov(out, 0);
-  __ Bind(&end);
+
+  if (end.IsReferenced()) {
+    __ Bind(&end);
+  }
 }
 
 static void GenerateVisitStringIndexOf(HInvoke* invoke,
@@ -2242,143 +2357,116 @@
     __ CompareAndBranchIfNonZero(temp3, intrinsic_slow_path->GetEntryLabel());
   }
 
-  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
-  uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
-  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
-
-  // Compute the base source address in `temp1`.
-  if (src_pos.IsConstant()) {
-    int32_t constant = Int32ConstantFrom(src_pos);
-    __ Add(temp1, src, element_size * constant + offset);
+  if (length.IsConstant() && Int32ConstantFrom(length) == 0) {
+    // Null constant length: not need to emit the loop code at all.
   } else {
-    __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
-    __ Add(temp1, temp1, offset);
-  }
+    vixl32::Label done;
+    const Primitive::Type type = Primitive::kPrimNot;
+    const int32_t element_size = Primitive::ComponentSize(type);
 
-  // Compute the end source address in `temp3`.
-  if (length.IsConstant()) {
-    int32_t constant = Int32ConstantFrom(length);
-    __ Add(temp3, temp1, element_size * constant);
-  } else {
-    __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
-  }
+    if (length.IsRegister()) {
+      // Don't enter the copy loop if the length is null.
+      __ CompareAndBranchIfZero(RegisterFrom(length), &done, /* is_far_target */ false);
+    }
 
-  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
-    // TODO: Also convert this intrinsic to the IsGcMarking strategy?
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // TODO: Also convert this intrinsic to the IsGcMarking strategy?
 
-    // The base destination address is computed later, as `temp2` is
-    // used for intermediate computations.
+      // SystemArrayCopy implementation for Baker read barriers (see
+      // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+      //
+      //   uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+      //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+      //   bool is_gray = (rb_state == ReadBarrier::GrayState());
+      //   if (is_gray) {
+      //     // Slow-path copy.
+      //     do {
+      //       *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+      //     } while (src_ptr != end_ptr)
+      //   } else {
+      //     // Fast-path copy.
+      //     do {
+      //       *dest_ptr++ = *src_ptr++;
+      //     } while (src_ptr != end_ptr)
+      //   }
 
-    // SystemArrayCopy implementation for Baker read barriers (see
-    // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
-    //
-    //   if (src_ptr != end_ptr) {
-    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
-    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
-    //     if (is_gray) {
-    //       // Slow-path copy.
-    //       do {
-    //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
-    //       } while (src_ptr != end_ptr)
-    //     } else {
-    //       // Fast-path copy.
-    //       do {
-    //         *dest_ptr++ = *src_ptr++;
-    //       } while (src_ptr != end_ptr)
-    //     }
-    //   }
+      // /* int32_t */ monitor = src->monitor_
+      __ Ldr(temp2, MemOperand(src, monitor_offset));
+      // /* LockWord */ lock_word = LockWord(monitor)
+      static_assert(sizeof(LockWord) == sizeof(int32_t),
+                    "art::LockWord and int32_t have different sizes.");
 
-    vixl32::Label loop, done;
+      // Introduce a dependency on the lock_word including the rb_state,
+      // which shall prevent load-load reordering without using
+      // a memory barrier (which would be more expensive).
+      // `src` is unchanged by this operation, but its value now depends
+      // on `temp2`.
+      __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
 
-    // Don't enter copy loop if `length == 0`.
-    __ Cmp(temp1, temp3);
-    __ B(eq, &done, /* far_target */ false);
+      // Compute the base source address in `temp1`.
+      // Note that `temp1` (the base source address) is computed from
+      // `src` (and `src_pos`) here, and thus honors the artificial
+      // dependency of `src` on `temp2`.
+      GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
+      // Compute the end source address in `temp3`.
+      GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
+      // The base destination address is computed later, as `temp2` is
+      // used for intermediate computations.
 
-    // /* int32_t */ monitor = src->monitor_
-    __ Ldr(temp2, MemOperand(src, monitor_offset));
-    // /* LockWord */ lock_word = LockWord(monitor)
-    static_assert(sizeof(LockWord) == sizeof(int32_t),
-                  "art::LockWord and int32_t have different sizes.");
+      // Slow path used to copy array when `src` is gray.
+      // Note that the base destination address is computed in `temp2`
+      // by the slow path code.
+      SlowPathCodeARMVIXL* read_barrier_slow_path =
+          new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
+      codegen_->AddSlowPath(read_barrier_slow_path);
 
-    // Introduce a dependency on the lock_word including the rb_state,
-    // which shall prevent load-load reordering without using
-    // a memory barrier (which would be more expensive).
-    // `src` is unchanged by this operation, but its value now depends
-    // on `temp2`.
-    __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
+      // Given the numeric representation, it's enough to check the low bit of the
+      // rb_state. We do that by shifting the bit out of the lock word with LSRS
+      // which can be a 16-bit instruction unlike the TST immediate.
+      static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+      static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+      __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+      // Carry flag is the last bit shifted out by LSRS.
+      __ B(cs, read_barrier_slow_path->GetEntryLabel());
 
-    // Slow path used to copy array when `src` is gray.
-    SlowPathCodeARMVIXL* read_barrier_slow_path =
-        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
-    codegen_->AddSlowPath(read_barrier_slow_path);
+      // Fast-path copy.
+      // Compute the base destination address in `temp2`.
+      GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
+      // Iterate over the arrays and do a raw copy of the objects. We don't need to
+      // poison/unpoison.
+      vixl32::Label loop;
+      __ Bind(&loop);
+      {
+        UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+        const vixl32::Register temp_reg = temps.Acquire();
+        __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+        __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+      }
+      __ Cmp(temp1, temp3);
+      __ B(ne, &loop, /* far_target */ false);
 
-    // Given the numeric representation, it's enough to check the low bit of the
-    // rb_state. We do that by shifting the bit out of the lock word with LSRS
-    // which can be a 16-bit instruction unlike the TST immediate.
-    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
-    __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
-    // Carry flag is the last bit shifted out by LSRS.
-    __ B(cs, read_barrier_slow_path->GetEntryLabel());
-
-    // Fast-path copy.
-
-    // Compute the base destination address in `temp2`.
-    if (dest_pos.IsConstant()) {
-      int32_t constant = Int32ConstantFrom(dest_pos);
-      __ Add(temp2, dest, element_size * constant + offset);
+      __ Bind(read_barrier_slow_path->GetExitLabel());
     } else {
-      __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
-      __ Add(temp2, temp2, offset);
+      // Non read barrier code.
+      // Compute the base source address in `temp1`.
+      GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
+      // Compute the base destination address in `temp2`.
+      GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
+      // Compute the end source address in `temp3`.
+      GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
+      // Iterate over the arrays and do a raw copy of the objects. We don't need to
+      // poison/unpoison.
+      vixl32::Label loop;
+      __ Bind(&loop);
+      {
+        UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+        const vixl32::Register temp_reg = temps.Acquire();
+        __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+        __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+      }
+      __ Cmp(temp1, temp3);
+      __ B(ne, &loop, /* far_target */ false);
     }
-
-    // Iterate over the arrays and do a raw copy of the objects. We don't need to
-    // poison/unpoison.
-    __ Bind(&loop);
-
-    {
-      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
-      const vixl32::Register temp_reg = temps.Acquire();
-
-      __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
-      __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
-    }
-
-    __ Cmp(temp1, temp3);
-    __ B(ne, &loop, /* far_target */ false);
-
-    __ Bind(read_barrier_slow_path->GetExitLabel());
-    __ Bind(&done);
-  } else {
-    // Non read barrier code.
-
-    // Compute the base destination address in `temp2`.
-    if (dest_pos.IsConstant()) {
-      int32_t constant = Int32ConstantFrom(dest_pos);
-      __ Add(temp2, dest, element_size * constant + offset);
-    } else {
-      __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
-      __ Add(temp2, temp2, offset);
-    }
-
-    // Iterate over the arrays and do a raw copy of the objects. We don't need to
-    // poison/unpoison.
-    vixl32::Label loop, done;
-    __ Cmp(temp1, temp3);
-    __ B(eq, &done, /* far_target */ false);
-    __ Bind(&loop);
-
-    {
-      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
-      const vixl32::Register temp_reg = temps.Acquire();
-
-      __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
-      __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
-    }
-
-    __ Cmp(temp1, temp3);
-    __ B(ne, &loop, /* far_target */ false);
     __ Bind(&done);
   }
 
@@ -2778,13 +2866,14 @@
   vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
 
   vixl32::Label done, compressed_string_loop;
+  vixl32::Label* final_label = codegen_->GetFinalLabel(invoke, &done);
   // dst to be copied.
   __ Add(dst_ptr, dstObj, data_offset);
   __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
 
   __ Subs(num_chr, srcEnd, srcBegin);
   // Early out for valid zero-length retrievals.
-  __ B(eq, &done, /* far_target */ false);
+  __ B(eq, final_label, /* far_target */ false);
 
   // src range to copy.
   __ Add(src_ptr, srcObj, value_offset);
@@ -2828,7 +2917,7 @@
   __ B(ge, &loop, /* far_target */ false);
 
   __ Adds(num_chr, num_chr, 4);
-  __ B(eq, &done, /* far_target */ false);
+  __ B(eq, final_label, /* far_target */ false);
 
   // Main loop for < 4 character case and remainder handling. Loads and stores one
   // 16-bit Java character at a time.
@@ -2841,7 +2930,7 @@
   __ B(gt, &remainder, /* far_target */ false);
 
   if (mirror::kUseStringCompression) {
-    __ B(&done);
+    __ B(final_label);
 
     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
     DCHECK_EQ(c_char_size, 1u);
@@ -2857,7 +2946,9 @@
     __ B(gt, &compressed_string_loop, /* far_target */ false);
   }
 
-  __ Bind(&done);
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
 }
 
 void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
@@ -2990,8 +3081,78 @@
   __ Vrintm(F64, F64, OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
 }
 
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      LocationFrom(r0),
+      LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  ArmVIXLAssembler* const assembler = GetAssembler();
+
+  vixl32::Register out = RegisterFrom(locations->Out());
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  vixl32::Register argument = calling_convention.GetRegisterAt(0);
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ Ldr(out, codegen_->DeduplicateBootImageAddressLiteral(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address =
+          dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ Mov(temp, value);
+      assembler->StoreToOffset(kStoreWord, temp, out, info.value_offset);
+      // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+      // one.
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    }
+  } else {
+    vixl32::Register in = RegisterFrom(locations->InAt(0));
+    // Check bounds of our cache.
+    __ Add(out, in, -info.low);
+    __ Cmp(out, info.high - info.low + 1);
+    vixl32::Label allocate, done;
+    __ B(hs, &allocate);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ Ldr(temp, codegen_->DeduplicateBootImageAddressLiteral(data_offset + address));
+    codegen_->LoadFromShiftedRegOffset(Primitive::kPrimNot, locations->Out(), temp, out);
+    assembler->MaybeUnpoisonHeapReference(out);
+    __ B(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ Ldr(argument, codegen_->DeduplicateBootImageAddressLiteral(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    assembler->StoreToOffset(kStoreWord, in, out, info.value_offset);
+    // `value` is a final field :-( Ideally, we'd merge this memory barrier with the allocation
+    // one.
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
-UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
 UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
index 6e79cb7..023cba1 100644
--- a/compiler/optimizing/intrinsics_arm_vixl.h
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -47,6 +47,7 @@
 
  private:
   ArenaAllocator* arena_;
+  CodeGenerator* codegen_;
   ArmVIXLAssembler* assembler_;
   const ArmInstructionSetFeatures& features_;
 
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 64a6840..b67793c 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -1514,21 +1514,31 @@
                     Thread::PeerOffset<kMipsPointerSize>().Int32Value());
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
-  bool can_call =
-       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
-       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile;
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           can_call ?
-                                                               LocationSummary::kCallOnSlowPath :
-                                                               LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorMIPS::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
 static void GenUnsafeGet(HInvoke* invoke,
                          Primitive::Type type,
                          bool is_volatile,
@@ -1539,45 +1549,109 @@
          (type == Primitive::kPrimLong) ||
          (type == Primitive::kPrimNot)) << type;
   MipsAssembler* assembler = codegen->GetAssembler();
+  // Target register.
+  Location trg_loc = locations->Out();
   // Object pointer.
-  Register base = locations->InAt(1).AsRegister<Register>();
+  Location base_loc = locations->InAt(1);
+  Register base = base_loc.AsRegister<Register>();
   // The "offset" argument is passed as a "long". Since this code is for
   // a 32-bit processor, we can only use 32-bit addresses, so we only
   // need the low 32-bits of offset.
-  Register offset_lo = invoke->GetLocations()->InAt(2).AsRegisterPairLow<Register>();
+  Location offset_loc = locations->InAt(2);
+  Register offset_lo = offset_loc.AsRegisterPairLow<Register>();
 
-  __ Addu(TMP, base, offset_lo);
-  if (is_volatile) {
-    __ Sync(0);
+  if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) {
+    __ Addu(TMP, base, offset_lo);
   }
-  if (type == Primitive::kPrimLong) {
-    Register trg_lo = locations->Out().AsRegisterPairLow<Register>();
-    Register trg_hi = locations->Out().AsRegisterPairHigh<Register>();
 
-    if (is_R6) {
-      __ Lw(trg_lo, TMP, 0);
-      __ Lw(trg_hi, TMP, 4);
-    } else {
-      __ Lwr(trg_lo, TMP, 0);
-      __ Lwl(trg_lo, TMP, 3);
-      __ Lwr(trg_hi, TMP, 4);
-      __ Lwl(trg_hi, TMP, 7);
+  switch (type) {
+    case Primitive::kPrimLong: {
+      Register trg_lo = trg_loc.AsRegisterPairLow<Register>();
+      Register trg_hi = trg_loc.AsRegisterPairHigh<Register>();
+      CHECK(!is_volatile);  // TODO: support atomic 8-byte volatile loads.
+      if (is_R6) {
+        __ Lw(trg_lo, TMP, 0);
+        __ Lw(trg_hi, TMP, 4);
+      } else {
+        __ Lwr(trg_lo, TMP, 0);
+        __ Lwl(trg_lo, TMP, 3);
+        __ Lwr(trg_hi, TMP, 4);
+        __ Lwl(trg_hi, TMP, 7);
+      }
+      break;
     }
-  } else {
-    Register trg = locations->Out().AsRegister<Register>();
 
-    if (is_R6) {
-      __ Lw(trg, TMP, 0);
-    } else {
-      __ Lwr(trg, TMP, 0);
-      __ Lwl(trg, TMP, 3);
+    case Primitive::kPrimInt: {
+      Register trg = trg_loc.AsRegister<Register>();
+      if (is_R6) {
+        __ Lw(trg, TMP, 0);
+      } else {
+        __ Lwr(trg, TMP, 0);
+        __ Lwl(trg, TMP, 3);
+      }
+      if (is_volatile) {
+        __ Sync(0);
+      }
+      break;
     }
+
+    case Primitive::kPrimNot: {
+      Register trg = trg_loc.AsRegister<Register>();
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+                                                             trg_loc,
+                                                             base,
+                                                             /* offset */ 0U,
+                                                             /* index */ offset_loc,
+                                                             TIMES_1,
+                                                             temp,
+                                                             /* needs_null_check */ false);
+          if (is_volatile) {
+            __ Sync(0);
+          }
+        } else {
+          if (is_R6) {
+            __ Lw(trg, TMP, 0);
+          } else {
+            __ Lwr(trg, TMP, 0);
+            __ Lwl(trg, TMP, 3);
+          }
+          if (is_volatile) {
+            __ Sync(0);
+          }
+          codegen->GenerateReadBarrierSlow(invoke,
+                                           trg_loc,
+                                           trg_loc,
+                                           base_loc,
+                                           /* offset */ 0U,
+                                           /* index */ offset_loc);
+        }
+      } else {
+        if (is_R6) {
+          __ Lw(trg, TMP, 0);
+        } else {
+          __ Lwr(trg, TMP, 0);
+          __ Lwl(trg, TMP, 3);
+        }
+        if (is_volatile) {
+          __ Sync(0);
+        }
+        __ MaybeUnpoisonHeapReference(trg);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type " << type;
+      UNREACHABLE();
   }
 }
 
 // int sun.misc.Unsafe.getInt(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGet(HInvoke* invoke) {
@@ -1586,7 +1660,7 @@
 
 // int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetVolatile(HInvoke* invoke) {
@@ -1595,25 +1669,16 @@
 
 // long sun.misc.Unsafe.getLong(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLong(HInvoke* invoke) {
   GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, IsR6(), codegen_);
 }
 
-// long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
-void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, IsR6(), codegen_);
-}
-
 // Object sun.misc.Unsafe.getObject(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObject(HInvoke* invoke) {
@@ -1622,7 +1687,7 @@
 
 // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
@@ -1639,6 +1704,8 @@
   locations->SetInAt(3, Location::RequiresRegister());
 }
 
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
 static void GenUnsafePut(LocationSummary* locations,
                          Primitive::Type type,
                          bool is_volatile,
@@ -1663,6 +1730,11 @@
   if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
     Register value = locations->InAt(3).AsRegister<Register>();
 
+    if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+      __ PoisonHeapReference(AT, value);
+      value = AT;
+    }
+
     if (is_R6) {
       __ Sw(value, TMP, 0);
     } else {
@@ -1672,7 +1744,7 @@
   } else {
     Register value_lo = locations->InAt(3).AsRegisterPairLow<Register>();
     Register value_hi = locations->InAt(3).AsRegisterPairHigh<Register>();
-
+    CHECK(!is_volatile);  // TODO: support atomic 8-byte volatile stores.
     if (is_R6) {
       __ Sw(value_lo, TMP, 0);
       __ Sw(value_hi, TMP, 4);
@@ -1806,50 +1878,83 @@
                codegen_);
 }
 
-// void sun.misc.Unsafe.putLongVolatile(Object o, long offset, long x)
-void IntrinsicLocationsBuilderMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntIntToVoidLocations(arena_, invoke);
-}
-
-void IntrinsicCodeGeneratorMIPS::VisitUnsafePutLongVolatile(HInvoke* invoke) {
-  GenUnsafePut(invoke->GetLocations(),
-               Primitive::kPrimLong,
-               /* is_volatile */ true,
-               /* is_ordered */ false,
-               IsR6(),
-               codegen_);
-}
-
-static void CreateIntIntIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
-
   locations->SetOut(Location::RequiresRegister());
+
+  // Temporary register used in CAS by (Baker) read barrier.
+  if (can_call) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS* codegen) {
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS* codegen) {
   MipsAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   bool isR6 = codegen->GetInstructionSetFeatures().IsR6();
   Register base = locations->InAt(1).AsRegister<Register>();
-  Register offset_lo = locations->InAt(2).AsRegisterPairLow<Register>();
+  Location offset_loc = locations->InAt(2);
+  Register offset_lo = offset_loc.AsRegisterPairLow<Register>();
   Register expected = locations->InAt(3).AsRegister<Register>();
   Register value = locations->InAt(4).AsRegister<Register>();
-  Register out = locations->Out().AsRegister<Register>();
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();
 
   DCHECK_NE(base, out);
   DCHECK_NE(offset_lo, out);
   DCHECK_NE(expected, out);
 
   if (type == Primitive::kPrimNot) {
-    // Mark card for object assuming new value is stored.
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+    // object and scan the receiver at the next GC for nothing.
     bool value_can_be_null = true;  // TODO: Worth finding out this information?
     codegen->MarkGCCard(base, value, value_can_be_null);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      Location temp = locations->GetTemp(0);
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          /* offset */ 0u,
+          /* index */ offset_loc,
+          ScaleFactor::TIMES_1,
+          temp,
+          /* needs_null_check */ false,
+          /* always_update_field */ true);
+    }
+  }
+
+  MipsLabel loop_head, exit_loop;
+  __ Addu(TMP, base, offset_lo);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ PoisonHeapReference(expected);
+    // Do not poison `value`, if it is the same register as
+    // `expected`, which has just been poisoned.
+    if (value != expected) {
+      __ PoisonHeapReference(value);
+    }
   }
 
   // do {
@@ -1857,8 +1962,6 @@
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  MipsLabel loop_head, exit_loop;
-  __ Addu(TMP, base, offset_lo);
   __ Sync(0);
   __ Bind(&loop_head);
   if ((type == Primitive::kPrimInt) || (type == Primitive::kPrimNot)) {
@@ -1868,8 +1971,8 @@
       __ LlR2(out, TMP);
     }
   } else {
-      LOG(FATAL) << "Unsupported op size " << type;
-      UNREACHABLE();
+    LOG(FATAL) << "Unsupported op size " << type;
+    UNREACHABLE();
   }
   __ Subu(out, out, expected);          // If we didn't get the 'expected'
   __ Sltiu(out, out, 1);                // value, set 'out' to false, and
@@ -1894,24 +1997,43 @@
                                 // cycle atomically then retry.
   __ Bind(&exit_loop);
   __ Sync(0);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ UnpoisonHeapReference(expected);
+    // Do not unpoison `value`, if it is the same register as
+    // `expected`, which has just been unpoisoned.
+    if (value != expected) {
+      __ UnpoisonHeapReference(value);
+    }
+  }
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASInt(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+  GenCas(invoke, Primitive::kPrimInt, codegen_);
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
 void IntrinsicLocationsBuilderMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
-  CreateIntIntIntIntIntToIntLocations(arena_, invoke);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitUnsafeCASObject(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  GenCas(invoke, Primitive::kPrimNot, codegen_);
 }
 
 // int java.lang.String.compareTo(String anotherString)
@@ -1989,20 +2111,24 @@
     __ LoadConst32(out, 1);
     return;
   }
-
-  // Check if input is null, return false if it is.
-  __ Beqz(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Beqz(arg, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ Beq(str, arg, &return_true);
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ Lw(temp1, str, class_offset);
-  __ Lw(temp2, arg, class_offset);
-  __ Bne(temp1, temp2, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Lw(temp1, str, class_offset);
+    __ Lw(temp2, arg, class_offset);
+    __ Bne(temp1, temp2, &return_false);
+  }
 
   // Load `count` fields of this and argument strings.
   __ Lw(temp1, str, count_offset);
@@ -2527,7 +2653,7 @@
 // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
 void IntrinsicLocationsBuilderMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kNoCall,
                                                             kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
@@ -2535,17 +2661,9 @@
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
 
-  // We will call memcpy() to do the actual work. Allocate the temporary
-  // registers to use the correct input registers, and output register.
-  // memcpy() uses the normal MIPS calling convention.
-  InvokeRuntimeCallingConvention calling_convention;
-
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-
-  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimInt);
-  locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<Register>()));
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorMIPS::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -2564,16 +2682,11 @@
   Register dstBegin = locations->InAt(4).AsRegister<Register>();
 
   Register dstPtr = locations->GetTemp(0).AsRegister<Register>();
-  DCHECK_EQ(dstPtr, A0);
   Register srcPtr = locations->GetTemp(1).AsRegister<Register>();
-  DCHECK_EQ(srcPtr, A1);
   Register numChrs = locations->GetTemp(2).AsRegister<Register>();
-  DCHECK_EQ(numChrs, A2);
-
-  Register dstReturn = locations->GetTemp(3).AsRegister<Register>();
-  DCHECK_EQ(dstReturn, V0);
 
   MipsLabel done;
+  MipsLabel loop;
 
   // Location of data in char array buffer.
   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
@@ -2602,7 +2715,7 @@
     __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
     __ Sll(TMP, TMP, 31);
 
-    // If string is uncompressed, use memcpy() path.
+    // If string is uncompressed, use uncompressed path.
     __ Bnez(TMP, &uncompressed_copy);
 
     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -2628,10 +2741,13 @@
     __ Addu(srcPtr, srcPtr, AT);
   }
 
-  // Calculate number of bytes to copy from number of characters.
-  __ Sll(numChrs, numChrs, char_shift);
-
-  codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr);
+  __ Bind(&loop);
+  __ Lh(AT, srcPtr, 0);
+  __ Addiu(numChrs, numChrs, -1);
+  __ Addiu(srcPtr, srcPtr, char_size);
+  __ Sh(AT, dstPtr, 0);
+  __ Addiu(dstPtr, dstPtr, char_size);
+  __ Bnez(numChrs, &loop);
 
   __ Bind(&done);
 }
@@ -2642,6 +2758,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, MathFloor)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRint)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathRoundDouble)
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetLongVolatile);
+UNIMPLEMENTED_INTRINSIC(MIPS, UnsafePutLongVolatile);
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeCASLong)
 
 UNIMPLEMENTED_INTRINSIC(MIPS, ReferenceGetReferent)
@@ -2682,6 +2800,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndSetObject)
 
+UNIMPLEMENTED_INTRINSIC(MIPS, IntegerValueOf)
+
 UNREACHABLE_INTRINSICS(MIPS)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 3888828..c2518a7 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -834,15 +834,15 @@
   __ Bnezc(AT, &done);
 
   //     Long outLong = floor/ceil(in);
-  //     if outLong == Long.MAX_VALUE {
+  //     if (outLong == Long.MAX_VALUE) || (outLong == Long.MIN_VALUE) {
   //         // floor()/ceil() has almost certainly returned a value
   //         // which can't be successfully represented as a signed
   //         // 64-bit number.  Java expects that the input value will
   //         // be returned in these cases.
   //         // There is also a small probability that floor(in)/ceil(in)
   //         // correctly truncates/rounds up the input value to
-  //         // Long.MAX_VALUE.  In that case, this exception handling
-  //         // code still does the correct thing.
+  //         // Long.MAX_VALUE or Long.MIN_VALUE. In these cases, this
+  //         // exception handling code still does the correct thing.
   //         return in;
   //     }
   if (mode == kFloor) {
@@ -852,8 +852,14 @@
   }
   __ Dmfc1(AT, out);
   __ MovD(out, in);
-  __ LoadConst64(TMP, kPrimLongMax);
-  __ Beqc(AT, TMP, &done);
+  __ Daddiu(TMP, AT, 1);
+  __ Dati(TMP, 0x8000);  // TMP = AT + 0x8000 0000 0000 0001
+                         // or    AT - 0x7FFF FFFF FFFF FFFF.
+                         // IOW, TMP = 1 if AT = Long.MIN_VALUE
+                         // or   TMP = 0 if AT = Long.MAX_VALUE.
+  __ Dsrl(TMP, TMP, 1);  // TMP = 0 if AT = Long.MIN_VALUE
+                         //         or AT = Long.MAX_VALUE.
+  __ Beqzc(TMP, &done);
 
   //     double out = outLong;
   //     return out;
@@ -1151,16 +1157,31 @@
                     Thread::PeerOffset<kMips64PointerSize>().Int32Value());
 }
 
-static void CreateIntIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+  locations->SetOut(Location::RequiresRegister(),
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorMIPS64::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
 static void GenUnsafeGet(HInvoke* invoke,
                          Primitive::Type type,
                          bool is_volatile,
@@ -1168,29 +1189,71 @@
   LocationSummary* locations = invoke->GetLocations();
   DCHECK((type == Primitive::kPrimInt) ||
          (type == Primitive::kPrimLong) ||
-         (type == Primitive::kPrimNot));
+         (type == Primitive::kPrimNot)) << type;
   Mips64Assembler* assembler = codegen->GetAssembler();
+  // Target register.
+  Location trg_loc = locations->Out();
+  GpuRegister trg = trg_loc.AsRegister<GpuRegister>();
   // Object pointer.
-  GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
+  Location base_loc = locations->InAt(1);
+  GpuRegister base = base_loc.AsRegister<GpuRegister>();
   // Long offset.
-  GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
-  GpuRegister trg = locations->Out().AsRegister<GpuRegister>();
+  Location offset_loc = locations->InAt(2);
+  GpuRegister offset = offset_loc.AsRegister<GpuRegister>();
 
-  __ Daddu(TMP, base, offset);
-  if (is_volatile) {
-    __ Sync(0);
+  if (!(kEmitCompilerReadBarrier && kUseBakerReadBarrier && (type == Primitive::kPrimNot))) {
+    __ Daddu(TMP, base, offset);
   }
+
   switch (type) {
+    case Primitive::kPrimLong:
+      __ Ld(trg, TMP, 0);
+      if (is_volatile) {
+        __ Sync(0);
+      }
+      break;
+
     case Primitive::kPrimInt:
       __ Lw(trg, TMP, 0);
+      if (is_volatile) {
+        __ Sync(0);
+      }
       break;
 
     case Primitive::kPrimNot:
-      __ Lwu(trg, TMP, 0);
-      break;
-
-    case Primitive::kPrimLong:
-      __ Ld(trg, TMP, 0);
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
+                                                             trg_loc,
+                                                             base,
+                                                             /* offset */ 0U,
+                                                             /* index */ offset_loc,
+                                                             TIMES_1,
+                                                             temp,
+                                                             /* needs_null_check */ false);
+          if (is_volatile) {
+            __ Sync(0);
+          }
+        } else {
+          __ Lwu(trg, TMP, 0);
+          if (is_volatile) {
+            __ Sync(0);
+          }
+          codegen->GenerateReadBarrierSlow(invoke,
+                                           trg_loc,
+                                           trg_loc,
+                                           base_loc,
+                                           /* offset */ 0U,
+                                           /* index */ offset_loc);
+        }
+      } else {
+        __ Lwu(trg, TMP, 0);
+        if (is_volatile) {
+          __ Sync(0);
+        }
+        __ MaybeUnpoisonHeapReference(trg);
+      }
       break;
 
     default:
@@ -1201,7 +1264,7 @@
 
 // int sun.misc.Unsafe.getInt(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGet(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGet(HInvoke* invoke) {
@@ -1210,7 +1273,7 @@
 
 // int sun.misc.Unsafe.getIntVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetVolatile(HInvoke* invoke) {
@@ -1219,7 +1282,7 @@
 
 // long sun.misc.Unsafe.getLong(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLong(HInvoke* invoke) {
@@ -1228,7 +1291,7 @@
 
 // long sun.misc.Unsafe.getLongVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
@@ -1237,7 +1300,7 @@
 
 // Object sun.misc.Unsafe.getObject(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObject(HInvoke* invoke) {
@@ -1246,7 +1309,7 @@
 
 // Object sun.misc.Unsafe.getObjectVolatile(Object o, long offset)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
-  CreateIntIntIntToIntLocations(arena_, invoke);
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
@@ -1263,6 +1326,8 @@
   locations->SetInAt(3, Location::RequiresRegister());
 }
 
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
 static void GenUnsafePut(LocationSummary* locations,
                          Primitive::Type type,
                          bool is_volatile,
@@ -1285,7 +1350,12 @@
   switch (type) {
     case Primitive::kPrimInt:
     case Primitive::kPrimNot:
-      __ Sw(value, TMP, 0);
+      if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+        __ PoisonHeapReference(AT, value);
+        __ Sw(AT, TMP, 0);
+      } else {
+        __ Sw(value, TMP, 0);
+      }
       break;
 
     case Primitive::kPrimLong:
@@ -1423,35 +1493,82 @@
                codegen_);
 }
 
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, HInvoke* invoke) {
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena, HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
-
   locations->SetOut(Location::RequiresRegister());
+
+  // Temporary register used in CAS by (Baker) read barrier.
+  if (can_call) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
+// Note that the caller must supply a properly aligned memory address.
+// If they do not, the behavior is undefined (atomicity not guaranteed, exception may occur).
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorMIPS64* codegen) {
   Mips64Assembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
   GpuRegister base = locations->InAt(1).AsRegister<GpuRegister>();
-  GpuRegister offset = locations->InAt(2).AsRegister<GpuRegister>();
+  Location offset_loc = locations->InAt(2);
+  GpuRegister offset = offset_loc.AsRegister<GpuRegister>();
   GpuRegister expected = locations->InAt(3).AsRegister<GpuRegister>();
   GpuRegister value = locations->InAt(4).AsRegister<GpuRegister>();
-  GpuRegister out = locations->Out().AsRegister<GpuRegister>();
+  Location out_loc = locations->Out();
+  GpuRegister out = out_loc.AsRegister<GpuRegister>();
 
   DCHECK_NE(base, out);
   DCHECK_NE(offset, out);
   DCHECK_NE(expected, out);
 
   if (type == Primitive::kPrimNot) {
-    // Mark card for object assuming new value is stored.
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+    // object and scan the receiver at the next GC for nothing.
     bool value_can_be_null = true;  // TODO: Worth finding out this information?
     codegen->MarkGCCard(base, value, value_can_be_null);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      Location temp = locations->GetTemp(0);
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          /* offset */ 0u,
+          /* index */ offset_loc,
+          ScaleFactor::TIMES_1,
+          temp,
+          /* needs_null_check */ false,
+          /* always_update_field */ true);
+    }
+  }
+
+  Mips64Label loop_head, exit_loop;
+  __ Daddu(TMP, base, offset);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ PoisonHeapReference(expected);
+    // Do not poison `value`, if it is the same register as
+    // `expected`, which has just been poisoned.
+    if (value != expected) {
+      __ PoisonHeapReference(value);
+    }
   }
 
   // do {
@@ -1459,8 +1576,6 @@
   // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
   // result = tmp_value != 0;
 
-  Mips64Label loop_head, exit_loop;
-  __ Daddu(TMP, base, offset);
   __ Sync(0);
   __ Bind(&loop_head);
   if (type == Primitive::kPrimLong) {
@@ -1469,6 +1584,11 @@
     // Note: We will need a read barrier here, when read barrier
     // support is added to the MIPS64 back end.
     __ Ll(out, TMP);
+    if (type == Primitive::kPrimNot) {
+      // The LL instruction sign-extends the 32-bit value, but
+      // 32-bit references must be zero-extended. Zero-extend `out`.
+      __ Dext(out, out, 0, 32);
+    }
   }
   __ Dsubu(out, out, expected);         // If we didn't get the 'expected'
   __ Sltiu(out, out, 1);                // value, set 'out' to false, and
@@ -1487,33 +1607,52 @@
                                 // cycle atomically then retry.
   __ Bind(&exit_loop);
   __ Sync(0);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    __ UnpoisonHeapReference(expected);
+    // Do not unpoison `value`, if it is the same register as
+    // `expected`, which has just been unpoisoned.
+    if (value != expected) {
+      __ UnpoisonHeapReference(value);
+    }
+  }
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapInt(Object o, long offset, int expected, int x)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASInt(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+  GenCas(invoke, Primitive::kPrimInt, codegen_);
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapLong(Object o, long offset, long expected, long x)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASLong(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
+  GenCas(invoke, Primitive::kPrimLong, codegen_);
 }
 
 // boolean sun.misc.Unsafe.compareAndSwapObject(Object o, long offset, Object expected, Object x)
 void IntrinsicLocationsBuilderMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
-  CreateIntIntIntIntIntToInt(arena_, invoke);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke);
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitUnsafeCASObject(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  GenCas(invoke, Primitive::kPrimNot, codegen_);
 }
 
 // int java.lang.String.compareTo(String anotherString)
@@ -1593,19 +1732,24 @@
     return;
   }
 
-  // Check if input is null, return false if it is.
-  __ Beqzc(arg, &return_false);
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Beqzc(arg, &return_false);
+  }
 
   // Reference equality check, return true if same reference.
   __ Beqc(str, arg, &return_true);
 
-  // Instanceof check for the argument by comparing class fields.
-  // All string objects must have the same type since String cannot be subclassed.
-  // Receiver must be a string object, so its class field is equal to all strings' class fields.
-  // If the argument is a string object, its class field must be equal to receiver's class field.
-  __ Lw(temp1, str, class_offset);
-  __ Lw(temp2, arg, class_offset);
-  __ Bnec(temp1, temp2, &return_false);
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Lw(temp1, str, class_offset);
+    __ Lw(temp2, arg, class_offset);
+    __ Bnec(temp1, temp2, &return_false);
+  }
 
   // Load `count` fields of this and argument strings.
   __ Lw(temp1, str, count_offset);
@@ -1860,7 +2004,7 @@
 // void java.lang.String.getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin)
 void IntrinsicLocationsBuilderMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
   LocationSummary* locations = new (arena_) LocationSummary(invoke,
-                                                            LocationSummary::kCallOnMainOnly,
+                                                            LocationSummary::kNoCall,
                                                             kIntrinsified);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
@@ -1868,17 +2012,9 @@
   locations->SetInAt(3, Location::RequiresRegister());
   locations->SetInAt(4, Location::RequiresRegister());
 
-  // We will call memcpy() to do the actual work. Allocate the temporary
-  // registers to use the correct input registers, and output register.
-  // memcpy() uses the normal MIPS calling conventions.
-  InvokeRuntimeCallingConvention calling_convention;
-
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
-  locations->AddTemp(Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
-
-  Location outLocation = calling_convention.GetReturnLocation(Primitive::kPrimLong);
-  locations->AddTemp(Location::RegisterLocation(outLocation.AsRegister<GpuRegister>()));
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
 }
 
 void IntrinsicCodeGeneratorMIPS64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1897,16 +2033,11 @@
   GpuRegister dstBegin = locations->InAt(4).AsRegister<GpuRegister>();
 
   GpuRegister dstPtr = locations->GetTemp(0).AsRegister<GpuRegister>();
-  DCHECK_EQ(dstPtr, A0);
   GpuRegister srcPtr = locations->GetTemp(1).AsRegister<GpuRegister>();
-  DCHECK_EQ(srcPtr, A1);
   GpuRegister numChrs = locations->GetTemp(2).AsRegister<GpuRegister>();
-  DCHECK_EQ(numChrs, A2);
-
-  GpuRegister dstReturn = locations->GetTemp(3).AsRegister<GpuRegister>();
-  DCHECK_EQ(dstReturn, V0);
 
   Mips64Label done;
+  Mips64Label loop;
 
   // Location of data in char array buffer.
   const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
@@ -1930,7 +2061,7 @@
     __ LoadFromOffset(kLoadWord, TMP, srcObj, count_offset);
     __ Dext(TMP, TMP, 0, 1);
 
-    // If string is uncompressed, use memcpy() path.
+    // If string is uncompressed, use uncompressed path.
     __ Bnezc(TMP, &uncompressed_copy);
 
     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -1951,10 +2082,13 @@
   __ Daddiu(srcPtr, srcObj, value_offset);
   __ Dlsa(srcPtr, srcBegin, srcPtr, char_shift);
 
-  // Calculate number of bytes to copy from number of characters.
-  __ Dsll(numChrs, numChrs, char_shift);
-
-  codegen_->InvokeRuntime(kQuickMemcpy, invoke, invoke->GetDexPc(), nullptr);
+  __ Bind(&loop);
+  __ Lh(AT, srcPtr, 0);
+  __ Daddiu(numChrs, numChrs, -1);
+  __ Daddiu(srcPtr, srcPtr, char_size);
+  __ Sh(AT, dstPtr, 0);
+  __ Daddiu(dstPtr, dstPtr, char_size);
+  __ Bnezc(numChrs, &loop);
 
   __ Bind(&done);
 }
@@ -2075,6 +2209,8 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetLong)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndSetObject)
 
+UNIMPLEMENTED_INTRINSIC(MIPS64, IntegerValueOf)
+
 UNREACHABLE_INTRINSICS(MIPS64)
 
 #undef __
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index e1b7ea5..ecf919b 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -2878,6 +2878,49 @@
   return instruction->InputAt(input0) == instruction->InputAt(input1);
 }
 
+// Compute base address for the System.arraycopy intrinsic in `base`.
+static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
+                                          Primitive::Type type,
+                                          const Register& array,
+                                          const Location& pos,
+                                          const Register& base) {
+  // This routine is only used by the SystemArrayCopy intrinsic at the
+  // moment. We can allow Primitive::kPrimNot as `type` to implement
+  // the SystemArrayCopyChar intrinsic.
+  DCHECK_EQ(type, Primitive::kPrimNot);
+  const int32_t element_size = Primitive::ComponentSize(type);
+  const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
+  const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(base, Address(array, element_size * constant + data_offset));
+  } else {
+    __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
+  }
+}
+
+// Compute end source address for the System.arraycopy intrinsic in `end`.
+static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
+                                         Primitive::Type type,
+                                         const Location& copy_length,
+                                         const Register& base,
+                                         const Register& end) {
+  // This routine is only used by the SystemArrayCopy intrinsic at the
+  // moment. We can allow Primitive::kPrimNot as `type` to implement
+  // the SystemArrayCopyChar intrinsic.
+  DCHECK_EQ(type, Primitive::kPrimNot);
+  const int32_t element_size = Primitive::ComponentSize(type);
+  const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
+
+  if (copy_length.IsConstant()) {
+    int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(end, Address(base, element_size * constant));
+  } else {
+    __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
+  }
+}
+
 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
   // The only read barrier implementation supporting the
   // SystemArrayCopy intrinsic is the Baker-style read barriers.
@@ -3182,16 +3225,11 @@
     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   }
 
+  const Primitive::Type type = Primitive::kPrimNot;
+  const int32_t element_size = Primitive::ComponentSize(type);
+
   // Compute the base source address in `temp1`.
-  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
-  DCHECK_EQ(element_size, 4);
-  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
-  if (src_pos.IsConstant()) {
-    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(temp1, Address(src, element_size * constant + offset));
-  } else {
-    __ leal(temp1, Address(src, src_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
-  }
+  GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
 
   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // If it is needed (in the case of the fast-path loop), the base
@@ -3199,20 +3237,15 @@
     // intermediate computations.
 
     // Compute the end source address in `temp3`.
-    if (length.IsConstant()) {
-      int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
-      __ leal(temp3, Address(temp1, element_size * constant));
-    } else {
-      if (length.IsStackSlot()) {
-        // Location `length` is again pointing at a stack slot, as
-        // register `temp3` (which was containing the length parameter
-        // earlier) has been overwritten; restore it now
-        DCHECK(length.Equals(length_arg));
-        __ movl(temp3, Address(ESP, length.GetStackIndex()));
-        length = Location::RegisterLocation(temp3);
-      }
-      __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
+    if (length.IsStackSlot()) {
+      // Location `length` is again pointing at a stack slot, as
+      // register `temp3` (which was containing the length parameter
+      // earlier) has been overwritten; restore it now
+      DCHECK(length.Equals(length_arg));
+      __ movl(temp3, Address(ESP, length.GetStackIndex()));
+      length = Location::RegisterLocation(temp3);
     }
+    GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
 
     // SystemArrayCopy implementation for Baker read barriers (see
     // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
@@ -3266,15 +3299,8 @@
     __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
 
     // Fast-path copy.
-
-    // Set the base destination address in `temp2`.
-    if (dest_pos.IsConstant()) {
-      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
-      __ leal(temp2, Address(dest, element_size * constant + offset));
-    } else {
-      __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
-    }
-
+    // Compute the base destination address in `temp2`.
+    GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
     // Iterate over the arrays and do a raw copy of the objects. We don't need to
     // poison/unpoison.
     __ Bind(&loop);
@@ -3291,23 +3317,10 @@
     __ Bind(&done);
   } else {
     // Non read barrier code.
-
     // Compute the base destination address in `temp2`.
-    if (dest_pos.IsConstant()) {
-      int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
-      __ leal(temp2, Address(dest, element_size * constant + offset));
-    } else {
-      __ leal(temp2, Address(dest, dest_pos.AsRegister<Register>(), ScaleFactor::TIMES_4, offset));
-    }
-
+    GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
     // Compute the end source address in `temp3`.
-    if (length.IsConstant()) {
-      int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
-      __ leal(temp3, Address(temp1, element_size * constant));
-    } else {
-      __ leal(temp3, Address(temp1, length.AsRegister<Register>(), ScaleFactor::TIMES_4, 0));
-    }
-
+    GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
     // Iterate over the arrays and do a raw copy of the objects. We don't need to
     // poison/unpoison.
     NearLabel loop, done;
@@ -3326,15 +3339,70 @@
   }
 
   // We only need one card marking on the destination array.
-  codegen_->MarkGCCard(temp1,
-                       temp2,
-                       dest,
-                       Register(kNoRegister),
-                       /* value_can_be_null */ false);
+  codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
 
   __ Bind(intrinsic_slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      Location::RegisterLocation(EAX),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  X86Assembler* assembler = GetAssembler();
+
+  Register out = locations->Out().AsRegister<Register>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ movl(out, Immediate(address));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ movl(Address(out, info.value_offset), Immediate(value));
+    }
+  } else {
+    Register in = locations->InAt(0).AsRegister<Register>();
+    // Check bounds of our cache.
+    __ leal(out, Address(in, -info.low));
+    __ cmpl(out, Immediate(info.high - info.low + 1));
+    NearLabel allocate, done;
+    __ j(kAboveEqual, &allocate);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    __ movl(out, Address(out, TIMES_4, data_offset + address));
+    __ MaybeUnpoisonHeapReference(out);
+    __ jmp(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ movl(Address(out, info.value_offset), in);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 05d270a..13956df 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -39,7 +39,6 @@
   : arena_(codegen->GetGraph()->GetArena()), codegen_(codegen) {
 }
 
-
 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
   return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
 }
@@ -1119,6 +1118,47 @@
   CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
 }
 
+// Compute base source address, base destination address, and end
+// source address for the System.arraycopy intrinsic in `src_base`,
+// `dst_base` and `src_end` respectively.
+static void GenSystemArrayCopyAddresses(X86_64Assembler* assembler,
+                                        Primitive::Type type,
+                                        const CpuRegister& src,
+                                        const Location& src_pos,
+                                        const CpuRegister& dst,
+                                        const Location& dst_pos,
+                                        const Location& copy_length,
+                                        const CpuRegister& src_base,
+                                        const CpuRegister& dst_base,
+                                        const CpuRegister& src_end) {
+  // This routine is only used by the SystemArrayCopy intrinsic.
+  DCHECK_EQ(type, Primitive::kPrimNot);
+  const int32_t element_size = Primitive::ComponentSize(type);
+  const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
+  const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+  if (src_pos.IsConstant()) {
+    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(src_base, Address(src, element_size * constant + data_offset));
+  } else {
+    __ leal(src_base, Address(src, src_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
+  }
+
+  if (dst_pos.IsConstant()) {
+    int32_t constant = dst_pos.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(dst_base, Address(dst, element_size * constant + data_offset));
+  } else {
+    __ leal(dst_base, Address(dst, dst_pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
+  }
+
+  if (copy_length.IsConstant()) {
+    int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
+    __ leal(src_end, Address(src_base, element_size * constant));
+  } else {
+    __ leal(src_end, Address(src_base, copy_length.AsRegister<CpuRegister>(), scale_factor, 0));
+  }
+}
+
 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
   // The only read barrier implementation supporting the
   // SystemArrayCopy intrinsic is the Baker-style read barriers.
@@ -1367,30 +1407,13 @@
     __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
   }
 
-  // Compute base source address, base destination address, and end source address.
+  const Primitive::Type type = Primitive::kPrimNot;
+  const int32_t element_size = Primitive::ComponentSize(type);
 
-  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
-  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
-  if (src_pos.IsConstant()) {
-    int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(temp1, Address(src, element_size * constant + offset));
-  } else {
-    __ leal(temp1, Address(src, src_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
-  }
-
-  if (dest_pos.IsConstant()) {
-    int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(temp2, Address(dest, element_size * constant + offset));
-  } else {
-    __ leal(temp2, Address(dest, dest_pos.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, offset));
-  }
-
-  if (length.IsConstant()) {
-    int32_t constant = length.GetConstant()->AsIntConstant()->GetValue();
-    __ leal(temp3, Address(temp1, element_size * constant));
-  } else {
-    __ leal(temp3, Address(temp1, length.AsRegister<CpuRegister>(), ScaleFactor::TIMES_4, 0));
-  }
+  // Compute base source address, base destination address, and end
+  // source address in `temp1`, `temp2` and `temp3` respectively.
+  GenSystemArrayCopyAddresses(
+      GetAssembler(), type, src, src_pos, dest, dest_pos, length, temp1, temp2, temp3);
 
   if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // SystemArrayCopy implementation for Baker read barriers (see
@@ -1475,11 +1498,7 @@
   }
 
   // We only need one card marking on the destination array.
-  codegen_->MarkGCCard(temp1,
-                       temp2,
-                       dest,
-                       CpuRegister(kNoRegister),
-                       /* value_can_be_null */ false);
+  codegen_->MarkGCCard(temp1, temp2, dest, CpuRegister(kNoRegister), /* value_can_be_null */ false);
 
   __ Bind(intrinsic_slow_path->GetExitLabel());
 }
@@ -2995,6 +3014,73 @@
   __ Bind(slow_path->GetExitLabel());
 }
 
+void IntrinsicLocationsBuilderX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+  InvokeRuntimeCallingConvention calling_convention;
+  IntrinsicVisitor::ComputeIntegerValueOfLocations(
+      invoke,
+      codegen_,
+      Location::RegisterLocation(RAX),
+      Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+}
+
+void IntrinsicCodeGeneratorX86_64::VisitIntegerValueOf(HInvoke* invoke) {
+  IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
+  LocationSummary* locations = invoke->GetLocations();
+  X86_64Assembler* assembler = GetAssembler();
+
+  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  InvokeRuntimeCallingConvention calling_convention;
+  if (invoke->InputAt(0)->IsConstant()) {
+    int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
+    if (value >= info.low && value <= info.high) {
+      // Just embed the j.l.Integer in the code.
+      ScopedObjectAccess soa(Thread::Current());
+      mirror::Object* boxed = info.cache->Get(value + (-info.low));
+      DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
+      __ movl(out, Immediate(static_cast<int32_t>(address)));
+    } else {
+      // Allocate and initialize a new j.l.Integer.
+      // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
+      // JIT object table.
+      CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
+      uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+      __ movl(argument, Immediate(static_cast<int32_t>(address)));
+      codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+      CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+      __ movl(Address(out, info.value_offset), Immediate(value));
+    }
+  } else {
+    CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
+    // Check bounds of our cache.
+    __ leal(out, Address(in, -info.low));
+    __ cmpl(out, Immediate(info.high - info.low + 1));
+    NearLabel allocate, done;
+    __ j(kAboveEqual, &allocate);
+    // If the value is within the bounds, load the j.l.Integer directly from the array.
+    uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
+    uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
+    if (data_offset + address <= std::numeric_limits<int32_t>::max()) {
+      __ movl(out, Address(out, TIMES_4, data_offset + address));
+    } else {
+      CpuRegister temp = CpuRegister(calling_convention.GetRegisterAt(0));
+      __ movl(temp, Immediate(static_cast<int32_t>(data_offset + address)));
+      __ movl(out, Address(temp, out, TIMES_4, 0));
+    }
+    __ MaybeUnpoisonHeapReference(out);
+    __ jmp(&done);
+    __ Bind(&allocate);
+    // Otherwise allocate and initialize a new j.l.Integer.
+    CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
+    address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
+    __ movl(argument, Immediate(static_cast<int32_t>(address)));
+    codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
+    __ movl(Address(out, info.value_offset), in);
+    __ Bind(&done);
+  }
+}
+
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
diff --git a/compiler/optimizing/licm_test.cc b/compiler/optimizing/licm_test.cc
index 5bcfa4c..8d15f78 100644
--- a/compiler/optimizing/licm_test.cc
+++ b/compiler/optimizing/licm_test.cc
@@ -28,7 +28,18 @@
  */
 class LICMTest : public CommonCompilerTest {
  public:
-  LICMTest() : pool_(), allocator_(&pool_) {
+  LICMTest()
+      : pool_(),
+        allocator_(&pool_),
+        entry_(nullptr),
+        loop_preheader_(nullptr),
+        loop_header_(nullptr),
+        loop_body_(nullptr),
+        return_(nullptr),
+        exit_(nullptr),
+        parameter_(nullptr),
+        int_constant_(nullptr),
+        float_constant_(nullptr) {
     graph_ = CreateGraph(&allocator_);
   }
 
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 2d3c00f..48699b3 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -38,7 +38,8 @@
         position_(pos),
         is_singleton_(true),
         is_singleton_and_not_returned_(true),
-        is_singleton_and_not_deopt_visible_(true) {
+        is_singleton_and_not_deopt_visible_(true),
+        has_index_aliasing_(false) {
     CalculateEscape(reference_,
                     nullptr,
                     &is_singleton_,
@@ -68,13 +69,36 @@
     return is_singleton_and_not_returned_ && is_singleton_and_not_deopt_visible_;
   }
 
+  // Returns true if reference_ is a singleton and returned to the caller or
+  // used as an environment local of an HDeoptimize instruction.
+  bool IsSingletonAndNonRemovable() const {
+    return is_singleton_ &&
+           (!is_singleton_and_not_returned_ || !is_singleton_and_not_deopt_visible_);
+  }
+
+  bool HasIndexAliasing() {
+    return has_index_aliasing_;
+  }
+
+  void SetHasIndexAliasing(bool has_index_aliasing) {
+    // Only allow setting to true.
+    DCHECK(has_index_aliasing);
+    has_index_aliasing_ = has_index_aliasing;
+  }
+
  private:
   HInstruction* const reference_;
   const size_t position_;  // position in HeapLocationCollector's ref_info_array_.
 
-  bool is_singleton_;                        // can only be referred to by a single name in the method,
-  bool is_singleton_and_not_returned_;       // and not returned to caller,
-  bool is_singleton_and_not_deopt_visible_;  // and not used as an environment local of HDeoptimize.
+  // Can only be referred to by a single name in the method.
+  bool is_singleton_;
+  // Is singleton and not returned to caller.
+  bool is_singleton_and_not_returned_;
+  // Is singleton and not used as an environment local of HDeoptimize.
+  bool is_singleton_and_not_deopt_visible_;
+  // Some heap locations with reference_ have array index aliasing,
+  // e.g. arr[i] and arr[j] may be the same location.
+  bool has_index_aliasing_;
 
   DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
 };
@@ -321,6 +345,8 @@
         // Different constant indices do not alias.
         return false;
       }
+      ReferenceInfo* ref_info = loc1->GetReferenceInfo();
+      ref_info->SetHasIndexAliasing(true);
     }
     return true;
   }
@@ -497,7 +523,8 @@
         removed_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
         substitute_instructions_for_loads_(graph->GetArena()->Adapter(kArenaAllocLSE)),
         possibly_removed_stores_(graph->GetArena()->Adapter(kArenaAllocLSE)),
-        singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
+        singleton_new_instances_(graph->GetArena()->Adapter(kArenaAllocLSE)),
+        singleton_new_arrays_(graph->GetArena()->Adapter(kArenaAllocLSE)) {
   }
 
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
@@ -534,20 +561,24 @@
     }
 
     // At this point, stores in possibly_removed_stores_ can be safely removed.
-    for (size_t i = 0, e = possibly_removed_stores_.size(); i < e; i++) {
-      HInstruction* store = possibly_removed_stores_[i];
+    for (HInstruction* store : possibly_removed_stores_) {
       DCHECK(store->IsInstanceFieldSet() || store->IsStaticFieldSet() || store->IsArraySet());
       store->GetBlock()->RemoveInstruction(store);
     }
 
     // Eliminate allocations that are not used.
-    for (size_t i = 0, e = singleton_new_instances_.size(); i < e; i++) {
-      HInstruction* new_instance = singleton_new_instances_[i];
+    for (HInstruction* new_instance : singleton_new_instances_) {
       if (!new_instance->HasNonEnvironmentUses()) {
         new_instance->RemoveEnvironmentUsers();
         new_instance->GetBlock()->RemoveInstruction(new_instance);
       }
     }
+    for (HInstruction* new_array : singleton_new_arrays_) {
+      if (!new_array->HasNonEnvironmentUses()) {
+        new_array->RemoveEnvironmentUsers();
+        new_array->GetBlock()->RemoveInstruction(new_array);
+      }
+    }
   }
 
  private:
@@ -558,7 +589,7 @@
   void KeepIfIsStore(HInstruction* heap_value) {
     if (heap_value == kDefaultHeapValue ||
         heap_value == kUnknownHeapValue ||
-        !heap_value->IsInstanceFieldSet()) {
+        !(heap_value->IsInstanceFieldSet() || heap_value->IsArraySet())) {
       return;
     }
     auto idx = std::find(possibly_removed_stores_.begin(),
@@ -600,14 +631,17 @@
       for (size_t i = 0; i < heap_values.size(); i++) {
         HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
         ReferenceInfo* ref_info = location->GetReferenceInfo();
-        if (!ref_info->IsSingleton() || location->IsValueKilledByLoopSideEffects()) {
-          // heap value is killed by loop side effects (stored into directly, or due to
-          // aliasing).
+        if (ref_info->IsSingletonAndRemovable() &&
+            !location->IsValueKilledByLoopSideEffects()) {
+          // A removable singleton's field that's not stored into inside a loop is
+          // invariant throughout the loop. Nothing to do.
+          DCHECK(ref_info->IsSingletonAndRemovable());
+        } else {
+          // heap value is killed by loop side effects (stored into directly, or
+          // due to aliasing). Or the heap value may be needed after method return
+          // or deoptimization.
           KeepIfIsStore(pre_header_heap_values[i]);
           heap_values[i] = kUnknownHeapValue;
-        } else {
-          // A singleton's field that's not stored into inside a loop is invariant throughout
-          // the loop.
         }
       }
     }
@@ -626,7 +660,7 @@
       bool from_all_predecessors = true;
       ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
       HInstruction* singleton_ref = nullptr;
-      if (ref_info->IsSingletonAndRemovable()) {
+      if (ref_info->IsSingleton()) {
         // We do more analysis of liveness when merging heap values for such
         // cases since stores into such references may potentially be eliminated.
         singleton_ref = ref_info->GetReference();
@@ -652,8 +686,9 @@
         }
       }
 
-      if (merged_value == kUnknownHeapValue) {
-        // There are conflicting heap values from different predecessors.
+      if (merged_value == kUnknownHeapValue || ref_info->IsSingletonAndNonRemovable()) {
+        // There are conflicting heap values from different predecessors,
+        // or the heap value may be needed after method return or deoptimization.
         // Keep the last store in each predecessor since future loads cannot be eliminated.
         for (HBasicBlock* predecessor : predecessors) {
           ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()];
@@ -734,13 +769,16 @@
       heap_values[idx] = constant;
       return;
     }
-    if (heap_value != kUnknownHeapValue && heap_value->IsInstanceFieldSet()) {
-      HInstruction* store = heap_value;
-      // This load must be from a singleton since it's from the same field
-      // that a "removed" store puts the value. That store must be to a singleton's field.
-      DCHECK(ref_info->IsSingleton());
-      // Get the real heap value of the store.
-      heap_value = store->InputAt(1);
+    if (heap_value != kUnknownHeapValue) {
+      if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
+        HInstruction* store = heap_value;
+        // This load must be from a singleton since it's from the same
+        // field/element that a "removed" store puts the value. That store
+        // must be to a singleton's field/element.
+        DCHECK(ref_info->IsSingleton());
+        // Get the real heap value of the store.
+        heap_value = heap_value->IsInstanceFieldSet() ? store->InputAt(1) : store->InputAt(2);
+      }
     }
     if (heap_value == kUnknownHeapValue) {
       // Load isn't eliminated. Put the load as the value into the HeapLocation.
@@ -796,19 +834,19 @@
     if (Equal(heap_value, value)) {
       // Store into the heap location with the same value.
       same_value = true;
-    } else if (index != nullptr) {
-      // For array element, don't eliminate stores since it can be easily aliased
-      // with non-constant index.
-    } else if (ref_info->IsSingletonAndRemovable()) {
-      // Store into a field of a singleton that's not returned. The value cannot be
-      // killed due to aliasing/invocation. It can be redundant since future loads can
+    } else if (index != nullptr && ref_info->HasIndexAliasing()) {
+      // For array element, don't eliminate stores if the index can be aliased.
+    } else if (ref_info->IsSingleton()) {
+      // Store into a field of a singleton. The value cannot be killed due to
+      // aliasing/invocation. It can be redundant since future loads can
       // directly get the value set by this instruction. The value can still be killed due to
       // merging or loop side effects. Stores whose values are killed due to merging/loop side
       // effects later will be removed from possibly_removed_stores_ when that is detected.
+      // Stores whose values may be needed after method return or deoptimization
+      // are also removed from possibly_removed_stores_ when that is detected.
       possibly_redundant = true;
       HNewInstance* new_instance = ref_info->GetReference()->AsNewInstance();
-      DCHECK(new_instance != nullptr);
-      if (new_instance->IsFinalizable()) {
+      if (new_instance != nullptr && new_instance->IsFinalizable()) {
         // Finalizable objects escape globally. Need to keep the store.
         possibly_redundant = false;
       } else {
@@ -834,7 +872,7 @@
 
     if (!same_value) {
       if (possibly_redundant) {
-        DCHECK(instruction->IsInstanceFieldSet());
+        DCHECK(instruction->IsInstanceFieldSet() || instruction->IsArraySet());
         // Put the store as the heap value. If the value is loaded from heap
         // by a load later, this store isn't really redundant.
         heap_values[idx] = instruction;
@@ -914,6 +952,33 @@
                      value);
   }
 
+  void VisitDeoptimize(HDeoptimize* instruction) {
+    const ArenaVector<HInstruction*>& heap_values =
+        heap_values_for_[instruction->GetBlock()->GetBlockId()];
+    for (HInstruction* heap_value : heap_values) {
+      // Filter out fake instructions before checking instruction kind below.
+      if (heap_value == kUnknownHeapValue || heap_value == kDefaultHeapValue) {
+        continue;
+      }
+      // A store is kept as the heap value for possibly removed stores.
+      if (heap_value->IsInstanceFieldSet() || heap_value->IsArraySet()) {
+        // Check whether the reference for a store is used by an environment local of
+        // HDeoptimize.
+        HInstruction* reference = heap_value->InputAt(0);
+        DCHECK(heap_location_collector_.FindReferenceInfoOf(reference)->IsSingleton());
+        for (const HUseListNode<HEnvironment*>& use : reference->GetEnvUses()) {
+          HEnvironment* user = use.GetUser();
+          if (user->GetHolder() == instruction) {
+            // The singleton for the store is visible at this deoptimization
+            // point. Need to keep the store so that the heap value is
+            // seen by the interpreter.
+            KeepIfIsStore(heap_value);
+          }
+        }
+      }
+    }
+  }
+
   void HandleInvoke(HInstruction* invoke) {
     ArenaVector<HInstruction*>& heap_values =
         heap_values_for_[invoke->GetBlock()->GetBlockId()];
@@ -995,6 +1060,27 @@
     }
   }
 
+  void VisitNewArray(HNewArray* new_array) OVERRIDE {
+    ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array);
+    if (ref_info == nullptr) {
+      // new_array isn't used for array accesses. No need to process it.
+      return;
+    }
+    if (ref_info->IsSingletonAndRemovable()) {
+      singleton_new_arrays_.push_back(new_array);
+    }
+    ArenaVector<HInstruction*>& heap_values =
+        heap_values_for_[new_array->GetBlock()->GetBlockId()];
+    for (size_t i = 0; i < heap_values.size(); i++) {
+      HeapLocation* location = heap_location_collector_.GetHeapLocation(i);
+      HInstruction* ref = location->GetReferenceInfo()->GetReference();
+      if (ref == new_array && location->GetIndex() != nullptr) {
+        // Array elements are set to default heap values.
+        heap_values[i] = kDefaultHeapValue;
+      }
+    }
+  }
+
   // Find an instruction's substitute if it should be removed.
   // Return the same instruction if it should not be removed.
   HInstruction* FindSubstitute(HInstruction* instruction) {
@@ -1023,6 +1109,7 @@
   ArenaVector<HInstruction*> possibly_removed_stores_;
 
   ArenaVector<HInstruction*> singleton_new_instances_;
+  ArenaVector<HInstruction*> singleton_new_arrays_;
 
   DISALLOW_COPY_AND_ASSIGN(LSEVisitor);
 };
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index 091b58a..6f0dbce 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -69,11 +69,13 @@
     // We do not use the value 9 because it conflicts with kLocationConstantMask.
     kDoNotUse9 = 9,
 
+    kSIMDStackSlot = 10,  // 128bit stack slot. TODO: generalize with encoded #bytes?
+
     // Unallocated location represents a location that is not fixed and can be
     // allocated by a register allocator.  Each unallocated location has
     // a policy that specifies what kind of location is suitable. Payload
     // contains register allocation policy.
-    kUnallocated = 10,
+    kUnallocated = 11,
   };
 
   Location() : ValueObject(), value_(kInvalid) {
@@ -82,6 +84,7 @@
     static_assert((kUnallocated & kLocationConstantMask) != kConstant, "TagError");
     static_assert((kStackSlot & kLocationConstantMask) != kConstant, "TagError");
     static_assert((kDoubleStackSlot & kLocationConstantMask) != kConstant, "TagError");
+    static_assert((kSIMDStackSlot & kLocationConstantMask) != kConstant, "TagError");
     static_assert((kRegister & kLocationConstantMask) != kConstant, "TagError");
     static_assert((kFpuRegister & kLocationConstantMask) != kConstant, "TagError");
     static_assert((kRegisterPair & kLocationConstantMask) != kConstant, "TagError");
@@ -266,8 +269,20 @@
     return GetKind() == kDoubleStackSlot;
   }
 
+  static Location SIMDStackSlot(intptr_t stack_index) {
+    uintptr_t payload = EncodeStackIndex(stack_index);
+    Location loc(kSIMDStackSlot, payload);
+    // Ensure that sign is preserved.
+    DCHECK_EQ(loc.GetStackIndex(), stack_index);
+    return loc;
+  }
+
+  bool IsSIMDStackSlot() const {
+    return GetKind() == kSIMDStackSlot;
+  }
+
   intptr_t GetStackIndex() const {
-    DCHECK(IsStackSlot() || IsDoubleStackSlot());
+    DCHECK(IsStackSlot() || IsDoubleStackSlot() || IsSIMDStackSlot());
     // Decode stack index manually to preserve sign.
     return GetPayload() - kStackIndexBias;
   }
@@ -315,6 +330,7 @@
       case kRegister: return "R";
       case kStackSlot: return "S";
       case kDoubleStackSlot: return "DS";
+      case kSIMDStackSlot: return "SIMD";
       case kUnallocated: return "U";
       case kConstant: return "C";
       case kFpuRegister: return "F";
@@ -417,6 +433,7 @@
 class RegisterSet : public ValueObject {
  public:
   static RegisterSet Empty() { return RegisterSet(); }
+  static RegisterSet AllFpu() { return RegisterSet(0, -1); }
 
   void Add(Location loc) {
     if (loc.IsRegister()) {
@@ -462,6 +479,7 @@
 
  private:
   RegisterSet() : core_registers_(0), floating_point_registers_(0) {}
+  RegisterSet(uint32_t core, uint32_t fp) : core_registers_(core), floating_point_registers_(fp) {}
 
   uint32_t core_registers_;
   uint32_t floating_point_registers_;
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 26c9ab8..1a79601 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -16,10 +16,21 @@
 
 #include "loop_optimization.h"
 
+#include "arch/instruction_set.h"
+#include "arch/arm/instruction_set_features_arm.h"
+#include "arch/arm64/instruction_set_features_arm64.h"
+#include "arch/mips/instruction_set_features_mips.h"
+#include "arch/mips64/instruction_set_features_mips64.h"
+#include "arch/x86/instruction_set_features_x86.h"
+#include "arch/x86_64/instruction_set_features_x86_64.h"
+#include "driver/compiler_driver.h"
 #include "linear_order.h"
 
 namespace art {
 
+// Enables vectorization (SIMDization) in the loop optimizer.
+static constexpr bool kEnableVectorization = true;
+
 // Remove the instruction from the graph. A bit more elaborate than the usual
 // instruction removal, since there may be a cycle in the use structure.
 static void RemoveFromCycle(HInstruction* instruction) {
@@ -52,24 +63,43 @@
   return false;
 }
 
+// Test vector restrictions.
+static bool HasVectorRestrictions(uint64_t restrictions, uint64_t tested) {
+  return (restrictions & tested) != 0;
+}
+
+// Inserts an instruction.
+static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
+  DCHECK(block != nullptr);
+  DCHECK(instruction != nullptr);
+  block->InsertInstructionBefore(instruction, block->GetLastInstruction());
+  return instruction;
+}
+
 //
 // Class methods.
 //
 
 HLoopOptimization::HLoopOptimization(HGraph* graph,
+                                     CompilerDriver* compiler_driver,
                                      HInductionVarAnalysis* induction_analysis)
     : HOptimization(graph, kLoopOptimizationPassName),
+      compiler_driver_(compiler_driver),
       induction_range_(induction_analysis),
       loop_allocator_(nullptr),
+      global_allocator_(graph_->GetArena()),
       top_loop_(nullptr),
       last_loop_(nullptr),
       iset_(nullptr),
       induction_simplication_count_(0),
-      simplified_(false) {
+      simplified_(false),
+      vector_length_(0),
+      vector_refs_(nullptr),
+      vector_map_(nullptr) {
 }
 
 void HLoopOptimization::Run() {
-  // Well-behaved loops only.
+  // Skip if there is no loop or the graph has try-catch/irreducible loops.
   // TODO: make this less of a sledgehammer.
   if (!graph_->HasLoops() || graph_->HasTryCatch() || graph_->HasIrreducibleLoops()) {
     return;
@@ -78,14 +108,13 @@
   // Phase-local allocator that draws from the global pool. Since the allocator
   // itself resides on the stack, it is destructed on exiting Run(), which
   // implies its underlying memory is released immediately.
-  ArenaAllocator allocator(graph_->GetArena()->GetArenaPool());
+  ArenaAllocator allocator(global_allocator_->GetArenaPool());
   loop_allocator_ = &allocator;
 
   // Perform loop optimizations.
   LocalRun();
-
   if (top_loop_ == nullptr) {
-    graph_->SetHasLoops(false);
+    graph_->SetHasLoops(false);  // no more loops
   }
 
   // Detach.
@@ -107,18 +136,29 @@
   }
 
   // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use
-  // a temporary set that stores instructions using the phase-local allocator.
+  // temporary data structures using the phase-local allocator. All new HIR
+  // should use the global allocator.
   if (top_loop_ != nullptr) {
     ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+    ArenaSet<ArrayReference> refs(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+    ArenaSafeMap<HInstruction*, HInstruction*> map(
+        std::less<HInstruction*>(), loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+    // Attach.
     iset_ = &iset;
+    vector_refs_ = &refs;
+    vector_map_ = &map;
+    // Traverse.
     TraverseLoopsInnerToOuter(top_loop_);
-    iset_ = nullptr;  // detach
+    // Detach.
+    iset_ = nullptr;
+    vector_refs_ = nullptr;
+    vector_map_ = nullptr;
   }
 }
 
 void HLoopOptimization::AddLoop(HLoopInformation* loop_info) {
   DCHECK(loop_info != nullptr);
-  LoopNode* node = new (loop_allocator_) LoopNode(loop_info);  // phase-local allocator
+  LoopNode* node = new (loop_allocator_) LoopNode(loop_info);
   if (last_loop_ == nullptr) {
     // First loop.
     DCHECK(top_loop_ == nullptr);
@@ -166,7 +206,7 @@
 void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
   for ( ; node != nullptr; node = node->next) {
     // Visit inner loops first.
-    int current_induction_simplification_count = induction_simplication_count_;
+    uint32_t current_induction_simplification_count = induction_simplication_count_;
     if (node->inner != nullptr) {
       TraverseLoopsInnerToOuter(node->inner);
     }
@@ -175,7 +215,7 @@
     if (current_induction_simplification_count != induction_simplication_count_) {
       induction_range_.ReVisit(node->loop_info);
     }
-    // Repeat simplifications in the body of this loop until no more changes occur.
+    // Repeat simplifications in the loop-body until no more changes occur.
     // Note that since each simplification consists of eliminating code (without
     // introducing new code), this process is always finite.
     do {
@@ -183,13 +223,17 @@
       SimplifyInduction(node);
       SimplifyBlocks(node);
     } while (simplified_);
-    // Simplify inner loop.
+    // Optimize inner loop.
     if (node->inner == nullptr) {
-      SimplifyInnerLoop(node);
+      OptimizeInnerLoop(node);
     }
   }
 }
 
+//
+// Optimization.
+//
+
 void HLoopOptimization::SimplifyInduction(LoopNode* node) {
   HBasicBlock* header = node->loop_info->GetHeader();
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
@@ -200,13 +244,9 @@
   //           for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
   for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->AsPhi();
-    iset_->clear();
-    int32_t use_count = 0;
-    if (IsPhiInduction(phi) &&
-        IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ false, &use_count) &&
-        // No uses, or no early-exit with proper replacement.
-        (use_count == 0 ||
-         (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) {
+    iset_->clear();  // prepare phi induction
+    if (TrySetPhiInduction(phi, /*restrict_uses*/ true) &&
+        TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ false)) {
       for (HInstruction* i : *iset_) {
         RemoveFromCycle(i);
       }
@@ -252,49 +292,47 @@
   }
 }
 
-bool HLoopOptimization::SimplifyInnerLoop(LoopNode* node) {
+void HLoopOptimization::OptimizeInnerLoop(LoopNode* node) {
   HBasicBlock* header = node->loop_info->GetHeader();
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
   // Ensure loop header logic is finite.
-  int64_t tc = 0;
-  if (!induction_range_.IsFinite(node->loop_info, &tc)) {
-    return false;
+  int64_t trip_count = 0;
+  if (!induction_range_.IsFinite(node->loop_info, &trip_count)) {
+    return;
   }
+
   // Ensure there is only a single loop-body (besides the header).
   HBasicBlock* body = nullptr;
   for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
     if (it.Current() != header) {
       if (body != nullptr) {
-        return false;
+        return;
       }
       body = it.Current();
     }
   }
   // Ensure there is only a single exit point.
   if (header->GetSuccessors().size() != 2) {
-    return false;
+    return;
   }
   HBasicBlock* exit = (header->GetSuccessors()[0] == body)
       ? header->GetSuccessors()[1]
       : header->GetSuccessors()[0];
   // Ensure exit can only be reached by exiting loop.
   if (exit->GetPredecessors().size() != 1) {
-    return false;
+    return;
   }
   // Detect either an empty loop (no side effects other than plain iteration) or
   // a trivial loop (just iterating once). Replace subsequent index uses, if any,
   // with the last value and remove the loop, possibly after unrolling its body.
   HInstruction* phi = header->GetFirstPhi();
-  iset_->clear();
-  int32_t use_count = 0;
-  if (IsEmptyHeader(header)) {
+  iset_->clear();  // prepare phi induction
+  if (TrySetSimpleLoopHeader(header)) {
     bool is_empty = IsEmptyBody(body);
-    if ((is_empty || tc == 1) &&
-        IsOnlyUsedAfterLoop(node->loop_info, phi, /*collect_loop_uses*/ true, &use_count) &&
-        // No uses, or proper replacement.
-        (use_count == 0 || TryReplaceWithLastValue(phi, preheader))) {
+    if ((is_empty || trip_count == 1) &&
+        TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
       if (!is_empty) {
-        // Unroll the loop body, which sees initial value of the index.
+        // Unroll the loop-body, which sees initial value of the index.
         phi->ReplaceWith(phi->InputAt(0));
         preheader->MergeInstructionsWith(body);
       }
@@ -304,28 +342,705 @@
       header->RemoveDominatedBlock(exit);
       header->DisconnectAndDelete();
       preheader->AddSuccessor(exit);
-      preheader->AddInstruction(new (graph_->GetArena()) HGoto());  // global allocator
+      preheader->AddInstruction(new (global_allocator_) HGoto());
       preheader->AddDominatedBlock(exit);
       exit->SetDominator(preheader);
       RemoveLoop(node);  // update hierarchy
+      return;
+    }
+  }
+
+  // Vectorize loop, if possible and valid.
+  if (kEnableVectorization) {
+    iset_->clear();  // prepare phi induction
+    if (TrySetSimpleLoopHeader(header) &&
+        CanVectorize(node, body, trip_count) &&
+        TryAssignLastValue(node->loop_info, phi, preheader, /*collect_loop_uses*/ true)) {
+      Vectorize(node, body, exit, trip_count);
+      graph_->SetHasSIMD(true);  // flag SIMD usage
+      return;
+    }
+  }
+}
+
+//
+// Loop vectorization. The implementation is based on the book by Aart J.C. Bik:
+// "The Software Vectorization Handbook. Applying Multimedia Extensions for Maximum Performance."
+// Intel Press, June, 2004 (http://www.aartbik.com/).
+//
+
+bool HLoopOptimization::CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count) {
+  // Reset vector bookkeeping.
+  vector_length_ = 0;
+  vector_refs_->clear();
+  vector_runtime_test_a_ =
+  vector_runtime_test_b_= nullptr;
+
+  // Phis in the loop-body prevent vectorization.
+  if (!block->GetPhis().IsEmpty()) {
+    return false;
+  }
+
+  // Scan the loop-body, starting a right-hand-side tree traversal at each left-hand-side
+  // occurrence, which allows passing down attributes down the use tree.
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    if (!VectorizeDef(node, it.Current(), /*generate_code*/ false)) {
+      return false;  // failure to vectorize a left-hand-side
+    }
+  }
+
+  // Heuristics. Does vectorization seem profitable?
+  // TODO: refine
+  if (vector_length_ == 0) {
+    return false;  // nothing found
+  } else if (0 < trip_count && trip_count < vector_length_) {
+    return false;  // insufficient iterations
+  }
+
+  // Data dependence analysis. Find each pair of references with same type, where
+  // at least one is a write. Each such pair denotes a possible data dependence.
+  // This analysis exploits the property that differently typed arrays cannot be
+  // aliased, as well as the property that references either point to the same
+  // array or to two completely disjoint arrays, i.e., no partial aliasing.
+  // Other than a few simply heuristics, no detailed subscript analysis is done.
+  for (auto i = vector_refs_->begin(); i != vector_refs_->end(); ++i) {
+    for (auto j = i; ++j != vector_refs_->end(); ) {
+      if (i->type == j->type && (i->lhs || j->lhs)) {
+        // Found same-typed a[i+x] vs. b[i+y], where at least one is a write.
+        HInstruction* a = i->base;
+        HInstruction* b = j->base;
+        HInstruction* x = i->offset;
+        HInstruction* y = j->offset;
+        if (a == b) {
+          // Found a[i+x] vs. a[i+y]. Accept if x == y (loop-independent data dependence).
+          // Conservatively assume a loop-carried data dependence otherwise, and reject.
+          if (x != y) {
+            return false;
+          }
+        } else {
+          // Found a[i+x] vs. b[i+y]. Accept if x == y (at worst loop-independent data dependence).
+          // Conservatively assume a potential loop-carried data dependence otherwise, avoided by
+          // generating an explicit a != b disambiguation runtime test on the two references.
+          if (x != y) {
+            // For now, we reject after one test to avoid excessive overhead.
+            if (vector_runtime_test_a_ != nullptr) {
+              return false;
+            }
+            vector_runtime_test_a_ = a;
+            vector_runtime_test_b_ = b;
+          }
+        }
+      }
+    }
+  }
+
+  // Success!
+  return true;
+}
+
+void HLoopOptimization::Vectorize(LoopNode* node,
+                                  HBasicBlock* block,
+                                  HBasicBlock* exit,
+                                  int64_t trip_count) {
+  Primitive::Type induc_type = Primitive::kPrimInt;
+  HBasicBlock* header = node->loop_info->GetHeader();
+  HBasicBlock* preheader = node->loop_info->GetPreHeader();
+
+  // A cleanup is needed for any unknown trip count or for a known trip count
+  // with remainder iterations after vectorization.
+  bool needs_cleanup = trip_count == 0 || (trip_count % vector_length_) != 0;
+
+  // Adjust vector bookkeeping.
+  iset_->clear();  // prepare phi induction
+  bool is_simple_loop_header = TrySetSimpleLoopHeader(header);  // fills iset_
+  DCHECK(is_simple_loop_header);
+
+  // Generate preheader:
+  // stc = <trip-count>;
+  // vtc = stc - stc % VL;
+  HInstruction* stc = induction_range_.GenerateTripCount(node->loop_info, graph_, preheader);
+  HInstruction* vtc = stc;
+  if (needs_cleanup) {
+    DCHECK(IsPowerOfTwo(vector_length_));
+    HInstruction* rem = Insert(
+        preheader, new (global_allocator_) HAnd(induc_type,
+                                                stc,
+                                                graph_->GetIntConstant(vector_length_ - 1)));
+    vtc = Insert(preheader, new (global_allocator_) HSub(induc_type, stc, rem));
+  }
+
+  // Generate runtime disambiguation test:
+  // vtc = a != b ? vtc : 0;
+  if (vector_runtime_test_a_ != nullptr) {
+    HInstruction* rt = Insert(
+        preheader,
+        new (global_allocator_) HNotEqual(vector_runtime_test_a_, vector_runtime_test_b_));
+    vtc = Insert(preheader,
+                 new (global_allocator_) HSelect(rt, vtc, graph_->GetIntConstant(0), kNoDexPc));
+    needs_cleanup = true;
+  }
+
+  // Generate vector loop:
+  // for (i = 0; i < vtc; i += VL)
+  //    <vectorized-loop-body>
+  vector_mode_ = kVector;
+  GenerateNewLoop(node,
+                  block,
+                  graph_->TransformLoopForVectorization(header, block, exit),
+                  graph_->GetIntConstant(0),
+                  vtc,
+                  graph_->GetIntConstant(vector_length_));
+  HLoopInformation* vloop = vector_header_->GetLoopInformation();
+
+  // Generate cleanup loop, if needed:
+  // for ( ; i < stc; i += 1)
+  //    <loop-body>
+  if (needs_cleanup) {
+    vector_mode_ = kSequential;
+    GenerateNewLoop(node,
+                    block,
+                    graph_->TransformLoopForVectorization(vector_header_, vector_body_, exit),
+                    vector_phi_,
+                    stc,
+                    graph_->GetIntConstant(1));
+  }
+
+  // Remove the original loop by disconnecting the body block
+  // and removing all instructions from the header.
+  block->DisconnectAndDelete();
+  while (!header->GetFirstInstruction()->IsGoto()) {
+    header->RemoveInstruction(header->GetFirstInstruction());
+  }
+  // Update loop hierarchy: the old header now resides in the
+  // same outer loop as the old preheader.
+  header->SetLoopInformation(preheader->GetLoopInformation());  // outward
+  node->loop_info = vloop;
+}
+
+void HLoopOptimization::GenerateNewLoop(LoopNode* node,
+                                        HBasicBlock* block,
+                                        HBasicBlock* new_preheader,
+                                        HInstruction* lo,
+                                        HInstruction* hi,
+                                        HInstruction* step) {
+  Primitive::Type induc_type = Primitive::kPrimInt;
+  // Prepare new loop.
+  vector_map_->clear();
+  vector_preheader_ = new_preheader,
+  vector_header_ = vector_preheader_->GetSingleSuccessor();
+  vector_body_ = vector_header_->GetSuccessors()[1];
+  vector_phi_ = new (global_allocator_) HPhi(global_allocator_,
+                                             kNoRegNumber,
+                                             0,
+                                             HPhi::ToPhiType(induc_type));
+  // Generate header and prepare body.
+  // for (i = lo; i < hi; i += step)
+  //    <loop-body>
+  HInstruction* cond = new (global_allocator_) HAboveOrEqual(vector_phi_, hi);
+  vector_header_->AddPhi(vector_phi_);
+  vector_header_->AddInstruction(cond);
+  vector_header_->AddInstruction(new (global_allocator_) HIf(cond));
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    bool vectorized_def = VectorizeDef(node, it.Current(), /*generate_code*/ true);
+    DCHECK(vectorized_def);
+  }
+  // Generate body from the instruction map, but in original program order.
+  HEnvironment* env = vector_header_->GetFirstInstruction()->GetEnvironment();
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    auto i = vector_map_->find(it.Current());
+    if (i != vector_map_->end() && !i->second->IsInBlock()) {
+      Insert(vector_body_, i->second);
+      // Deal with instructions that need an environment, such as the scalar intrinsics.
+      if (i->second->NeedsEnvironment()) {
+        i->second->CopyEnvironmentFromWithLoopPhiAdjustment(env, vector_header_);
+      }
+    }
+  }
+  // Finalize increment and phi.
+  HInstruction* inc = new (global_allocator_) HAdd(induc_type, vector_phi_, step);
+  vector_phi_->AddInput(lo);
+  vector_phi_->AddInput(Insert(vector_body_, inc));
+}
+
+// TODO: accept reductions at left-hand-side, mixed-type store idioms, etc.
+bool HLoopOptimization::VectorizeDef(LoopNode* node,
+                                     HInstruction* instruction,
+                                     bool generate_code) {
+  // Accept a left-hand-side array base[index] for
+  // (1) supported vector type,
+  // (2) loop-invariant base,
+  // (3) unit stride index,
+  // (4) vectorizable right-hand-side value.
+  uint64_t restrictions = kNone;
+  if (instruction->IsArraySet()) {
+    Primitive::Type type = instruction->AsArraySet()->GetComponentType();
+    HInstruction* base = instruction->InputAt(0);
+    HInstruction* index = instruction->InputAt(1);
+    HInstruction* value = instruction->InputAt(2);
+    HInstruction* offset = nullptr;
+    if (TrySetVectorType(type, &restrictions) &&
+        node->loop_info->IsDefinedOutOfTheLoop(base) &&
+        induction_range_.IsUnitStride(index, &offset) &&
+        VectorizeUse(node, value, generate_code, type, restrictions)) {
+      if (generate_code) {
+        GenerateVecSub(index, offset);
+        GenerateVecMem(instruction, vector_map_->Get(index), vector_map_->Get(value), type);
+      } else {
+        vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ true));
+      }
       return true;
     }
+    return false;
+  }
+  // Branch back okay.
+  if (instruction->IsGoto()) {
+    return true;
+  }
+  // Otherwise accept only expressions with no effects outside the immediate loop-body.
+  // Note that actual uses are inspected during right-hand-side tree traversal.
+  return !IsUsedOutsideLoop(node->loop_info, instruction) && !instruction->DoesAnyWrite();
+}
+
+// TODO: more operations and intrinsics, detect saturation arithmetic, etc.
+bool HLoopOptimization::VectorizeUse(LoopNode* node,
+                                     HInstruction* instruction,
+                                     bool generate_code,
+                                     Primitive::Type type,
+                                     uint64_t restrictions) {
+  // Accept anything for which code has already been generated.
+  if (generate_code) {
+    if (vector_map_->find(instruction) != vector_map_->end()) {
+      return true;
+    }
+  }
+  // Continue the right-hand-side tree traversal, passing in proper
+  // types and vector restrictions along the way. During code generation,
+  // all new nodes are drawn from the global allocator.
+  if (node->loop_info->IsDefinedOutOfTheLoop(instruction)) {
+    // Accept invariant use, using scalar expansion.
+    if (generate_code) {
+      GenerateVecInv(instruction, type);
+    }
+    return true;
+  } else if (instruction->IsArrayGet()) {
+    // Accept a right-hand-side array base[index] for
+    // (1) exact matching vector type,
+    // (2) loop-invariant base,
+    // (3) unit stride index,
+    // (4) vectorizable right-hand-side value.
+    HInstruction* base = instruction->InputAt(0);
+    HInstruction* index = instruction->InputAt(1);
+    HInstruction* offset = nullptr;
+    if (type == instruction->GetType() &&
+        node->loop_info->IsDefinedOutOfTheLoop(base) &&
+        induction_range_.IsUnitStride(index, &offset)) {
+      if (generate_code) {
+        GenerateVecSub(index, offset);
+        GenerateVecMem(instruction, vector_map_->Get(index), nullptr, type);
+      } else {
+        vector_refs_->insert(ArrayReference(base, offset, type, /*lhs*/ false));
+      }
+      return true;
+    }
+  } else if (instruction->IsTypeConversion()) {
+    // Accept particular type conversions.
+    HTypeConversion* conversion = instruction->AsTypeConversion();
+    HInstruction* opa = conversion->InputAt(0);
+    Primitive::Type from = conversion->GetInputType();
+    Primitive::Type to = conversion->GetResultType();
+    if ((to == Primitive::kPrimByte ||
+         to == Primitive::kPrimChar ||
+         to == Primitive::kPrimShort) && from == Primitive::kPrimInt) {
+      // Accept a "narrowing" type conversion from a "wider" computation for
+      // (1) conversion into final required type,
+      // (2) vectorizable operand,
+      // (3) "wider" operations cannot bring in higher order bits.
+      if (to == type && VectorizeUse(node, opa, generate_code, type, restrictions | kNoHiBits)) {
+        if (generate_code) {
+          if (vector_mode_ == kVector) {
+            vector_map_->Put(instruction, vector_map_->Get(opa));  // operand pass-through
+          } else {
+            GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+          }
+        }
+        return true;
+      }
+    } else if (to == Primitive::kPrimFloat && from == Primitive::kPrimInt) {
+      DCHECK_EQ(to, type);
+      // Accept int to float conversion for
+      // (1) supported int,
+      // (2) vectorizable operand.
+      if (TrySetVectorType(from, &restrictions) &&
+          VectorizeUse(node, opa, generate_code, from, restrictions)) {
+        if (generate_code) {
+          GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+        }
+        return true;
+      }
+    }
+    return false;
+  } else if (instruction->IsNeg() || instruction->IsNot() || instruction->IsBooleanNot()) {
+    // Accept unary operator for vectorizable operand.
+    HInstruction* opa = instruction->InputAt(0);
+    if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+      if (generate_code) {
+        GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+      }
+      return true;
+    }
+  } else if (instruction->IsAdd() || instruction->IsSub() ||
+             instruction->IsMul() || instruction->IsDiv() ||
+             instruction->IsAnd() || instruction->IsOr()  || instruction->IsXor()) {
+    // Deal with vector restrictions.
+    if ((instruction->IsMul() && HasVectorRestrictions(restrictions, kNoMul)) ||
+        (instruction->IsDiv() && HasVectorRestrictions(restrictions, kNoDiv))) {
+      return false;
+    }
+    // Accept binary operator for vectorizable operands.
+    HInstruction* opa = instruction->InputAt(0);
+    HInstruction* opb = instruction->InputAt(1);
+    if (VectorizeUse(node, opa, generate_code, type, restrictions) &&
+        VectorizeUse(node, opb, generate_code, type, restrictions)) {
+      if (generate_code) {
+        GenerateVecOp(instruction, vector_map_->Get(opa), vector_map_->Get(opb), type);
+      }
+      return true;
+    }
+  } else if (instruction->IsShl() || instruction->IsShr() || instruction->IsUShr()) {
+    // Deal with vector restrictions.
+    if ((HasVectorRestrictions(restrictions, kNoShift)) ||
+        (instruction->IsShr() && HasVectorRestrictions(restrictions, kNoShr))) {
+      return false;  // unsupported instruction
+    } else if ((instruction->IsShr() || instruction->IsUShr()) &&
+               HasVectorRestrictions(restrictions, kNoHiBits)) {
+      return false;  // hibits may impact lobits; TODO: we can do better!
+    }
+    // Accept shift operator for vectorizable/invariant operands.
+    // TODO: accept symbolic, albeit loop invariant shift factors.
+    HInstruction* opa = instruction->InputAt(0);
+    HInstruction* opb = instruction->InputAt(1);
+    if (VectorizeUse(node, opa, generate_code, type, restrictions) && opb->IsIntConstant()) {
+      if (generate_code) {
+        // Make sure shift factor only looks at lower bits, as defined for sequential shifts.
+        // Note that even the narrower SIMD shifts do the right thing after that.
+        int32_t mask = (instruction->GetType() == Primitive::kPrimLong)
+            ? kMaxLongShiftDistance
+            : kMaxIntShiftDistance;
+        HInstruction* s = graph_->GetIntConstant(opb->AsIntConstant()->GetValue() & mask);
+        GenerateVecOp(instruction, vector_map_->Get(opa), s, type);
+      }
+      return true;
+    }
+  } else if (instruction->IsInvokeStaticOrDirect()) {
+    // Accept particular intrinsics.
+    HInvokeStaticOrDirect* invoke = instruction->AsInvokeStaticOrDirect();
+    switch (invoke->GetIntrinsic()) {
+      case Intrinsics::kMathAbsInt:
+      case Intrinsics::kMathAbsLong:
+      case Intrinsics::kMathAbsFloat:
+      case Intrinsics::kMathAbsDouble: {
+        // Deal with vector restrictions.
+        if (HasVectorRestrictions(restrictions, kNoAbs) ||
+            HasVectorRestrictions(restrictions, kNoHiBits)) {
+          // TODO: we can do better for some hibits cases.
+          return false;
+        }
+        // Accept ABS(x) for vectorizable operand.
+        HInstruction* opa = instruction->InputAt(0);
+        if (VectorizeUse(node, opa, generate_code, type, restrictions)) {
+          if (generate_code) {
+            GenerateVecOp(instruction, vector_map_->Get(opa), nullptr, type);
+          }
+          return true;
+        }
+        return false;
+      }
+      default:
+        return false;
+    }  // switch
   }
   return false;
 }
 
-bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
+bool HLoopOptimization::TrySetVectorType(Primitive::Type type, uint64_t* restrictions) {
+  const InstructionSetFeatures* features = compiler_driver_->GetInstructionSetFeatures();
+  switch (compiler_driver_->GetInstructionSet()) {
+    case kArm:
+    case kThumb2:
+      return false;
+    case kArm64:
+      // Allow vectorization for all ARM devices, because Android assumes that
+      // ARMv8 AArch64 always supports advanced SIMD. For now, only D registers
+      // (64-bit vectors) not Q registers (128-bit vectors).
+      switch (type) {
+        case Primitive::kPrimBoolean:
+        case Primitive::kPrimByte:
+          *restrictions |= kNoDiv | kNoAbs;
+          return TrySetVectorLength(8);
+        case Primitive::kPrimChar:
+        case Primitive::kPrimShort:
+          *restrictions |= kNoDiv | kNoAbs;
+          return TrySetVectorLength(4);
+        case Primitive::kPrimInt:
+          *restrictions |= kNoDiv;
+          return TrySetVectorLength(2);
+        case Primitive::kPrimFloat:
+          return TrySetVectorLength(2);
+        default:
+          return false;
+      }
+    case kX86:
+    case kX86_64:
+      // Allow vectorization for SSE4-enabled X86 devices only (128-bit vectors).
+      if (features->AsX86InstructionSetFeatures()->HasSSE4_1()) {
+        switch (type) {
+          case Primitive::kPrimBoolean:
+          case Primitive::kPrimByte:
+            *restrictions |= kNoMul | kNoDiv | kNoShift | kNoAbs;
+            return TrySetVectorLength(16);
+          case Primitive::kPrimChar:
+          case Primitive::kPrimShort:
+            *restrictions |= kNoDiv | kNoAbs;
+            return TrySetVectorLength(8);
+          case Primitive::kPrimInt:
+            *restrictions |= kNoDiv;
+            return TrySetVectorLength(4);
+          case Primitive::kPrimLong:
+            *restrictions |= kNoMul | kNoDiv | kNoShr | kNoAbs;
+            return TrySetVectorLength(2);
+          case Primitive::kPrimFloat:
+            return TrySetVectorLength(4);
+          case Primitive::kPrimDouble:
+            return TrySetVectorLength(2);
+          default:
+            break;
+        }  // switch type
+      }
+      return false;
+    case kMips:
+    case kMips64:
+      // TODO: implement MIPS SIMD.
+      return false;
+    default:
+      return false;
+  }  // switch instruction set
+}
+
+bool HLoopOptimization::TrySetVectorLength(uint32_t length) {
+  DCHECK(IsPowerOfTwo(length) && length >= 2u);
+  // First time set?
+  if (vector_length_ == 0) {
+    vector_length_ = length;
+  }
+  // Different types are acceptable within a loop-body, as long as all the corresponding vector
+  // lengths match exactly to obtain a uniform traversal through the vector iteration space
+  // (idiomatic exceptions to this rule can be handled by further unrolling sub-expressions).
+  return vector_length_ == length;
+}
+
+void HLoopOptimization::GenerateVecInv(HInstruction* org, Primitive::Type type) {
+  if (vector_map_->find(org) == vector_map_->end()) {
+    // In scalar code, just use a self pass-through for scalar invariants
+    // (viz. expression remains itself).
+    if (vector_mode_ == kSequential) {
+      vector_map_->Put(org, org);
+      return;
+    }
+    // In vector code, explicit scalar expansion is needed.
+    HInstruction* vector = new (global_allocator_) HVecReplicateScalar(
+        global_allocator_, org, type, vector_length_);
+    vector_map_->Put(org, Insert(vector_preheader_, vector));
+  }
+}
+
+void HLoopOptimization::GenerateVecSub(HInstruction* org, HInstruction* offset) {
+  if (vector_map_->find(org) == vector_map_->end()) {
+    HInstruction* subscript = vector_phi_;
+    if (offset != nullptr) {
+      subscript = new (global_allocator_) HAdd(Primitive::kPrimInt, subscript, offset);
+      if (org->IsPhi()) {
+        Insert(vector_body_, subscript);  // lacks layout placeholder
+      }
+    }
+    vector_map_->Put(org, subscript);
+  }
+}
+
+void HLoopOptimization::GenerateVecMem(HInstruction* org,
+                                       HInstruction* opa,
+                                       HInstruction* opb,
+                                       Primitive::Type type) {
+  HInstruction* vector = nullptr;
+  if (vector_mode_ == kVector) {
+    // Vector store or load.
+    if (opb != nullptr) {
+      vector = new (global_allocator_) HVecStore(
+          global_allocator_, org->InputAt(0), opa, opb, type, vector_length_);
+    } else  {
+      vector = new (global_allocator_) HVecLoad(
+          global_allocator_, org->InputAt(0), opa, type, vector_length_);
+    }
+  } else {
+    // Scalar store or load.
+    DCHECK(vector_mode_ == kSequential);
+    if (opb != nullptr) {
+      vector = new (global_allocator_) HArraySet(org->InputAt(0), opa, opb, type, kNoDexPc);
+    } else  {
+      vector = new (global_allocator_) HArrayGet(org->InputAt(0), opa, type, kNoDexPc);
+    }
+  }
+  vector_map_->Put(org, vector);
+}
+
+#define GENERATE_VEC(x, y) \
+  if (vector_mode_ == kVector) { \
+    vector = (x); \
+  } else { \
+    DCHECK(vector_mode_ == kSequential); \
+    vector = (y); \
+  } \
+  break;
+
+void HLoopOptimization::GenerateVecOp(HInstruction* org,
+                                      HInstruction* opa,
+                                      HInstruction* opb,
+                                      Primitive::Type type) {
+  if (vector_mode_ == kSequential) {
+    // Scalar code follows implicit integral promotion.
+    if (type == Primitive::kPrimBoolean ||
+        type == Primitive::kPrimByte ||
+        type == Primitive::kPrimChar ||
+        type == Primitive::kPrimShort) {
+      type = Primitive::kPrimInt;
+    }
+  }
+  HInstruction* vector = nullptr;
+  switch (org->GetKind()) {
+    case HInstruction::kNeg:
+      DCHECK(opb == nullptr);
+      GENERATE_VEC(
+          new (global_allocator_) HVecNeg(global_allocator_, opa, type, vector_length_),
+          new (global_allocator_) HNeg(type, opa));
+    case HInstruction::kNot:
+      DCHECK(opb == nullptr);
+      GENERATE_VEC(
+          new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_),
+          new (global_allocator_) HNot(type, opa));
+    case HInstruction::kBooleanNot:
+      DCHECK(opb == nullptr);
+      GENERATE_VEC(
+          new (global_allocator_) HVecNot(global_allocator_, opa, type, vector_length_),
+          new (global_allocator_) HBooleanNot(opa));
+    case HInstruction::kTypeConversion:
+      DCHECK(opb == nullptr);
+      GENERATE_VEC(
+          new (global_allocator_) HVecCnv(global_allocator_, opa, type, vector_length_),
+          new (global_allocator_) HTypeConversion(type, opa, kNoDexPc));
+    case HInstruction::kAdd:
+      GENERATE_VEC(
+          new (global_allocator_) HVecAdd(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HAdd(type, opa, opb));
+    case HInstruction::kSub:
+      GENERATE_VEC(
+          new (global_allocator_) HVecSub(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HSub(type, opa, opb));
+    case HInstruction::kMul:
+      GENERATE_VEC(
+          new (global_allocator_) HVecMul(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HMul(type, opa, opb));
+    case HInstruction::kDiv:
+      GENERATE_VEC(
+          new (global_allocator_) HVecDiv(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HDiv(type, opa, opb, kNoDexPc));
+    case HInstruction::kAnd:
+      GENERATE_VEC(
+          new (global_allocator_) HVecAnd(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HAnd(type, opa, opb));
+    case HInstruction::kOr:
+      GENERATE_VEC(
+          new (global_allocator_) HVecOr(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HOr(type, opa, opb));
+    case HInstruction::kXor:
+      GENERATE_VEC(
+          new (global_allocator_) HVecXor(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HXor(type, opa, opb));
+    case HInstruction::kShl:
+      GENERATE_VEC(
+          new (global_allocator_) HVecShl(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HShl(type, opa, opb));
+    case HInstruction::kShr:
+      GENERATE_VEC(
+          new (global_allocator_) HVecShr(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HShr(type, opa, opb));
+    case HInstruction::kUShr:
+      GENERATE_VEC(
+          new (global_allocator_) HVecUShr(global_allocator_, opa, opb, type, vector_length_),
+          new (global_allocator_) HUShr(type, opa, opb));
+    case HInstruction::kInvokeStaticOrDirect: {
+      HInvokeStaticOrDirect* invoke = org->AsInvokeStaticOrDirect();
+      if (vector_mode_ == kVector) {
+        switch (invoke->GetIntrinsic()) {
+          case Intrinsics::kMathAbsInt:
+          case Intrinsics::kMathAbsLong:
+          case Intrinsics::kMathAbsFloat:
+          case Intrinsics::kMathAbsDouble:
+            DCHECK(opb == nullptr);
+            vector = new (global_allocator_) HVecAbs(global_allocator_, opa, type, vector_length_);
+            break;
+          default:
+            LOG(FATAL) << "Unsupported SIMD intrinsic";
+            UNREACHABLE();
+        }  // switch invoke
+      } else {
+        // In scalar code, simply clone the method invoke, and replace its operands with the
+        // corresponding new scalar instructions in the loop. The instruction will get an
+        // environment while being inserted from the instruction map in original program order.
+        DCHECK(vector_mode_ == kSequential);
+        HInvokeStaticOrDirect* new_invoke = new (global_allocator_) HInvokeStaticOrDirect(
+            global_allocator_,
+            invoke->GetNumberOfArguments(),
+            invoke->GetType(),
+            invoke->GetDexPc(),
+            invoke->GetDexMethodIndex(),
+            invoke->GetResolvedMethod(),
+            invoke->GetDispatchInfo(),
+            invoke->GetInvokeType(),
+            invoke->GetTargetMethod(),
+            invoke->GetClinitCheckRequirement());
+        HInputsRef inputs = invoke->GetInputs();
+        for (size_t index = 0; index < inputs.size(); ++index) {
+          new_invoke->SetArgumentAt(index, vector_map_->Get(inputs[index]));
+        }
+        vector = new_invoke;
+      }
+      break;
+    }
+    default:
+      break;
+  }  // switch
+  CHECK(vector != nullptr) << "Unsupported SIMD operator";
+  vector_map_->Put(org, vector);
+}
+
+#undef GENERATE_VEC
+
+//
+// Helpers.
+//
+
+bool HLoopOptimization::TrySetPhiInduction(HPhi* phi, bool restrict_uses) {
+  DCHECK(iset_->empty());
   ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
   if (set != nullptr) {
-    DCHECK(iset_->empty());
     for (HInstruction* i : *set) {
       // Check that, other than instructions that are no longer in the graph (removed earlier)
-      // each instruction is removable and, other than the phi, uses are contained in the cycle.
+      // each instruction is removable and, when restrict uses are requested, other than for phi,
+      // all uses are contained within the cycle.
       if (!i->IsInBlock()) {
         continue;
       } else if (!i->IsRemovable()) {
         return false;
-      } else if (i != phi) {
+      } else if (i != phi && restrict_uses) {
         for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
           if (set->find(use.GetUser()) == set->end()) {
             return false;
@@ -344,10 +1059,12 @@
 //       c:   Condition(phi, bound)
 //       i:   If(c)
 // TODO: Find a less pattern matching approach?
-bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) {
+bool HLoopOptimization::TrySetSimpleLoopHeader(HBasicBlock* block) {
   DCHECK(iset_->empty());
   HInstruction* phi = block->GetFirstPhi();
-  if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) {
+  if (phi != nullptr &&
+      phi->GetNext() == nullptr &&
+      TrySetPhiInduction(phi->AsPhi(), /*restrict_uses*/ false)) {
     HInstruction* s = block->GetFirstInstruction();
     if (s != nullptr && s->IsSuspendCheck()) {
       HInstruction* c = s->GetNext();
@@ -365,14 +1082,24 @@
 }
 
 bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) {
-  if (block->GetFirstPhi() == nullptr) {
-    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
-      HInstruction* instruction = it.Current();
-      if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
-        return false;
-      }
+  if (!block->GetPhis().IsEmpty()) {
+    return false;
+  }
+  for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+    HInstruction* instruction = it.Current();
+    if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
+      return false;
     }
-    return true;
+  }
+  return true;
+}
+
+bool HLoopOptimization::IsUsedOutsideLoop(HLoopInformation* loop_info,
+                                          HInstruction* instruction) {
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    if (use.GetUser()->GetBlock()->GetLoopInformation() != loop_info) {
+      return true;
+    }
   }
   return false;
 }
@@ -434,6 +1161,19 @@
   return false;
 }
 
+bool HLoopOptimization::TryAssignLastValue(HLoopInformation* loop_info,
+                                           HInstruction* instruction,
+                                           HBasicBlock* block,
+                                           bool collect_loop_uses) {
+  // Assigning the last value is always successful if there are no uses.
+  // Otherwise, it succeeds in a no early-exit loop by generating the
+  // proper last value assignment.
+  int32_t use_count = 0;
+  return IsOnlyUsedAfterLoop(loop_info, instruction, collect_loop_uses, &use_count) &&
+      (use_count == 0 ||
+       (!IsEarlyExit(loop_info) && TryReplaceWithLastValue(instruction, block)));
+}
+
 void HLoopOptimization::RemoveDeadInstructions(const HInstructionList& list) {
   for (HBackwardInstructionIterator i(list); !i.Done(); i.Advance()) {
     HInstruction* instruction = i.Current();
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 9ddab41..d8f50aa 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -23,13 +23,18 @@
 
 namespace art {
 
+class CompilerDriver;
+
 /**
  * Loop optimizations. Builds a loop hierarchy and applies optimizations to
- * the detected nested loops, such as removal of dead induction and empty loops.
+ * the detected nested loops, such as removal of dead induction and empty loops
+ * and inner loop vectorization.
  */
 class HLoopOptimization : public HOptimization {
  public:
-  HLoopOptimization(HGraph* graph, HInductionVarAnalysis* induction_analysis);
+  HLoopOptimization(HGraph* graph,
+                    CompilerDriver* compiler_driver,
+                    HInductionVarAnalysis* induction_analysis);
 
   void Run() OVERRIDE;
 
@@ -46,36 +51,111 @@
           inner(nullptr),
           previous(nullptr),
           next(nullptr) {}
-    HLoopInformation* const loop_info;
+    HLoopInformation* loop_info;
     LoopNode* outer;
     LoopNode* inner;
     LoopNode* previous;
     LoopNode* next;
   };
 
-  void LocalRun();
+  /*
+   * Vectorization restrictions (bit mask).
+   */
+  enum VectorRestrictions {
+    kNone     = 0,   // no restrictions
+    kNoMul    = 1,   // no multiplication
+    kNoDiv    = 2,   // no division
+    kNoShift  = 4,   // no shift
+    kNoShr    = 8,   // no arithmetic shift right
+    kNoHiBits = 16,  // "wider" operations cannot bring in higher order bits
+    kNoAbs    = 32,  // no absolute value
+  };
 
+  /*
+   * Vectorization mode during synthesis
+   * (sequential peeling/cleanup loop or vector loop).
+   */
+  enum VectorMode {
+    kSequential,
+    kVector
+  };
+
+  /*
+   * Representation of a unit-stride array reference.
+   */
+  struct ArrayReference {
+    ArrayReference(HInstruction* b, HInstruction* o, Primitive::Type t, bool l)
+        : base(b), offset(o), type(t), lhs(l) { }
+    bool operator<(const ArrayReference& other) const {
+      return
+          (base < other.base) ||
+          (base == other.base &&
+           (offset < other.offset || (offset == other.offset &&
+                                      (type < other.type ||
+                                       (type == other.type && lhs < other.lhs)))));
+    }
+    HInstruction* base;    // base address
+    HInstruction* offset;  // offset + i
+    Primitive::Type type;  // component type
+    bool lhs;              // def/use
+  };
+
+  // Loop setup and traversal.
+  void LocalRun();
   void AddLoop(HLoopInformation* loop_info);
   void RemoveLoop(LoopNode* node);
-
   void TraverseLoopsInnerToOuter(LoopNode* node);
 
-  // Simplification.
+  // Optimization.
   void SimplifyInduction(LoopNode* node);
   void SimplifyBlocks(LoopNode* node);
-  bool SimplifyInnerLoop(LoopNode* node);
+  void OptimizeInnerLoop(LoopNode* node);
+
+  // Vectorization analysis and synthesis.
+  bool CanVectorize(LoopNode* node, HBasicBlock* block, int64_t trip_count);
+  void Vectorize(LoopNode* node, HBasicBlock* block, HBasicBlock* exit, int64_t trip_count);
+  void GenerateNewLoop(LoopNode* node,
+                       HBasicBlock* block,
+                       HBasicBlock* new_preheader,
+                       HInstruction* lo,
+                       HInstruction* hi,
+                       HInstruction* step);
+  bool VectorizeDef(LoopNode* node, HInstruction* instruction, bool generate_code);
+  bool VectorizeUse(LoopNode* node,
+                    HInstruction* instruction,
+                    bool generate_code,
+                    Primitive::Type type,
+                    uint64_t restrictions);
+  bool TrySetVectorType(Primitive::Type type, /*out*/ uint64_t* restrictions);
+  bool TrySetVectorLength(uint32_t length);
+  void GenerateVecInv(HInstruction* org, Primitive::Type type);
+  void GenerateVecSub(HInstruction* org, HInstruction* off);
+  void GenerateVecMem(HInstruction* org,
+                      HInstruction* opa,
+                      HInstruction* opb,
+                      Primitive::Type type);
+  void GenerateVecOp(HInstruction* org, HInstruction* opa, HInstruction* opb, Primitive::Type type);
 
   // Helpers.
-  bool IsPhiInduction(HPhi* phi);
-  bool IsEmptyHeader(HBasicBlock* block);
+  bool TrySetPhiInduction(HPhi* phi, bool restrict_uses);
+  bool TrySetSimpleLoopHeader(HBasicBlock* block);
   bool IsEmptyBody(HBasicBlock* block);
   bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
                            HInstruction* instruction,
                            bool collect_loop_uses,
                            /*out*/ int32_t* use_count);
+  bool IsUsedOutsideLoop(HLoopInformation* loop_info,
+                         HInstruction* instruction);
   bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
+  bool TryAssignLastValue(HLoopInformation* loop_info,
+                          HInstruction* instruction,
+                          HBasicBlock* block,
+                          bool collect_loop_uses);
   void RemoveDeadInstructions(const HInstructionList& list);
 
+  // Compiler driver (to query ISA features).
+  const CompilerDriver* compiler_driver_;
+
   // Range information based on prior induction variable analysis.
   InductionVarRange induction_range_;
 
@@ -83,6 +163,9 @@
   // through this allocator is immediately released when the loop optimizer is done.
   ArenaAllocator* loop_allocator_;
 
+  // Global heap memory allocator. Used to build HIR.
+  ArenaAllocator* global_allocator_;
+
   // Entries into the loop hierarchy representation. The hierarchy resides
   // in phase-local heap memory.
   LoopNode* top_loop_;
@@ -95,11 +178,33 @@
   // Counter that tracks how many induction cycles have been simplified. Useful
   // to trigger incremental updates of induction variable analysis of outer loops
   // when the induction of inner loops has changed.
-  int32_t induction_simplication_count_;
+  uint32_t induction_simplication_count_;
 
   // Flag that tracks if any simplifications have occurred.
   bool simplified_;
 
+  // Number of "lanes" for selected packed type.
+  uint32_t vector_length_;
+
+  // Set of array references in the vector loop.
+  // Contents reside in phase-local heap memory.
+  ArenaSet<ArrayReference>* vector_refs_;
+
+  // Mapping used during vectorization synthesis for both the scalar peeling/cleanup
+  // loop (simd_ is false) and the actual vector loop (simd_ is true). The data
+  // structure maps original instructions into the new instructions.
+  // Contents reside in phase-local heap memory.
+  ArenaSafeMap<HInstruction*, HInstruction*>* vector_map_;
+
+  // Temporary vectorization bookkeeping.
+  HBasicBlock* vector_preheader_;  // preheader of the new loop
+  HBasicBlock* vector_header_;  // header of the new loop
+  HBasicBlock* vector_body_;  // body of the new loop
+  HInstruction* vector_runtime_test_a_;
+  HInstruction* vector_runtime_test_b_;  // defines a != b runtime test
+  HPhi* vector_phi_;  // the Phi representing the normalized loop index
+  VectorMode vector_mode_;  // selects synthesis mode
+
   friend class LoopOptimizationTest;
 
   DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/loop_optimization_test.cc b/compiler/optimizing/loop_optimization_test.cc
index 9a6b493..5b93506 100644
--- a/compiler/optimizing/loop_optimization_test.cc
+++ b/compiler/optimizing/loop_optimization_test.cc
@@ -31,7 +31,7 @@
         allocator_(&pool_),
         graph_(CreateGraph(&allocator_)),
         iva_(new (&allocator_) HInductionVarAnalysis(graph_)),
-        loop_opt_(new (&allocator_) HLoopOptimization(graph_, iva_)) {
+        loop_opt_(new (&allocator_) HLoopOptimization(graph_, nullptr, iva_)) {
     BuildGraph();
   }
 
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 62c8910..e71fea9 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -1088,6 +1088,19 @@
   DCHECK(env_uses_.empty());
 }
 
+void HInstruction::ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement) {
+  const HUseList<HInstruction*>& uses = GetUses();
+  for (auto it = uses.begin(), end = uses.end(); it != end; /* ++it below */) {
+    HInstruction* user = it->GetUser();
+    size_t index = it->GetIndex();
+    // Increment `it` now because `*it` may disappear thanks to user->ReplaceInput().
+    ++it;
+    if (dominator->StrictlyDominates(user)) {
+      user->ReplaceInput(replacement, index);
+    }
+  }
+}
+
 void HInstruction::ReplaceInput(HInstruction* replacement, size_t index) {
   HUserRecord<HInstruction*> input_use = InputRecordAt(index);
   if (input_use.GetInstruction() == replacement) {
@@ -1323,6 +1336,18 @@
   }
 }
 
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs) {
+  switch (rhs) {
+    case HDeoptimize::Kind::kBCE:
+      return os << "bce";
+    case HDeoptimize::Kind::kInline:
+      return os << "inline";
+    default:
+      LOG(FATAL) << "Unknown Deoptimization kind: " << static_cast<int>(rhs);
+      UNREACHABLE();
+  }
+}
+
 bool HCondition::IsBeforeWhenDisregardMoves(HInstruction* instruction) const {
   return this == instruction->GetPreviousDisregardingMoves();
 }
@@ -2046,6 +2071,9 @@
   if (HasTryCatch()) {
     outer_graph->SetHasTryCatch(true);
   }
+  if (HasSIMD()) {
+    outer_graph->SetHasSIMD(true);
+  }
 
   HInstruction* return_value = nullptr;
   if (GetBlocks().size() == 3) {
@@ -2179,6 +2207,9 @@
       }
     }
     if (rerun_loop_analysis) {
+      DCHECK(!outer_graph->HasIrreducibleLoops())
+          << "Recomputing loop information in graphs with irreducible loops "
+          << "is unsupported, as it could lead to loop header changes";
       outer_graph->ClearLoopInformation();
       outer_graph->ClearDominanceInformation();
       outer_graph->BuildDominatorTree();
@@ -2309,6 +2340,68 @@
       new_pre_header, old_pre_header, /* replace_if_back_edge */ false);
 }
 
+HBasicBlock* HGraph::TransformLoopForVectorization(HBasicBlock* header,
+                                                   HBasicBlock* body,
+                                                   HBasicBlock* exit) {
+  DCHECK(header->IsLoopHeader());
+  HLoopInformation* loop = header->GetLoopInformation();
+
+  // Add new loop blocks.
+  HBasicBlock* new_pre_header = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* new_header = new (arena_) HBasicBlock(this, header->GetDexPc());
+  HBasicBlock* new_body = new (arena_) HBasicBlock(this, header->GetDexPc());
+  AddBlock(new_pre_header);
+  AddBlock(new_header);
+  AddBlock(new_body);
+
+  // Set up control flow.
+  header->ReplaceSuccessor(exit, new_pre_header);
+  new_pre_header->AddSuccessor(new_header);
+  new_header->AddSuccessor(exit);
+  new_header->AddSuccessor(new_body);
+  new_body->AddSuccessor(new_header);
+
+  // Set up dominators.
+  header->ReplaceDominatedBlock(exit, new_pre_header);
+  new_pre_header->SetDominator(header);
+  new_pre_header->dominated_blocks_.push_back(new_header);
+  new_header->SetDominator(new_pre_header);
+  new_header->dominated_blocks_.push_back(new_body);
+  new_body->SetDominator(new_header);
+  new_header->dominated_blocks_.push_back(exit);
+  exit->SetDominator(new_header);
+
+  // Fix reverse post order.
+  size_t index_of_header = IndexOfElement(reverse_post_order_, header);
+  MakeRoomFor(&reverse_post_order_, 2, index_of_header);
+  reverse_post_order_[++index_of_header] = new_pre_header;
+  reverse_post_order_[++index_of_header] = new_header;
+  size_t index_of_body = IndexOfElement(reverse_post_order_, body);
+  MakeRoomFor(&reverse_post_order_, 1, index_of_body - 1);
+  reverse_post_order_[index_of_body] = new_body;
+
+  // Add gotos and suspend check (client must add conditional in header).
+  new_pre_header->AddInstruction(new (arena_) HGoto());
+  HSuspendCheck* suspend_check = new (arena_) HSuspendCheck(header->GetDexPc());
+  new_header->AddInstruction(suspend_check);
+  new_body->AddInstruction(new (arena_) HGoto());
+  suspend_check->CopyEnvironmentFromWithLoopPhiAdjustment(
+      loop->GetSuspendCheck()->GetEnvironment(), header);
+
+  // Update loop information.
+  new_header->AddBackEdge(new_body);
+  new_header->GetLoopInformation()->SetSuspendCheck(suspend_check);
+  new_header->GetLoopInformation()->Populate();
+  new_pre_header->SetLoopInformation(loop->GetPreHeader()->GetLoopInformation());  // outward
+  HLoopInformationOutwardIterator it(*new_header);
+  for (it.Advance(); !it.Done(); it.Advance()) {
+    it.Current()->Add(new_pre_header);
+    it.Current()->Add(new_header);
+    it.Current()->Add(new_body);
+  }
+  return new_pre_header;
+}
+
 static void CheckAgainstUpperBound(ReferenceTypeInfo rti, ReferenceTypeInfo upper_bound_rti)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (rti.IsValid()) {
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 8a9e618..671f950 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -323,6 +323,7 @@
         temporaries_vreg_slots_(0),
         has_bounds_checks_(false),
         has_try_catch_(false),
+        has_simd_(false),
         has_loops_(false),
         has_irreducible_loops_(false),
         debuggable_(debuggable),
@@ -340,6 +341,7 @@
         cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
         cached_current_method_(nullptr),
+        art_method_(nullptr),
         inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
         osr_(osr),
         cha_single_implementation_list_(arena->Adapter(kArenaAllocCHA)) {
@@ -398,6 +400,12 @@
   // put deoptimization instructions, etc.
   void TransformLoopHeaderForBCE(HBasicBlock* header);
 
+  // Adds a new loop directly after the loop with the given header and exit.
+  // Returns the new preheader.
+  HBasicBlock* TransformLoopForVectorization(HBasicBlock* header,
+                                             HBasicBlock* body,
+                                             HBasicBlock* exit);
+
   // Removes `block` from the graph. Assumes `block` has been disconnected from
   // other blocks and has no instructions or phis.
   void DeleteDeadEmptyBlock(HBasicBlock* block);
@@ -560,6 +568,9 @@
   bool HasTryCatch() const { return has_try_catch_; }
   void SetHasTryCatch(bool value) { has_try_catch_ = value; }
 
+  bool HasSIMD() const { return has_simd_; }
+  void SetHasSIMD(bool value) { has_simd_ = value; }
+
   bool HasLoops() const { return has_loops_; }
   void SetHasLoops(bool value) { has_loops_ = value; }
 
@@ -652,6 +663,11 @@
   // false positives.
   bool has_try_catch_;
 
+  // Flag whether SIMD instructions appear in the graph. If true, the
+  // code generators may have to be more careful spilling the wider
+  // contents of SIMD registers.
+  bool has_simd_;
+
   // Flag whether there are any loops in the graph. We can skip loop
   // optimization if it's false. It's only best effort to keep it up
   // to date in the presence of code elimination so there might be false
@@ -1353,6 +1369,26 @@
   M(TypeConversion, Instruction)                                        \
   M(UShr, BinaryOperation)                                              \
   M(Xor, BinaryOperation)                                               \
+  M(VecReplicateScalar, VecUnaryOperation)                              \
+  M(VecSetScalars, VecUnaryOperation)                                   \
+  M(VecSumReduce, VecUnaryOperation)                                    \
+  M(VecCnv, VecUnaryOperation)                                          \
+  M(VecNeg, VecUnaryOperation)                                          \
+  M(VecAbs, VecUnaryOperation)                                          \
+  M(VecNot, VecUnaryOperation)                                          \
+  M(VecAdd, VecBinaryOperation)                                         \
+  M(VecSub, VecBinaryOperation)                                         \
+  M(VecMul, VecBinaryOperation)                                         \
+  M(VecDiv, VecBinaryOperation)                                         \
+  M(VecAnd, VecBinaryOperation)                                         \
+  M(VecAndNot, VecBinaryOperation)                                      \
+  M(VecOr, VecBinaryOperation)                                          \
+  M(VecXor, VecBinaryOperation)                                         \
+  M(VecShl, VecBinaryOperation)                                         \
+  M(VecShr, VecBinaryOperation)                                         \
+  M(VecUShr, VecBinaryOperation)                                        \
+  M(VecLoad, VecMemoryOperation)                                        \
+  M(VecStore, VecMemoryOperation)                                       \
 
 /*
  * Instructions, shared across several (not all) architectures.
@@ -1414,7 +1450,11 @@
   M(Constant, Instruction)                                              \
   M(UnaryOperation, Instruction)                                        \
   M(BinaryOperation, Instruction)                                       \
-  M(Invoke, Instruction)
+  M(Invoke, Instruction)                                                \
+  M(VecOperation, Instruction)                                          \
+  M(VecUnaryOperation, VecOperation)                                    \
+  M(VecBinaryOperation, VecOperation)                                   \
+  M(VecMemoryOperation, VecOperation)
 
 #define FOR_EACH_INSTRUCTION(M)                                         \
   FOR_EACH_CONCRETE_INSTRUCTION(M)                                      \
@@ -1734,11 +1774,11 @@
 // A HEnvironment object contains the values of virtual registers at a given location.
 class HEnvironment : public ArenaObject<kArenaAllocEnvironment> {
  public:
-  HEnvironment(ArenaAllocator* arena,
-               size_t number_of_vregs,
-               ArtMethod* method,
-               uint32_t dex_pc,
-               HInstruction* holder)
+  ALWAYS_INLINE HEnvironment(ArenaAllocator* arena,
+                             size_t number_of_vregs,
+                             ArtMethod* method,
+                             uint32_t dex_pc,
+                             HInstruction* holder)
      : vregs_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentVRegs)),
        locations_(number_of_vregs, arena->Adapter(kArenaAllocEnvironmentLocations)),
        parent_(nullptr),
@@ -1747,7 +1787,7 @@
        holder_(holder) {
   }
 
-  HEnvironment(ArenaAllocator* arena, const HEnvironment& to_copy, HInstruction* holder)
+  ALWAYS_INLINE HEnvironment(ArenaAllocator* arena, const HEnvironment& to_copy, HInstruction* holder)
       : HEnvironment(arena,
                      to_copy.Size(),
                      to_copy.GetMethod(),
@@ -1914,6 +1954,9 @@
 
   virtual bool IsControlFlow() const { return false; }
 
+  // Can the instruction throw?
+  // TODO: We should rename to CanVisiblyThrow, as some instructions (like HNewInstance),
+  // could throw OOME, but it is still OK to remove them if they are unused.
   virtual bool CanThrow() const { return false; }
   bool CanThrowIntoCatchBlock() const { return CanThrow() && block_->IsTryBlock(); }
 
@@ -2068,6 +2111,7 @@
   void SetLocations(LocationSummary* locations) { locations_ = locations; }
 
   void ReplaceWith(HInstruction* instruction);
+  void ReplaceUsesDominatedBy(HInstruction* dominator, HInstruction* replacement);
   void ReplaceInput(HInstruction* replacement, size_t index);
 
   // This is almost the same as doing `ReplaceWith()`. But in this helper, the
@@ -2931,28 +2975,97 @@
 };
 
 // Deoptimize to interpreter, upon checking a condition.
-class HDeoptimize FINAL : public HTemplateInstruction<1> {
+class HDeoptimize FINAL : public HVariableInputSizeInstruction {
  public:
-  // We set CanTriggerGC to prevent any intermediate address to be live
-  // at the point of the `HDeoptimize`.
-  HDeoptimize(HInstruction* cond, uint32_t dex_pc)
-      : HTemplateInstruction(SideEffects::CanTriggerGC(), dex_pc) {
+  enum class Kind {
+    kBCE,
+    kInline,
+    kLast = kInline
+  };
+
+  // Use this constructor when the `HDeoptimize` acts as a barrier, where no code can move
+  // across.
+  HDeoptimize(ArenaAllocator* arena, HInstruction* cond, Kind kind, uint32_t dex_pc)
+      : HVariableInputSizeInstruction(
+            SideEffects::All(),
+            dex_pc,
+            arena,
+            /* number_of_inputs */ 1,
+            kArenaAllocMisc) {
+    SetPackedFlag<kFieldCanBeMoved>(false);
+    SetPackedField<DeoptimizeKindField>(kind);
     SetRawInputAt(0, cond);
   }
 
-  bool CanBeMoved() const OVERRIDE { return true; }
-  bool InstructionDataEquals(const HInstruction* other ATTRIBUTE_UNUSED) const OVERRIDE {
-    return true;
+  // Use this constructor when the `HDeoptimize` guards an instruction, and any user
+  // that relies on the deoptimization to pass should have its input be the `HDeoptimize`
+  // instead of `guard`.
+  // We set CanTriggerGC to prevent any intermediate address to be live
+  // at the point of the `HDeoptimize`.
+  HDeoptimize(ArenaAllocator* arena,
+              HInstruction* cond,
+              HInstruction* guard,
+              Kind kind,
+              uint32_t dex_pc)
+      : HVariableInputSizeInstruction(
+            SideEffects::CanTriggerGC(),
+            dex_pc,
+            arena,
+            /* number_of_inputs */ 2,
+            kArenaAllocMisc) {
+    SetPackedFlag<kFieldCanBeMoved>(true);
+    SetPackedField<DeoptimizeKindField>(kind);
+    SetRawInputAt(0, cond);
+    SetRawInputAt(1, guard);
   }
+
+  bool CanBeMoved() const OVERRIDE { return GetPackedFlag<kFieldCanBeMoved>(); }
+
+  bool InstructionDataEquals(const HInstruction* other) const OVERRIDE {
+    return (other->CanBeMoved() == CanBeMoved()) && (other->AsDeoptimize()->GetKind() == GetKind());
+  }
+
   bool NeedsEnvironment() const OVERRIDE { return true; }
+
   bool CanThrow() const OVERRIDE { return true; }
 
+  Kind GetKind() const { return GetPackedField<DeoptimizeKindField>(); }
+
+  Primitive::Type GetType() const OVERRIDE {
+    return GuardsAnInput() ? GuardedInput()->GetType() : Primitive::kPrimVoid;
+  }
+
+  bool GuardsAnInput() const {
+    return InputCount() == 2;
+  }
+
+  HInstruction* GuardedInput() const {
+    DCHECK(GuardsAnInput());
+    return InputAt(1);
+  }
+
+  void RemoveGuard() {
+    RemoveInputAt(1);
+  }
+
   DECLARE_INSTRUCTION(Deoptimize);
 
  private:
+  static constexpr size_t kFieldCanBeMoved = kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldDeoptimizeKind = kNumberOfGenericPackedBits + 1;
+  static constexpr size_t kFieldDeoptimizeKindSize =
+      MinimumBitsToStore(static_cast<size_t>(Kind::kLast));
+  static constexpr size_t kNumberOfDeoptimizePackedBits =
+      kFieldDeoptimizeKind + kFieldDeoptimizeKindSize;
+  static_assert(kNumberOfDeoptimizePackedBits <= kMaxNumberOfPackedBits,
+                "Too many packed fields.");
+  using DeoptimizeKindField = BitField<Kind, kFieldDeoptimizeKind, kFieldDeoptimizeKindSize>;
+
   DISALLOW_COPY_AND_ASSIGN(HDeoptimize);
 };
 
+std::ostream& operator<<(std::ostream& os, const HDeoptimize::Kind& rhs);
+
 // Represents a should_deoptimize flag. Currently used for CHA-based devirtualization.
 // The compiled code checks this flag value in a guard before devirtualized call and
 // if it's true, starts to do deoptimization.
@@ -3912,6 +4025,7 @@
   bool IsIntrinsic() const { return intrinsic_ != Intrinsics::kNone; }
 
   ArtMethod* GetResolvedMethod() const { return resolved_method_; }
+  void SetResolvedMethod(ArtMethod* method) { resolved_method_ = method; }
 
   DECLARE_ABSTRACT_INSTRUCTION(Invoke);
 
@@ -3954,7 +4068,7 @@
   }
 
   uint32_t number_of_arguments_;
-  ArtMethod* const resolved_method_;
+  ArtMethod* resolved_method_;
   const uint32_t dex_method_index_;
   Intrinsics intrinsic_;
 
@@ -4111,6 +4225,10 @@
     dispatch_info_ = dispatch_info;
   }
 
+  DispatchInfo GetDispatchInfo() const {
+    return dispatch_info_;
+  }
+
   void AddSpecialInput(HInstruction* input) {
     // We allow only one special input.
     DCHECK(!IsStringInit() && !HasCurrentMethodInput());
@@ -5541,8 +5659,6 @@
 
     // Use a known boot image Class* address, embedded in the code by the codegen.
     // Used for boot image classes referenced by apps in AOT- and JIT-compiled code.
-    // Note: codegen needs to emit a linker patch if indicated by compiler options'
-    // GetIncludePatchInformation().
     kBootImageAddress,
 
     // Load from an entry in the .bss section using a PC-relative load.
@@ -5746,8 +5862,6 @@
 
     // Use a known boot image String* address, embedded in the code by the codegen.
     // Used for boot image strings referenced by apps in AOT- and JIT-compiled code.
-    // Note: codegen needs to emit a linker patch if indicated by compiler options'
-    // GetIncludePatchInformation().
     kBootImageAddress,
 
     // Load from an entry in the .bss section using a PC-relative load.
@@ -6609,6 +6723,8 @@
 
 }  // namespace art
 
+#include "nodes_vector.h"
+
 #if defined(ART_ENABLE_CODEGEN_arm) || defined(ART_ENABLE_CODEGEN_arm64)
 #include "nodes_shared.h"
 #endif
diff --git a/compiler/optimizing/nodes_vector.h b/compiler/optimizing/nodes_vector.h
new file mode 100644
index 0000000..0cbbf2a
--- /dev/null
+++ b/compiler/optimizing/nodes_vector.h
@@ -0,0 +1,604 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
+#define ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
+
+// This #include should never be used by compilation, because this header file (nodes_vector.h)
+// is included in the header file nodes.h itself. However it gives editing tools better context.
+#include "nodes.h"
+
+namespace art {
+
+// Memory alignment, represented as an offset relative to a base, where 0 <= offset < base,
+// and base is a power of two. For example, the value Alignment(16, 0) means memory is
+// perfectly aligned at a 16-byte boundary, whereas the value Alignment(16, 4) means
+// memory is always exactly 4 bytes above such a boundary.
+class Alignment {
+ public:
+  Alignment(size_t base, size_t offset) : base_(base), offset_(offset) {
+    DCHECK_LT(offset, base);
+    DCHECK(IsPowerOfTwo(base));
+  }
+
+  // Returns true if memory is "at least" aligned at the given boundary.
+  // Assumes requested base is power of two.
+  bool IsAlignedAt(size_t base) const {
+    DCHECK_NE(0u, base);
+    DCHECK(IsPowerOfTwo(base));
+    return ((offset_ | base_) & (base - 1u)) == 0;
+  }
+
+  std::string ToString() const {
+    return "ALIGN(" + std::to_string(base_) + "," + std::to_string(offset_) + ")";
+  }
+
+ private:
+  size_t base_;
+  size_t offset_;
+};
+
+//
+// Definitions of abstract vector operations in HIR.
+//
+
+// Abstraction of a vector operation, i.e., an operation that performs
+// GetVectorLength() x GetPackedType() operations simultaneously.
+class HVecOperation : public HVariableInputSizeInstruction {
+ public:
+  HVecOperation(ArenaAllocator* arena,
+                Primitive::Type packed_type,
+                SideEffects side_effects,
+                size_t number_of_inputs,
+                size_t vector_length,
+                uint32_t dex_pc)
+      : HVariableInputSizeInstruction(side_effects,
+                                      dex_pc,
+                                      arena,
+                                      number_of_inputs,
+                                      kArenaAllocVectorNode),
+        vector_length_(vector_length) {
+    SetPackedField<TypeField>(packed_type);
+    DCHECK_LT(1u, vector_length);
+  }
+
+  // Returns the number of elements packed in a vector.
+  size_t GetVectorLength() const {
+    return vector_length_;
+  }
+
+  // Returns the number of bytes in a full vector.
+  size_t GetVectorNumberOfBytes() const {
+    return vector_length_ * Primitive::ComponentSize(GetPackedType());
+  }
+
+  // Returns the type of the vector operation: a SIMD operation looks like a FPU location.
+  // TODO: we could introduce SIMD types in HIR.
+  Primitive::Type GetType() const OVERRIDE {
+    return Primitive::kPrimDouble;
+  }
+
+  // Returns the true component type packed in a vector.
+  Primitive::Type GetPackedType() const {
+    return GetPackedField<TypeField>();
+  }
+
+  DECLARE_ABSTRACT_INSTRUCTION(VecOperation);
+
+ private:
+  // Additional packed bits.
+  static constexpr size_t kFieldType = HInstruction::kNumberOfGenericPackedBits;
+  static constexpr size_t kFieldTypeSize =
+      MinimumBitsToStore(static_cast<size_t>(Primitive::kPrimLast));
+  static constexpr size_t kNumberOfVectorOpPackedBits = kFieldType + kFieldTypeSize;
+  static_assert(kNumberOfVectorOpPackedBits <= kMaxNumberOfPackedBits, "Too many packed fields.");
+  using TypeField = BitField<Primitive::Type, kFieldType, kFieldTypeSize>;
+
+  const size_t vector_length_;
+
+  DISALLOW_COPY_AND_ASSIGN(HVecOperation);
+};
+
+// Abstraction of a unary vector operation.
+class HVecUnaryOperation : public HVecOperation {
+ public:
+  HVecUnaryOperation(ArenaAllocator* arena,
+                     Primitive::Type packed_type,
+                     size_t vector_length,
+                     uint32_t dex_pc)
+      : HVecOperation(arena,
+                      packed_type,
+                      SideEffects::None(),
+                      /*number_of_inputs*/ 1,
+                      vector_length,
+                      dex_pc) { }
+  DECLARE_ABSTRACT_INSTRUCTION(VecUnaryOperation);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecUnaryOperation);
+};
+
+// Abstraction of a binary vector operation.
+class HVecBinaryOperation : public HVecOperation {
+ public:
+  HVecBinaryOperation(ArenaAllocator* arena,
+                      Primitive::Type packed_type,
+                      size_t vector_length,
+                      uint32_t dex_pc)
+      : HVecOperation(arena,
+                      packed_type,
+                      SideEffects::None(),
+                      /*number_of_inputs*/ 2,
+                      vector_length,
+                      dex_pc) { }
+  DECLARE_ABSTRACT_INSTRUCTION(VecBinaryOperation);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecBinaryOperation);
+};
+
+// Abstraction of a vector operation that references memory, with an alignment.
+// The Android runtime guarantees at least "component size" alignment for array
+// elements and, thus, vectors.
+class HVecMemoryOperation : public HVecOperation {
+ public:
+  HVecMemoryOperation(ArenaAllocator* arena,
+                      Primitive::Type packed_type,
+                      SideEffects side_effects,
+                      size_t number_of_inputs,
+                      size_t vector_length,
+                      uint32_t dex_pc)
+      : HVecOperation(arena, packed_type, side_effects, number_of_inputs, vector_length, dex_pc),
+        alignment_(Primitive::ComponentSize(packed_type), 0) { }
+
+  void SetAlignment(Alignment alignment) { alignment_ = alignment; }
+
+  Alignment GetAlignment() const { return alignment_; }
+
+  DECLARE_ABSTRACT_INSTRUCTION(VecMemoryOperation);
+
+ private:
+  Alignment alignment_;
+
+  DISALLOW_COPY_AND_ASSIGN(HVecMemoryOperation);
+};
+
+//
+// Definitions of concrete vector operations in HIR.
+//
+
+// Replicates the given scalar into a vector,
+// viz. replicate(x) = [ x, .. , x ].
+class HVecReplicateScalar FINAL : public HVecUnaryOperation {
+ public:
+  HVecReplicateScalar(ArenaAllocator* arena,
+                      HInstruction* scalar,
+                      Primitive::Type packed_type,
+                      size_t vector_length,
+                      uint32_t dex_pc = kNoDexPc)
+      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+    SetRawInputAt(0, scalar);
+  }
+  DECLARE_INSTRUCTION(VecReplicateScalar);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecReplicateScalar);
+};
+
+// Assigns the given scalar elements to a vector,
+// viz. set( array(x1, .., xn) ) = [ x1, .. , xn ].
+class HVecSetScalars FINAL : public HVecUnaryOperation {
+  HVecSetScalars(ArenaAllocator* arena,
+                 HInstruction** scalars,  // array
+                 Primitive::Type packed_type,
+                 size_t vector_length,
+                 uint32_t dex_pc = kNoDexPc)
+      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+    for (size_t i = 0; i < vector_length; i++) {
+      SetRawInputAt(0, scalars[i]);
+    }
+  }
+  DECLARE_INSTRUCTION(VecSetScalars);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecSetScalars);
+};
+
+// Sum-reduces the given vector into a shorter vector (m < n) or scalar (m = 1),
+// viz. sum-reduce[ x1, .. , xn ] = [ y1, .., ym ], where yi = sum_j x_j.
+class HVecSumReduce FINAL : public HVecUnaryOperation {
+  HVecSumReduce(ArenaAllocator* arena,
+                HInstruction* input,
+                Primitive::Type packed_type,
+                size_t vector_length,
+                uint32_t dex_pc = kNoDexPc)
+      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(input->IsVecOperation());
+    DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, input);
+  }
+
+  // TODO: probably integral promotion
+  Primitive::Type GetType() const OVERRIDE { return GetPackedType(); }
+
+  DECLARE_INSTRUCTION(VecSumReduce);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecSumReduce);
+};
+
+// Converts every component in the vector,
+// viz. cnv[ x1, .. , xn ]  = [ cnv(x1), .. , cnv(xn) ].
+class HVecCnv FINAL : public HVecUnaryOperation {
+ public:
+  HVecCnv(ArenaAllocator* arena,
+          HInstruction* input,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(input->IsVecOperation());
+    DCHECK_NE(input->AsVecOperation()->GetPackedType(), packed_type);  // actual convert
+    SetRawInputAt(0, input);
+  }
+
+  Primitive::Type GetInputType() const { return InputAt(0)->AsVecOperation()->GetPackedType(); }
+  Primitive::Type GetResultType() const { return GetPackedType(); }
+
+  DECLARE_INSTRUCTION(VecCnv);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecCnv);
+};
+
+// Negates every component in the vector,
+// viz. neg[ x1, .. , xn ]  = [ -x1, .. , -xn ].
+class HVecNeg FINAL : public HVecUnaryOperation {
+ public:
+  HVecNeg(ArenaAllocator* arena,
+          HInstruction* input,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(input->IsVecOperation());
+    DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, input);
+  }
+  DECLARE_INSTRUCTION(VecNeg);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecNeg);
+};
+
+// Takes absolute value of every component in the vector,
+// viz. abs[ x1, .. , xn ]  = [ |x1|, .. , |xn| ].
+class HVecAbs FINAL : public HVecUnaryOperation {
+ public:
+  HVecAbs(ArenaAllocator* arena,
+          HInstruction* input,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(input->IsVecOperation());
+    DCHECK_EQ(input->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, input);
+  }
+  DECLARE_INSTRUCTION(VecAbs);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecAbs);
+};
+
+// Bitwise- or boolean-nots every component in the vector,
+// viz. not[ x1, .. , xn ]  = [ ~x1, .. , ~xn ], or
+//      not[ x1, .. , xn ]  = [ !x1, .. , !xn ] for boolean.
+class HVecNot FINAL : public HVecUnaryOperation {
+ public:
+  HVecNot(ArenaAllocator* arena,
+          HInstruction* input,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecUnaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(input->IsVecOperation());
+    SetRawInputAt(0, input);
+  }
+  DECLARE_INSTRUCTION(VecNot);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecNot);
+};
+
+// Adds every component in the two vectors,
+// viz. [ x1, .. , xn ] + [ y1, .. , yn ] = [ x1 + y1, .. , xn + yn ].
+class HVecAdd FINAL : public HVecBinaryOperation {
+ public:
+  HVecAdd(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecAdd);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecAdd);
+};
+
+// Subtracts every component in the two vectors,
+// viz. [ x1, .. , xn ] - [ y1, .. , yn ] = [ x1 - y1, .. , xn - yn ].
+class HVecSub FINAL : public HVecBinaryOperation {
+ public:
+  HVecSub(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecSub);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecSub);
+};
+
+// Multiplies every component in the two vectors,
+// viz. [ x1, .. , xn ] * [ y1, .. , yn ] = [ x1 * y1, .. , xn * yn ].
+class HVecMul FINAL : public HVecBinaryOperation {
+ public:
+  HVecMul(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecMul);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecMul);
+};
+
+// Divides every component in the two vectors,
+// viz. [ x1, .. , xn ] / [ y1, .. , yn ] = [ x1 / y1, .. , xn / yn ].
+class HVecDiv FINAL : public HVecBinaryOperation {
+ public:
+  HVecDiv(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    DCHECK_EQ(right->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecDiv);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecDiv);
+};
+
+// Bitwise-ands every component in the two vectors,
+// viz. [ x1, .. , xn ] & [ y1, .. , yn ] = [ x1 & y1, .. , xn & yn ].
+class HVecAnd FINAL : public HVecBinaryOperation {
+ public:
+  HVecAnd(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecAnd);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecAnd);
+};
+
+// Bitwise-and-nots every component in the two vectors,
+// viz. [ x1, .. , xn ] and-not [ y1, .. , yn ] = [ ~x1 & y1, .. , ~xn & yn ].
+class HVecAndNot FINAL : public HVecBinaryOperation {
+ public:
+  HVecAndNot(ArenaAllocator* arena,
+             HInstruction* left,
+             HInstruction* right,
+             Primitive::Type packed_type,
+             size_t vector_length,
+             uint32_t dex_pc = kNoDexPc)
+         : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecAndNot);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecAndNot);
+};
+
+// Bitwise-ors every component in the two vectors,
+// viz. [ x1, .. , xn ] | [ y1, .. , yn ] = [ x1 | y1, .. , xn | yn ].
+class HVecOr FINAL : public HVecBinaryOperation {
+ public:
+  HVecOr(ArenaAllocator* arena,
+         HInstruction* left,
+         HInstruction* right,
+         Primitive::Type packed_type,
+         size_t vector_length,
+         uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecOr);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecOr);
+};
+
+// Bitwise-xors every component in the two vectors,
+// viz. [ x1, .. , xn ] ^ [ y1, .. , yn ] = [ x1 ^ y1, .. , xn ^ yn ].
+class HVecXor FINAL : public HVecBinaryOperation {
+ public:
+  HVecXor(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation() && right->IsVecOperation());
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecXor);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecXor);
+};
+
+// Logically shifts every component in the vector left by the given distance,
+// viz. [ x1, .. , xn ] << d = [ x1 << d, .. , xn << d ].
+class HVecShl FINAL : public HVecBinaryOperation {
+ public:
+  HVecShl(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecShl);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecShl);
+};
+
+// Arithmetically shifts every component in the vector right by the given distance,
+// viz. [ x1, .. , xn ] >> d = [ x1 >> d, .. , xn >> d ].
+class HVecShr FINAL : public HVecBinaryOperation {
+ public:
+  HVecShr(ArenaAllocator* arena,
+          HInstruction* left,
+          HInstruction* right,
+          Primitive::Type packed_type,
+          size_t vector_length,
+          uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecShr);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecShr);
+};
+
+// Logically shifts every component in the vector right by the given distance,
+// viz. [ x1, .. , xn ] >>> d = [ x1 >>> d, .. , xn >>> d ].
+class HVecUShr FINAL : public HVecBinaryOperation {
+ public:
+  HVecUShr(ArenaAllocator* arena,
+           HInstruction* left,
+           HInstruction* right,
+           Primitive::Type packed_type,
+           size_t vector_length,
+           uint32_t dex_pc = kNoDexPc)
+      : HVecBinaryOperation(arena, packed_type, vector_length, dex_pc) {
+    DCHECK(left->IsVecOperation());
+    DCHECK_EQ(left->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, left);
+    SetRawInputAt(1, right);
+  }
+  DECLARE_INSTRUCTION(VecUShr);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecUShr);
+};
+
+// Loads a vector from memory, viz. load(mem, 1)
+// yield the vector [ mem(1), .. , mem(n) ].
+class HVecLoad FINAL : public HVecMemoryOperation {
+ public:
+  HVecLoad(ArenaAllocator* arena,
+           HInstruction* base,
+           HInstruction* index,
+           Primitive::Type packed_type,
+           size_t vector_length,
+           uint32_t dex_pc = kNoDexPc)
+      : HVecMemoryOperation(arena,
+                            packed_type,
+                            SideEffects::ArrayReadOfType(packed_type),
+                            /*number_of_inputs*/ 2,
+                            vector_length,
+                            dex_pc) {
+    SetRawInputAt(0, base);
+    SetRawInputAt(1, index);
+  }
+  DECLARE_INSTRUCTION(VecLoad);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecLoad);
+};
+
+// Stores a vector to memory, viz. store(m, 1, [x1, .. , xn] )
+// sets mem(1) = x1, .. , mem(n) = xn.
+class HVecStore FINAL : public HVecMemoryOperation {
+ public:
+  HVecStore(ArenaAllocator* arena,
+            HInstruction* base,
+            HInstruction* index,
+            HInstruction* value,
+            Primitive::Type packed_type,
+            size_t vector_length,
+            uint32_t dex_pc = kNoDexPc)
+      : HVecMemoryOperation(arena,
+                            packed_type,
+                            SideEffects::ArrayWriteOfType(packed_type),
+                            /*number_of_inputs*/ 3,
+                            vector_length,
+                            dex_pc) {
+    DCHECK(value->IsVecOperation());
+    DCHECK_EQ(value->AsVecOperation()->GetPackedType(), packed_type);
+    SetRawInputAt(0, base);
+    SetRawInputAt(1, index);
+    SetRawInputAt(2, value);
+  }
+  DECLARE_INSTRUCTION(VecStore);
+ private:
+  DISALLOW_COPY_AND_ASSIGN(HVecStore);
+};
+
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_NODES_VECTOR_H_
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index d84fe6c..60af2b4 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -174,53 +174,45 @@
 // 0x00000034: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64[] = {
-    0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
-    0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
-    0xE8, 0xFF, 0xBD, 0x67, 0x18, 0x00, 0xBD, 0x67,
-    0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF,
-    0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67,
-    0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF,
+    0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7,
+    0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF,
+    0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67,
+    0x00, 0x00, 0x1F, 0xD8,
 };
-
 static constexpr uint8_t expected_cfi_kMips64[] = {
-    0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x0A, 0x44,
-    0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
-    0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44,
+    0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
-// 0x00000000: daddiu r29, r29, -40
-// 0x00000004: .cfi_def_cfa_offset: 40
-// 0x00000004: sd r31, +32(r29)
+// 0x00000000: daddiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sd r31, +56(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +24(r29)
+// 0x00000008: sd r17, +48(r29)
 // 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +16(r29)
+// 0x0000000c: sd r16, +40(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000010: sdc1 f25, +32(r29)
 // 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000014: sdc1 f24, +24(r29)
 // 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: daddiu r29, r29, -24
-// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: .cfi_remember_state
-// 0x0000001c: daddiu r29, r29, 24
-// 0x00000020: .cfi_def_cfa_offset: 40
-// 0x00000020: ldc1 f24, +0(r29)
-// 0x00000024: .cfi_restore: r56
-// 0x00000024: ldc1 f25, +8(r29)
+// 0x00000018: .cfi_remember_state
+// 0x00000018: ld r31, +56(r29)
+// 0x0000001c: .cfi_restore: r31
+// 0x0000001c: ld r17, +48(r29)
+// 0x00000020: .cfi_restore: r17
+// 0x00000020: ld r16, +40(r29)
+// 0x00000024: .cfi_restore: r16
+// 0x00000024: ldc1 f25, +32(r29)
 // 0x00000028: .cfi_restore: r57
-// 0x00000028: ld r16, +16(r29)
-// 0x0000002c: .cfi_restore: r16
-// 0x0000002c: ld r17, +24(r29)
-// 0x00000030: .cfi_restore: r17
-// 0x00000030: ld r31, +32(r29)
-// 0x00000034: .cfi_restore: r31
-// 0x00000034: daddiu r29, r29, 40
-// 0x00000038: .cfi_def_cfa_offset: 0
-// 0x00000038: jr r31
-// 0x0000003c: nop
-// 0x00000040: .cfi_restore_state
-// 0x00000040: .cfi_def_cfa_offset: 64
+// 0x00000028: ldc1 f24, +24(r29)
+// 0x0000002c: .cfi_restore: r56
+// 0x0000002c: daddiu r29, r29, 64
+// 0x00000030: .cfi_def_cfa_offset: 0
+// 0x00000030: jic r31, 0
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
 #ifdef ART_USE_OLD_ARM_BACKEND
@@ -403,58 +395,52 @@
 // 0x00020060: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
-    0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
-    0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
-    0xE8, 0xFF, 0xBD, 0x67, 0x02, 0x00, 0xA6, 0x60,
-    0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
+    0xC0, 0xFF, 0xBD, 0x67, 0x38, 0x00, 0xBF, 0xFF, 0x30, 0x00, 0xB1, 0xFF,
+    0x28, 0x00, 0xB0, 0xFF, 0x20, 0x00, 0xB9, 0xF7, 0x18, 0x00, 0xB8, 0xF7,
+    0x02, 0x00, 0xA6, 0x60, 0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
 };
 static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
-    0x18, 0x00, 0xBD, 0x67, 0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7,
-    0x10, 0x00, 0xB0, 0xDF, 0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF,
-    0x28, 0x00, 0xBD, 0x67, 0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
+    0x38, 0x00, 0xBF, 0xDF, 0x30, 0x00, 0xB1, 0xDF, 0x28, 0x00, 0xB0, 0xDF,
+    0x20, 0x00, 0xB9, 0xD7, 0x18, 0x00, 0xB8, 0xD7, 0x40, 0x00, 0xBD, 0x67,
+    0x00, 0x00, 0x1F, 0xD8,
 };
 static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
-    0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x10, 0x00,
-    0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0,
-    0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
+    0x44, 0x0E, 0x40, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x04, 0x10, 0x00, 0x02, 0x00, 0x0A,
+    0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x44, 0xF9, 0x44, 0xF8, 0x44, 0x0E,
+    0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
-// 0x00000000: daddiu r29, r29, -40
-// 0x00000004: .cfi_def_cfa_offset: 40
-// 0x00000004: sd r31, +32(r29)
+// 0x00000000: daddiu r29, r29, -64
+// 0x00000004: .cfi_def_cfa_offset: 64
+// 0x00000004: sd r31, +56(r29)
 // 0x00000008: .cfi_offset: r31 at cfa-8
-// 0x00000008: sd r17, +24(r29)
+// 0x00000008: sd r17, +48(r29)
 // 0x0000000c: .cfi_offset: r17 at cfa-16
-// 0x0000000c: sd r16, +16(r29)
+// 0x0000000c: sd r16, +40(r29)
 // 0x00000010: .cfi_offset: r16 at cfa-24
-// 0x00000010: sdc1 f25, +8(r29)
+// 0x00000010: sdc1 f25, +32(r29)
 // 0x00000014: .cfi_offset: r57 at cfa-32
-// 0x00000014: sdc1 f24, +0(r29)
+// 0x00000014: sdc1 f24, +24(r29)
 // 0x00000018: .cfi_offset: r56 at cfa-40
-// 0x00000018: daddiu r29, r29, -24
-// 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: bnec r5, r6, 0x0000002c ; +12
-// 0x00000020: auipc r1, 2
-// 0x00000024: jic r1, 12 ; b 0x00020030 ; +131080
-// 0x00000028: nop
+// 0x00000018: bnec r5, r6, 0x00000024 ; +12
+// 0x0000001c: auipc r1, 2
+// 0x00000020: jic r1, 12 ; bc 0x00020028 ; +131080
+// 0x00000024: nop
 //             ...
-// 0x00020028: nop
-// 0x0002002c: .cfi_remember_state
-// 0x0002002c: daddiu r29, r29, 24
-// 0x00020030: .cfi_def_cfa_offset: 40
-// 0x00020030: ldc1 f24, +0(r29)
-// 0x00020034: .cfi_restore: r56
-// 0x00020034: ldc1 f25, +8(r29)
+// 0x00020024: nop
+// 0x00020028: .cfi_remember_state
+// 0x00020028: ld r31, +56(r29)
+// 0x0002002c: .cfi_restore: r31
+// 0x0002002c: ld r17, +48(r29)
+// 0x00020030: .cfi_restore: r17
+// 0x00020030: ld r16, +40(r29)
+// 0x00020034: .cfi_restore: r16
+// 0x00020034: ldc1 f25, +32(r29)
 // 0x00020038: .cfi_restore: r57
-// 0x00020038: ld r16, +16(r29)
-// 0x0002003c: .cfi_restore: r16
-// 0x0002003c: ld r17, +24(r29)
-// 0x00020040: .cfi_restore: r17
-// 0x00020040: ld r31, +32(r29)
-// 0x00020044: .cfi_restore: r31
-// 0x00020044: daddiu r29, r29, 40
-// 0x00020047: .cfi_def_cfa_offset: 0
-// 0x00020048: jr r31
-// 0x0002004c: nop
-// 0x00020050: .cfi_restore_state
-// 0x00020050: .cfi_def_cfa_offset: 64
+// 0x00020038: ldc1 f24, +24(r29)
+// 0x0002003c: .cfi_restore: r56
+// 0x0002003c: daddiu r29, r29, 64
+// 0x00020040: .cfi_def_cfa_offset: 0
+// 0x00020040: jic r31, 0
+// 0x00020044: .cfi_restore_state
+// 0x00020044: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index f72bd6a..e542cbb 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -56,6 +56,7 @@
 #include "builder.h"
 #include "cha_guard_optimization.h"
 #include "code_generator.h"
+#include "code_sinking.h"
 #include "compiled_method.h"
 #include "compiler.h"
 #include "constant_folding.h"
@@ -448,15 +449,6 @@
       || instruction_set == kX86_64;
 }
 
-// Read barrier are supported on ARM, ARM64, x86 and x86-64 at the moment.
-// TODO: Add support for other architectures and remove this function
-static bool InstructionSetSupportsReadBarrier(InstructionSet instruction_set) {
-  return instruction_set == kArm64
-      || instruction_set == kThumb2
-      || instruction_set == kX86
-      || instruction_set == kX86_64;
-}
-
 // Strip pass name suffix to get optimization name.
 static std::string ConvertPassNameToOptimizationName(const std::string& pass_name) {
   size_t pos = pass_name.find(kPassNameSeparator);
@@ -498,7 +490,8 @@
                                 handles,
                                 stats,
                                 number_of_dex_registers,
-                                /* depth */ 0);
+                                /* total_number_of_instructions */ 0,
+                                /* parent */ nullptr);
   } else if (opt_name == HSharpening::kSharpeningPassName) {
     return new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver, handles);
   } else if (opt_name == HSelectGenerator::kSelectGeneratorPassName) {
@@ -506,7 +499,7 @@
   } else if (opt_name == HInductionVarAnalysis::kInductionPassName) {
     return new (arena) HInductionVarAnalysis(graph);
   } else if (opt_name == InstructionSimplifier::kInstructionSimplifierPassName) {
-    return new (arena) InstructionSimplifier(graph, stats, pass_name.c_str());
+    return new (arena) InstructionSimplifier(graph, codegen, stats, pass_name.c_str());
   } else if (opt_name == IntrinsicsRecognizer::kIntrinsicsRecognizerPassName) {
     return new (arena) IntrinsicsRecognizer(graph, stats);
   } else if (opt_name == LICM::kLoopInvariantCodeMotionPassName) {
@@ -518,9 +511,11 @@
   } else if (opt_name == SideEffectsAnalysis::kSideEffectsAnalysisPassName) {
     return new (arena) SideEffectsAnalysis(graph);
   } else if (opt_name == HLoopOptimization::kLoopOptimizationPassName) {
-    return new (arena) HLoopOptimization(graph, most_recent_induction);
+    return new (arena) HLoopOptimization(graph, driver, most_recent_induction);
   } else if (opt_name == CHAGuardOptimization::kCHAGuardOptimizationPassName) {
     return new (arena) CHAGuardOptimization(graph);
+  } else if (opt_name == CodeSinking::kCodeSinkingPassName) {
+    return new (arena) CodeSinking(graph, stats);
 #ifdef ART_ENABLE_CODEGEN_arm
   } else if (opt_name == arm::DexCacheArrayFixups::kDexCacheArrayFixupsArmPassName) {
     return new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
@@ -604,8 +599,7 @@
                                          VariableSizedHandleScope* handles) const {
   OptimizingCompilerStats* stats = compilation_stats_.get();
   const CompilerOptions& compiler_options = driver->GetCompilerOptions();
-  bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
-      && (compiler_options.GetInlineMaxCodeUnits() > 0);
+  bool should_inline = (compiler_options.GetInlineMaxCodeUnits() > 0);
   if (!should_inline) {
     return;
   }
@@ -620,7 +614,8 @@
       handles,
       stats,
       number_of_dex_registers,
-      /* depth */ 0);
+      /* total_number_of_instructions */ 0,
+      /* parent */ nullptr);
   HOptimization* optimizations[] = { inliner };
 
   RunOptimizations(optimizations, arraysize(optimizations), pass_observer);
@@ -765,28 +760,32 @@
   HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$final");
   HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
-  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
+  InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, codegen, stats);
   HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(
       graph, "constant_folding$after_inlining");
   HConstantFolding* fold3 = new (arena) HConstantFolding(graph, "constant_folding$after_bce");
-  SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
-  GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects);
-  LICM* licm = new (arena) LICM(graph, *side_effects, stats);
-  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects);
+  SideEffectsAnalysis* side_effects1 = new (arena) SideEffectsAnalysis(
+      graph, "side_effects$before_gvn");
+  SideEffectsAnalysis* side_effects2 = new (arena) SideEffectsAnalysis(
+      graph, "side_effects$before_lse");
+  GVNOptimization* gvn = new (arena) GVNOptimization(graph, *side_effects1);
+  LICM* licm = new (arena) LICM(graph, *side_effects1, stats);
   HInductionVarAnalysis* induction = new (arena) HInductionVarAnalysis(graph);
-  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects, induction);
-  HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction);
+  BoundsCheckElimination* bce = new (arena) BoundsCheckElimination(graph, *side_effects1, induction);
+  HLoopOptimization* loop = new (arena) HLoopOptimization(graph, driver, induction);
+  LoadStoreElimination* lse = new (arena) LoadStoreElimination(graph, *side_effects2);
   HSharpening* sharpening = new (arena) HSharpening(
       graph, codegen, dex_compilation_unit, driver, handles);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier$after_inlining");
+      graph, codegen, stats, "instruction_simplifier$after_inlining");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier$after_bce");
+      graph, codegen, stats, "instruction_simplifier$after_bce");
   InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier$before_codegen");
+      graph, codegen, stats, "instruction_simplifier$before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
   CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
+  CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
 
   HOptimization* optimizations1[] = {
     intrinsics,
@@ -806,7 +805,7 @@
     fold2,  // TODO: if we don't inline we can also skip fold2.
     simplify2,
     dce2,
-    side_effects,
+    side_effects1,
     gvn,
     licm,
     induction,
@@ -814,9 +813,11 @@
     loop,
     fold3,  // evaluates code generated by dynamic bce
     simplify3,
+    side_effects2,
     lse,
     cha_guard,
     dce3,
+    code_sinking,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.
@@ -847,8 +848,15 @@
                                          const DexFile::CodeItem* code_item) const {
   ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
   ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
-  stack_map.resize(codegen->ComputeStackMapsSize());
-  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item);
+  ArenaVector<uint8_t> method_info(arena->Adapter(kArenaAllocStackMaps));
+  size_t stack_map_size = 0;
+  size_t method_info_size = 0;
+  codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size);
+  stack_map.resize(stack_map_size);
+  method_info.resize(method_info_size);
+  codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()),
+                          MemoryRegion(method_info.data(), method_info.size()),
+                          *code_item);
 
   CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
       compiler_driver,
@@ -860,7 +868,7 @@
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
       codegen->GetFpuSpillMask(),
-      ArrayRef<const SrcMapElem>(),
+      ArrayRef<const uint8_t>(method_info),
       ArrayRef<const uint8_t>(stack_map),
       ArrayRef<const uint8_t>(*codegen->GetAssembler()->cfi().data()),
       ArrayRef<const LinkerPatch>(linker_patches));
@@ -895,12 +903,6 @@
     return nullptr;
   }
 
-  // When read barriers are enabled, do not attempt to compile for
-  // instruction sets that have no read barrier support.
-  if (kEmitCompilerReadBarrier && !InstructionSetSupportsReadBarrier(instruction_set)) {
-    return nullptr;
-  }
-
   if (Compiler::IsPathologicalCase(*code_item, method_idx, dex_file)) {
     MaybeRecordStat(MethodCompilationStat::kNotCompiledPathological);
     return nullptr;
@@ -1091,13 +1093,10 @@
 
   if (kIsDebugBuild &&
       IsCompilingWithCoreImage() &&
-      IsInstructionSetSupported(compiler_driver->GetInstructionSet()) &&
-      (!kEmitCompilerReadBarrier ||
-       InstructionSetSupportsReadBarrier(compiler_driver->GetInstructionSet()))) {
+      IsInstructionSetSupported(compiler_driver->GetInstructionSet())) {
     // For testing purposes, we put a special marker on method names
-    // that should be compiled with this compiler (when the the
-    // instruction set is supported -- and has support for read
-    // barriers, if they are enabled). This makes sure we're not
+    // that should be compiled with this compiler (when the
+    // instruction set is supported). This makes sure we're not
     // regressing.
     std::string method_name = dex_file.PrettyMethod(method_idx);
     bool shouldCompile = method_name.find("$opt$") != std::string::npos;
@@ -1191,7 +1190,9 @@
     }
   }
 
-  size_t stack_map_size = codegen->ComputeStackMapsSize();
+  size_t stack_map_size = 0;
+  size_t method_info_size = 0;
+  codegen->ComputeStackMapAndMethodInfoSize(&stack_map_size, &method_info_size);
   size_t number_of_roots = codegen->GetNumberOfJitRoots();
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   // We allocate an object array to ensure the JIT roots that we will collect in EmitJitRoots
@@ -1207,20 +1208,30 @@
     return false;
   }
   uint8_t* stack_map_data = nullptr;
+  uint8_t* method_info_data = nullptr;
   uint8_t* roots_data = nullptr;
-  uint32_t data_size = code_cache->ReserveData(
-      self, stack_map_size, number_of_roots, method, &stack_map_data, &roots_data);
+  uint32_t data_size = code_cache->ReserveData(self,
+                                               stack_map_size,
+                                               method_info_size,
+                                               number_of_roots,
+                                               method,
+                                               &stack_map_data,
+                                               &method_info_data,
+                                               &roots_data);
   if (stack_map_data == nullptr || roots_data == nullptr) {
     return false;
   }
   MaybeRecordStat(MethodCompilationStat::kCompiled);
-  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
+  codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size),
+                          MemoryRegion(method_info_data, method_info_size),
+                          *code_item);
   codegen->EmitJitRoots(code_allocator.GetData(), roots, roots_data);
 
   const void* code = code_cache->CommitCode(
       self,
       method,
       stack_map_data,
+      method_info_data,
       roots_data,
       codegen->HasEmptyFrame() ? 0 : codegen->GetFrameSize(),
       codegen->GetCoreSpillMask(),
diff --git a/compiler/optimizing/optimizing_compiler_stats.h b/compiler/optimizing/optimizing_compiler_stats.h
index 7240d40..a211c54 100644
--- a/compiler/optimizing/optimizing_compiler_stats.h
+++ b/compiler/optimizing/optimizing_compiler_stats.h
@@ -68,6 +68,24 @@
   kImplicitNullCheckGenerated,
   kExplicitNullCheckGenerated,
   kSimplifyIf,
+  kInstructionSunk,
+  kNotInlinedUnresolvedEntrypoint,
+  kNotInlinedDexCache,
+  kNotInlinedStackMaps,
+  kNotInlinedEnvironmentBudget,
+  kNotInlinedInstructionBudget,
+  kNotInlinedLoopWithoutExit,
+  kNotInlinedIrreducibleLoop,
+  kNotInlinedAlwaysThrows,
+  kNotInlinedInfiniteLoop,
+  kNotInlinedTryCatch,
+  kNotInlinedRegisterAllocator,
+  kNotInlinedCannotBuild,
+  kNotInlinedNotVerified,
+  kNotInlinedCodeItem,
+  kNotInlinedWont,
+  kNotInlinedRecursiveBudget,
+  kNotInlinedProxy,
   kLastStat
 };
 
@@ -166,6 +184,24 @@
       case kImplicitNullCheckGenerated: name = "ImplicitNullCheckGenerated"; break;
       case kExplicitNullCheckGenerated: name = "ExplicitNullCheckGenerated"; break;
       case kSimplifyIf: name = "SimplifyIf"; break;
+      case kInstructionSunk: name = "InstructionSunk"; break;
+      case kNotInlinedUnresolvedEntrypoint: name = "NotInlinedUnresolvedEntrypoint"; break;
+      case kNotInlinedDexCache: name = "NotInlinedDexCache"; break;
+      case kNotInlinedStackMaps: name = "NotInlinedStackMaps"; break;
+      case kNotInlinedEnvironmentBudget: name = "NotInlinedEnvironmentBudget"; break;
+      case kNotInlinedInstructionBudget: name = "NotInlinedInstructionBudget"; break;
+      case kNotInlinedLoopWithoutExit: name = "NotInlinedLoopWithoutExit"; break;
+      case kNotInlinedIrreducibleLoop: name = "NotInlinedIrreducibleLoop"; break;
+      case kNotInlinedAlwaysThrows: name = "NotInlinedAlwaysThrows"; break;
+      case kNotInlinedInfiniteLoop: name = "NotInlinedInfiniteLoop"; break;
+      case kNotInlinedTryCatch: name = "NotInlinedTryCatch"; break;
+      case kNotInlinedRegisterAllocator: name = "NotInlinedRegisterAllocator"; break;
+      case kNotInlinedCannotBuild: name = "NotInlinedCannotBuild"; break;
+      case kNotInlinedNotVerified: name = "NotInlinedNotVerified"; break;
+      case kNotInlinedCodeItem: name = "NotInlinedCodeItem"; break;
+      case kNotInlinedWont: name = "NotInlinedWont"; break;
+      case kNotInlinedRecursiveBudget: name = "NotInlinedRecursiveBudget"; break;
+      case kNotInlinedProxy: name = "NotInlinedProxy"; break;
 
       case kLastStat:
         LOG(FATAL) << "invalid stat "
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index efbaf6c..66bfea9 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -40,6 +40,14 @@
   check->ReplaceWith(check->InputAt(0));
 }
 
+void PrepareForRegisterAllocation::VisitDeoptimize(HDeoptimize* deoptimize) {
+  if (deoptimize->GuardsAnInput()) {
+    // Replace the uses with the actual guarded instruction.
+    deoptimize->ReplaceWith(deoptimize->GuardedInput());
+    deoptimize->RemoveGuard();
+  }
+}
+
 void PrepareForRegisterAllocation::VisitBoundsCheck(HBoundsCheck* check) {
   check->ReplaceWith(check->InputAt(0));
   if (check->IsStringCharAt()) {
diff --git a/compiler/optimizing/prepare_for_register_allocation.h b/compiler/optimizing/prepare_for_register_allocation.h
index c128227..7ffbe44 100644
--- a/compiler/optimizing/prepare_for_register_allocation.h
+++ b/compiler/optimizing/prepare_for_register_allocation.h
@@ -44,6 +44,7 @@
   void VisitClinitCheck(HClinitCheck* check) OVERRIDE;
   void VisitCondition(HCondition* condition) OVERRIDE;
   void VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) OVERRIDE;
+  void VisitDeoptimize(HDeoptimize* deoptimize) OVERRIDE;
 
   bool CanMoveClinitCheck(HInstruction* input, HInstruction* user) const;
   bool CanEmitConditionAt(HCondition* condition, HInstruction* user) const;
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 6e332ca..d5637b9 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -310,8 +310,8 @@
     BoundTypeIn(receiver, trueBlock, /* start_instruction */ nullptr, class_rti);
   } else {
     DCHECK(check->IsDeoptimize());
-    if (compare->IsEqual()) {
-      BoundTypeIn(receiver, check->GetBlock(), check, class_rti);
+    if (compare->IsEqual() && check->AsDeoptimize()->GuardsAnInput()) {
+      check->SetReferenceTypeInfo(class_rti);
     }
   }
 }
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 84a4bab..0b49ce1 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -29,7 +29,7 @@
  */
 class ReferenceTypePropagationTest : public CommonCompilerTest {
  public:
-  ReferenceTypePropagationTest() : pool_(), allocator_(&pool_) {
+  ReferenceTypePropagationTest() : pool_(), allocator_(&pool_), propagation_(nullptr) {
     graph_ = CreateGraph(&allocator_);
   }
 
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index 59523a9..c6a0b6a 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -299,14 +299,17 @@
       // Currently, we spill unconditionnally the current method in the code generators.
       && !interval->GetDefinedBy()->IsCurrentMethod()) {
     // We spill eagerly, so move must be at definition.
-    InsertMoveAfter(interval->GetDefinedBy(),
-                    interval->ToLocation(),
-                    interval->NeedsTwoSpillSlots()
-                        ? Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot())
-                        : Location::StackSlot(interval->GetParent()->GetSpillSlot()));
+    Location loc;
+    switch (interval->NumberOfSpillSlotsNeeded()) {
+      case 1: loc = Location::StackSlot(interval->GetParent()->GetSpillSlot()); break;
+      case 2: loc = Location::DoubleStackSlot(interval->GetParent()->GetSpillSlot()); break;
+      case 4: loc = Location::SIMDStackSlot(interval->GetParent()->GetSpillSlot()); break;
+      default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+    }
+    InsertMoveAfter(interval->GetDefinedBy(), interval->ToLocation(), loc);
   }
   UsePosition* use = current->GetFirstUse();
-  UsePosition* env_use = current->GetFirstEnvironmentUse();
+  EnvUsePosition* env_use = current->GetFirstEnvironmentUse();
 
   // Walk over all siblings, updating locations of use positions, and
   // connecting them when they are adjacent.
@@ -323,7 +326,6 @@
         use = use->GetNext();
       }
       while (use != nullptr && use->GetPosition() <= range->GetEnd()) {
-        DCHECK(!use->GetIsEnvironment());
         DCHECK(current->CoversSlow(use->GetPosition()) || (use->GetPosition() == range->GetEnd()));
         if (!use->IsSynthesized()) {
           LocationSummary* locations = use->GetUser()->GetLocations();
@@ -460,9 +462,12 @@
       location_source = defined_by->GetLocations()->Out();
     } else {
       DCHECK(defined_by->IsCurrentMethod());
-      location_source = parent->NeedsTwoSpillSlots()
-          ? Location::DoubleStackSlot(parent->GetSpillSlot())
-          : Location::StackSlot(parent->GetSpillSlot());
+      switch (parent->NumberOfSpillSlotsNeeded()) {
+        case 1: location_source = Location::StackSlot(parent->GetSpillSlot()); break;
+        case 2: location_source = Location::DoubleStackSlot(parent->GetSpillSlot()); break;
+        case 4: location_source = Location::SIMDStackSlot(parent->GetSpillSlot()); break;
+        default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
+      }
     }
   } else {
     DCHECK(source != nullptr);
@@ -493,7 +498,8 @@
       || destination.IsFpuRegister()
       || destination.IsFpuRegisterPair()
       || destination.IsStackSlot()
-      || destination.IsDoubleStackSlot();
+      || destination.IsDoubleStackSlot()
+      || destination.IsSIMDStackSlot();
 }
 
 void RegisterAllocationResolver::AddMove(HParallelMove* move,
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 9064f86..87f709f 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -1029,7 +1029,7 @@
       interval->SetSpillSlot(previous_phi->GetLiveInterval()->GetSpillSlot());
     } else {
       interval->SetSpillSlot(catch_phi_spill_slot_counter_);
-      catch_phi_spill_slot_counter_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+      catch_phi_spill_slot_counter_ += interval->NumberOfSpillSlotsNeeded();
     }
   }
 }
@@ -1996,43 +1996,48 @@
     bool is_interval_beginning;
     size_t position;
     std::tie(position, is_interval_beginning, parent_interval) = *it;
-
-    bool needs_two_slots = parent_interval->NeedsTwoSpillSlots();
+    size_t number_of_spill_slots_needed = parent_interval->NumberOfSpillSlotsNeeded();
 
     if (is_interval_beginning) {
       DCHECK(!parent_interval->HasSpillSlot());
       DCHECK_EQ(position, parent_interval->GetStart());
 
-      // Find a free stack slot.
+      // Find first available free stack slot(s).
       size_t slot = 0;
-      for (; taken.IsBitSet(slot) || (needs_two_slots && taken.IsBitSet(slot + 1)); ++slot) {
-        // Skip taken slots.
+      for (; ; ++slot) {
+        bool found = true;
+        for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+          if (taken.IsBitSet(s)) {
+            found = false;
+            break;  // failure
+          }
+        }
+        if (found) {
+          break;  // success
+        }
       }
+
       parent_interval->SetSpillSlot(slot);
 
-      *num_stack_slots_used = std::max(*num_stack_slots_used,
-                                       needs_two_slots ? slot + 1 : slot + 2);
-      if (needs_two_slots && *num_stack_slots_used % 2 != 0) {
+      *num_stack_slots_used = std::max(*num_stack_slots_used, slot + number_of_spill_slots_needed);
+      if (number_of_spill_slots_needed > 1 && *num_stack_slots_used % 2 != 0) {
         // The parallel move resolver requires that there be an even number of spill slots
         // allocated for pair value types.
         ++(*num_stack_slots_used);
       }
 
-      taken.SetBit(slot);
-      if (needs_two_slots) {
-        taken.SetBit(slot + 1);
+      for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+        taken.SetBit(s);
       }
     } else {
       DCHECK_EQ(position, parent_interval->GetLastSibling()->GetEnd());
       DCHECK(parent_interval->HasSpillSlot());
 
-      // Free up the stack slot used by this interval.
+      // Free up the stack slot(s) used by this interval.
       size_t slot = parent_interval->GetSpillSlot();
-      DCHECK(taken.IsBitSet(slot));
-      DCHECK(!needs_two_slots || taken.IsBitSet(slot + 1));
-      taken.ClearBit(slot);
-      if (needs_two_slots) {
-        taken.ClearBit(slot + 1);
+      for (size_t s = slot, u = slot + number_of_spill_slots_needed; s < u; s++) {
+        DCHECK(taken.IsBitSet(s));
+        taken.ClearBit(s);
       }
     }
   }
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 1a391ce..ab8d540 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -629,21 +629,21 @@
     if (!locations->OutputCanOverlapWithInputs() && locations->Out().IsUnallocated()) {
       HInputsRef inputs = defined_by->GetInputs();
       for (size_t i = 0; i < inputs.size(); ++i) {
-        // Take the last interval of the input. It is the location of that interval
-        // that will be used at `defined_by`.
-        LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
-        // Note that interval may have not been processed yet.
-        // TODO: Handle non-split intervals last in the work list.
-        if (locations->InAt(i).IsValid()
-            && interval->HasRegister()
-            && interval->SameRegisterKind(*current)) {
-          // The input must be live until the end of `defined_by`, to comply to
-          // the linear scan algorithm. So we use `defined_by`'s end lifetime
-          // position to check whether the input is dead or is inactive after
-          // `defined_by`.
-          DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
-          size_t position = defined_by->GetLifetimePosition() + 1;
-          FreeIfNotCoverAt(interval, position, free_until);
+        if (locations->InAt(i).IsValid()) {
+          // Take the last interval of the input. It is the location of that interval
+          // that will be used at `defined_by`.
+          LiveInterval* interval = inputs[i]->GetLiveInterval()->GetLastSibling();
+          // Note that interval may have not been processed yet.
+          // TODO: Handle non-split intervals last in the work list.
+          if (interval->HasRegister() && interval->SameRegisterKind(*current)) {
+            // The input must be live until the end of `defined_by`, to comply to
+            // the linear scan algorithm. So we use `defined_by`'s end lifetime
+            // position to check whether the input is dead or is inactive after
+            // `defined_by`.
+            DCHECK(interval->CoversSlow(defined_by->GetLifetimePosition()));
+            size_t position = defined_by->GetLifetimePosition() + 1;
+            FreeIfNotCoverAt(interval, position, free_until);
+          }
         }
       }
     }
@@ -1125,36 +1125,31 @@
       LOG(FATAL) << "Unexpected type for interval " << interval->GetType();
   }
 
-  // Find an available spill slot.
+  // Find first available spill slots.
+  size_t number_of_spill_slots_needed = parent->NumberOfSpillSlotsNeeded();
   size_t slot = 0;
   for (size_t e = spill_slots->size(); slot < e; ++slot) {
-    if ((*spill_slots)[slot] <= parent->GetStart()) {
-      if (!parent->NeedsTwoSpillSlots()) {
-        // One spill slot is sufficient.
+    bool found = true;
+    for (size_t s = slot, u = std::min(slot + number_of_spill_slots_needed, e); s < u; s++) {
+      if ((*spill_slots)[s] > parent->GetStart()) {
+        found = false;  // failure
         break;
       }
-      if (slot == e - 1 || (*spill_slots)[slot + 1] <= parent->GetStart()) {
-        // Two spill slots are available.
-        break;
-      }
+    }
+    if (found) {
+      break;  // success
     }
   }
 
+  // Need new spill slots?
+  size_t upper = slot + number_of_spill_slots_needed;
+  if (upper > spill_slots->size()) {
+    spill_slots->resize(upper);
+  }
+  // Set slots to end.
   size_t end = interval->GetLastSibling()->GetEnd();
-  if (parent->NeedsTwoSpillSlots()) {
-    if (slot + 2u > spill_slots->size()) {
-      // We need a new spill slot.
-      spill_slots->resize(slot + 2u, end);
-    }
-    (*spill_slots)[slot] = end;
-    (*spill_slots)[slot + 1] = end;
-  } else {
-    if (slot == spill_slots->size()) {
-      // We need a new spill slot.
-      spill_slots->push_back(end);
-    } else {
-      (*spill_slots)[slot] = end;
-    }
+  for (size_t s = slot; s < upper; s++) {
+    (*spill_slots)[s] = end;
   }
 
   // Note that the exact spill slot location will be computed when we resolve,
@@ -1180,7 +1175,7 @@
     // TODO: Reuse spill slots when intervals of phis from different catch
     //       blocks do not overlap.
     interval->SetSpillSlot(catch_phi_spill_slots_);
-    catch_phi_spill_slots_ += interval->NeedsTwoSpillSlots() ? 2 : 1;
+    catch_phi_spill_slots_ += interval->NumberOfSpillSlotsNeeded();
   }
 }
 
diff --git a/compiler/optimizing/register_allocator_test.cc b/compiler/optimizing/register_allocator_test.cc
index 2227872..667afb1 100644
--- a/compiler/optimizing/register_allocator_test.cc
+++ b/compiler/optimizing/register_allocator_test.cc
@@ -912,9 +912,9 @@
   // Create an interval with lifetime holes.
   static constexpr size_t ranges1[][2] = {{0, 2}, {4, 6}, {8, 10}};
   LiveInterval* first = BuildInterval(ranges1, arraysize(ranges1), &allocator, -1, one);
-  first->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, first->first_use_);
-  first->first_use_ = new(&allocator) UsePosition(user, 0, false, 7, first->first_use_);
-  first->first_use_ = new(&allocator) UsePosition(user, 0, false, 6, first->first_use_);
+  first->first_use_ = new(&allocator) UsePosition(user, false, 8, first->first_use_);
+  first->first_use_ = new(&allocator) UsePosition(user, false, 7, first->first_use_);
+  first->first_use_ = new(&allocator) UsePosition(user, false, 6, first->first_use_);
 
   locations = new (&allocator) LocationSummary(first->GetDefinedBy(), LocationSummary::kNoCall);
   locations->SetOut(Location::RequiresRegister());
@@ -934,9 +934,9 @@
   // before lifetime position 6 yet.
   static constexpr size_t ranges3[][2] = {{2, 4}, {8, 10}};
   LiveInterval* third = BuildInterval(ranges3, arraysize(ranges3), &allocator, -1, three);
-  third->first_use_ = new(&allocator) UsePosition(user, 0, false, 8, third->first_use_);
-  third->first_use_ = new(&allocator) UsePosition(user, 0, false, 4, third->first_use_);
-  third->first_use_ = new(&allocator) UsePosition(user, 0, false, 3, third->first_use_);
+  third->first_use_ = new(&allocator) UsePosition(user, false, 8, third->first_use_);
+  third->first_use_ = new(&allocator) UsePosition(user, false, 4, third->first_use_);
+  third->first_use_ = new(&allocator) UsePosition(user, false, 3, third->first_use_);
   locations = new (&allocator) LocationSummary(third->GetDefinedBy(), LocationSummary::kNoCall);
   locations->SetOut(Location::RequiresRegister());
   third = third->SplitAt(3);
diff --git a/compiler/optimizing/scheduler.h b/compiler/optimizing/scheduler.h
index ab0dad4..9236a0e 100644
--- a/compiler/optimizing/scheduler.h
+++ b/compiler/optimizing/scheduler.h
@@ -315,7 +315,10 @@
   // This class and its sub-classes will never be used to drive a visit of an
   // `HGraph` but only to visit `HInstructions` one at a time, so we do not need
   // to pass a valid graph to `HGraphDelegateVisitor()`.
-  SchedulingLatencyVisitor() : HGraphDelegateVisitor(nullptr) {}
+  SchedulingLatencyVisitor()
+      : HGraphDelegateVisitor(nullptr),
+        last_visited_latency_(0),
+        last_visited_internal_latency_(0) {}
 
   void VisitInstruction(HInstruction* instruction) OVERRIDE {
     LOG(FATAL) << "Error visiting " << instruction->DebugName() << ". "
@@ -413,6 +416,7 @@
         selector_(selector),
         only_optimize_loop_blocks_(true),
         scheduling_graph_(this, arena),
+        cursor_(nullptr),
         candidates_(arena_->Adapter(kArenaAllocScheduler)) {}
   virtual ~HScheduler() {}
 
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index be40092..eedaf6e 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -41,7 +41,7 @@
     for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* instruction = it.Current();
       if (instruction->IsInvokeStaticOrDirect()) {
-        ProcessInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect());
+        SharpenInvokeStaticOrDirect(instruction->AsInvokeStaticOrDirect(), codegen_);
       } else if (instruction->IsLoadString()) {
         ProcessLoadString(instruction->AsLoadString());
       }
@@ -65,12 +65,12 @@
 }
 
 static bool AOTCanEmbedMethod(ArtMethod* method, const CompilerOptions& options) {
-  // Including patch information means the AOT code will be patched, which we don't
-  // support in the compiler, and is anyways moving away b/33192586.
-  return IsInBootImage(method) && !options.GetCompilePic() && !options.GetIncludePatchInformation();
+  return IsInBootImage(method) && !options.GetCompilePic();
 }
 
-void HSharpening::ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+
+void HSharpening::SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke,
+                                              CodeGenerator* codegen) {
   if (invoke->IsStringInit()) {
     // Not using the dex cache arrays. But we could still try to use a better dispatch...
     // TODO: Use direct_method and direct_code for the appropriate StringFactory method.
@@ -97,12 +97,12 @@
 
   // We don't optimize for debuggable as it would prevent us from obsoleting the method in some
   // situations.
-  if (callee == codegen_->GetGraph()->GetArtMethod() && !codegen_->GetGraph()->IsDebuggable()) {
+  if (callee == codegen->GetGraph()->GetArtMethod() && !codegen->GetGraph()->IsDebuggable()) {
     // Recursive call.
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kRecursive;
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallSelf;
   } else if (Runtime::Current()->UseJitCompilation() ||
-      AOTCanEmbedMethod(callee, codegen_->GetCompilerOptions())) {
+      AOTCanEmbedMethod(callee, codegen->GetCompilerOptions())) {
     // JIT or on-device AOT compilation referencing a boot image method.
     // Use the method address directly.
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress;
@@ -111,13 +111,17 @@
   } else {
     // Use PC-relative access to the dex cache arrays.
     method_load_kind = HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative;
-    DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen_->GetInstructionSet()),
-                                &graph_->GetDexFile());
+    // Note: we use the invoke's graph instead of the codegen graph, which are
+    // different when inlining (the codegen graph is the most outer graph). The
+    // invoke's dex method index is relative to the dex file where the invoke's graph
+    // was built from.
+    DexCacheArraysLayout layout(GetInstructionSetPointerSize(codegen->GetInstructionSet()),
+                                &invoke->GetBlock()->GetGraph()->GetDexFile());
     method_load_data = layout.MethodOffset(invoke->GetDexMethodIndex());
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
   }
 
-  if (graph_->IsDebuggable()) {
+  if (codegen->GetGraph()->IsDebuggable()) {
     // For debuggable apps always use the code pointer from ArtMethod
     // so that we don't circumvent instrumentation stubs if installed.
     code_ptr_location = HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod;
@@ -127,14 +131,14 @@
       method_load_kind, code_ptr_location, method_load_data
   };
   HInvokeStaticOrDirect::DispatchInfo dispatch_info =
-      codegen_->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
+      codegen->GetSupportedInvokeStaticOrDirectDispatch(desired_dispatch_info, invoke);
   invoke->SetDispatchInfo(dispatch_info);
 }
 
-HLoadClass::LoadKind HSharpening::SharpenClass(HLoadClass* load_class,
-                                               CodeGenerator* codegen,
-                                               CompilerDriver* compiler_driver,
-                                               const DexCompilationUnit& dex_compilation_unit) {
+HLoadClass::LoadKind HSharpening::ComputeLoadClassKind(HLoadClass* load_class,
+                                                       CodeGenerator* codegen,
+                                                       CompilerDriver* compiler_driver,
+                                                       const DexCompilationUnit& dex_compilation_unit) {
   Handle<mirror::Class> klass = load_class->GetClass();
   DCHECK(load_class->GetLoadKind() == HLoadClass::LoadKind::kDexCacheViaMethod ||
          load_class->GetLoadKind() == HLoadClass::LoadKind::kReferrersClass)
@@ -255,7 +259,7 @@
     } else if (runtime->UseJitCompilation()) {
       // TODO: Make sure we don't set the "compile PIC" flag for JIT as that's bogus.
       // DCHECK(!codegen_->GetCompilerOptions().GetCompilePic());
-      string = class_linker->LookupString(dex_file, string_index, dex_cache);
+      string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
       if (string != nullptr) {
         if (runtime->GetHeap()->ObjectIsInBootImageSpace(string)) {
           desired_load_kind = HLoadString::LoadKind::kBootImageAddress;
@@ -267,7 +271,7 @@
       }
     } else {
       // AOT app compilation. Try to lookup the string without allocating if not found.
-      string = class_linker->LookupString(dex_file, string_index, dex_cache);
+      string = class_linker->LookupString(dex_file, string_index, dex_cache.Get());
       if (string != nullptr &&
           runtime->GetHeap()->ObjectIsInBootImageSpace(string) &&
           !codegen_->GetCompilerOptions().GetCompilePic()) {
diff --git a/compiler/optimizing/sharpening.h b/compiler/optimizing/sharpening.h
index 4240b2f..10707c7 100644
--- a/compiler/optimizing/sharpening.h
+++ b/compiler/optimizing/sharpening.h
@@ -48,14 +48,16 @@
   static constexpr const char* kSharpeningPassName = "sharpening";
 
   // Used by the builder and the inliner.
-  static HLoadClass::LoadKind SharpenClass(HLoadClass* load_class,
-                                           CodeGenerator* codegen,
-                                           CompilerDriver* compiler_driver,
-                                           const DexCompilationUnit& dex_compilation_unit)
+  static HLoadClass::LoadKind ComputeLoadClassKind(HLoadClass* load_class,
+                                                   CodeGenerator* codegen,
+                                                   CompilerDriver* compiler_driver,
+                                                   const DexCompilationUnit& dex_compilation_unit)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Used by Sharpening and InstructionSimplifier.
+  static void SharpenInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke, CodeGenerator* codegen);
+
  private:
-  void ProcessInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke);
   void ProcessLoadString(HLoadString* load_string);
 
   CodeGenerator* codegen_;
diff --git a/compiler/optimizing/side_effects_analysis.h b/compiler/optimizing/side_effects_analysis.h
index bac6088..fea47e6 100644
--- a/compiler/optimizing/side_effects_analysis.h
+++ b/compiler/optimizing/side_effects_analysis.h
@@ -25,8 +25,8 @@
 
 class SideEffectsAnalysis : public HOptimization {
  public:
-  explicit SideEffectsAnalysis(HGraph* graph)
-      : HOptimization(graph, kSideEffectsAnalysisPassName),
+  SideEffectsAnalysis(HGraph* graph, const char* pass_name = kSideEffectsAnalysisPassName)
+      : HOptimization(graph, pass_name),
         graph_(graph),
         block_effects_(graph->GetBlocks().size(),
                        graph->GetArena()->Adapter(kArenaAllocSideEffectsAnalysis)),
@@ -41,7 +41,7 @@
 
   bool HasRun() const { return has_run_; }
 
-  static constexpr const char* kSideEffectsAnalysisPassName = "SideEffects";
+  static constexpr const char* kSideEffectsAnalysisPassName = "side_effects";
 
  private:
   void UpdateLoopEffects(HLoopInformation* info, SideEffects effects);
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index e8e12e1..b538a89 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -469,8 +469,15 @@
   }
 }
 
-bool LiveInterval::NeedsTwoSpillSlots() const {
-  return type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble;
+size_t LiveInterval::NumberOfSpillSlotsNeeded() const {
+  // For a SIMD operation, compute the number of needed spill slots.
+  // TODO: do through vector type?
+  HInstruction* definition = GetParent()->GetDefinedBy();
+  if (definition != nullptr && definition->IsVecOperation()) {
+    return definition->AsVecOperation()->GetVectorNumberOfBytes() / kVRegSize;
+  }
+  // Return number of needed spill slots based on type.
+  return (type_ == Primitive::kPrimLong || type_ == Primitive::kPrimDouble) ? 2 : 1;
 }
 
 Location LiveInterval::ToLocation() const {
@@ -494,10 +501,11 @@
     if (defined_by->IsConstant()) {
       return defined_by->GetLocations()->Out();
     } else if (GetParent()->HasSpillSlot()) {
-      if (NeedsTwoSpillSlots()) {
-        return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
-      } else {
-        return Location::StackSlot(GetParent()->GetSpillSlot());
+      switch (NumberOfSpillSlotsNeeded()) {
+        case 1: return Location::StackSlot(GetParent()->GetSpillSlot());
+        case 2: return Location::DoubleStackSlot(GetParent()->GetSpillSlot());
+        case 4: return Location::SIMDStackSlot(GetParent()->GetSpillSlot());
+        default: LOG(FATAL) << "Unexpected number of spill slots"; UNREACHABLE();
       }
     } else {
       return Location();
diff --git a/compiler/optimizing/ssa_liveness_analysis.h b/compiler/optimizing/ssa_liveness_analysis.h
index b62bf4e..e9dffc1 100644
--- a/compiler/optimizing/ssa_liveness_analysis.h
+++ b/compiler/optimizing/ssa_liveness_analysis.h
@@ -17,9 +17,10 @@
 #ifndef ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
 #define ART_COMPILER_OPTIMIZING_SSA_LIVENESS_ANALYSIS_H_
 
-#include "nodes.h"
 #include <iostream>
 
+#include "nodes.h"
+
 namespace art {
 
 class CodeGenerator;
@@ -103,21 +104,20 @@
  */
 class UsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
-  UsePosition(HInstruction* user,
-              HEnvironment* environment,
-              size_t input_index,
-              size_t position,
-              UsePosition* next)
+  UsePosition(HInstruction* user, size_t input_index, size_t position, UsePosition* next)
       : user_(user),
-        environment_(environment),
         input_index_(input_index),
         position_(position),
         next_(next) {
-    DCHECK(environment == nullptr || user == nullptr);
     DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
   }
 
-  static constexpr size_t kNoInput = -1;
+  explicit UsePosition(size_t position)
+      : user_(nullptr),
+        input_index_(kNoInput),
+        position_(dchecked_integral_cast<uint32_t>(position)),
+        next_(nullptr) {
+  }
 
   size_t GetPosition() const { return position_; }
 
@@ -125,9 +125,7 @@
   void SetNext(UsePosition* next) { next_ = next; }
 
   HInstruction* GetUser() const { return user_; }
-  HEnvironment* GetEnvironment() const { return environment_; }
 
-  bool GetIsEnvironment() const { return environment_ != nullptr; }
   bool IsSynthesized() const { return user_ == nullptr; }
 
   size_t GetInputIndex() const { return input_index_; }
@@ -142,20 +140,20 @@
 
   UsePosition* Dup(ArenaAllocator* allocator) const {
     return new (allocator) UsePosition(
-        user_, environment_, input_index_, position_,
+        user_, input_index_, position_,
         next_ == nullptr ? nullptr : next_->Dup(allocator));
   }
 
   bool RequiresRegister() const {
-    if (GetIsEnvironment()) return false;
     if (IsSynthesized()) return false;
     Location location = GetUser()->GetLocations()->InAt(GetInputIndex());
     return location.IsUnallocated() && location.RequiresRegisterKind();
   }
 
  private:
+  static constexpr uint32_t kNoInput = static_cast<uint32_t>(-1);
+
   HInstruction* const user_;
-  HEnvironment* const environment_;
   const size_t input_index_;
   const size_t position_;
   UsePosition* next_;
@@ -163,6 +161,50 @@
   DISALLOW_COPY_AND_ASSIGN(UsePosition);
 };
 
+/**
+ * An environment use position represents a live interval for environment use at a given position.
+ */
+class EnvUsePosition : public ArenaObject<kArenaAllocSsaLiveness> {
+ public:
+  EnvUsePosition(HEnvironment* environment,
+                 size_t input_index,
+                 size_t position,
+                 EnvUsePosition* next)
+      : environment_(environment),
+        input_index_(input_index),
+        position_(position),
+        next_(next) {
+    DCHECK(environment != nullptr);
+    DCHECK(next_ == nullptr || next->GetPosition() >= GetPosition());
+  }
+
+  size_t GetPosition() const { return position_; }
+
+  EnvUsePosition* GetNext() const { return next_; }
+  void SetNext(EnvUsePosition* next) { next_ = next; }
+
+  HEnvironment* GetEnvironment() const { return environment_; }
+  size_t GetInputIndex() const { return input_index_; }
+
+  void Dump(std::ostream& stream) const {
+    stream << position_;
+  }
+
+  EnvUsePosition* Dup(ArenaAllocator* allocator) const {
+    return new (allocator) EnvUsePosition(
+        environment_, input_index_, position_,
+        next_ == nullptr ? nullptr : next_->Dup(allocator));
+  }
+
+ private:
+  HEnvironment* const environment_;
+  const size_t input_index_;
+  const size_t position_;
+  EnvUsePosition* next_;
+
+  DISALLOW_COPY_AND_ASSIGN(EnvUsePosition);
+};
+
 class SafepointPosition : public ArenaObject<kArenaAllocSsaLiveness> {
  public:
   explicit SafepointPosition(HInstruction* instruction)
@@ -227,7 +269,7 @@
     DCHECK(first_env_use_ == nullptr) << "A temporary cannot have environment user";
     size_t position = instruction->GetLifetimePosition();
     first_use_ = new (allocator_) UsePosition(
-        instruction, /* environment */ nullptr, temp_index, position, first_use_);
+        instruction, temp_index, position, first_use_);
     AddRange(position, position + 1);
   }
 
@@ -276,7 +318,7 @@
       }
       DCHECK(first_use_->GetPosition() + 1 == position);
       UsePosition* new_use = new (allocator_) UsePosition(
-          instruction, nullptr /* environment */, input_index, position, cursor->GetNext());
+          instruction, input_index, position, cursor->GetNext());
       cursor->SetNext(new_use);
       if (first_range_->GetEnd() == first_use_->GetPosition()) {
         first_range_->end_ = position;
@@ -285,11 +327,11 @@
     }
 
     if (is_environment) {
-      first_env_use_ = new (allocator_) UsePosition(
-          nullptr /* instruction */, environment, input_index, position, first_env_use_);
+      first_env_use_ = new (allocator_) EnvUsePosition(
+          environment, input_index, position, first_env_use_);
     } else {
       first_use_ = new (allocator_) UsePosition(
-          instruction, nullptr /* environment */, input_index, position, first_use_);
+          instruction, input_index, position, first_use_);
     }
 
     if (is_environment && !keep_alive) {
@@ -328,10 +370,10 @@
       AddBackEdgeUses(*block);
     }
     first_use_ = new (allocator_) UsePosition(
-        instruction, /* environment */ nullptr, input_index, block->GetLifetimeEnd(), first_use_);
+        instruction, input_index, block->GetLifetimeEnd(), first_use_);
   }
 
-  void AddRange(size_t start, size_t end) {
+  ALWAYS_INLINE void AddRange(size_t start, size_t end) {
     if (first_range_ == nullptr) {
       first_range_ = last_range_ = range_search_start_ =
           new (allocator_) LiveRange(start, end, first_range_);
@@ -538,7 +580,7 @@
     return first_use_;
   }
 
-  UsePosition* GetFirstEnvironmentUse() const {
+  EnvUsePosition* GetFirstEnvironmentUse() const {
     return first_env_use_;
   }
 
@@ -676,7 +718,7 @@
       current = current->GetNext();
     }
     stream << "}, uses: { ";
-    UsePosition* use = first_use_;
+    const UsePosition* use = first_use_;
     if (use != nullptr) {
       do {
         use->Dump(stream);
@@ -684,12 +726,12 @@
       } while ((use = use->GetNext()) != nullptr);
     }
     stream << "}, { ";
-    use = first_env_use_;
-    if (use != nullptr) {
+    const EnvUsePosition* env_use = first_env_use_;
+    if (env_use != nullptr) {
       do {
-        use->Dump(stream);
+        env_use->Dump(stream);
         stream << " ";
-      } while ((use = use->GetNext()) != nullptr);
+      } while ((env_use = env_use->GetNext()) != nullptr);
     }
     stream << "}";
     stream << " is_fixed: " << is_fixed_ << ", is_split: " << IsSplit();
@@ -720,9 +762,9 @@
   // Returns kNoRegister otherwise.
   int FindHintAtDefinition() const;
 
-  // Returns whether the interval needs two (Dex virtual register size `kVRegSize`)
-  // slots for spilling.
-  bool NeedsTwoSpillSlots() const;
+  // Returns the number of required spilling slots (measured as a multiple of the
+  // Dex virtual register size `kVRegSize`).
+  size_t NumberOfSpillSlotsNeeded() const;
 
   bool IsFloatingPoint() const {
     return type_ == Primitive::kPrimFloat || type_ == Primitive::kPrimDouble;
@@ -1015,12 +1057,7 @@
       DCHECK(last_in_new_list == nullptr ||
              back_edge_use_position > last_in_new_list->GetPosition());
 
-      UsePosition* new_use = new (allocator_) UsePosition(
-          /* user */ nullptr,
-          /* environment */ nullptr,
-          UsePosition::kNoInput,
-          back_edge_use_position,
-          /* next */ nullptr);
+      UsePosition* new_use = new (allocator_) UsePosition(back_edge_use_position);
 
       if (last_in_new_list != nullptr) {
         // Going outward. The latest created use needs to point to the new use.
@@ -1056,7 +1093,7 @@
 
   // Uses of this interval. Note that this linked list is shared amongst siblings.
   UsePosition* first_use_;
-  UsePosition* first_env_use_;
+  EnvUsePosition* first_env_use_;
 
   // The instruction type this interval corresponds to.
   const Primitive::Type type_;
@@ -1210,8 +1247,7 @@
 
   // Returns whether `instruction` in an HEnvironment held by `env_holder`
   // should be kept live by the HEnvironment.
-  static bool ShouldBeLiveForEnvironment(HInstruction* env_holder,
-                                         HInstruction* instruction) {
+  static bool ShouldBeLiveForEnvironment(HInstruction* env_holder, HInstruction* instruction) {
     if (instruction == nullptr) return false;
     // A value that's not live in compiled code may still be needed in interpreter,
     // due to code motion, etc.
diff --git a/compiler/optimizing/ssa_liveness_analysis_test.cc b/compiler/optimizing/ssa_liveness_analysis_test.cc
new file mode 100644
index 0000000..a1016d1
--- /dev/null
+++ b/compiler/optimizing/ssa_liveness_analysis_test.cc
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arch/instruction_set.h"
+#include "arch/instruction_set_features.h"
+#include "base/arena_allocator.h"
+#include "base/arena_containers.h"
+#include "driver/compiler_options.h"
+#include "code_generator.h"
+#include "nodes.h"
+#include "optimizing_unit_test.h"
+#include "ssa_liveness_analysis.h"
+
+namespace art {
+
+class SsaLivenessAnalysisTest : public testing::Test {
+ public:
+  SsaLivenessAnalysisTest()
+      : pool_(),
+        allocator_(&pool_),
+        graph_(CreateGraph(&allocator_)),
+        compiler_options_(),
+        instruction_set_(kRuntimeISA) {
+    std::string error_msg;
+    instruction_set_features_ =
+        InstructionSetFeatures::FromVariant(instruction_set_, "default", &error_msg);
+    codegen_ = CodeGenerator::Create(graph_,
+                                     instruction_set_,
+                                     *instruction_set_features_,
+                                     compiler_options_);
+    CHECK(codegen_ != nullptr) << instruction_set_ << " is not a supported target architecture.";
+    // Create entry block.
+    entry_ = new (&allocator_) HBasicBlock(graph_);
+    graph_->AddBlock(entry_);
+    graph_->SetEntryBlock(entry_);
+  }
+
+ protected:
+  HBasicBlock* CreateSuccessor(HBasicBlock* block) {
+    HGraph* graph = block->GetGraph();
+    HBasicBlock* successor = new (&allocator_) HBasicBlock(graph);
+    graph->AddBlock(successor);
+    block->AddSuccessor(successor);
+    return successor;
+  }
+
+  ArenaPool pool_;
+  ArenaAllocator allocator_;
+  HGraph* graph_;
+  CompilerOptions compiler_options_;
+  InstructionSet instruction_set_;
+  std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
+  std::unique_ptr<CodeGenerator> codegen_;
+  HBasicBlock* entry_;
+};
+
+TEST_F(SsaLivenessAnalysisTest, TestReturnArg) {
+  HInstruction* arg = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimInt);
+  entry_->AddInstruction(arg);
+
+  HBasicBlock* block = CreateSuccessor(entry_);
+  HInstruction* ret = new (&allocator_) HReturn(arg);
+  block->AddInstruction(ret);
+  block->AddInstruction(new (&allocator_) HExit());
+
+  graph_->BuildDominatorTree();
+  SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get());
+  ssa_analysis.Analyze();
+
+  std::ostringstream arg_dump;
+  arg->GetLiveInterval()->Dump(arg_dump);
+  EXPECT_STREQ("ranges: { [2,6) }, uses: { 6 }, { } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+               arg_dump.str().c_str());
+}
+
+TEST_F(SsaLivenessAnalysisTest, TestAput) {
+  HInstruction* array = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
+  HInstruction* index = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt);
+  HInstruction* value = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt);
+  HInstruction* extra_arg1 = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt);
+  HInstruction* extra_arg2 = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot);
+  ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 },
+                                  allocator_.Adapter());
+  for (HInstruction* insn : args) {
+    entry_->AddInstruction(insn);
+  }
+
+  HBasicBlock* block = CreateSuccessor(entry_);
+  HInstruction* null_check = new (&allocator_) HNullCheck(array, 0);
+  block->AddInstruction(null_check);
+  HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_,
+                                                                /* number_of_vregs */ 5,
+                                                                /* method */ nullptr,
+                                                                /* dex_pc */ 0u,
+                                                                null_check);
+  null_check_env->CopyFrom(args);
+  null_check->SetRawEnvironment(null_check_env);
+  HInstruction* length = new (&allocator_) HArrayLength(array, 0);
+  block->AddInstruction(length);
+  HInstruction* bounds_check = new (&allocator_) HBoundsCheck(index, length, /* dex_pc */ 0u);
+  block->AddInstruction(bounds_check);
+  HEnvironment* bounds_check_env = new (&allocator_) HEnvironment(&allocator_,
+                                                                  /* number_of_vregs */ 5,
+                                                                  /* method */ nullptr,
+                                                                  /* dex_pc */ 0u,
+                                                                  bounds_check);
+  bounds_check_env->CopyFrom(args);
+  bounds_check->SetRawEnvironment(bounds_check_env);
+  HInstruction* array_set =
+      new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0);
+  block->AddInstruction(array_set);
+
+  graph_->BuildDominatorTree();
+  SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get());
+  ssa_analysis.Analyze();
+
+  EXPECT_FALSE(graph_->IsDebuggable());
+  EXPECT_EQ(18u, bounds_check->GetLifetimePosition());
+  static const char* const expected[] = {
+      "ranges: { [2,21) }, uses: { 15 17 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      "ranges: { [4,21) }, uses: { 19 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      "ranges: { [6,21) }, uses: { 21 }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      // Environment uses do not keep the non-reference argument alive.
+      "ranges: { [8,10) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+      // Environment uses keep the reference argument alive.
+      "ranges: { [10,19) }, uses: { }, { 15 19 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+  };
+  ASSERT_EQ(arraysize(expected), args.size());
+  size_t arg_index = 0u;
+  for (HInstruction* arg : args) {
+    std::ostringstream arg_dump;
+    arg->GetLiveInterval()->Dump(arg_dump);
+    EXPECT_STREQ(expected[arg_index], arg_dump.str().c_str()) << arg_index;
+    ++arg_index;
+  }
+}
+
+TEST_F(SsaLivenessAnalysisTest, TestDeoptimize) {
+  HInstruction* array = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(0), 0, Primitive::kPrimNot);
+  HInstruction* index = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(1), 1, Primitive::kPrimInt);
+  HInstruction* value = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(2), 2, Primitive::kPrimInt);
+  HInstruction* extra_arg1 = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(3), 3, Primitive::kPrimInt);
+  HInstruction* extra_arg2 = new (&allocator_) HParameterValue(
+      graph_->GetDexFile(), dex::TypeIndex(4), 4, Primitive::kPrimNot);
+  ArenaVector<HInstruction*> args({ array, index, value, extra_arg1, extra_arg2 },
+                                  allocator_.Adapter());
+  for (HInstruction* insn : args) {
+    entry_->AddInstruction(insn);
+  }
+
+  HBasicBlock* block = CreateSuccessor(entry_);
+  HInstruction* null_check = new (&allocator_) HNullCheck(array, 0);
+  block->AddInstruction(null_check);
+  HEnvironment* null_check_env = new (&allocator_) HEnvironment(&allocator_,
+                                                                /* number_of_vregs */ 5,
+                                                                /* method */ nullptr,
+                                                                /* dex_pc */ 0u,
+                                                                null_check);
+  null_check_env->CopyFrom(args);
+  null_check->SetRawEnvironment(null_check_env);
+  HInstruction* length = new (&allocator_) HArrayLength(array, 0);
+  block->AddInstruction(length);
+  // Use HAboveOrEqual+HDeoptimize as the bounds check.
+  HInstruction* ae = new (&allocator_) HAboveOrEqual(index, length);
+  block->AddInstruction(ae);
+  HInstruction* deoptimize =
+      new(&allocator_) HDeoptimize(&allocator_, ae, HDeoptimize::Kind::kBCE, /* dex_pc */ 0u);
+  block->AddInstruction(deoptimize);
+  HEnvironment* deoptimize_env = new (&allocator_) HEnvironment(&allocator_,
+                                                                /* number_of_vregs */ 5,
+                                                                /* method */ nullptr,
+                                                                /* dex_pc */ 0u,
+                                                                deoptimize);
+  deoptimize_env->CopyFrom(args);
+  deoptimize->SetRawEnvironment(deoptimize_env);
+  HInstruction* array_set =
+      new (&allocator_) HArraySet(array, index, value, Primitive::kPrimInt, /* dex_pc */ 0);
+  block->AddInstruction(array_set);
+
+  graph_->BuildDominatorTree();
+  SsaLivenessAnalysis ssa_analysis(graph_, codegen_.get());
+  ssa_analysis.Analyze();
+
+  EXPECT_FALSE(graph_->IsDebuggable());
+  EXPECT_EQ(20u, deoptimize->GetLifetimePosition());
+  static const char* const expected[] = {
+      "ranges: { [2,23) }, uses: { 15 17 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      "ranges: { [4,23) }, uses: { 19 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 "
+          "is_high: 0",
+      "ranges: { [6,23) }, uses: { 23 }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+      // Environment use in HDeoptimize keeps even the non-reference argument alive.
+      "ranges: { [8,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+      // Environment uses keep the reference argument alive.
+      "ranges: { [10,21) }, uses: { }, { 15 21 } is_fixed: 0, is_split: 0 is_low: 0 is_high: 0",
+  };
+  ASSERT_EQ(arraysize(expected), args.size());
+  size_t arg_index = 0u;
+  for (HInstruction* arg : args) {
+    std::ostringstream arg_dump;
+    arg->GetLiveInterval()->Dump(arg_dump);
+    EXPECT_STREQ(expected[arg_index], arg_dump.str().c_str()) << arg_index;
+    ++arg_index;
+  }
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 4d12ad6..b7840d7 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -152,6 +152,9 @@
   encoding.location_catalog.num_entries = location_catalog_entries_.size();
   encoding.location_catalog.num_bytes = ComputeDexRegisterLocationCatalogSize();
   encoding.inline_info.num_entries = inline_infos_.size();
+  // Must be done before calling ComputeInlineInfoEncoding since ComputeInlineInfoEncoding requires
+  // dex_method_index_idx to be filled in.
+  PrepareMethodIndices();
   ComputeInlineInfoEncoding(&encoding.inline_info.encoding,
                             encoding.dex_register_map.num_bytes);
   CodeOffset max_native_pc_offset = ComputeMaxNativePcCodeOffset();
@@ -245,7 +248,7 @@
     for (size_t j = 0; j < entry.inlining_depth; ++j) {
       InlineInfoEntry inline_entry = inline_infos_[inline_info_index++];
       if (inline_entry.method == nullptr) {
-        method_index_max = std::max(method_index_max, inline_entry.method_index);
+        method_index_max = std::max(method_index_max, inline_entry.dex_method_index_idx);
         extra_data_max = std::max(extra_data_max, 1u);
       } else {
         method_index_max = std::max(
@@ -288,7 +291,25 @@
   return entry.offset;
 }
 
-void StackMapStream::FillIn(MemoryRegion region) {
+void StackMapStream::FillInMethodInfo(MemoryRegion region) {
+  {
+    MethodInfo info(region.begin(), method_indices_.size());
+    for (size_t i = 0; i < method_indices_.size(); ++i) {
+      info.SetMethodIndex(i, method_indices_[i]);
+    }
+  }
+  if (kIsDebugBuild) {
+    // Check the data matches.
+    MethodInfo info(region.begin());
+    const size_t count = info.NumMethodIndices();
+    DCHECK_EQ(count, method_indices_.size());
+    for (size_t i = 0; i < count; ++i) {
+      DCHECK_EQ(info.GetMethodIndex(i), method_indices_[i]);
+    }
+  }
+}
+
+void StackMapStream::FillInCodeInfo(MemoryRegion region) {
   DCHECK_EQ(0u, current_entry_.dex_pc) << "EndStackMapEntry not called after BeginStackMapEntry";
   DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before FillIn";
 
@@ -345,7 +366,7 @@
       InvokeInfo invoke_info(code_info.GetInvokeInfo(encoding, invoke_info_idx));
       invoke_info.SetNativePcCodeOffset(encoding.invoke_info.encoding, entry.native_pc_code_offset);
       invoke_info.SetInvokeType(encoding.invoke_info.encoding, entry.invoke_type);
-      invoke_info.SetMethodIndex(encoding.invoke_info.encoding, entry.dex_method_index);
+      invoke_info.SetMethodIndexIdx(encoding.invoke_info.encoding, entry.dex_method_index_idx);
       ++invoke_info_idx;
     }
 
@@ -364,7 +385,7 @@
       for (size_t depth = 0; depth < entry.inlining_depth; ++depth) {
         InlineInfoEntry inline_entry = inline_infos_[depth + entry.inline_infos_start_index];
         if (inline_entry.method != nullptr) {
-          inline_info.SetMethodIndexAtDepth(
+          inline_info.SetMethodIndexIdxAtDepth(
               encoding.inline_info.encoding,
               depth,
               High32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
@@ -373,9 +394,9 @@
               depth,
               Low32Bits(reinterpret_cast<uintptr_t>(inline_entry.method)));
         } else {
-          inline_info.SetMethodIndexAtDepth(encoding.inline_info.encoding,
-                                            depth,
-                                            inline_entry.method_index);
+          inline_info.SetMethodIndexIdxAtDepth(encoding.inline_info.encoding,
+                                               depth,
+                                               inline_entry.dex_method_index_idx);
           inline_info.SetExtraDataAtDepth(encoding.inline_info.encoding, depth, 1);
         }
         inline_info.SetDexPcAtDepth(encoding.inline_info.encoding, depth, inline_entry.dex_pc);
@@ -533,6 +554,29 @@
   return dedupe.size();
 }
 
+void StackMapStream::PrepareMethodIndices() {
+  CHECK(method_indices_.empty());
+  method_indices_.resize(stack_maps_.size() + inline_infos_.size());
+  ArenaUnorderedMap<uint32_t, size_t> dedupe(allocator_->Adapter(kArenaAllocStackMapStream));
+  for (StackMapEntry& stack_map : stack_maps_) {
+    const size_t index = dedupe.size();
+    const uint32_t method_index = stack_map.dex_method_index;
+    if (method_index != DexFile::kDexNoIndex) {
+      stack_map.dex_method_index_idx = dedupe.emplace(method_index, index).first->second;
+      method_indices_[index] = method_index;
+    }
+  }
+  for (InlineInfoEntry& inline_info : inline_infos_) {
+    const size_t index = dedupe.size();
+    const uint32_t method_index = inline_info.method_index;
+    CHECK_NE(method_index, DexFile::kDexNoIndex);
+    inline_info.dex_method_index_idx = dedupe.emplace(method_index, index).first->second;
+    method_indices_[index] = method_index;
+  }
+  method_indices_.resize(dedupe.size());
+}
+
+
 size_t StackMapStream::PrepareStackMasks(size_t entry_size_in_bits) {
   // Preallocate memory since we do not want it to move (the dedup map will point into it).
   const size_t byte_entry_size = RoundUp(entry_size_in_bits, kBitsPerByte) / kBitsPerByte;
@@ -590,7 +634,8 @@
       DCHECK_EQ(invoke_info.GetNativePcOffset(encoding.invoke_info.encoding, instruction_set_),
                 entry.native_pc_code_offset.Uint32Value(instruction_set_));
       DCHECK_EQ(invoke_info.GetInvokeType(encoding.invoke_info.encoding), entry.invoke_type);
-      DCHECK_EQ(invoke_info.GetMethodIndex(encoding.invoke_info.encoding), entry.dex_method_index);
+      DCHECK_EQ(invoke_info.GetMethodIndexIdx(encoding.invoke_info.encoding),
+                entry.dex_method_index_idx);
       invoke_info_index++;
     }
     CheckDexRegisterMap(code_info,
@@ -615,8 +660,10 @@
           DCHECK_EQ(inline_info.GetArtMethodAtDepth(encoding.inline_info.encoding, d),
                     inline_entry.method);
         } else {
-          DCHECK_EQ(inline_info.GetMethodIndexAtDepth(encoding.inline_info.encoding, d),
-                    inline_entry.method_index);
+          const size_t method_index_idx =
+              inline_info.GetMethodIndexIdxAtDepth(encoding.inline_info.encoding, d);
+          DCHECK_EQ(method_index_idx, inline_entry.dex_method_index_idx);
+          DCHECK_EQ(method_indices_[method_index_idx], inline_entry.method_index);
         }
 
         CheckDexRegisterMap(code_info,
@@ -633,4 +680,9 @@
   }
 }
 
+size_t StackMapStream::ComputeMethodInfoSize() const {
+  DCHECK_NE(0u, needed_size_) << "PrepareForFillIn not called before " << __FUNCTION__;
+  return MethodInfo::ComputeSize(method_indices_.size());
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index 4225a87..e6471e1 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -22,6 +22,7 @@
 #include "base/hash_map.h"
 #include "base/value_object.h"
 #include "memory_region.h"
+#include "method_info.h"
 #include "nodes.h"
 #include "stack_map.h"
 
@@ -70,6 +71,7 @@
         inline_infos_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
         register_masks_(allocator->Adapter(kArenaAllocStackMapStream)),
+        method_indices_(allocator->Adapter(kArenaAllocStackMapStream)),
         dex_register_entries_(allocator->Adapter(kArenaAllocStackMapStream)),
         stack_mask_max_(-1),
         dex_pc_max_(0),
@@ -120,6 +122,7 @@
     size_t dex_register_map_index;
     InvokeType invoke_type;
     uint32_t dex_method_index;
+    uint32_t dex_method_index_idx;  // Index into dex method index table.
   };
 
   struct InlineInfoEntry {
@@ -128,6 +131,7 @@
     uint32_t method_index;
     DexRegisterMapEntry dex_register_entry;
     size_t dex_register_map_index;
+    uint32_t dex_method_index_idx;  // Index into the dex method index table.
   };
 
   void BeginStackMapEntry(uint32_t dex_pc,
@@ -164,7 +168,10 @@
   // Prepares the stream to fill in a memory region. Must be called before FillIn.
   // Returns the size (in bytes) needed to store this stream.
   size_t PrepareForFillIn();
-  void FillIn(MemoryRegion region);
+  void FillInCodeInfo(MemoryRegion region);
+  void FillInMethodInfo(MemoryRegion region);
+
+  size_t ComputeMethodInfoSize() const;
 
  private:
   size_t ComputeDexRegisterLocationCatalogSize() const;
@@ -180,6 +187,9 @@
   // Returns the number of unique register masks.
   size_t PrepareRegisterMasks();
 
+  // Prepare and deduplicate method indices.
+  void PrepareMethodIndices();
+
   // Deduplicate entry if possible and return the corresponding index into dex_register_entries_
   // array. If entry is not a duplicate, a new entry is added to dex_register_entries_.
   size_t AddDexRegisterMapEntry(const DexRegisterMapEntry& entry);
@@ -232,6 +242,7 @@
   ArenaVector<InlineInfoEntry> inline_infos_;
   ArenaVector<uint8_t> stack_masks_;
   ArenaVector<uint32_t> register_masks_;
+  ArenaVector<uint32_t> method_indices_;
   ArenaVector<DexRegisterMapEntry> dex_register_entries_;
   int stack_mask_max_;
   uint32_t dex_pc_max_;
diff --git a/compiler/optimizing/stack_map_test.cc b/compiler/optimizing/stack_map_test.cc
index 330f7f2..a842c6e 100644
--- a/compiler/optimizing/stack_map_test.cc
+++ b/compiler/optimizing/stack_map_test.cc
@@ -60,7 +60,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -173,7 +173,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -433,7 +433,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -519,7 +519,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -611,7 +611,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -672,7 +672,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo ci(region);
   CodeInfoEncoding encoding = ci.ExtractEncoding();
@@ -721,7 +721,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -823,7 +823,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo ci(region);
   CodeInfoEncoding encoding = ci.ExtractEncoding();
@@ -950,7 +950,7 @@
   size_t size = stream.PrepareForFillIn();
   void* memory = arena.Alloc(size, kArenaAllocMisc);
   MemoryRegion region(memory, size);
-  stream.FillIn(region);
+  stream.FillInCodeInfo(region);
 
   CodeInfo code_info(region);
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
@@ -979,11 +979,16 @@
   stream.AddInvoke(kDirect, 65535);
   stream.EndStackMapEntry();
 
-  const size_t size = stream.PrepareForFillIn();
-  MemoryRegion region(arena.Alloc(size, kArenaAllocMisc), size);
-  stream.FillIn(region);
+  const size_t code_info_size = stream.PrepareForFillIn();
+  MemoryRegion code_info_region(arena.Alloc(code_info_size, kArenaAllocMisc), code_info_size);
+  stream.FillInCodeInfo(code_info_region);
 
-  CodeInfo code_info(region);
+  const size_t method_info_size = stream.ComputeMethodInfoSize();
+  MemoryRegion method_info_region(arena.Alloc(method_info_size, kArenaAllocMisc), method_info_size);
+  stream.FillInMethodInfo(method_info_region);
+
+  CodeInfo code_info(code_info_region);
+  MethodInfo method_info(method_info_region.begin());
   CodeInfoEncoding encoding = code_info.ExtractEncoding();
   ASSERT_EQ(3u, code_info.GetNumberOfStackMaps(encoding));
 
@@ -996,13 +1001,13 @@
   EXPECT_TRUE(invoke2.IsValid());
   EXPECT_TRUE(invoke3.IsValid());
   EXPECT_EQ(invoke1.GetInvokeType(encoding.invoke_info.encoding), kSuper);
-  EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding), 1u);
+  EXPECT_EQ(invoke1.GetMethodIndex(encoding.invoke_info.encoding, method_info), 1u);
   EXPECT_EQ(invoke1.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 4u);
   EXPECT_EQ(invoke2.GetInvokeType(encoding.invoke_info.encoding), kStatic);
-  EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding), 3u);
+  EXPECT_EQ(invoke2.GetMethodIndex(encoding.invoke_info.encoding, method_info), 3u);
   EXPECT_EQ(invoke2.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 8u);
   EXPECT_EQ(invoke3.GetInvokeType(encoding.invoke_info.encoding), kDirect);
-  EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding), 65535u);
+  EXPECT_EQ(invoke3.GetMethodIndex(encoding.invoke_info.encoding, method_info), 65535u);
   EXPECT_EQ(invoke3.GetNativePcOffset(encoding.invoke_info.encoding, kRuntimeISA), 16u);
 }
 
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index e5eef37..6afc3dd 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -230,6 +230,7 @@
   if (!CanHoldStoreOffsetThumb(type, offset)) {
     CHECK_NE(base.GetCode(), kIpCode);
     if ((reg.GetCode() != kIpCode) &&
+        (!vixl_masm_.GetScratchRegisterList()->IsEmpty()) &&
         ((type != kStoreWordPair) || (reg.GetCode() + 1 != kIpCode))) {
       tmp_reg = temps.Acquire();
     } else {
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index 322f6c4..e81e767 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -135,6 +135,16 @@
   // jumping within 2KB range. For B(cond, label), because the supported branch range is 256
   // bytes; we use the far_target hint to try to use 16-bit T1 encoding for short range jumps.
   void B(vixl32::Condition cond, vixl32::Label* label, bool is_far_target = true);
+
+  // Use literal for generating double constant if it doesn't fit VMOV encoding.
+  void Vmov(vixl32::DRegister rd, double imm) {
+    if (vixl::VFP::IsImmFP64(imm)) {
+      MacroAssembler::Vmov(rd, imm);
+    } else {
+      MacroAssembler::Vldr(rd, imm);
+    }
+  }
+  using MacroAssembler::Vmov;
 };
 
 class ArmVIXLAssembler FINAL : public Assembler {
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 5c48759..f655994 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -42,7 +42,10 @@
   kUseQuaternaryName,
 };
 
-template<typename Ass, typename Reg, typename FPReg, typename Imm>
+// For use in the template as the default type to get a nonvector registers version.
+struct NoVectorRegs {};
+
+template<typename Ass, typename Reg, typename FPReg, typename Imm, typename VecReg = NoVectorRegs>
 class AssemblerTest : public testing::Test {
  public:
   Ass* GetAssembler() {
@@ -146,7 +149,8 @@
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               const std::string& fmt,
-                                              int bias = 0) {
+                                              int bias = 0,
+                                              int multiplier = 1) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
@@ -154,7 +158,7 @@
       for (auto reg2 : reg2_registers) {
         for (int64_t imm : imms) {
           ImmType new_imm = CreateImmediate(imm);
-          (assembler_.get()->*f)(*reg1, *reg2, new_imm + bias);
+          (assembler_.get()->*f)(*reg1, *reg2, new_imm * multiplier + bias);
           std::string base = fmt;
 
           std::string reg1_string = (this->*GetName1)(*reg1);
@@ -172,7 +176,7 @@
           size_t imm_index = base.find(IMM_TOKEN);
           if (imm_index != std::string::npos) {
             std::ostringstream sreg;
-            sreg << imm + bias;
+            sreg << imm * multiplier + bias;
             std::string imm_string = sreg.str();
             base.replace(imm_index, ConstexprStrLen(IMM_TOKEN), imm_string);
           }
@@ -305,7 +309,7 @@
   template <typename RegType, typename ImmType>
   std::string RepeatTemplatedRegisterImmBits(void (Ass::*f)(RegType, ImmType),
                                              int imm_bits,
-                                             const std::vector<Reg*> registers,
+                                             const std::vector<RegType*> registers,
                                              std::string (AssemblerTest::*GetName)(const RegType&),
                                              const std::string& fmt,
                                              int bias) {
@@ -538,6 +542,82 @@
     return str;
   }
 
+  std::string RepeatVV(void (Ass::*f)(VecReg, VecReg), const std::string& fmt) {
+    return RepeatTemplatedRegisters<VecReg, VecReg>(f,
+                                                    GetVectorRegisters(),
+                                                    GetVectorRegisters(),
+                                                    &AssemblerTest::GetVecRegName,
+                                                    &AssemblerTest::GetVecRegName,
+                                                    fmt);
+  }
+
+  std::string RepeatVVV(void (Ass::*f)(VecReg, VecReg, VecReg), const std::string& fmt) {
+    return RepeatTemplatedRegisters<VecReg, VecReg, VecReg>(f,
+                                                            GetVectorRegisters(),
+                                                            GetVectorRegisters(),
+                                                            GetVectorRegisters(),
+                                                            &AssemblerTest::GetVecRegName,
+                                                            &AssemblerTest::GetVecRegName,
+                                                            &AssemblerTest::GetVecRegName,
+                                                            fmt);
+  }
+
+  std::string RepeatVR(void (Ass::*f)(VecReg, Reg), const std::string& fmt) {
+    return RepeatTemplatedRegisters<VecReg, Reg>(
+        f,
+        GetVectorRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetVecRegName,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
+
+  template <typename ImmType>
+  std::string RepeatVIb(void (Ass::*f)(VecReg, ImmType),
+                        int imm_bits,
+                        std::string fmt,
+                        int bias = 0) {
+    return RepeatTemplatedRegisterImmBits<VecReg, ImmType>(f,
+                                                           imm_bits,
+                                                           GetVectorRegisters(),
+                                                           &AssemblerTest::GetVecRegName,
+                                                           fmt,
+                                                           bias);
+  }
+
+  template <typename ImmType>
+  std::string RepeatVRIb(void (Ass::*f)(VecReg, Reg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt,
+                         int bias = 0,
+                         int multiplier = 1) {
+    return RepeatTemplatedRegistersImmBits<VecReg, Reg, ImmType>(
+        f,
+        imm_bits,
+        GetVectorRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetVecRegName,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt,
+        bias,
+        multiplier);
+  }
+
+  template <typename ImmType>
+  std::string RepeatVVIb(void (Ass::*f)(VecReg, VecReg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt,
+                         int bias = 0) {
+    return RepeatTemplatedRegistersImmBits<VecReg, VecReg, ImmType>(f,
+                                                                    imm_bits,
+                                                                    GetVectorRegisters(),
+                                                                    GetVectorRegisters(),
+                                                                    &AssemblerTest::GetVecRegName,
+                                                                    &AssemblerTest::GetVecRegName,
+                                                                    fmt,
+                                                                    bias);
+  }
+
   // This is intended to be run as a test.
   bool CheckTools() {
     return test_helper_->CheckTools();
@@ -552,6 +632,11 @@
     UNREACHABLE();
   }
 
+  virtual std::vector<VecReg*> GetVectorRegisters() {
+    UNIMPLEMENTED(FATAL) << "Architecture does not support vector registers";
+    UNREACHABLE();
+  }
+
   // Secondary register names are the secondary view on registers, e.g., 32b on 64b systems.
   virtual std::string GetSecondaryRegisterName(const Reg& reg ATTRIBUTE_UNUSED) {
     UNIMPLEMENTED(FATAL) << "Architecture does not support secondary registers";
@@ -971,6 +1056,12 @@
     return sreg.str();
   }
 
+  std::string GetVecRegName(const VecReg& reg) {
+    std::ostringstream sreg;
+    sreg << reg;
+    return sreg.str();
+  }
+
   // If the assembly file needs a header, return it in a sub-class.
   virtual const char* GetAssemblyHeader() {
     return nullptr;
diff --git a/compiler/utils/atomic_method_ref_map-inl.h b/compiler/utils/atomic_method_ref_map-inl.h
index d71c2fe..ad3a099 100644
--- a/compiler/utils/atomic_method_ref_map-inl.h
+++ b/compiler/utils/atomic_method_ref_map-inl.h
@@ -42,7 +42,7 @@
 inline bool AtomicMethodRefMap<T>::Get(MethodReference ref, T* out) const {
   const ElementArray* const array = GetArray(ref.dex_file);
   if (array == nullptr) {
-    return kInsertResultInvalidDexFile;
+    return false;
   }
   *out = (*array)[ref.dex_method_index].LoadRelaxed();
   return true;
diff --git a/compiler/utils/jni_macro_assembler.cc b/compiler/utils/jni_macro_assembler.cc
index 2f154fb..3ac6c3c 100644
--- a/compiler/utils/jni_macro_assembler.cc
+++ b/compiler/utils/jni_macro_assembler.cc
@@ -84,7 +84,11 @@
 MacroAsm64UniquePtr JNIMacroAssembler<PointerSize::k64>::Create(
     ArenaAllocator* arena,
     InstructionSet instruction_set,
-    const InstructionSetFeatures* instruction_set_features ATTRIBUTE_UNUSED) {
+    const InstructionSetFeatures* instruction_set_features) {
+#ifndef ART_ENABLE_CODEGEN_mips64
+  UNUSED(instruction_set_features);
+#endif
+
   switch (instruction_set) {
 #ifdef ART_ENABLE_CODEGEN_arm64
     case kArm64:
@@ -92,7 +96,11 @@
 #endif
 #ifdef ART_ENABLE_CODEGEN_mips64
     case kMips64:
-      return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(arena));
+      return MacroAsm64UniquePtr(new (arena) mips64::Mips64Assembler(
+          arena,
+          instruction_set_features != nullptr
+              ? instruction_set_features->AsMips64InstructionSetFeatures()
+              : nullptr));
 #endif
 #ifdef ART_ENABLE_CODEGEN_x86_64
     case kX86_64:
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index 5e83e82..2e2231b 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -3475,8 +3475,8 @@
   CHECK(dest.IsCoreRegister() && base.AsMips().IsCoreRegister());
   LoadFromOffset(kLoadWord, dest.AsCoreRegister(),
                  base.AsMips().AsCoreRegister(), offs.Int32Value());
-  if (kPoisonHeapReferences && unpoison_reference) {
-    Subu(dest.AsCoreRegister(), ZERO, dest.AsCoreRegister());
+  if (unpoison_reference) {
+    MaybeUnpoisonHeapReference(dest.AsCoreRegister());
   }
 }
 
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index 2fca185..1a5a23d 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -501,8 +501,10 @@
                            bool is_float = false);
 
  private:
+  // This will be used as an argument for loads/stores
+  // when there is no need for implicit null checks.
   struct NoImplicitNullChecker {
-    void operator()() {}
+    void operator()() const {}
   };
 
  public:
@@ -727,6 +729,38 @@
   void Pop(Register rd);
   void PopAndReturn(Register rd, Register rt);
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `src` and store it in `dst`.
+  void PoisonHeapReference(Register dst, Register src) {
+    // dst = -src.
+    Subu(dst, ZERO, src);
+  }
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(Register reg) {
+    // reg = -reg.
+    PoisonHeapReference(reg, reg);
+  }
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(Register reg) {
+    // reg = -reg.
+    Subu(reg, ZERO, reg);
+  }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(Register reg) {
+    if (kPoisonHeapReferences) {
+      UnpoisonHeapReference(reg);
+    }
+  }
+
   void Bind(Label* label) OVERRIDE {
     Bind(down_cast<MipsLabel*>(label));
   }
diff --git a/compiler/utils/mips64/assembler_mips64.cc b/compiler/utils/mips64/assembler_mips64.cc
index 998f2c7..0cff44d 100644
--- a/compiler/utils/mips64/assembler_mips64.cc
+++ b/compiler/utils/mips64/assembler_mips64.cc
@@ -184,6 +184,122 @@
   Emit(encoding);
 }
 
+void Mips64Assembler::EmitMsa3R(int operation,
+                                int df,
+                                VectorRegister wt,
+                                VectorRegister ws,
+                                VectorRegister wd,
+                                int minor_opcode) {
+  CHECK_NE(wt, kNoVectorRegister);
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaOperationShift |
+                      df << kDfShift |
+                      static_cast<uint32_t>(wt) << kWtShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitMsaBIT(int operation,
+                                 int df_m,
+                                 VectorRegister ws,
+                                 VectorRegister wd,
+                                 int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaOperationShift |
+                      df_m << kDfMShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitMsaELM(int operation,
+                                 int df_n,
+                                 VectorRegister ws,
+                                 VectorRegister wd,
+                                 int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaELMOperationShift |
+                      df_n << kDfNShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitMsaMI10(int s10,
+                                  GpuRegister rs,
+                                  VectorRegister wd,
+                                  int minor_opcode,
+                                  int df) {
+  CHECK_NE(rs, kNoGpuRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  CHECK(IsUint<10>(s10)) << s10;
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      s10 << kS10Shift |
+                      static_cast<uint32_t>(rs) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode << kS10MinorShift |
+                      df;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitMsaI10(int operation,
+                                 int df,
+                                 int i10,
+                                 VectorRegister wd,
+                                 int minor_opcode) {
+  CHECK_NE(wd, kNoVectorRegister);
+  CHECK(IsUint<10>(i10)) << i10;
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsaOperationShift |
+                      df << kDfShift |
+                      i10 << kI10Shift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitMsa2R(int operation,
+                                int df,
+                                VectorRegister ws,
+                                VectorRegister wd,
+                                int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsa2ROperationShift |
+                      df << kDf2RShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+}
+
+void Mips64Assembler::EmitMsa2RF(int operation,
+                                 int df,
+                                 VectorRegister ws,
+                                 VectorRegister wd,
+                                 int minor_opcode) {
+  CHECK_NE(ws, kNoVectorRegister);
+  CHECK_NE(wd, kNoVectorRegister);
+  uint32_t encoding = static_cast<uint32_t>(kMsaMajorOpcode) << kOpcodeShift |
+                      operation << kMsa2RFOperationShift |
+                      df << kDf2RShift |
+                      static_cast<uint32_t>(ws) << kWsShift |
+                      static_cast<uint32_t>(wd) << kWdShift |
+                      minor_opcode;
+  Emit(encoding);
+}
+
 void Mips64Assembler::Addu(GpuRegister rd, GpuRegister rs, GpuRegister rt) {
   EmitR(0, rs, rt, rd, 0, 0x21);
 }
@@ -488,6 +604,11 @@
   EmitI(0xf, rs, rt, imm16);
 }
 
+void Mips64Assembler::Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16) {
+  CHECK_NE(rs, ZERO);
+  EmitI(0x1d, rs, rt, imm16);
+}
+
 void Mips64Assembler::Dahi(GpuRegister rs, uint16_t imm16) {
   EmitI(1, rs, static_cast<GpuRegister>(6), imm16);
 }
@@ -1075,6 +1196,485 @@
   Nor(rd, rs, ZERO);
 }
 
+void Mips64Assembler::AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xe);
+}
+
+void Mips64Assembler::MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x0, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x1, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x2, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x4, 0x3, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x0, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x1, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x2, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x5, 0x3, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x0, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x1, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x2, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x6, 0x3, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x0, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x1, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x2, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x7, 0x3, wt, ws, wd, 0x12);
+}
+
+void Mips64Assembler::FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x0, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x1, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x2, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x3, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x0, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x1, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x2, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x3, wt, ws, wd, 0x1b);
+}
+
+void Mips64Assembler::Ffint_sW(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  EmitMsa2RF(0x19e, 0x0, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::Ffint_sD(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  EmitMsa2RF(0x19e, 0x1, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::Ftint_sW(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  EmitMsa2RF(0x19c, 0x0, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::Ftint_sD(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  EmitMsa2RF(0x19c, 0x1, ws, wd, 0x1e);
+}
+
+void Mips64Assembler::SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x0, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x1, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x2, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x0, 0x3, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x0, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x1, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x2, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x1, 0x3, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x2, 0x0, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x2, 0x1, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x2, 0x2, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt) {
+  CHECK(HasMsa());
+  EmitMsa3R(0x2, 0x3, wt, ws, wd, 0xd);
+}
+
+void Mips64Assembler::SlliB(VectorRegister wd, VectorRegister ws, int shamt3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(shamt3)) << shamt3;
+  EmitMsaBIT(0x0, shamt3 | kMsaDfMByteMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SlliH(VectorRegister wd, VectorRegister ws, int shamt4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(shamt4)) << shamt4;
+  EmitMsaBIT(0x0, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SlliW(VectorRegister wd, VectorRegister ws, int shamt5) {
+  CHECK(HasMsa());
+  CHECK(IsUint<5>(shamt5)) << shamt5;
+  EmitMsaBIT(0x0, shamt5 | kMsaDfMWordMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SlliD(VectorRegister wd, VectorRegister ws, int shamt6) {
+  CHECK(HasMsa());
+  CHECK(IsUint<6>(shamt6)) << shamt6;
+  EmitMsaBIT(0x0, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SraiB(VectorRegister wd, VectorRegister ws, int shamt3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(shamt3)) << shamt3;
+  EmitMsaBIT(0x1, shamt3 | kMsaDfMByteMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SraiH(VectorRegister wd, VectorRegister ws, int shamt4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(shamt4)) << shamt4;
+  EmitMsaBIT(0x1, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SraiW(VectorRegister wd, VectorRegister ws, int shamt5) {
+  CHECK(HasMsa());
+  CHECK(IsUint<5>(shamt5)) << shamt5;
+  EmitMsaBIT(0x1, shamt5 | kMsaDfMWordMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SraiD(VectorRegister wd, VectorRegister ws, int shamt6) {
+  CHECK(HasMsa());
+  CHECK(IsUint<6>(shamt6)) << shamt6;
+  EmitMsaBIT(0x1, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SrliB(VectorRegister wd, VectorRegister ws, int shamt3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(shamt3)) << shamt3;
+  EmitMsaBIT(0x2, shamt3 | kMsaDfMByteMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SrliH(VectorRegister wd, VectorRegister ws, int shamt4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(shamt4)) << shamt4;
+  EmitMsaBIT(0x2, shamt4 | kMsaDfMHalfwordMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SrliW(VectorRegister wd, VectorRegister ws, int shamt5) {
+  CHECK(HasMsa());
+  CHECK(IsUint<5>(shamt5)) << shamt5;
+  EmitMsaBIT(0x2, shamt5 | kMsaDfMWordMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::SrliD(VectorRegister wd, VectorRegister ws, int shamt6) {
+  CHECK(HasMsa());
+  CHECK(IsUint<6>(shamt6)) << shamt6;
+  EmitMsaBIT(0x2, shamt6 | kMsaDfMDoublewordMask, ws, wd, 0x9);
+}
+
+void Mips64Assembler::MoveV(VectorRegister wd, VectorRegister ws) {
+  CHECK(HasMsa());
+  EmitMsaBIT(0x1, 0x3e, ws, wd, 0x19);
+}
+
+void Mips64Assembler::SplatiB(VectorRegister wd, VectorRegister ws, int n4) {
+  CHECK(HasMsa());
+  CHECK(IsUint<4>(n4)) << n4;
+  EmitMsaELM(0x1, n4 | kMsaDfNByteMask, ws, wd, 0x19);
+}
+
+void Mips64Assembler::SplatiH(VectorRegister wd, VectorRegister ws, int n3) {
+  CHECK(HasMsa());
+  CHECK(IsUint<3>(n3)) << n3;
+  EmitMsaELM(0x1, n3 | kMsaDfNHalfwordMask, ws, wd, 0x19);
+}
+
+void Mips64Assembler::SplatiW(VectorRegister wd, VectorRegister ws, int n2) {
+  CHECK(HasMsa());
+  CHECK(IsUint<2>(n2)) << n2;
+  EmitMsaELM(0x1, n2 | kMsaDfNWordMask, ws, wd, 0x19);
+}
+
+void Mips64Assembler::SplatiD(VectorRegister wd, VectorRegister ws, int n1) {
+  CHECK(HasMsa());
+  CHECK(IsUint<1>(n1)) << n1;
+  EmitMsaELM(0x1, n1 | kMsaDfNDoublewordMask, ws, wd, 0x19);
+}
+
+void Mips64Assembler::FillB(VectorRegister wd, GpuRegister rs) {
+  CHECK(HasMsa());
+  EmitMsa2R(0xc0, 0x0, static_cast<VectorRegister>(rs), wd, 0x1e);
+}
+
+void Mips64Assembler::FillH(VectorRegister wd, GpuRegister rs) {
+  CHECK(HasMsa());
+  EmitMsa2R(0xc0, 0x1, static_cast<VectorRegister>(rs), wd, 0x1e);
+}
+
+void Mips64Assembler::FillW(VectorRegister wd, GpuRegister rs) {
+  CHECK(HasMsa());
+  EmitMsa2R(0xc0, 0x2, static_cast<VectorRegister>(rs), wd, 0x1e);
+}
+
+void Mips64Assembler::FillD(VectorRegister wd, GpuRegister rs) {
+  CHECK(HasMsa());
+  EmitMsa2R(0xc0, 0x3, static_cast<VectorRegister>(rs), wd, 0x1e);
+}
+
+void Mips64Assembler::LdiB(VectorRegister wd, int imm8) {
+  CHECK(HasMsa());
+  CHECK(IsInt<8>(imm8)) << imm8;
+  EmitMsaI10(0x6, 0x0, imm8 & kMsaS10Mask, wd, 0x7);
+}
+
+void Mips64Assembler::LdiH(VectorRegister wd, int imm10) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(imm10)) << imm10;
+  EmitMsaI10(0x6, 0x1, imm10 & kMsaS10Mask, wd, 0x7);
+}
+
+void Mips64Assembler::LdiW(VectorRegister wd, int imm10) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(imm10)) << imm10;
+  EmitMsaI10(0x6, 0x2, imm10 & kMsaS10Mask, wd, 0x7);
+}
+
+void Mips64Assembler::LdiD(VectorRegister wd, int imm10) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(imm10)) << imm10;
+  EmitMsaI10(0x6, 0x3, imm10 & kMsaS10Mask, wd, 0x7);
+}
+
+void Mips64Assembler::LdB(VectorRegister wd, GpuRegister rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(offset)) << offset;
+  EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x8, 0x0);
+}
+
+void Mips64Assembler::LdH(VectorRegister wd, GpuRegister rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<11>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMips64HalfwordSize);
+  EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x8, 0x1);
+}
+
+void Mips64Assembler::LdW(VectorRegister wd, GpuRegister rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<12>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMips64WordSize);
+  EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x8, 0x2);
+}
+
+void Mips64Assembler::LdD(VectorRegister wd, GpuRegister rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<13>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMips64DoublewordSize);
+  EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x8, 0x3);
+}
+
+void Mips64Assembler::StB(VectorRegister wd, GpuRegister rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<10>(offset)) << offset;
+  EmitMsaMI10(offset & kMsaS10Mask, rs, wd, 0x9, 0x0);
+}
+
+void Mips64Assembler::StH(VectorRegister wd, GpuRegister rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<11>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMips64HalfwordSize);
+  EmitMsaMI10((offset >> TIMES_2) & kMsaS10Mask, rs, wd, 0x9, 0x1);
+}
+
+void Mips64Assembler::StW(VectorRegister wd, GpuRegister rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<12>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMips64WordSize);
+  EmitMsaMI10((offset >> TIMES_4) & kMsaS10Mask, rs, wd, 0x9, 0x2);
+}
+
+void Mips64Assembler::StD(VectorRegister wd, GpuRegister rs, int offset) {
+  CHECK(HasMsa());
+  CHECK(IsInt<13>(offset)) << offset;
+  CHECK_ALIGNED(offset, kMips64DoublewordSize);
+  EmitMsaMI10((offset >> TIMES_8) & kMsaS10Mask, rs, wd, 0x9, 0x3);
+}
+
 void Mips64Assembler::LoadConst32(GpuRegister rd, int32_t value) {
   TemplateLoadConst32(this, rd, value);
 }
@@ -1101,6 +1701,7 @@
   }
 }
 
+// TODO: don't use rtmp, use daui, dahi, dati.
 void Mips64Assembler::Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp) {
   if (IsInt<16>(value)) {
     Daddiu(rt, rs, value);
@@ -2015,80 +2616,103 @@
   Bcond(label, kCondT, static_cast<GpuRegister>(ft), ZERO);
 }
 
-void Mips64Assembler::LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base,
-                                     int32_t offset) {
-  if (!IsInt<16>(offset) ||
-      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
-    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
-    Daddu(AT, AT, base);
-    base = AT;
-    offset &= (kMips64DoublewordSize - 1);
+void Mips64Assembler::AdjustBaseAndOffset(GpuRegister& base,
+                                          int32_t& offset,
+                                          bool is_doubleword) {
+  // This method is used to adjust the base register and offset pair
+  // for a load/store when the offset doesn't fit into int16_t.
+  // It is assumed that `base + offset` is sufficiently aligned for memory
+  // operands that are machine word in size or smaller. For doubleword-sized
+  // operands it's assumed that `base` is a multiple of 8, while `offset`
+  // may be a multiple of 4 (e.g. 4-byte-aligned long and double arguments
+  // and spilled variables on the stack accessed relative to the stack
+  // pointer register).
+  // We preserve the "alignment" of `offset` by adjusting it by a multiple of 8.
+  CHECK_NE(base, AT);  // Must not overwrite the register `base` while loading `offset`.
+
+  bool doubleword_aligned = IsAligned<kMips64DoublewordSize>(offset);
+  bool two_accesses = is_doubleword && !doubleword_aligned;
+
+  // IsInt<16> must be passed a signed value, hence the static cast below.
+  if (IsInt<16>(offset) &&
+      (!two_accesses || IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
+    // Nothing to do: `offset` (and, if needed, `offset + 4`) fits into int16_t.
+    return;
   }
 
-  switch (type) {
-    case kLoadSignedByte:
-      Lb(reg, base, offset);
-      break;
-    case kLoadUnsignedByte:
-      Lbu(reg, base, offset);
-      break;
-    case kLoadSignedHalfword:
-      Lh(reg, base, offset);
-      break;
-    case kLoadUnsignedHalfword:
-      Lhu(reg, base, offset);
-      break;
-    case kLoadWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Lw(reg, base, offset);
-      break;
-    case kLoadUnsignedWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Lwu(reg, base, offset);
-      break;
-    case kLoadDoubleword:
-      if (!IsAligned<kMips64DoublewordSize>(offset)) {
-        CHECK_ALIGNED(offset, kMips64WordSize);
-        Lwu(reg, base, offset);
-        Lwu(TMP2, base, offset + kMips64WordSize);
-        Dinsu(reg, TMP2, 32, 32);
-      } else {
-        Ld(reg, base, offset);
-      }
-      break;
+  // Remember the "(mis)alignment" of `offset`, it will be checked at the end.
+  uint32_t misalignment = offset & (kMips64DoublewordSize - 1);
+
+  // First, see if `offset` can be represented as a sum of two 16-bit signed
+  // offsets. This can save an instruction.
+  // To simplify matters, only do this for a symmetric range of offsets from
+  // about -64KB to about +64KB, allowing further addition of 4 when accessing
+  // 64-bit variables with two 32-bit accesses.
+  constexpr int32_t kMinOffsetForSimpleAdjustment = 0x7ff8;  // Max int16_t that's a multiple of 8.
+  constexpr int32_t kMaxOffsetForSimpleAdjustment = 2 * kMinOffsetForSimpleAdjustment;
+
+  if (0 <= offset && offset <= kMaxOffsetForSimpleAdjustment) {
+    Daddiu(AT, base, kMinOffsetForSimpleAdjustment);
+    offset -= kMinOffsetForSimpleAdjustment;
+  } else if (-kMaxOffsetForSimpleAdjustment <= offset && offset < 0) {
+    Daddiu(AT, base, -kMinOffsetForSimpleAdjustment);
+    offset += kMinOffsetForSimpleAdjustment;
+  } else {
+    // In more complex cases take advantage of the daui instruction, e.g.:
+    //    daui   AT, base, offset_high
+    //   [dahi   AT, 1]                       // When `offset` is close to +2GB.
+    //    lw     reg_lo, offset_low(AT)
+    //   [lw     reg_hi, (offset_low+4)(AT)]  // If misaligned 64-bit load.
+    // or when offset_low+4 overflows int16_t:
+    //    daui   AT, base, offset_high
+    //    daddiu AT, AT, 8
+    //    lw     reg_lo, (offset_low-8)(AT)
+    //    lw     reg_hi, (offset_low-4)(AT)
+    int16_t offset_low = Low16Bits(offset);
+    int32_t offset_low32 = offset_low;
+    int16_t offset_high = High16Bits(offset);
+    bool increment_hi16 = offset_low < 0;
+    bool overflow_hi16 = false;
+
+    if (increment_hi16) {
+      offset_high++;
+      overflow_hi16 = (offset_high == -32768);
+    }
+    Daui(AT, base, offset_high);
+
+    if (overflow_hi16) {
+      Dahi(AT, 1);
+    }
+
+    if (two_accesses && !IsInt<16>(static_cast<int32_t>(offset_low32 + kMips64WordSize))) {
+      // Avoid overflow in the 16-bit offset of the load/store instruction when adding 4.
+      Daddiu(AT, AT, kMips64DoublewordSize);
+      offset_low32 -= kMips64DoublewordSize;
+    }
+
+    offset = offset_low32;
   }
+  base = AT;
+
+  CHECK(IsInt<16>(offset));
+  if (two_accesses) {
+    CHECK(IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)));
+  }
+  CHECK_EQ(misalignment, offset & (kMips64DoublewordSize - 1));
 }
 
-void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base,
-                                        int32_t offset) {
-  if (!IsInt<16>(offset) ||
-      (type == kLoadDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
-    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
-    Daddu(AT, AT, base);
-    base = AT;
-    offset &= (kMips64DoublewordSize - 1);
-  }
+void Mips64Assembler::LoadFromOffset(LoadOperandType type,
+                                     GpuRegister reg,
+                                     GpuRegister base,
+                                     int32_t offset) {
+  LoadFromOffset<>(type, reg, base, offset);
+}
 
-  switch (type) {
-    case kLoadWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Lwc1(reg, base, offset);
-      break;
-    case kLoadDoubleword:
-      if (!IsAligned<kMips64DoublewordSize>(offset)) {
-        CHECK_ALIGNED(offset, kMips64WordSize);
-        Lwc1(reg, base, offset);
-        Lw(TMP2, base, offset + kMips64WordSize);
-        Mthc1(TMP2, reg);
-      } else {
-        Ldc1(reg, base, offset);
-      }
-      break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
-  }
+void Mips64Assembler::LoadFpuFromOffset(LoadOperandType type,
+                                        FpuRegister reg,
+                                        GpuRegister base,
+                                        int32_t offset) {
+  LoadFpuFromOffset<>(type, reg, base, offset);
 }
 
 void Mips64Assembler::EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset,
@@ -2118,72 +2742,18 @@
   }
 }
 
-void Mips64Assembler::StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base,
+void Mips64Assembler::StoreToOffset(StoreOperandType type,
+                                    GpuRegister reg,
+                                    GpuRegister base,
                                     int32_t offset) {
-  if (!IsInt<16>(offset) ||
-      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
-    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
-    Daddu(AT, AT, base);
-    base = AT;
-    offset &= (kMips64DoublewordSize - 1);
-  }
-
-  switch (type) {
-    case kStoreByte:
-      Sb(reg, base, offset);
-      break;
-    case kStoreHalfword:
-      Sh(reg, base, offset);
-      break;
-    case kStoreWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Sw(reg, base, offset);
-      break;
-    case kStoreDoubleword:
-      if (!IsAligned<kMips64DoublewordSize>(offset)) {
-        CHECK_ALIGNED(offset, kMips64WordSize);
-        Sw(reg, base, offset);
-        Dsrl32(TMP2, reg, 0);
-        Sw(TMP2, base, offset + kMips64WordSize);
-      } else {
-        Sd(reg, base, offset);
-      }
-      break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
-  }
+  StoreToOffset<>(type, reg, base, offset);
 }
 
-void Mips64Assembler::StoreFpuToOffset(StoreOperandType type, FpuRegister reg, GpuRegister base,
+void Mips64Assembler::StoreFpuToOffset(StoreOperandType type,
+                                       FpuRegister reg,
+                                       GpuRegister base,
                                        int32_t offset) {
-  if (!IsInt<16>(offset) ||
-      (type == kStoreDoubleword && !IsAligned<kMips64DoublewordSize>(offset) &&
-       !IsInt<16>(static_cast<int32_t>(offset + kMips64WordSize)))) {
-    LoadConst32(AT, offset & ~(kMips64DoublewordSize - 1));
-    Daddu(AT, AT, base);
-    base = AT;
-    offset &= (kMips64DoublewordSize - 1);
-  }
-
-  switch (type) {
-    case kStoreWord:
-      CHECK_ALIGNED(offset, kMips64WordSize);
-      Swc1(reg, base, offset);
-      break;
-    case kStoreDoubleword:
-      if (!IsAligned<kMips64DoublewordSize>(offset)) {
-        CHECK_ALIGNED(offset, kMips64WordSize);
-        Mfhc1(TMP2, reg);
-        Swc1(reg, base, offset);
-        Sw(TMP2, base, offset + kMips64WordSize);
-      } else {
-        Sdc1(reg, base, offset);
-      }
-      break;
-    default:
-      LOG(FATAL) << "UNREACHABLE";
-  }
+  StoreFpuToOffset<>(type, reg, base, offset);
 }
 
 static dwarf::Reg DWARFReg(GpuRegister reg) {
@@ -2367,12 +2937,8 @@
   CHECK(dest.IsGpuRegister() && base.AsMips64().IsGpuRegister());
   LoadFromOffset(kLoadUnsignedWord, dest.AsGpuRegister(),
                  base.AsMips64().AsGpuRegister(), offs.Int32Value());
-  if (kPoisonHeapReferences && unpoison_reference) {
-    // TODO: review
-    // Negate the 32-bit ref
-    Dsubu(dest.AsGpuRegister(), ZERO, dest.AsGpuRegister());
-    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64
-    Dext(dest.AsGpuRegister(), dest.AsGpuRegister(), 0, 32);
+  if (unpoison_reference) {
+    MaybeUnpoisonHeapReference(dest.AsGpuRegister());
   }
 }
 
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index a0a1db6..666c693 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>
 
+#include "arch/mips64/instruction_set_features_mips64.h"
 #include "base/arena_containers.h"
 #include "base/enums.h"
 #include "base/macros.h"
@@ -266,6 +267,7 @@
   }
 }
 
+static constexpr size_t kMips64HalfwordSize = 2;
 static constexpr size_t kMips64WordSize = 4;
 static constexpr size_t kMips64DoublewordSize = 8;
 
@@ -412,7 +414,8 @@
  public:
   using JNIBase = JNIMacroAssembler<PointerSize::k64>;
 
-  explicit Mips64Assembler(ArenaAllocator* arena)
+  explicit Mips64Assembler(ArenaAllocator* arena,
+                           const Mips64InstructionSetFeatures* instruction_set_features = nullptr)
       : Assembler(arena),
         overwriting_(false),
         overwrite_location_(0),
@@ -421,7 +424,8 @@
         jump_tables_(arena->Adapter(kArenaAllocAssembler)),
         last_position_adjustment_(0),
         last_old_position_(0),
-        last_branch_id_(0) {
+        last_branch_id_(0),
+        has_msa_(instruction_set_features != nullptr ? instruction_set_features->HasMsa() : false) {
     cfi().DelayEmittingAdvancePCs();
   }
 
@@ -512,6 +516,7 @@
   void Ldpc(GpuRegister rs, uint32_t imm18);  // MIPS64
   void Lui(GpuRegister rt, uint16_t imm16);
   void Aui(GpuRegister rt, GpuRegister rs, uint16_t imm16);
+  void Daui(GpuRegister rt, GpuRegister rs, uint16_t imm16);  // MIPS64
   void Dahi(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Dati(GpuRegister rs, uint16_t imm16);  // MIPS64
   void Sync(uint32_t stype);
@@ -643,6 +648,105 @@
   void Clear(GpuRegister rd);
   void Not(GpuRegister rd, GpuRegister rs);
 
+  // MSA instructions.
+  void AndV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void OrV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void NorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void XorV(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void AddvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void AddvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SubvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void MulvD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Div_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_sD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void Mod_uD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void FaddW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FaddD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FsubW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FsubD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmulW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FmulD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FdivW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void FdivD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  void Ffint_sW(VectorRegister wd, VectorRegister ws);
+  void Ffint_sD(VectorRegister wd, VectorRegister ws);
+  void Ftint_sW(VectorRegister wd, VectorRegister ws);
+  void Ftint_sD(VectorRegister wd, VectorRegister ws);
+
+  void SllB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SllD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SraD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlB(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlH(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlW(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+  void SrlD(VectorRegister wd, VectorRegister ws, VectorRegister wt);
+
+  // Immediate shift instructions, where shamtN denotes shift amount (must be between 0 and 2^N-1).
+  void SlliB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SlliH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SlliW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SlliD(VectorRegister wd, VectorRegister ws, int shamt6);
+  void SraiB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SraiH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SraiW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SraiD(VectorRegister wd, VectorRegister ws, int shamt6);
+  void SrliB(VectorRegister wd, VectorRegister ws, int shamt3);
+  void SrliH(VectorRegister wd, VectorRegister ws, int shamt4);
+  void SrliW(VectorRegister wd, VectorRegister ws, int shamt5);
+  void SrliD(VectorRegister wd, VectorRegister ws, int shamt6);
+
+  void MoveV(VectorRegister wd, VectorRegister ws);
+  void SplatiB(VectorRegister wd, VectorRegister ws, int n4);
+  void SplatiH(VectorRegister wd, VectorRegister ws, int n3);
+  void SplatiW(VectorRegister wd, VectorRegister ws, int n2);
+  void SplatiD(VectorRegister wd, VectorRegister ws, int n1);
+  void FillB(VectorRegister wd, GpuRegister rs);
+  void FillH(VectorRegister wd, GpuRegister rs);
+  void FillW(VectorRegister wd, GpuRegister rs);
+  void FillD(VectorRegister wd, GpuRegister rs);
+
+  void LdiB(VectorRegister wd, int imm8);
+  void LdiH(VectorRegister wd, int imm10);
+  void LdiW(VectorRegister wd, int imm10);
+  void LdiD(VectorRegister wd, int imm10);
+  void LdB(VectorRegister wd, GpuRegister rs, int offset);
+  void LdH(VectorRegister wd, GpuRegister rs, int offset);
+  void LdW(VectorRegister wd, GpuRegister rs, int offset);
+  void LdD(VectorRegister wd, GpuRegister rs, int offset);
+  void StB(VectorRegister wd, GpuRegister rs, int offset);
+  void StH(VectorRegister wd, GpuRegister rs, int offset);
+  void StW(VectorRegister wd, GpuRegister rs, int offset);
+  void StD(VectorRegister wd, GpuRegister rs, int offset);
+
   // Higher level composite instructions.
   int InstrCountForLoadReplicatedConst32(int64_t);
   void LoadConst32(GpuRegister rd, int32_t value);
@@ -654,6 +758,44 @@
   void Addiu32(GpuRegister rt, GpuRegister rs, int32_t value);
   void Daddiu64(GpuRegister rt, GpuRegister rs, int64_t value, GpuRegister rtmp = AT);  // MIPS64
 
+  //
+  // Heap poisoning.
+  //
+
+  // Poison a heap reference contained in `src` and store it in `dst`.
+  void PoisonHeapReference(GpuRegister dst, GpuRegister src) {
+    // dst = -src.
+    // Negate the 32-bit ref.
+    Dsubu(dst, ZERO, src);
+    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
+    Dext(dst, dst, 0, 32);
+  }
+  // Poison a heap reference contained in `reg`.
+  void PoisonHeapReference(GpuRegister reg) {
+    // reg = -reg.
+    PoisonHeapReference(reg, reg);
+  }
+  // Unpoison a heap reference contained in `reg`.
+  void UnpoisonHeapReference(GpuRegister reg) {
+    // reg = -reg.
+    // Negate the 32-bit ref.
+    Dsubu(reg, ZERO, reg);
+    // And constrain it to 32 bits (zero-extend into bits 32 through 63) as on Arm64 and x86/64.
+    Dext(reg, reg, 0, 32);
+  }
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(GpuRegister reg) {
+    if (kPoisonHeapReferences) {
+      PoisonHeapReference(reg);
+    }
+  }
+  // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybeUnpoisonHeapReference(GpuRegister reg) {
+    if (kPoisonHeapReferences) {
+      UnpoisonHeapReference(reg);
+    }
+  }
+
   void Bind(Label* label) OVERRIDE {
     Bind(down_cast<Mips64Label*>(label));
   }
@@ -733,6 +875,240 @@
   void Bc1nez(FpuRegister ft, Mips64Label* label);
 
   void EmitLoad(ManagedRegister m_dst, GpuRegister src_register, int32_t src_offset, size_t size);
+  void AdjustBaseAndOffset(GpuRegister& base, int32_t& offset, bool is_doubleword);
+
+ private:
+  // This will be used as an argument for loads/stores
+  // when there is no need for implicit null checks.
+  struct NoImplicitNullChecker {
+    void operator()() const {}
+  };
+
+ public:
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void StoreConstToOffset(StoreOperandType type,
+                          int64_t value,
+                          GpuRegister base,
+                          int32_t offset,
+                          GpuRegister temp,
+                          ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    // We permit `base` and `temp` to coincide (however, we check that neither is AT),
+    // in which case the `base` register may be overwritten in the process.
+    CHECK_NE(temp, AT);  // Must not use AT as temp, so as not to overwrite the adjusted base.
+    AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
+    GpuRegister reg;
+    // If the adjustment left `base` unchanged and equal to `temp`, we can't use `temp`
+    // to load and hold the value but we can use AT instead as AT hasn't been used yet.
+    // Otherwise, `temp` can be used for the value. And if `temp` is the same as the
+    // original `base` (that is, `base` prior to the adjustment), the original `base`
+    // register will be overwritten.
+    if (base == temp) {
+      temp = AT;
+    }
+
+    if (type == kStoreDoubleword && IsAligned<kMips64DoublewordSize>(offset)) {
+      if (value == 0) {
+        reg = ZERO;
+      } else {
+        reg = temp;
+        LoadConst64(reg, value);
+      }
+      Sd(reg, base, offset);
+      null_checker();
+    } else {
+      uint32_t low = Low32Bits(value);
+      uint32_t high = High32Bits(value);
+      if (low == 0) {
+        reg = ZERO;
+      } else {
+        reg = temp;
+        LoadConst32(reg, low);
+      }
+      switch (type) {
+        case kStoreByte:
+          Sb(reg, base, offset);
+          break;
+        case kStoreHalfword:
+          Sh(reg, base, offset);
+          break;
+        case kStoreWord:
+          Sw(reg, base, offset);
+          break;
+        case kStoreDoubleword:
+          // not aligned to kMips64DoublewordSize
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Sw(reg, base, offset);
+          null_checker();
+          if (high == 0) {
+            reg = ZERO;
+          } else {
+            reg = temp;
+            if (high != low) {
+              LoadConst32(reg, high);
+            }
+          }
+          Sw(reg, base, offset + kMips64WordSize);
+          break;
+        default:
+          LOG(FATAL) << "UNREACHABLE";
+      }
+      if (type != kStoreDoubleword) {
+        null_checker();
+      }
+    }
+  }
+
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void LoadFromOffset(LoadOperandType type,
+                      GpuRegister reg,
+                      GpuRegister base,
+                      int32_t offset,
+                      ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
+
+    switch (type) {
+      case kLoadSignedByte:
+        Lb(reg, base, offset);
+        break;
+      case kLoadUnsignedByte:
+        Lbu(reg, base, offset);
+        break;
+      case kLoadSignedHalfword:
+        Lh(reg, base, offset);
+        break;
+      case kLoadUnsignedHalfword:
+        Lhu(reg, base, offset);
+        break;
+      case kLoadWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lw(reg, base, offset);
+        break;
+      case kLoadUnsignedWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwu(reg, base, offset);
+        break;
+      case kLoadDoubleword:
+        if (!IsAligned<kMips64DoublewordSize>(offset)) {
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Lwu(reg, base, offset);
+          null_checker();
+          Lwu(TMP2, base, offset + kMips64WordSize);
+          Dinsu(reg, TMP2, 32, 32);
+        } else {
+          Ld(reg, base, offset);
+          null_checker();
+        }
+        break;
+    }
+    if (type != kLoadDoubleword) {
+      null_checker();
+    }
+  }
+
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void LoadFpuFromOffset(LoadOperandType type,
+                         FpuRegister reg,
+                         GpuRegister base,
+                         int32_t offset,
+                         ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kLoadDoubleword));
+
+    switch (type) {
+      case kLoadWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Lwc1(reg, base, offset);
+        null_checker();
+        break;
+      case kLoadDoubleword:
+        if (!IsAligned<kMips64DoublewordSize>(offset)) {
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Lwc1(reg, base, offset);
+          null_checker();
+          Lw(TMP2, base, offset + kMips64WordSize);
+          Mthc1(TMP2, reg);
+        } else {
+          Ldc1(reg, base, offset);
+          null_checker();
+        }
+        break;
+      default:
+        LOG(FATAL) << "UNREACHABLE";
+    }
+  }
+
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void StoreToOffset(StoreOperandType type,
+                     GpuRegister reg,
+                     GpuRegister base,
+                     int32_t offset,
+                     ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    // Must not use AT as `reg`, so as not to overwrite the value being stored
+    // with the adjusted `base`.
+    CHECK_NE(reg, AT);
+    AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
+
+    switch (type) {
+      case kStoreByte:
+        Sb(reg, base, offset);
+        break;
+      case kStoreHalfword:
+        Sh(reg, base, offset);
+        break;
+      case kStoreWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Sw(reg, base, offset);
+        break;
+      case kStoreDoubleword:
+        if (!IsAligned<kMips64DoublewordSize>(offset)) {
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Sw(reg, base, offset);
+          null_checker();
+          Dsrl32(TMP2, reg, 0);
+          Sw(TMP2, base, offset + kMips64WordSize);
+        } else {
+          Sd(reg, base, offset);
+          null_checker();
+        }
+        break;
+      default:
+        LOG(FATAL) << "UNREACHABLE";
+    }
+    if (type != kStoreDoubleword) {
+      null_checker();
+    }
+  }
+
+  template <typename ImplicitNullChecker = NoImplicitNullChecker>
+  void StoreFpuToOffset(StoreOperandType type,
+                        FpuRegister reg,
+                        GpuRegister base,
+                        int32_t offset,
+                        ImplicitNullChecker null_checker = NoImplicitNullChecker()) {
+    AdjustBaseAndOffset(base, offset, /* is_doubleword */ (type == kStoreDoubleword));
+
+    switch (type) {
+      case kStoreWord:
+        CHECK_ALIGNED(offset, kMips64WordSize);
+        Swc1(reg, base, offset);
+        null_checker();
+        break;
+      case kStoreDoubleword:
+        if (!IsAligned<kMips64DoublewordSize>(offset)) {
+          CHECK_ALIGNED(offset, kMips64WordSize);
+          Mfhc1(TMP2, reg);
+          Swc1(reg, base, offset);
+          null_checker();
+          Sw(TMP2, base, offset + kMips64WordSize);
+        } else {
+          Sdc1(reg, base, offset);
+          null_checker();
+        }
+        break;
+      default:
+        LOG(FATAL) << "UNREACHABLE";
+    }
+  }
+
   void LoadFromOffset(LoadOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
   void LoadFpuFromOffset(LoadOperandType type, FpuRegister reg, GpuRegister base, int32_t offset);
   void StoreToOffset(StoreOperandType type, GpuRegister reg, GpuRegister base, int32_t offset);
@@ -1076,6 +1452,18 @@
   void EmitFR(int opcode, int fmt, FpuRegister ft, FpuRegister fs, FpuRegister fd, int funct);
   void EmitFI(int opcode, int fmt, FpuRegister rt, uint16_t imm);
   void EmitBcondc(BranchCondition cond, GpuRegister rs, GpuRegister rt, uint32_t imm16_21);
+  void EmitMsa3R(int operation,
+                 int df,
+                 VectorRegister wt,
+                 VectorRegister ws,
+                 VectorRegister wd,
+                 int minor_opcode);
+  void EmitMsaBIT(int operation, int df_m, VectorRegister ws, VectorRegister wd, int minor_opcode);
+  void EmitMsaELM(int operation, int df_n, VectorRegister ws, VectorRegister wd, int minor_opcode);
+  void EmitMsaMI10(int s10, GpuRegister rs, VectorRegister wd, int minor_opcode, int df);
+  void EmitMsaI10(int operation, int df, int i10, VectorRegister wd, int minor_opcode);
+  void EmitMsa2R(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
+  void EmitMsa2RF(int operation, int df, VectorRegister ws, VectorRegister wd, int minor_opcode);
 
   void Buncond(Mips64Label* label);
   void Bcond(Mips64Label* label,
@@ -1099,6 +1487,10 @@
   // Emits exception block.
   void EmitExceptionPoll(Mips64ExceptionSlowPath* exception);
 
+  bool HasMsa() const {
+    return has_msa_;
+  }
+
   // List of exception blocks to generate at the end of the code cache.
   std::vector<Mips64ExceptionSlowPath> exception_blocks_;
 
@@ -1122,6 +1514,8 @@
   uint32_t last_old_position_;
   uint32_t last_branch_id_;
 
+  const bool has_msa_;
+
   DISALLOW_COPY_AND_ASSIGN(Mips64Assembler);
 };
 
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 74b8f06..f2e3b16 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -37,12 +37,17 @@
 class AssemblerMIPS64Test : public AssemblerTest<mips64::Mips64Assembler,
                                                  mips64::GpuRegister,
                                                  mips64::FpuRegister,
-                                                 uint32_t> {
+                                                 uint32_t,
+                                                 mips64::VectorRegister> {
  public:
   typedef AssemblerTest<mips64::Mips64Assembler,
                         mips64::GpuRegister,
                         mips64::FpuRegister,
-                        uint32_t> Base;
+                        uint32_t,
+                        mips64::VectorRegister> Base;
+
+  AssemblerMIPS64Test()
+      : instruction_set_features_(Mips64InstructionSetFeatures::FromVariant("default", nullptr)) {}
 
  protected:
   // Get the typically used name for this architecture, e.g., aarch64, x86-64, ...
@@ -60,7 +65,7 @@
     // (and MIPS32R6) with the GNU assembler don't have correct final offsets in PC-relative
     // branches in the .text section and so they require a relocation pass (there's a relocation
     // section, .rela.text, that has the needed info to fix up the branches).
-    return " -march=mips64r6 -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib";
+    return " -march=mips64r6 -mmsa -Wa,--no-warn -Wl,-Ttext=0 -Wl,-e0 -nostdlib";
   }
 
   void Pad(std::vector<uint8_t>& data) OVERRIDE {
@@ -76,6 +81,10 @@
     return " -D -bbinary -mmips:isa64r6";
   }
 
+  mips64::Mips64Assembler* CreateAssembler(ArenaAllocator* arena) OVERRIDE {
+    return new (arena) mips64::Mips64Assembler(arena, instruction_set_features_.get());
+  }
+
   void SetUpHelpers() OVERRIDE {
     if (registers_.size() == 0) {
       registers_.push_back(new mips64::GpuRegister(mips64::ZERO));
@@ -176,6 +185,39 @@
       fp_registers_.push_back(new mips64::FpuRegister(mips64::F29));
       fp_registers_.push_back(new mips64::FpuRegister(mips64::F30));
       fp_registers_.push_back(new mips64::FpuRegister(mips64::F31));
+
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W0));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W1));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W2));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W3));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W4));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W5));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W6));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W7));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W8));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W9));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W10));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W11));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W12));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W13));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W14));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W15));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W16));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W17));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W18));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W19));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W20));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W21));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W22));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W23));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W24));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W25));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W26));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W27));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W28));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W29));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W30));
+      vec_registers_.push_back(new mips64::VectorRegister(mips64::W31));
     }
   }
 
@@ -183,6 +225,7 @@
     AssemblerTest::TearDown();
     STLDeleteElements(&registers_);
     STLDeleteElements(&fp_registers_);
+    STLDeleteElements(&vec_registers_);
   }
 
   std::vector<mips64::GpuRegister*> GetRegisters() OVERRIDE {
@@ -193,6 +236,10 @@
     return fp_registers_;
   }
 
+  std::vector<mips64::VectorRegister*> GetVectorRegisters() OVERRIDE {
+    return vec_registers_;
+  }
+
   uint32_t CreateImmediate(int64_t imm_value) OVERRIDE {
     return imm_value;
   }
@@ -272,8 +319,10 @@
   std::map<mips64::GpuRegister, std::string, MIPS64CpuRegisterCompare> secondary_register_names_;
 
   std::vector<mips64::FpuRegister*> fp_registers_;
-};
+  std::vector<mips64::VectorRegister*> vec_registers_;
 
+  std::unique_ptr<const Mips64InstructionSetFeatures> instruction_set_features_;
+};
 
 TEST_F(AssemblerMIPS64Test, Toolchain) {
   EXPECT_TRUE(CheckTools());
@@ -1269,6 +1318,24 @@
   DriverStr(RepeatRIb(&mips64::Mips64Assembler::Lui, 16, "lui ${reg}, {imm}"), "lui");
 }
 
+TEST_F(AssemblerMIPS64Test, Daui) {
+  std::vector<mips64::GpuRegister*> reg1_registers = GetRegisters();
+  std::vector<mips64::GpuRegister*> reg2_registers = GetRegisters();
+  reg2_registers.erase(reg2_registers.begin());  // reg2 can't be ZERO, remove it.
+  std::vector<int64_t> imms = CreateImmediateValuesBits(/* imm_bits */ 16, /* as_uint */ true);
+  WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
+  std::ostringstream expected;
+  for (mips64::GpuRegister* reg1 : reg1_registers) {
+    for (mips64::GpuRegister* reg2 : reg2_registers) {
+      for (int64_t imm : imms) {
+        __ Daui(*reg1, *reg2, imm);
+        expected << "daui $" << *reg1 << ", $" << *reg2 << ", " << imm << "\n";
+      }
+    }
+  }
+  DriverStr(expected.str(), "daui");
+}
+
 TEST_F(AssemblerMIPS64Test, Dahi) {
   DriverStr(RepeatRIb(&mips64::Mips64Assembler::Dahi, 16, "dahi ${reg}, ${reg}, {imm}"), "dahi");
 }
@@ -1542,6 +1609,10 @@
   __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -256);
   __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, -32768);
   __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFFFFFE);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x7FFFFFFF);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x80000000);
+  __ LoadFromOffset(mips64::kLoadSignedByte, mips64::A0, mips64::A1, 0x80000001);
 
   __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A0, 0);
   __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0);
@@ -1556,6 +1627,10 @@
   __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -256);
   __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, -32768);
   __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0xABCDEF00);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFFFFFE);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x7FFFFFFF);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x80000000);
+  __ LoadFromOffset(mips64::kLoadUnsignedByte, mips64::A0, mips64::A1, 0x80000001);
 
   __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A0, 0);
   __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0);
@@ -1570,6 +1645,10 @@
   __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -256);
   __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, -32768);
   __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFC);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFE);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x80000000);
+  __ LoadFromOffset(mips64::kLoadSignedHalfword, mips64::A0, mips64::A1, 0x80000002);
 
   __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A0, 0);
   __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0);
@@ -1584,6 +1663,10 @@
   __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -256);
   __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, -32768);
   __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0xABCDEF00);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFC);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x7FFFFFFE);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x80000000);
+  __ LoadFromOffset(mips64::kLoadUnsignedHalfword, mips64::A0, mips64::A1, 0x80000002);
 
   __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A0, 0);
   __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0);
@@ -1598,6 +1681,10 @@
   __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -256);
   __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, -32768);
   __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0xABCDEF00);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFFFFF8);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x7FFFFFFC);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x80000000);
+  __ LoadFromOffset(mips64::kLoadWord, mips64::A0, mips64::A1, 0x80000004);
 
   __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A0, 0);
   __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0);
@@ -1612,6 +1699,10 @@
   __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -256);
   __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, -32768);
   __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0xABCDEF00);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFFFFF8);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x7FFFFFFC);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x80000000);
+  __ LoadFromOffset(mips64::kLoadUnsignedWord, mips64::A0, mips64::A1, 0x80000004);
 
   __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A0, 0);
   __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0);
@@ -1622,10 +1713,15 @@
   __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8000);
   __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x8004);
   __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x10000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x27FFC);
   __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x12345678);
   __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -256);
   __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, -32768);
   __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFFFFF8);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x7FFFFFFC);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x80000000);
+  __ LoadFromOffset(mips64::kLoadDoubleword, mips64::A0, mips64::A1, 0x80000004);
 
   const char* expected =
       "lb $a0, 0($a0)\n"
@@ -1634,25 +1730,28 @@
       "lb $a0, 256($a1)\n"
       "lb $a0, 1000($a1)\n"
       "lb $a0, 0x7FFF($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "lb $a0, 8($at)\n"
+      "daddiu $at, $a1, 32760\n"
+      "lb $a0, 9($at)\n"
+      "daui $at, $a1, 1\n"
       "lb $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "lb $a0, 1($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "lb $a0, 0($at)\n"
+      "daui $at, $a1, 0x1234\n"
+      "lb $a0, 0x5678($at)\n"
       "lb $a0, -256($a1)\n"
       "lb $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
+      "daui $at, $a1, 0xABCE\n"
+      "lb $a0, -4352($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lb $a0, -2($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lb $a0, -1($at)\n"
+      "daui $at, $a1, 32768\n"
       "lb $a0, 0($at)\n"
+      "daui $at, $a1, 32768\n"
+      "lb $a0, 1($at)\n"
 
       "lbu $a0, 0($a0)\n"
       "lbu $a0, 0($a1)\n"
@@ -1660,25 +1759,28 @@
       "lbu $a0, 256($a1)\n"
       "lbu $a0, 1000($a1)\n"
       "lbu $a0, 0x7FFF($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "lbu $a0, 8($at)\n"
+      "daddiu $at, $a1, 32760\n"
+      "lbu $a0, 9($at)\n"
+      "daui $at, $a1, 1\n"
       "lbu $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "lbu $a0, 1($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "lbu $a0, 0($at)\n"
+      "daui $at, $a1, 0x1234\n"
+      "lbu $a0, 0x5678($at)\n"
       "lbu $a0, -256($a1)\n"
       "lbu $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
+      "daui $at, $a1, 0xABCE\n"
+      "lbu $a0, -4352($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lbu $a0, -2($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lbu $a0, -1($at)\n"
+      "daui $at, $a1, 32768\n"
       "lbu $a0, 0($at)\n"
+      "daui $at, $a1, 32768\n"
+      "lbu $a0, 1($at)\n"
 
       "lh $a0, 0($a0)\n"
       "lh $a0, 0($a1)\n"
@@ -1686,25 +1788,28 @@
       "lh $a0, 256($a1)\n"
       "lh $a0, 1000($a1)\n"
       "lh $a0, 0x7FFE($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "lh $a0, 8($at)\n"
+      "daddiu $at, $a1, 32760\n"
+      "lh $a0, 10($at)\n"
+      "daui $at, $a1, 1\n"
       "lh $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "lh $a0, 2($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "lh $a0, 0($at)\n"
+      "daui $at, $a1, 0x1234\n"
+      "lh $a0, 0x5678($at)\n"
       "lh $a0, -256($a1)\n"
       "lh $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
+      "daui $at, $a1, 0xABCE\n"
+      "lh $a0, -4352($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lh $a0, -4($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lh $a0, -2($at)\n"
+      "daui $at, $a1, 32768\n"
       "lh $a0, 0($at)\n"
+      "daui $at, $a1, 32768\n"
+      "lh $a0, 2($at)\n"
 
       "lhu $a0, 0($a0)\n"
       "lhu $a0, 0($a1)\n"
@@ -1712,25 +1817,28 @@
       "lhu $a0, 256($a1)\n"
       "lhu $a0, 1000($a1)\n"
       "lhu $a0, 0x7FFE($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "lhu $a0, 8($at)\n"
+      "daddiu $at, $a1, 32760\n"
+      "lhu $a0, 10($at)\n"
+      "daui $at, $a1, 1\n"
       "lhu $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "lhu $a0, 2($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "lhu $a0, 0($at)\n"
+      "daui $at, $a1, 0x1234\n"
+      "lhu $a0, 0x5678($at)\n"
       "lhu $a0, -256($a1)\n"
       "lhu $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
+      "daui $at, $a1, 0xABCE\n"
+      "lhu $a0, -4352($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lhu $a0, -4($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lhu $a0, -2($at)\n"
+      "daui $at, $a1, 32768\n"
       "lhu $a0, 0($at)\n"
+      "daui $at, $a1, 32768\n"
+      "lhu $a0, 2($at)\n"
 
       "lw $a0, 0($a0)\n"
       "lw $a0, 0($a1)\n"
@@ -1738,25 +1846,28 @@
       "lw $a0, 256($a1)\n"
       "lw $a0, 1000($a1)\n"
       "lw $a0, 0x7FFC($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "lw $a0, 8($at)\n"
+      "daddiu $at, $a1, 32760\n"
+      "lw $a0, 12($at)\n"
+      "daui $at, $a1, 1\n"
       "lw $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "lw $a0, 4($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "lw $a0, 0($at)\n"
+      "daui $at, $a1, 0x1234\n"
+      "lw $a0, 0x5678($at)\n"
       "lw $a0, -256($a1)\n"
       "lw $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
+      "daui $at, $a1, 0xABCE\n"
+      "lw $a0, -4352($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lw $a0, -8($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lw $a0, -4($at)\n"
+      "daui $at, $a1, 32768\n"
       "lw $a0, 0($at)\n"
+      "daui $at, $a1, 32768\n"
+      "lw $a0, 4($at)\n"
 
       "lwu $a0, 0($a0)\n"
       "lwu $a0, 0($a1)\n"
@@ -1764,59 +1875,73 @@
       "lwu $a0, 256($a1)\n"
       "lwu $a0, 1000($a1)\n"
       "lwu $a0, 0x7FFC($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "lwu $a0, 8($at)\n"
+      "daddiu $at, $a1, 32760\n"
+      "lwu $a0, 12($at)\n"
+      "daui $at, $a1, 1\n"
       "lwu $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "lwu $a0, 4($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "lwu $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "lwu $a0, 0($at)\n"
+      "daui $at, $a1, 0x1234\n"
+      "lwu $a0, 0x5678($at)\n"
       "lwu $a0, -256($a1)\n"
       "lwu $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
+      "daui $at, $a1, 0xABCE\n"
+      "lwu $a0, -4352($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lwu $a0, -8($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lwu $a0, -4($at)\n"
+      "daui $at, $a1, 32768\n"
       "lwu $a0, 0($at)\n"
+      "daui $at, $a1, 32768\n"
+      "lwu $a0, 4($at)\n"
 
       "ld $a0, 0($a0)\n"
       "ld $a0, 0($a1)\n"
       "lwu $a0, 4($a1)\n"
       "lwu $t3, 8($a1)\n"
-      "dins $a0, $t3, 32, 32\n"
+      "dinsu $a0, $t3, 32, 32\n"
       "ld $a0, 256($a1)\n"
       "ld $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x7FF8\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 32760\n"
       "lwu $a0, 4($at)\n"
       "lwu $t3, 8($at)\n"
-      "dins $a0, $t3, 32, 32\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "dinsu $a0, $t3, 32, 32\n"
+      "daddiu $at, $a1, 32760\n"
+      "ld $a0, 8($at)\n"
+      "daddiu $at, $a1, 32760\n"
+      "lwu $a0, 12($at)\n"
+      "lwu $t3, 16($at)\n"
+      "dinsu $a0, $t3, 32, 32\n"
+      "daui $at, $a1, 1\n"
       "ld $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "lwu $a0, 4($at)\n"
-      "lwu $t3, 8($at)\n"
-      "dins $a0, $t3, 32, 32\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "ld $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "ld $a0, 0($at)\n"
+      "daui $at, $a1, 2\n"
+      "daddiu $at, $at, 8\n"
+      "lwu $a0, 0x7ff4($at)\n"
+      "lwu $t3, 0x7ff8($at)\n"
+      "dinsu $a0, $t3, 32, 32\n"
+      "daui $at, $a1, 0x1234\n"
+      "ld $a0, 0x5678($at)\n"
       "ld $a0, -256($a1)\n"
       "ld $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
-      "ld $a0, 0($at)\n";
+      "daui $at, $a1, 0xABCE\n"
+      "ld $a0, -4352($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "ld $a0, -8($at)\n"
+      "daui $at, $a1, 32768\n"
+      "dahi $at, $at, 1\n"
+      "lwu $a0, -4($at)\n"
+      "lwu $t3, 0($at)\n"
+      "dinsu $a0, $t3, 32, 32\n"
+      "daui $at, $a1, 32768\n"
+      "ld $a0, 0($at)\n"
+      "daui $at, $a1, 32768\n"
+      "lwu $a0, 4($at)\n"
+      "lwu $t3, 8($at)\n"
+      "dinsu $a0, $t3, 32, 32\n";
   DriverStr(expected, "LoadFromOffset");
 }
 
@@ -1850,57 +1975,42 @@
       "lwc1 $f0, 4($a0)\n"
       "lwc1 $f0, 256($a0)\n"
       "lwc1 $f0, 0x7FFC($a0)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a0\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
+      "lwc1 $f0, 8($at)\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
+      "lwc1 $f0, 12($at)\n"
+      "daui $at, $a0, 1\n"
       "lwc1 $f0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a0\n"
-      "lwc1 $f0, 4($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n"
+      "daui $at, $a0, 4660 # 0x1234\n"
+      "lwc1 $f0, 22136($at) # 0x5678\n"
       "lwc1 $f0, -256($a0)\n"
       "lwc1 $f0, -32768($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a0\n"
-      "lwc1 $f0, 0($at)\n"
+      "daui $at, $a0, 0xABCE\n"
+      "lwc1 $f0, -0x1100($at) # 0xEF00\n"
 
       "ldc1 $f0, 0($a0)\n"
       "lwc1 $f0, 4($a0)\n"
       "lw $t3, 8($a0)\n"
       "mthc1 $t3, $f0\n"
       "ldc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x7FF8\n"
-      "daddu $at, $at, $a0\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
       "lwc1 $f0, 4($at)\n"
       "lw $t3, 8($at)\n"
       "mthc1 $t3, $f0\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a0\n"
-      "ldc1 $f0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a0\n"
-      "lwc1 $f0, 4($at)\n"
-      "lw $t3, 8($at)\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
+      "ldc1 $f0, 8($at)\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
+      "lwc1 $f0, 12($at)\n"
+      "lw $t3, 16($at)\n"
       "mthc1 $t3, $f0\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a0\n"
+      "daui $at, $a0, 1\n"
       "ldc1 $f0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a0\n"
-      "ldc1 $f0, 0($at)\n"
+      "daui $at, $a0, 4660 # 0x1234\n"
+      "ldc1 $f0, 22136($at) # 0x5678\n"
       "ldc1 $f0, -256($a0)\n"
       "ldc1 $f0, -32768($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a0\n"
-      "ldc1 $f0, 0($at)\n";
+      "daui $at, $a0, 0xABCE\n"
+      "ldc1 $f0, -0x1100($at) # 0xEF00\n";
   DriverStr(expected, "LoadFpuFromOffset");
 }
 
@@ -1960,6 +2070,10 @@
   __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -256);
   __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, -32768);
   __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0xABCDEF00);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFFFFF8);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x7FFFFFFC);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x80000000);
+  __ StoreToOffset(mips64::kStoreDoubleword, mips64::A0, mips64::A1, 0x80000004);
 
   const char* expected =
       "sb $a0, 0($a0)\n"
@@ -1968,25 +2082,18 @@
       "sb $a0, 256($a1)\n"
       "sb $a0, 1000($a1)\n"
       "sb $a0, 0x7FFF($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "sb $a0, 8($at)\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "sb $a0, 9($at)\n"
+      "daui $at, $a1, 1\n"
       "sb $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "sb $a0, 1($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
+      "daui $at, $a1, 4660 # 0x1234\n"
+      "sb $a0, 22136($at) # 0x5678\n"
       "sb $a0, -256($a1)\n"
       "sb $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
-      "sb $a0, 0($at)\n"
+      "daui $at, $a1, 43982 # 0xABCE\n"
+      "sb $a0, -4352($at) # 0xEF00\n"
 
       "sh $a0, 0($a0)\n"
       "sh $a0, 0($a1)\n"
@@ -1994,25 +2101,18 @@
       "sh $a0, 256($a1)\n"
       "sh $a0, 1000($a1)\n"
       "sh $a0, 0x7FFE($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "sh $a0, 8($at)\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "sh $a0, 10($at)\n"
+      "daui $at, $a1, 1\n"
       "sh $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "sh $a0, 2($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
+      "daui $at, $a1, 4660 # 0x1234\n"
+      "sh $a0, 22136($at) # 0x5678\n"
       "sh $a0, -256($a1)\n"
       "sh $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
-      "sh $a0, 0($at)\n"
+      "daui $at, $a1, 43982 # 0xABCE\n"
+      "sh $a0, -4352($at) # 0xEF00\n"
 
       "sw $a0, 0($a0)\n"
       "sw $a0, 0($a1)\n"
@@ -2020,25 +2120,18 @@
       "sw $a0, 256($a1)\n"
       "sw $a0, 1000($a1)\n"
       "sw $a0, 0x7FFC($a1)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "sw $a0, 8($at)\n"
+      "daddiu $at, $a1, 0x7FF8\n"
+      "sw $a0, 12($at)\n"
+      "daui $at, $a1, 1\n"
       "sw $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "sw $a0, 4($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
+      "daui $at, $a1, 4660 # 0x1234\n"
+      "sw $a0, 22136($at) # 0x5678\n"
       "sw $a0, -256($a1)\n"
       "sw $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
-      "sw $a0, 0($at)\n"
+      "daui $at, $a1, 43982 # 0xABCE\n"
+      "sw $a0, -4352($at) # 0xEF00\n"
 
       "sd $a0, 0($a0)\n"
       "sd $a0, 0($a1)\n"
@@ -2047,32 +2140,38 @@
       "sw $t3, 8($a1)\n"
       "sd $a0, 256($a1)\n"
       "sd $a0, 1000($a1)\n"
-      "ori $at, $zero, 0x7FF8\n"
-      "daddu $at, $at, $a1\n"
+      "daddiu $at, $a1, 0x7FF8\n"
       "sw $a0, 4($at)\n"
       "dsrl32 $t3, $a0, 0\n"
       "sw $t3, 8($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "sd $a0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a1\n"
-      "sw $a0, 4($at)\n"
+      "daddiu $at, $a1, 32760 # 0x7FF8\n"
+      "sd $a0, 8($at)\n"
+      "daddiu $at, $a1, 32760 # 0x7FF8\n"
+      "sw $a0, 12($at)\n"
       "dsrl32 $t3, $a0, 0\n"
-      "sw $t3, 8($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a1\n"
+      "sw $t3, 16($at)\n"
+      "daui $at, $a1, 1\n"
       "sd $a0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a1\n"
-      "sd $a0, 0($at)\n"
+      "daui $at, $a1, 4660 # 0x1234\n"
+      "sd $a0, 22136($at) # 0x5678\n"
       "sd $a0, -256($a1)\n"
       "sd $a0, -32768($a1)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a1\n"
-      "sd $a0, 0($at)\n";
+      "daui $at, $a1, 0xABCE\n"
+      "sd $a0, -0x1100($at)\n"
+      "daui $at, $a1, 0x8000\n"
+      "dahi $at, $at, 1\n"
+      "sd $a0, -8($at)\n"
+      "daui $at, $a1, 0x8000\n"
+      "dahi $at, $at, 1\n"
+      "sw $a0, -4($at) # 0xFFFC\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 0($at) # 0x0\n"
+      "daui $at, $a1, 0x8000\n"
+      "sd $a0, 0($at) # 0x0\n"
+      "daui $at, $a1, 0x8000\n"
+      "sw $a0, 4($at) # 0x4\n"
+      "dsrl32 $t3, $a0, 0\n"
+      "sw $t3, 8($at) # 0x8\n";
   DriverStr(expected, "StoreToOffset");
 }
 
@@ -2106,60 +2205,115 @@
       "swc1 $f0, 4($a0)\n"
       "swc1 $f0, 256($a0)\n"
       "swc1 $f0, 0x7FFC($a0)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a0\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
+      "swc1 $f0, 8($at)\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
+      "swc1 $f0, 12($at)\n"
+      "daui $at, $a0, 1\n"
       "swc1 $f0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a0\n"
-      "swc1 $f0, 4($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n"
+      "daui $at, $a0, 4660 # 0x1234\n"
+      "swc1 $f0, 22136($at) # 0x5678\n"
       "swc1 $f0, -256($a0)\n"
       "swc1 $f0, -32768($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a0\n"
-      "swc1 $f0, 0($at)\n"
+      "daui $at, $a0, 0xABCE\n"
+      "swc1 $f0, -0x1100($at)\n"
 
       "sdc1 $f0, 0($a0)\n"
       "mfhc1 $t3, $f0\n"
       "swc1 $f0, 4($a0)\n"
       "sw $t3, 8($a0)\n"
       "sdc1 $f0, 256($a0)\n"
-      "ori $at, $zero, 0x7FF8\n"
-      "daddu $at, $at, $a0\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
       "mfhc1 $t3, $f0\n"
       "swc1 $f0, 4($at)\n"
       "sw $t3, 8($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a0\n"
-      "sdc1 $f0, 0($at)\n"
-      "ori $at, $zero, 0x8000\n"
-      "daddu $at, $at, $a0\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
+      "sdc1 $f0, 8($at)\n"
+      "daddiu $at, $a0, 32760 # 0x7FF8\n"
       "mfhc1 $t3, $f0\n"
-      "swc1 $f0, 4($at)\n"
-      "sw $t3, 8($at)\n"
-      "lui $at, 1\n"
-      "daddu $at, $at, $a0\n"
+      "swc1 $f0, 12($at)\n"
+      "sw $t3, 16($at)\n"
+      "daui $at, $a0, 1\n"
       "sdc1 $f0, 0($at)\n"
-      "lui $at, 0x1234\n"
-      "ori $at, 0x5678\n"
-      "daddu $at, $at, $a0\n"
-      "sdc1 $f0, 0($at)\n"
+      "daui $at, $a0, 4660 # 0x1234\n"
+      "sdc1 $f0, 22136($at) # 0x5678\n"
       "sdc1 $f0, -256($a0)\n"
       "sdc1 $f0, -32768($a0)\n"
-      "lui $at, 0xABCD\n"
-      "ori $at, 0xEF00\n"
-      "daddu $at, $at, $a0\n"
-      "sdc1 $f0, 0($at)\n";
+      "daui $at, $a0, 0xABCE\n"
+      "sdc1 $f0, -0x1100($at)\n";
   DriverStr(expected, "StoreFpuToOffset");
 }
 
+TEST_F(AssemblerMIPS64Test, StoreConstToOffset) {
+  __ StoreConstToOffset(mips64::kStoreByte, 0xFF, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreHalfword, 0xFFFF, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0x123456789ABCDEF0, mips64::A1, +0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreByte, 0, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreHalfword, 0, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0, mips64::A1, +0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567812345678, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0x1234567800000000, mips64::A1, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreDoubleword, 0x0000000012345678, mips64::A1, +0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, +0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::A1, -0xFFF0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::A1, +0xFFF0, mips64::T8);
+
+  __ StoreConstToOffset(mips64::kStoreWord, 0, mips64::T8, -0xFFF0, mips64::T8);
+  __ StoreConstToOffset(mips64::kStoreWord, 0x12345678, mips64::T8, +0xFFF0, mips64::T8);
+
+  const char* expected =
+      "ori $t8, $zero, 0xFF\n"
+      "sb $t8, 0($a1)\n"
+      "ori $t8, $zero, 0xFFFF\n"
+      "sh $t8, 0($a1)\n"
+      "lui $t8, 0x1234\n"
+      "ori $t8, $t8,0x5678\n"
+      "sw $t8, 0($a1)\n"
+      "lui $t8, 0x9abc\n"
+      "ori $t8, $t8,0xdef0\n"
+      "dahi $t8, $t8, 0x5679\n"
+      "dati $t8, $t8, 0x1234\n"
+      "sd $t8, 0($a1)\n"
+      "sb $zero, 0($a1)\n"
+      "sh $zero, 0($a1)\n"
+      "sw $zero, 0($a1)\n"
+      "sd $zero, 0($a1)\n"
+      "lui $t8, 0x1234\n"
+      "ori $t8, $t8,0x5678\n"
+      "dins $t8, $t8, 0x20, 0x20\n"
+      "sd $t8, 0($a1)\n"
+      "lui $t8, 0x246\n"
+      "ori $t8, $t8, 0x8acf\n"
+      "dsll32 $t8, $t8, 0x3\n"
+      "sd $t8, 0($a1)\n"
+      "lui $t8, 0x1234\n"
+      "ori $t8, $t8, 0x5678\n"
+      "sd $t8, 0($a1)\n"
+      "sw $zero, 0($t8)\n"
+      "lui $at,0x1234\n"
+      "ori $at, $at, 0x5678\n"
+      "sw  $at, 0($t8)\n"
+      "daddiu $at, $a1, -32760 # 0x8008\n"
+      "sw $zero, -32760($at) # 0x8008\n"
+      "daddiu $at, $a1, 32760 # 0x7FF8\n"
+      "lui $t8, 4660 # 0x1234\n"
+      "ori $t8, $t8, 22136 # 0x5678\n"
+      "sw $t8, 32760($at) # 0x7FF8\n"
+      "daddiu $at, $t8, -32760 # 0x8008\n"
+      "sw $zero, -32760($at) # 0x8008\n"
+      "daddiu $at, $t8, 32760 # 0x7FF8\n"
+      "lui $t8, 4660 # 0x1234\n"
+      "ori $t8, $t8, 22136 # 0x5678\n"
+      "sw $t8, 32760($at) # 0x7FF8\n";
+  DriverStr(expected, "StoreConstToOffset");
+}
 //////////////////////////////
 // Loading/adding Constants //
 //////////////////////////////
@@ -2356,6 +2510,386 @@
   EXPECT_EQ(tester.GetPathsCovered(), art::mips64::kLoadConst64PathAllPaths);
 }
 
+// MSA instructions.
+
+TEST_F(AssemblerMIPS64Test, AndV) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::AndV, "and.v ${reg1}, ${reg2}, ${reg3}"), "and.v");
+}
+
+TEST_F(AssemblerMIPS64Test, OrV) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::OrV, "or.v ${reg1}, ${reg2}, ${reg3}"), "or.v");
+}
+
+TEST_F(AssemblerMIPS64Test, NorV) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::NorV, "nor.v ${reg1}, ${reg2}, ${reg3}"), "nor.v");
+}
+
+TEST_F(AssemblerMIPS64Test, XorV) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::XorV, "xor.v ${reg1}, ${reg2}, ${reg3}"), "xor.v");
+}
+
+TEST_F(AssemblerMIPS64Test, AddvB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvB, "addv.b ${reg1}, ${reg2}, ${reg3}"),
+            "addv.b");
+}
+
+TEST_F(AssemblerMIPS64Test, AddvH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvH, "addv.h ${reg1}, ${reg2}, ${reg3}"),
+            "addv.h");
+}
+
+TEST_F(AssemblerMIPS64Test, AddvW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvW, "addv.w ${reg1}, ${reg2}, ${reg3}"),
+            "addv.w");
+}
+
+TEST_F(AssemblerMIPS64Test, AddvD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::AddvD, "addv.d ${reg1}, ${reg2}, ${reg3}"),
+            "addv.d");
+}
+
+TEST_F(AssemblerMIPS64Test, SubvB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvB, "subv.b ${reg1}, ${reg2}, ${reg3}"),
+            "subv.b");
+}
+
+TEST_F(AssemblerMIPS64Test, SubvH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvH, "subv.h ${reg1}, ${reg2}, ${reg3}"),
+            "subv.h");
+}
+
+TEST_F(AssemblerMIPS64Test, SubvW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvW, "subv.w ${reg1}, ${reg2}, ${reg3}"),
+            "subv.w");
+}
+
+TEST_F(AssemblerMIPS64Test, SubvD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SubvD, "subv.d ${reg1}, ${reg2}, ${reg3}"),
+            "subv.d");
+}
+
+TEST_F(AssemblerMIPS64Test, MulvB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvB, "mulv.b ${reg1}, ${reg2}, ${reg3}"),
+            "mulv.b");
+}
+
+TEST_F(AssemblerMIPS64Test, MulvH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvH, "mulv.h ${reg1}, ${reg2}, ${reg3}"),
+            "mulv.h");
+}
+
+TEST_F(AssemblerMIPS64Test, MulvW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvW, "mulv.w ${reg1}, ${reg2}, ${reg3}"),
+            "mulv.w");
+}
+
+TEST_F(AssemblerMIPS64Test, MulvD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::MulvD, "mulv.d ${reg1}, ${reg2}, ${reg3}"),
+            "mulv.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Div_sB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sB, "div_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Div_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sH, "div_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Div_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sW, "div_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Div_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_sD, "div_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "div_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Div_uB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uB, "div_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Div_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uH, "div_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Div_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uW, "div_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Div_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Div_uD, "div_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "div_u.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Mod_sB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sB, "mod_s.b ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Mod_sH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sH, "mod_s.h ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Mod_sW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sW, "mod_s.w ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Mod_sD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_sD, "mod_s.d ${reg1}, ${reg2}, ${reg3}"),
+            "mod_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Mod_uB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uB, "mod_u.b ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.b");
+}
+
+TEST_F(AssemblerMIPS64Test, Mod_uH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uH, "mod_u.h ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.h");
+}
+
+TEST_F(AssemblerMIPS64Test, Mod_uW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uW, "mod_u.w ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Mod_uD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::Mod_uD, "mod_u.d ${reg1}, ${reg2}, ${reg3}"),
+            "mod_u.d");
+}
+
+TEST_F(AssemblerMIPS64Test, FaddW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddW, "fadd.w ${reg1}, ${reg2}, ${reg3}"),
+            "fadd.w");
+}
+
+TEST_F(AssemblerMIPS64Test, FaddD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FaddD, "fadd.d ${reg1}, ${reg2}, ${reg3}"),
+            "fadd.d");
+}
+
+TEST_F(AssemblerMIPS64Test, FsubW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubW, "fsub.w ${reg1}, ${reg2}, ${reg3}"),
+            "fsub.w");
+}
+
+TEST_F(AssemblerMIPS64Test, FsubD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FsubD, "fsub.d ${reg1}, ${reg2}, ${reg3}"),
+            "fsub.d");
+}
+
+TEST_F(AssemblerMIPS64Test, FmulW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulW, "fmul.w ${reg1}, ${reg2}, ${reg3}"),
+            "fmul.w");
+}
+
+TEST_F(AssemblerMIPS64Test, FmulD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FmulD, "fmul.d ${reg1}, ${reg2}, ${reg3}"),
+            "fmul.d");
+}
+
+TEST_F(AssemblerMIPS64Test, FdivW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivW, "fdiv.w ${reg1}, ${reg2}, ${reg3}"),
+            "fdiv.w");
+}
+
+TEST_F(AssemblerMIPS64Test, FdivD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::FdivD, "fdiv.d ${reg1}, ${reg2}, ${reg3}"),
+            "fdiv.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Ffint_sW) {
+  DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sW, "ffint_s.w ${reg1}, ${reg2}"),
+            "ffint_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Ffint_sD) {
+  DriverStr(RepeatVV(&mips64::Mips64Assembler::Ffint_sD, "ffint_s.d ${reg1}, ${reg2}"),
+            "ffint_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, Ftint_sW) {
+  DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sW, "ftint_s.w ${reg1}, ${reg2}"),
+            "ftint_s.w");
+}
+
+TEST_F(AssemblerMIPS64Test, Ftint_sD) {
+  DriverStr(RepeatVV(&mips64::Mips64Assembler::Ftint_sD, "ftint_s.d ${reg1}, ${reg2}"),
+            "ftint_s.d");
+}
+
+TEST_F(AssemblerMIPS64Test, SllB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllB, "sll.b ${reg1}, ${reg2}, ${reg3}"), "sll.b");
+}
+
+TEST_F(AssemblerMIPS64Test, SllH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllH, "sll.h ${reg1}, ${reg2}, ${reg3}"), "sll.h");
+}
+
+TEST_F(AssemblerMIPS64Test, SllW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllW, "sll.w ${reg1}, ${reg2}, ${reg3}"), "sll.w");
+}
+
+TEST_F(AssemblerMIPS64Test, SllD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SllD, "sll.d ${reg1}, ${reg2}, ${reg3}"), "sll.d");
+}
+
+TEST_F(AssemblerMIPS64Test, SraB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraB, "sra.b ${reg1}, ${reg2}, ${reg3}"), "sra.b");
+}
+
+TEST_F(AssemblerMIPS64Test, SraH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraH, "sra.h ${reg1}, ${reg2}, ${reg3}"), "sra.h");
+}
+
+TEST_F(AssemblerMIPS64Test, SraW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraW, "sra.w ${reg1}, ${reg2}, ${reg3}"), "sra.w");
+}
+
+TEST_F(AssemblerMIPS64Test, SraD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SraD, "sra.d ${reg1}, ${reg2}, ${reg3}"), "sra.d");
+}
+
+TEST_F(AssemblerMIPS64Test, SrlB) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlB, "srl.b ${reg1}, ${reg2}, ${reg3}"), "srl.b");
+}
+
+TEST_F(AssemblerMIPS64Test, SrlH) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlH, "srl.h ${reg1}, ${reg2}, ${reg3}"), "srl.h");
+}
+
+TEST_F(AssemblerMIPS64Test, SrlW) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlW, "srl.w ${reg1}, ${reg2}, ${reg3}"), "srl.w");
+}
+
+TEST_F(AssemblerMIPS64Test, SrlD) {
+  DriverStr(RepeatVVV(&mips64::Mips64Assembler::SrlD, "srl.d ${reg1}, ${reg2}, ${reg3}"), "srl.d");
+}
+
+TEST_F(AssemblerMIPS64Test, SlliB) {
+  DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliB, 3, "slli.b ${reg1}, ${reg2}, {imm}"),
+            "slli.b");
+}
+
+TEST_F(AssemblerMIPS64Test, SlliH) {
+  DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliH, 4, "slli.h ${reg1}, ${reg2}, {imm}"),
+            "slli.h");
+}
+
+TEST_F(AssemblerMIPS64Test, SlliW) {
+  DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliW, 5, "slli.w ${reg1}, ${reg2}, {imm}"),
+            "slli.w");
+}
+
+TEST_F(AssemblerMIPS64Test, SlliD) {
+  DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SlliD, 6, "slli.d ${reg1}, ${reg2}, {imm}"),
+            "slli.d");
+}
+
+TEST_F(AssemblerMIPS64Test, MoveV) {
+  DriverStr(RepeatVV(&mips64::Mips64Assembler::MoveV, "move.v ${reg1}, ${reg2}"), "move.v");
+}
+
+TEST_F(AssemblerMIPS64Test, SplatiB) {
+  DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiB, 4, "splati.b ${reg1}, ${reg2}[{imm}]"),
+            "splati.b");
+}
+
+TEST_F(AssemblerMIPS64Test, SplatiH) {
+  DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiH, 3, "splati.h ${reg1}, ${reg2}[{imm}]"),
+            "splati.h");
+}
+
+TEST_F(AssemblerMIPS64Test, SplatiW) {
+  DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiW, 2, "splati.w ${reg1}, ${reg2}[{imm}]"),
+            "splati.w");
+}
+
+TEST_F(AssemblerMIPS64Test, SplatiD) {
+  DriverStr(RepeatVVIb(&mips64::Mips64Assembler::SplatiD, 1, "splati.d ${reg1}, ${reg2}[{imm}]"),
+            "splati.d");
+}
+
+TEST_F(AssemblerMIPS64Test, FillB) {
+  DriverStr(RepeatVR(&mips64::Mips64Assembler::FillB, "fill.b ${reg1}, ${reg2}"), "fill.b");
+}
+
+TEST_F(AssemblerMIPS64Test, FillH) {
+  DriverStr(RepeatVR(&mips64::Mips64Assembler::FillH, "fill.h ${reg1}, ${reg2}"), "fill.h");
+}
+
+TEST_F(AssemblerMIPS64Test, FillW) {
+  DriverStr(RepeatVR(&mips64::Mips64Assembler::FillW, "fill.w ${reg1}, ${reg2}"), "fill.w");
+}
+
+TEST_F(AssemblerMIPS64Test, FillD) {
+  DriverStr(RepeatVR(&mips64::Mips64Assembler::FillD, "fill.d ${reg1}, ${reg2}"), "fill.d");
+}
+
+TEST_F(AssemblerMIPS64Test, LdiB) {
+  DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiB, -8, "ldi.b ${reg}, {imm}"), "ldi.b");
+}
+
+TEST_F(AssemblerMIPS64Test, LdiH) {
+  DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiH, -10, "ldi.h ${reg}, {imm}"), "ldi.h");
+}
+
+TEST_F(AssemblerMIPS64Test, LdiW) {
+  DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiW, -10, "ldi.w ${reg}, {imm}"), "ldi.w");
+}
+
+TEST_F(AssemblerMIPS64Test, LdiD) {
+  DriverStr(RepeatVIb(&mips64::Mips64Assembler::LdiD, -10, "ldi.d ${reg}, {imm}"), "ldi.d");
+}
+
+TEST_F(AssemblerMIPS64Test, LdB) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdB, -10, "ld.b ${reg1}, {imm}(${reg2})"), "ld.b");
+}
+
+TEST_F(AssemblerMIPS64Test, LdH) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdH, -10, "ld.h ${reg1}, {imm}(${reg2})", 0, 2),
+            "ld.h");
+}
+
+TEST_F(AssemblerMIPS64Test, LdW) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdW, -10, "ld.w ${reg1}, {imm}(${reg2})", 0, 4),
+            "ld.w");
+}
+
+TEST_F(AssemblerMIPS64Test, LdD) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::LdD, -10, "ld.d ${reg1}, {imm}(${reg2})", 0, 8),
+            "ld.d");
+}
+
+TEST_F(AssemblerMIPS64Test, StB) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StB, -10, "st.b ${reg1}, {imm}(${reg2})"), "st.b");
+}
+
+TEST_F(AssemblerMIPS64Test, StH) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StH, -10, "st.h ${reg1}, {imm}(${reg2})", 0, 2),
+            "st.h");
+}
+
+TEST_F(AssemblerMIPS64Test, StW) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StW, -10, "st.w ${reg1}, {imm}(${reg2})", 0, 4),
+            "st.w");
+}
+
+TEST_F(AssemblerMIPS64Test, StD) {
+  DriverStr(RepeatVRIb(&mips64::Mips64Assembler::StD, -10, "st.d ${reg1}, {imm}(${reg2})", 0, 8),
+            "st.d");
+}
+
 #undef __
 
 }  // namespace art
diff --git a/compiler/utils/mips64/constants_mips64.h b/compiler/utils/mips64/constants_mips64.h
index f57498d..bc8e40b 100644
--- a/compiler/utils/mips64/constants_mips64.h
+++ b/compiler/utils/mips64/constants_mips64.h
@@ -51,8 +51,36 @@
   kFdShift = 6,
   kFdBits = 5,
 
+  kMsaOperationShift = 23,
+  kMsaELMOperationShift = 22,
+  kMsa2ROperationShift = 18,
+  kMsa2RFOperationShift = 17,
+  kDfShift = 21,
+  kDfMShift = 16,
+  kDf2RShift = 16,
+  kDfNShift = 16,
+  kWtShift = 16,
+  kWtBits = 5,
+  kWsShift = 11,
+  kWsBits = 5,
+  kWdShift = 6,
+  kWdBits = 5,
+  kS10Shift = 16,
+  kI10Shift = 11,
+  kS10MinorShift = 2,
+
   kBranchOffsetMask = 0x0000ffff,
   kJumpOffsetMask = 0x03ffffff,
+  kMsaMajorOpcode = 0x1e,
+  kMsaDfMByteMask = 0x70,
+  kMsaDfMHalfwordMask = 0x60,
+  kMsaDfMWordMask = 0x40,
+  kMsaDfMDoublewordMask = 0x00,
+  kMsaDfNByteMask = 0x00,
+  kMsaDfNHalfwordMask = 0x20,
+  kMsaDfNWordMask = 0x30,
+  kMsaDfNDoublewordMask = 0x38,
+  kMsaS10Mask = 0x3ff,
 };
 
 enum ScaleFactor {
diff --git a/compiler/utils/mips64/managed_register_mips64.cc b/compiler/utils/mips64/managed_register_mips64.cc
index dea396e..42d061e 100644
--- a/compiler/utils/mips64/managed_register_mips64.cc
+++ b/compiler/utils/mips64/managed_register_mips64.cc
@@ -26,6 +26,11 @@
   CHECK(IsValidManagedRegister());
   CHECK(other.IsValidManagedRegister());
   if (Equals(other)) return true;
+  if (IsFpuRegister() && other.IsVectorRegister()) {
+    return (AsFpuRegister() == other.AsOverlappingFpuRegister());
+  } else if (IsVectorRegister() && other.IsFpuRegister()) {
+    return (AsVectorRegister() == other.AsOverlappingVectorRegister());
+  }
   return false;
 }
 
@@ -36,6 +41,8 @@
     os << "GPU: " << static_cast<int>(AsGpuRegister());
   } else if (IsFpuRegister()) {
      os << "FpuRegister: " << static_cast<int>(AsFpuRegister());
+  } else if (IsVectorRegister()) {
+     os << "VectorRegister: " << static_cast<int>(AsVectorRegister());
   } else {
     os << "??: " << RegId();
   }
diff --git a/compiler/utils/mips64/managed_register_mips64.h b/compiler/utils/mips64/managed_register_mips64.h
index c9f9556..3980199 100644
--- a/compiler/utils/mips64/managed_register_mips64.h
+++ b/compiler/utils/mips64/managed_register_mips64.h
@@ -30,11 +30,27 @@
 const int kNumberOfFpuRegIds = kNumberOfFpuRegisters;
 const int kNumberOfFpuAllocIds = kNumberOfFpuRegisters;
 
-const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds;
-const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds;
+const int kNumberOfVecRegIds = kNumberOfVectorRegisters;
+const int kNumberOfVecAllocIds = kNumberOfVectorRegisters;
 
-// An instance of class 'ManagedRegister' represents a single GPU register (enum
-// Register) or a double precision FP register (enum FpuRegister)
+const int kNumberOfRegIds = kNumberOfGpuRegIds + kNumberOfFpuRegIds + kNumberOfVecRegIds;
+const int kNumberOfAllocIds = kNumberOfGpuAllocIds + kNumberOfFpuAllocIds + kNumberOfVecAllocIds;
+
+// Register ids map:
+//   [0..R[  core registers (enum GpuRegister)
+//   [R..F[  floating-point registers (enum FpuRegister)
+//   [F..W[  MSA vector registers (enum VectorRegister)
+// where
+//   R = kNumberOfGpuRegIds
+//   F = R + kNumberOfFpuRegIds
+//   W = F + kNumberOfVecRegIds
+
+// An instance of class 'ManagedRegister' represents a single Mips64 register.
+// A register can be one of the following:
+//  * core register (enum GpuRegister)
+//  * floating-point register (enum FpuRegister)
+//  * MSA vector register (enum VectorRegister)
+//
 // 'ManagedRegister::NoRegister()' provides an invalid register.
 // There is a one-to-one mapping between ManagedRegister and register id.
 class Mips64ManagedRegister : public ManagedRegister {
@@ -49,6 +65,21 @@
     return static_cast<FpuRegister>(id_ - kNumberOfGpuRegIds);
   }
 
+  constexpr VectorRegister AsVectorRegister() const {
+    CHECK(IsVectorRegister());
+    return static_cast<VectorRegister>(id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegisters));
+  }
+
+  constexpr FpuRegister AsOverlappingFpuRegister() const {
+    CHECK(IsValidManagedRegister());
+    return static_cast<FpuRegister>(AsVectorRegister());
+  }
+
+  constexpr VectorRegister AsOverlappingVectorRegister() const {
+    CHECK(IsValidManagedRegister());
+    return static_cast<VectorRegister>(AsFpuRegister());
+  }
+
   constexpr bool IsGpuRegister() const {
     CHECK(IsValidManagedRegister());
     return (0 <= id_) && (id_ < kNumberOfGpuRegIds);
@@ -60,6 +91,12 @@
     return (0 <= test) && (test < kNumberOfFpuRegIds);
   }
 
+  constexpr bool IsVectorRegister() const {
+    CHECK(IsValidManagedRegister());
+    const int test = id_ - (kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+    return (0 <= test) && (test < kNumberOfVecRegIds);
+  }
+
   void Print(std::ostream& os) const;
 
   // Returns true if the two managed-registers ('this' and 'other') overlap.
@@ -77,6 +114,11 @@
     return FromRegId(r + kNumberOfGpuRegIds);
   }
 
+  static constexpr Mips64ManagedRegister FromVectorRegister(VectorRegister r) {
+    CHECK_NE(r, kNoVectorRegister);
+    return FromRegId(r + kNumberOfGpuRegIds + kNumberOfFpuRegIds);
+  }
+
  private:
   constexpr bool IsValidManagedRegister() const {
     return (0 <= id_) && (id_ < kNumberOfRegIds);
diff --git a/compiler/utils/mips64/managed_register_mips64_test.cc b/compiler/utils/mips64/managed_register_mips64_test.cc
new file mode 100644
index 0000000..8b72d7e
--- /dev/null
+++ b/compiler/utils/mips64/managed_register_mips64_test.cc
@@ -0,0 +1,480 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "managed_register_mips64.h"
+#include "globals.h"
+#include "gtest/gtest.h"
+
+namespace art {
+namespace mips64 {
+
+TEST(Mips64ManagedRegister, NoRegister) {
+  Mips64ManagedRegister reg = ManagedRegister::NoRegister().AsMips64();
+  EXPECT_TRUE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.Overlaps(reg));
+}
+
+TEST(Mips64ManagedRegister, GpuRegister) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(ZERO, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(AT);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(AT, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(V0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(V0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(A0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(A0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(A7);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(A7, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(T0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(T0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(T3);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(T3, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(S0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(S0, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(GP);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(GP, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(SP);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(SP, reg.AsGpuRegister());
+
+  reg = Mips64ManagedRegister::FromGpuRegister(RA);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_TRUE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_EQ(RA, reg.AsGpuRegister());
+}
+
+TEST(Mips64ManagedRegister, FpuRegister) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+  Mips64ManagedRegister vreg = Mips64ManagedRegister::FromVectorRegister(W0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F0, reg.AsFpuRegister());
+  EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F1);
+  vreg = Mips64ManagedRegister::FromVectorRegister(W1);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F1, reg.AsFpuRegister());
+  EXPECT_EQ(W1, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F20);
+  vreg = Mips64ManagedRegister::FromVectorRegister(W20);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F20, reg.AsFpuRegister());
+  EXPECT_EQ(W20, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F20)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F31);
+  vreg = Mips64ManagedRegister::FromVectorRegister(W31);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_TRUE(reg.IsFpuRegister());
+  EXPECT_FALSE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(vreg));
+  EXPECT_EQ(F31, reg.AsFpuRegister());
+  EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+}
+
+TEST(Mips64ManagedRegister, VectorRegister) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromVectorRegister(W0);
+  Mips64ManagedRegister freg = Mips64ManagedRegister::FromFpuRegister(F0);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W0, reg.AsVectorRegister());
+  EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W2);
+  freg = Mips64ManagedRegister::FromFpuRegister(F2);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W2, reg.AsVectorRegister());
+  EXPECT_EQ(F2, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W2)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W13);
+  freg = Mips64ManagedRegister::FromFpuRegister(F13);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W13, reg.AsVectorRegister());
+  EXPECT_EQ(F13, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W13)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W29);
+  freg = Mips64ManagedRegister::FromFpuRegister(F29);
+  EXPECT_FALSE(reg.IsNoRegister());
+  EXPECT_FALSE(reg.IsGpuRegister());
+  EXPECT_FALSE(reg.IsFpuRegister());
+  EXPECT_TRUE(reg.IsVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(freg));
+  EXPECT_EQ(W29, reg.AsVectorRegister());
+  EXPECT_EQ(F29, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Equals(Mips64ManagedRegister::FromVectorRegister(W29)));
+}
+
+TEST(Mips64ManagedRegister, Equals) {
+  ManagedRegister no_reg = ManagedRegister::NoRegister();
+  EXPECT_TRUE(no_reg.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(no_reg.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_ZERO = Mips64ManagedRegister::FromGpuRegister(ZERO);
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_TRUE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_ZERO.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_A1 = Mips64ManagedRegister::FromGpuRegister(A1);
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_TRUE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_A1.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_S2 = Mips64ManagedRegister::FromGpuRegister(S2);
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+  EXPECT_TRUE(reg_S2.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_S2.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_F0 = Mips64ManagedRegister::FromFpuRegister(F0);
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_TRUE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg_F0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_F31 = Mips64ManagedRegister::FromFpuRegister(F31);
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromGpuRegister(S2)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F1)));
+  EXPECT_TRUE(reg_F31.Equals(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg_F31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+
+  Mips64ManagedRegister reg_W0 = Mips64ManagedRegister::FromVectorRegister(W0);
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_TRUE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+  EXPECT_FALSE(reg_W0.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  Mips64ManagedRegister reg_W31 = Mips64ManagedRegister::FromVectorRegister(W31);
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::NoRegister()));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(A1)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromGpuRegister(S1)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W1)));
+  EXPECT_TRUE(reg_W31.Equals(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+TEST(Mips64ManagedRegister, Overlaps) {
+  Mips64ManagedRegister reg = Mips64ManagedRegister::FromFpuRegister(F0);
+  Mips64ManagedRegister reg_o = Mips64ManagedRegister::FromVectorRegister(W0);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F0, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W0, reg.AsOverlappingVectorRegister());
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F4);
+  reg_o = Mips64ManagedRegister::FromVectorRegister(W4);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F4, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W4, reg.AsOverlappingVectorRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F16);
+  reg_o = Mips64ManagedRegister::FromVectorRegister(W16);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F16, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W16, reg.AsOverlappingVectorRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromFpuRegister(F31);
+  reg_o = Mips64ManagedRegister::FromVectorRegister(W31);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(F31, reg_o.AsOverlappingFpuRegister());
+  EXPECT_EQ(W31, reg.AsOverlappingVectorRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W0);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F0);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W0, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F0, reg.AsOverlappingFpuRegister());
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W4);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F4);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W4, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F4, reg.AsOverlappingFpuRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W16);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F16);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W16, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F16, reg.AsOverlappingFpuRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromVectorRegister(W31);
+  reg_o = Mips64ManagedRegister::FromFpuRegister(F31);
+  EXPECT_TRUE(reg.Overlaps(reg_o));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_EQ(W31, reg_o.AsOverlappingVectorRegister());
+  EXPECT_EQ(F31, reg.AsOverlappingFpuRegister());
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(ZERO);
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(A0);
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(S0);
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+
+  reg = Mips64ManagedRegister::FromGpuRegister(RA);
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(ZERO)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(A0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(S0)));
+  EXPECT_TRUE(reg.Overlaps(Mips64ManagedRegister::FromGpuRegister(RA)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromFpuRegister(F31)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W0)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W4)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W16)));
+  EXPECT_FALSE(reg.Overlaps(Mips64ManagedRegister::FromVectorRegister(W31)));
+}
+
+}  // namespace mips64
+}  // namespace art
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 6eab302..1736618 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -958,6 +958,14 @@
 }
 
 
+void X86Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x5B);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
@@ -1161,6 +1169,32 @@
 }
 
 
+void X86Assembler::andnpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x55);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::andnps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x0F);
+  EmitUint8(0x55);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pandn(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xDF);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::orpd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1187,6 +1221,98 @@
 }
 
 
+void X86Assembler::pavgb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xE0);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pavgw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0xE3);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x74);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x75);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x76);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x29);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x64);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x65);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x66);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
+void X86Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x37);
+  EmitXmmRegisterOperand(dst, src);
+}
+
+
 void X86Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86/assembler_x86.h b/compiler/utils/x86/assembler_x86.h
index 2999599..a747cda 100644
--- a/compiler/utils/x86/assembler_x86.h
+++ b/compiler/utils/x86/assembler_x86.h
@@ -457,6 +457,7 @@
   void cvttss2si(Register dst, XmmRegister src);
   void cvttsd2si(Register dst, XmmRegister src);
 
+  void cvtdq2ps(XmmRegister dst, XmmRegister src);
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
 
   void comiss(XmmRegister a, XmmRegister b);
@@ -486,10 +487,27 @@
   void andps(XmmRegister dst, const Address& src);
   void pand(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
 
+  void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void andnps(XmmRegister dst, XmmRegister src);
+  void pandn(XmmRegister dst, XmmRegister src);
+
   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void orps(XmmRegister dst, XmmRegister src);
   void por(XmmRegister dst, XmmRegister src);
 
+  void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pavgw(XmmRegister dst, XmmRegister src);
+
+  void pcmpeqb(XmmRegister dst, XmmRegister src);
+  void pcmpeqw(XmmRegister dst, XmmRegister src);
+  void pcmpeqd(XmmRegister dst, XmmRegister src);
+  void pcmpeqq(XmmRegister dst, XmmRegister src);
+
+  void pcmpgtb(XmmRegister dst, XmmRegister src);
+  void pcmpgtw(XmmRegister dst, XmmRegister src);
+  void pcmpgtd(XmmRegister dst, XmmRegister src);
+  void pcmpgtq(XmmRegister dst, XmmRegister src);  // SSE4.2
+
   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
diff --git a/compiler/utils/x86/assembler_x86_test.cc b/compiler/utils/x86/assembler_x86_test.cc
index a74bea2..f75f972 100644
--- a/compiler/utils/x86/assembler_x86_test.cc
+++ b/compiler/utils/x86/assembler_x86_test.cc
@@ -322,6 +322,14 @@
   DriverStr(RepeatRI(&x86::X86Assembler::roll, 1U, "roll ${imm}, %{reg}"), "rolli");
 }
 
+TEST_F(AssemblerX86Test, Cvtdq2ps) {
+  DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps");
+}
+
+TEST_F(AssemblerX86Test, Cvtdq2pd) {
+  DriverStr(RepeatFF(&x86::X86Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd");
+}
+
 TEST_F(AssemblerX86Test, ComissAddr) {
   GetAssembler()->comiss(x86::XmmRegister(x86::XMM0), x86::Address(x86::EAX, 0));
   const char* expected = "comiss 0(%EAX), %xmm0\n";
@@ -573,6 +581,18 @@
   DriverStr(RepeatFF(&x86::X86Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
 }
 
+TEST_F(AssemblerX86Test, AndnPD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd");
+}
+
+TEST_F(AssemblerX86Test, AndnPS) {
+  DriverStr(RepeatFF(&x86::X86Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps");
+}
+
+TEST_F(AssemblerX86Test, PAndn) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn");
+}
+
 TEST_F(AssemblerX86Test, OrPD) {
   DriverStr(RepeatFF(&x86::X86Assembler::orpd, "orpd %{reg2}, %{reg1}"), "orpd");
 }
@@ -585,6 +605,46 @@
   DriverStr(RepeatFF(&x86::X86Assembler::por, "por %{reg2}, %{reg1}"), "por");
 }
 
+TEST_F(AssemblerX86Test, PAvgB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb");
+}
+
+TEST_F(AssemblerX86Test, PAvgW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "cmpeqb");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "cmpeqw");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "cmpeqd");
+}
+
+TEST_F(AssemblerX86Test, PCmpeqQ) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "cmpeqq");
+}
+
+TEST_F(AssemblerX86Test, PCmpgtB) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtb, "pcmpgtb %{reg2}, %{reg1}"), "cmpgtb");
+}
+
+TEST_F(AssemblerX86Test, PCmpgtW) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtw, "pcmpgtw %{reg2}, %{reg1}"), "cmpgtw");
+}
+
+TEST_F(AssemblerX86Test, PCmpgtD) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtd, "pcmpgtd %{reg2}, %{reg1}"), "cmpgtd");
+}
+
+TEST_F(AssemblerX86Test, PCmpgtQ) {
+  DriverStr(RepeatFF(&x86::X86Assembler::pcmpgtq, "pcmpgtq %{reg2}, %{reg1}"), "cmpgtq");
+}
+
 TEST_F(AssemblerX86Test, ShufPS) {
   DriverStr(RepeatFFI(&x86::X86Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
 }
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 458204a..1b7a485 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -1153,6 +1153,15 @@
 }
 
 
+void X86_64Assembler::cvtdq2ps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x5B);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+
 void X86_64Assembler::cvtdq2pd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0xF3);
@@ -1366,6 +1375,32 @@
   EmitXmmRegisterOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::andnpd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x55);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::andnps(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x55);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pandn(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xDF);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::orpd(XmmRegister dst, XmmRegister src) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
@@ -1392,6 +1427,98 @@
   EmitXmmRegisterOperand(dst.LowBits(), src);
 }
 
+void X86_64Assembler::pavgb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xE0);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pavgw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0xE3);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x74);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x75);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x76);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpeqq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x29);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpgtb(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x64);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpgtw(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x65);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpgtd(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x66);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
+void X86_64Assembler::pcmpgtq(XmmRegister dst, XmmRegister src) {
+  AssemblerBuffer::EnsureCapacity ensured(&buffer_);
+  EmitUint8(0x66);
+  EmitOptionalRex32(dst, src);
+  EmitUint8(0x0F);
+  EmitUint8(0x38);
+  EmitUint8(0x37);
+  EmitXmmRegisterOperand(dst.LowBits(), src);
+}
+
 void X86_64Assembler::shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm) {
   AssemblerBuffer::EnsureCapacity ensured(&buffer_);
   EmitUint8(0x66);
diff --git a/compiler/utils/x86_64/assembler_x86_64.h b/compiler/utils/x86_64/assembler_x86_64.h
index 0dc11d8..0ddc46c 100644
--- a/compiler/utils/x86_64/assembler_x86_64.h
+++ b/compiler/utils/x86_64/assembler_x86_64.h
@@ -486,6 +486,7 @@
   void cvttsd2si(CpuRegister dst, XmmRegister src);  // Note: this is the r32 version.
   void cvttsd2si(CpuRegister dst, XmmRegister src, bool is64bit);
 
+  void cvtdq2ps(XmmRegister dst, XmmRegister src);
   void cvtdq2pd(XmmRegister dst, XmmRegister src);
 
   void comiss(XmmRegister a, XmmRegister b);
@@ -514,10 +515,27 @@
   void andps(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void pand(XmmRegister dst, XmmRegister src);
 
+  void andnpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void andnps(XmmRegister dst, XmmRegister src);
+  void pandn(XmmRegister dst, XmmRegister src);
+
   void orpd(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
   void orps(XmmRegister dst, XmmRegister src);
   void por(XmmRegister dst, XmmRegister src);
 
+  void pavgb(XmmRegister dst, XmmRegister src);  // no addr variant (for now)
+  void pavgw(XmmRegister dst, XmmRegister src);
+
+  void pcmpeqb(XmmRegister dst, XmmRegister src);
+  void pcmpeqw(XmmRegister dst, XmmRegister src);
+  void pcmpeqd(XmmRegister dst, XmmRegister src);
+  void pcmpeqq(XmmRegister dst, XmmRegister src);
+
+  void pcmpgtb(XmmRegister dst, XmmRegister src);
+  void pcmpgtw(XmmRegister dst, XmmRegister src);
+  void pcmpgtd(XmmRegister dst, XmmRegister src);
+  void pcmpgtq(XmmRegister dst, XmmRegister src);  // SSE4.2
+
   void shufpd(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void shufps(XmmRegister dst, XmmRegister src, const Immediate& imm);
   void pshufd(XmmRegister dst, XmmRegister src, const Immediate& imm);
diff --git a/compiler/utils/x86_64/assembler_x86_64_test.cc b/compiler/utils/x86_64/assembler_x86_64_test.cc
index fe94497..e7d8401 100644
--- a/compiler/utils/x86_64/assembler_x86_64_test.cc
+++ b/compiler/utils/x86_64/assembler_x86_64_test.cc
@@ -1205,6 +1205,10 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtsd2ss, "cvtsd2ss %{reg2}, %{reg1}"), "cvtsd2ss");
 }
 
+TEST_F(AssemblerX86_64Test, Cvtdq2ps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2ps, "cvtdq2ps %{reg2}, %{reg1}"), "cvtdq2ps");
+}
+
 TEST_F(AssemblerX86_64Test, Cvtdq2pd) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::cvtdq2pd, "cvtdq2pd %{reg2}, %{reg1}"), "cvtdq2pd");
 }
@@ -1265,6 +1269,18 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::pand, "pand %{reg2}, %{reg1}"), "pand");
 }
 
+TEST_F(AssemblerX86_64Test, andnpd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnpd, "andnpd %{reg2}, %{reg1}"), "andnpd");
+}
+
+TEST_F(AssemblerX86_64Test, andnps) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::andnps, "andnps %{reg2}, %{reg1}"), "andnps");
+}
+
+TEST_F(AssemblerX86_64Test, Pandn) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pandn, "pandn %{reg2}, %{reg1}"), "pandn");
+}
+
 TEST_F(AssemblerX86_64Test, Orps) {
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::orps, "orps %{reg2}, %{reg1}"), "orps");
 }
@@ -1277,6 +1293,46 @@
   DriverStr(RepeatFF(&x86_64::X86_64Assembler::por, "por %{reg2}, %{reg1}"), "por");
 }
 
+TEST_F(AssemblerX86_64Test, Pavgb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgb, "pavgb %{reg2}, %{reg1}"), "pavgb");
+}
+
+TEST_F(AssemblerX86_64Test, Pavgw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pavgw, "pavgw %{reg2}, %{reg1}"), "pavgw");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqb, "pcmpeqb %{reg2}, %{reg1}"), "pcmpeqb");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqw, "pcmpeqw %{reg2}, %{reg1}"), "pcmpeqw");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqd, "pcmpeqd %{reg2}, %{reg1}"), "pcmpeqd");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpeqq) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpeqq, "pcmpeqq %{reg2}, %{reg1}"), "pcmpeqq");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpgtb) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtb, "pcmpgtb %{reg2}, %{reg1}"), "pcmpgtb");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpgtw) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtw, "pcmpgtw %{reg2}, %{reg1}"), "pcmpgtw");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpgtd) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtd, "pcmpgtd %{reg2}, %{reg1}"), "pcmpgtd");
+}
+
+TEST_F(AssemblerX86_64Test, PCmpgtq) {
+  DriverStr(RepeatFF(&x86_64::X86_64Assembler::pcmpgtq, "pcmpgtq %{reg2}, %{reg1}"), "pcmpgtq");
+}
+
 TEST_F(AssemblerX86_64Test, Shufps) {
   DriverStr(RepeatFFI(&x86_64::X86_64Assembler::shufps, 1, "shufps ${imm}, %{reg2}, %{reg1}"), "shufps");
 }
diff --git a/compiler/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
index c892b25..fa7e985 100644
--- a/compiler/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -18,21 +18,21 @@
 #include "verifier/verifier_deps.h"
 
 #include "class_linker.h"
-#include "compiler/common_compiler_test.h"
-#include "compiler/dex/verification_results.h"
-#include "compiler/dex/verified_method.h"
-#include "compiler/driver/compiler_options.h"
-#include "compiler/driver/compiler_driver.h"
-#include "compiler/utils/atomic_method_ref_map-inl.h"
+#include "common_compiler_test.h"
 #include "compiler_callbacks.h"
+#include "dex/verification_results.h"
+#include "dex/verified_method.h"
 #include "dex_file.h"
 #include "dex_file_types.h"
+#include "driver/compiler_options.h"
+#include "driver/compiler_driver.h"
 #include "handle_scope-inl.h"
 #include "verifier/method_verifier-inl.h"
 #include "mirror/class_loader.h"
 #include "runtime.h"
 #include "thread.h"
 #include "scoped_thread_state_change-inl.h"
+#include "utils/atomic_method_ref_map-inl.h"
 
 namespace art {
 namespace verifier {
@@ -246,9 +246,13 @@
   }
 
   bool HasUnverifiedClass(const std::string& cls) {
-    const DexFile::TypeId* type_id = primary_dex_file_->FindTypeId(cls.c_str());
+    return HasUnverifiedClass(cls, *primary_dex_file_);
+  }
+
+  bool HasUnverifiedClass(const std::string& cls, const DexFile& dex_file) {
+    const DexFile::TypeId* type_id = dex_file.FindTypeId(cls.c_str());
     DCHECK(type_id != nullptr);
-    dex::TypeIndex index = primary_dex_file_->GetIndexForTypeId(*type_id);
+    dex::TypeIndex index = dex_file.GetIndexForTypeId(*type_id);
     for (const auto& dex_dep : verifier_deps_->dex_deps_) {
       for (dex::TypeIndex entry : dex_dep.second->unverified_classes_) {
         if (index == entry) {
@@ -710,12 +714,12 @@
 
 TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInReferenced) {
   ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInReferenced"));
-  ASSERT_TRUE(HasClass("Ljava/lang/System;", true, "public final"));
+  ASSERT_TRUE(HasClass("Ljava/lang/System;", true, "public"));
   ASSERT_TRUE(HasField("Ljava/lang/System;",
                        "out",
                        "Ljava/io/PrintStream;",
                        true,
-                       "public final static",
+                       "public static",
                        "Ljava/lang/System;"));
 }
 
@@ -723,13 +727,13 @@
   ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInSuperclass1"));
   ASSERT_TRUE(HasClass("Ljava/util/SimpleTimeZone;", true, "public"));
   ASSERT_TRUE(HasField(
-      "Ljava/util/SimpleTimeZone;", "LONG", "I", true, "public final static", "Ljava/util/TimeZone;"));
+      "Ljava/util/SimpleTimeZone;", "LONG", "I", true, "public static", "Ljava/util/TimeZone;"));
 }
 
 TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInSuperclass2) {
   ASSERT_TRUE(VerifyMethod("StaticField_Resolved_DeclaredInSuperclass2"));
   ASSERT_TRUE(HasField(
-      "LMySimpleTimeZone;", "SHORT", "I", true, "public final static", "Ljava/util/TimeZone;"));
+      "LMySimpleTimeZone;", "SHORT", "I", true, "public static", "Ljava/util/TimeZone;"));
 }
 
 TEST_F(VerifierDepsTest, StaticField_Resolved_DeclaredInInterface1) {
@@ -739,7 +743,7 @@
                        "PI_ENABLE_OUTPUT_ESCAPING",
                        "Ljava/lang/String;",
                        true,
-                       "public final static",
+                       "public static",
                        "Ljavax/xml/transform/Result;"));
 }
 
@@ -749,7 +753,7 @@
                        "PI_ENABLE_OUTPUT_ESCAPING",
                        "Ljava/lang/String;",
                        true,
-                       "public final static",
+                       "public static",
                        "Ljavax/xml/transform/Result;"));
 }
 
@@ -759,7 +763,7 @@
                        "PI_ENABLE_OUTPUT_ESCAPING",
                        "Ljava/lang/String;",
                        true,
-                       "public final static",
+                       "public static",
                        "Ljavax/xml/transform/Result;"));
 }
 
@@ -769,13 +773,13 @@
                        "ELEMENT_NODE",
                        "S",
                        true,
-                       "public final static",
+                       "public static",
                        "Lorg/w3c/dom/Node;"));
 }
 
 TEST_F(VerifierDepsTest, StaticField_Unresolved_ReferrerInBoot) {
   ASSERT_TRUE(VerifyMethod("StaticField_Unresolved_ReferrerInBoot"));
-  ASSERT_TRUE(HasClass("Ljava/util/TimeZone;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljava/util/TimeZone;", true, "public"));
   ASSERT_TRUE(HasField("Ljava/util/TimeZone;", "x", "I", false));
 }
 
@@ -847,7 +851,7 @@
 
 TEST_F(VerifierDepsTest, InvokeStatic_Resolved_DeclaredInSuperclass1) {
   ASSERT_TRUE(VerifyMethod("InvokeStatic_Resolved_DeclaredInSuperclass1"));
-  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public"));
   ASSERT_TRUE(HasMethod("direct",
                         "Ljavax/net/ssl/SSLSocket;",
                         "setSocketImplFactory",
@@ -870,7 +874,7 @@
 
 TEST_F(VerifierDepsTest, InvokeStatic_DeclaredInInterface1) {
   ASSERT_TRUE(VerifyMethod("InvokeStatic_DeclaredInInterface1"));
-  ASSERT_TRUE(HasClass("Ljava/util/Map$Entry;", true, "public abstract interface"));
+  ASSERT_TRUE(HasClass("Ljava/util/Map$Entry;", true, "public interface"));
   ASSERT_TRUE(HasMethod("direct",
                         "Ljava/util/Map$Entry;",
                         "comparingByKey",
@@ -892,7 +896,7 @@
 
 TEST_F(VerifierDepsTest, InvokeStatic_Unresolved1) {
   ASSERT_FALSE(VerifyMethod("InvokeStatic_Unresolved1"));
-  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public"));
   ASSERT_TRUE(HasMethod("direct", "Ljavax/net/ssl/SSLSocket;", "x", "()V", false));
 }
 
@@ -910,7 +914,7 @@
 
 TEST_F(VerifierDepsTest, InvokeDirect_Resolved_DeclaredInSuperclass1) {
   ASSERT_FALSE(VerifyMethod("InvokeDirect_Resolved_DeclaredInSuperclass1"));
-  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public"));
   ASSERT_TRUE(HasMethod("direct",
                         "Ljavax/net/ssl/SSLSocket;",
                         "checkOldImpl",
@@ -928,7 +932,7 @@
 
 TEST_F(VerifierDepsTest, InvokeDirect_Unresolved1) {
   ASSERT_FALSE(VerifyMethod("InvokeDirect_Unresolved1"));
-  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public abstract"));
+  ASSERT_TRUE(HasClass("Ljavax/net/ssl/SSLSocket;", true, "public"));
   ASSERT_TRUE(HasMethod("direct", "Ljavax/net/ssl/SSLSocket;", "x", "()V", false));
 }
 
@@ -983,7 +987,7 @@
                         "size",
                         "()I",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/util/Set;"));
 }
 
@@ -1012,13 +1016,13 @@
 
 TEST_F(VerifierDepsTest, InvokeInterface_Resolved_DeclaredInReferenced) {
   ASSERT_TRUE(VerifyMethod("InvokeInterface_Resolved_DeclaredInReferenced"));
-  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public abstract interface"));
+  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface"));
   ASSERT_TRUE(HasMethod("interface",
                         "Ljava/lang/Runnable;",
                         "run",
                         "()V",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/lang/Runnable;"));
 }
 
@@ -1034,7 +1038,7 @@
                         "run",
                         "()V",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/lang/Runnable;"));
 }
 
@@ -1045,13 +1049,13 @@
                         "isEmpty",
                         "()Z",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/util/Set;"));
 }
 
 TEST_F(VerifierDepsTest, InvokeInterface_Unresolved1) {
   ASSERT_FALSE(VerifyMethod("InvokeInterface_Unresolved1"));
-  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public abstract interface"));
+  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface"));
   ASSERT_TRUE(HasMethod("interface", "Ljava/lang/Runnable;", "x", "()V", false));
 }
 
@@ -1062,20 +1066,20 @@
 
 TEST_F(VerifierDepsTest, InvokeSuper_ThisAssignable) {
   ASSERT_TRUE(VerifyMethod("InvokeSuper_ThisAssignable"));
-  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public abstract interface"));
+  ASSERT_TRUE(HasClass("Ljava/lang/Runnable;", true, "public interface"));
   ASSERT_TRUE(HasAssignable("Ljava/lang/Runnable;", "Ljava/lang/Thread;", true));
   ASSERT_TRUE(HasMethod("interface",
                         "Ljava/lang/Runnable;",
                         "run",
                         "()V",
                         true,
-                        "public abstract",
+                        "public",
                         "Ljava/lang/Runnable;"));
 }
 
 TEST_F(VerifierDepsTest, InvokeSuper_ThisNotAssignable) {
   ASSERT_FALSE(VerifyMethod("InvokeSuper_ThisNotAssignable"));
-  ASSERT_TRUE(HasClass("Ljava/lang/Integer;", true, "public final"));
+  ASSERT_TRUE(HasClass("Ljava/lang/Integer;", true, "public"));
   ASSERT_TRUE(HasAssignable("Ljava/lang/Integer;", "Ljava/lang/Thread;", false));
   ASSERT_TRUE(HasMethod(
       "virtual", "Ljava/lang/Integer;", "intValue", "()I", true, "public", "Ljava/lang/Integer;"));
@@ -1083,12 +1087,12 @@
 
 TEST_F(VerifierDepsTest, ArgumentType_ResolvedReferenceArray) {
   ASSERT_TRUE(VerifyMethod("ArgumentType_ResolvedReferenceArray"));
-  ASSERT_TRUE(HasClass("[Ljava/lang/Thread;", true, "public final abstract"));
+  ASSERT_TRUE(HasClass("[Ljava/lang/Thread;", true, "public"));
 }
 
 TEST_F(VerifierDepsTest, NewArray_Resolved) {
   ASSERT_TRUE(VerifyMethod("NewArray_Resolved"));
-  ASSERT_TRUE(HasClass("[Ljava/lang/IllegalStateException;", true, "public final abstract"));
+  ASSERT_TRUE(HasClass("[Ljava/lang/IllegalStateException;", true, "public"));
 }
 
 TEST_F(VerifierDepsTest, EncodeDecode) {
@@ -1141,7 +1145,7 @@
   // Test that a class with hard failure is recorded.
   ASSERT_TRUE(HasUnverifiedClass("LMyVerificationFailure;"));
   // Test that a class with unresolved super is recorded.
-  ASSERT_FALSE(HasUnverifiedClass("LMyClassWithNoSuper;"));
+  ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuper;"));
   // Test that a class with unresolved super and hard failure is recorded.
   ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuperButFailures;"));
 }
@@ -1511,5 +1515,26 @@
   }
 }
 
+TEST_F(VerifierDepsTest, MultiDexVerification) {
+  VerifyDexFile("VerifierDepsMulti");
+  ASSERT_EQ(NumberOfCompiledDexFiles(), 2u);
+
+  ASSERT_TRUE(HasUnverifiedClass("LMySoftVerificationFailure;", *dex_files_[1]));
+  ASSERT_TRUE(HasUnverifiedClass("LMySub1SoftVerificationFailure;", *dex_files_[0]));
+  ASSERT_TRUE(HasUnverifiedClass("LMySub2SoftVerificationFailure;", *dex_files_[0]));
+
+  std::vector<uint8_t> buffer;
+  verifier_deps_->Encode(dex_files_, &buffer);
+  ASSERT_FALSE(buffer.empty());
+}
+
+TEST_F(VerifierDepsTest, NotAssignable_InterfaceWithClassInBoot) {
+  ASSERT_TRUE(TestAssignabilityRecording(/* dst */ "Ljava/lang/Exception;",
+                                         /* src */ "LIface;",
+                                         /* is_strict */ true,
+                                         /* is_assignable */ false));
+  ASSERT_TRUE(HasAssignable("Ljava/lang/Exception;", "LIface;", false));
+}
+
 }  // namespace verifier
 }  // namespace art
diff --git a/dex2oat/Android.bp b/dex2oat/Android.bp
index 0924aec..048f36d 100644
--- a/dex2oat/Android.bp
+++ b/dex2oat/Android.bp
@@ -14,6 +14,12 @@
 // limitations under the License.
 //
 
+cc_library_headers {
+    name: "dex2oat_headers",
+    host_supported: true,
+    export_include_dirs: ["include"],
+}
+
 cc_defaults {
     name: "dex2oat-defaults",
     host_supported: true,
@@ -40,6 +46,7 @@
     include_dirs: [
         "art/cmdline",
     ],
+    header_libs: ["dex2oat_headers"],
 }
 
 art_cc_binary {
@@ -132,4 +139,5 @@
         "art_gtest_defaults",
     ],
     srcs: ["dex2oat_test.cc"],
+    header_libs: ["dex2oat_headers"],
 }
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index be75628..b4ea20b 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -54,6 +54,7 @@
 #include "debug/method_debug_info.h"
 #include "dex/quick_compiler_callbacks.h"
 #include "dex/verification_results.h"
+#include "dex2oat_return_codes.h"
 #include "dex_file-inl.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
@@ -311,13 +312,6 @@
   UsageError("      Example: --num-dex-method=%d", CompilerOptions::kDefaultNumDexMethodsThreshold);
   UsageError("      Default: %d", CompilerOptions::kDefaultNumDexMethodsThreshold);
   UsageError("");
-  UsageError("  --inline-depth-limit=<depth-limit>: the depth limit of inlining for fine tuning");
-  UsageError("      the compiler. A zero value will disable inlining. Honored only by Optimizing.");
-  UsageError("      Has priority over the --compiler-filter option. Intended for ");
-  UsageError("      development/experimental use.");
-  UsageError("      Example: --inline-depth-limit=%d", CompilerOptions::kDefaultInlineDepthLimit);
-  UsageError("      Default: %d", CompilerOptions::kDefaultInlineDepthLimit);
-  UsageError("");
   UsageError("  --inline-max-code-units=<code-units-count>: the maximum code units that a method");
   UsageError("      can have to be considered for inlining. A zero value will disable inlining.");
   UsageError("      Honored only by Optimizing. Has priority over the --compiler-filter option.");
@@ -328,11 +322,6 @@
   UsageError("");
   UsageError("  --dump-timing: display a breakdown of where time was spent");
   UsageError("");
-  UsageError("  --include-patch-information: Include patching information so the generated code");
-  UsageError("      can have its base address moved without full recompilation.");
-  UsageError("");
-  UsageError("  --no-include-patch-information: Do not include patching information.");
-  UsageError("");
   UsageError("  -g");
   UsageError("  --generate-debug-info: Generate debug information for native debugging,");
   UsageError("      such as stack unwinding information, ELF symbols and DWARF sections.");
@@ -874,22 +863,8 @@
       }
     }
 
-    // It they are not set, use default values for inlining settings.
-    // TODO: We should rethink the compiler filter. We mostly save
-    // time here, which is orthogonal to space.
-    if (compiler_options_->inline_depth_limit_ == CompilerOptions::kUnsetInlineDepthLimit) {
-      compiler_options_->inline_depth_limit_ =
-          (compiler_options_->compiler_filter_ == CompilerFilter::kSpace)
-          // Implementation of the space filter: limit inlining depth.
-          ? CompilerOptions::kSpaceFilterInlineDepthLimit
-          : CompilerOptions::kDefaultInlineDepthLimit;
-    }
     if (compiler_options_->inline_max_code_units_ == CompilerOptions::kUnsetInlineMaxCodeUnits) {
-      compiler_options_->inline_max_code_units_ =
-          (compiler_options_->compiler_filter_ == CompilerFilter::kSpace)
-          // Implementation of the space filter: limit inlining max code units.
-          ? CompilerOptions::kSpaceFilterInlineMaxCodeUnits
-          : CompilerOptions::kDefaultInlineMaxCodeUnits;
+      compiler_options_->inline_max_code_units_ = CompilerOptions::kDefaultInlineMaxCodeUnits;
     }
 
     // Checks are all explicit until we know the architecture.
@@ -1426,25 +1401,15 @@
     if (profile_compilation_info_ != nullptr && IsAppImage()) {
       Runtime* runtime = Runtime::Current();
       CHECK(runtime != nullptr);
-      std::set<DexCacheResolvedClasses> resolved_classes(
-          profile_compilation_info_->GetResolvedClasses());
-
       // Filter out class path classes since we don't want to include these in the image.
       std::unordered_set<std::string> dex_files_locations;
       for (const DexFile* dex_file : dex_files_) {
         dex_files_locations.insert(dex_file->GetLocation());
       }
-      for (auto it = resolved_classes.begin(); it != resolved_classes.end(); ) {
-        if (dex_files_locations.find(it->GetDexLocation()) == dex_files_locations.end()) {
-          VLOG(compiler) << "Removed profile samples for non-app dex file " << it->GetDexLocation();
-          it = resolved_classes.erase(it);
-        } else {
-          ++it;
-        }
-      }
-
+      std::set<DexCacheResolvedClasses> resolved_classes(
+          profile_compilation_info_->GetResolvedClasses(dex_files_locations));
       image_classes_.reset(new std::unordered_set<std::string>(
-          runtime->GetClassLinker()->GetClassDescriptorsForProfileKeys(resolved_classes)));
+          runtime->GetClassLinker()->GetClassDescriptorsForResolvedClasses(resolved_classes)));
       VLOG(compiler) << "Loaded " << image_classes_->size()
                      << " image class descriptors from profile";
       if (VLOG_IS_ON(compiler)) {
@@ -1457,11 +1422,11 @@
 
   // Set up the environment for compilation. Includes starting the runtime and loading/opening the
   // boot class path.
-  bool Setup() {
+  dex2oat::ReturnCode Setup() {
     TimingLogger::ScopedTiming t("dex2oat Setup", timings_);
 
     if (!PrepareImageClasses() || !PrepareCompiledClasses() || !PrepareCompiledMethods()) {
-      return false;
+      return dex2oat::ReturnCode::kOther;
     }
 
     verification_results_.reset(new VerificationResults(compiler_options_.get()));
@@ -1473,12 +1438,12 @@
 
     RuntimeArgumentMap runtime_options;
     if (!PrepareRuntimeOptions(&runtime_options)) {
-      return false;
+      return dex2oat::ReturnCode::kOther;
     }
 
     CreateOatWriters();
     if (!AddDexFileSources()) {
-      return false;
+      return dex2oat::ReturnCode::kOther;
     }
 
     if (IsBootImage() && image_filenames_.size() > 1) {
@@ -1494,14 +1459,14 @@
       // When compiling an app, create the runtime early to retrieve
       // the image location key needed for the oat header.
       if (!CreateRuntime(std::move(runtime_options))) {
-        return false;
+        return dex2oat::ReturnCode::kCreateRuntime;
       }
 
       if (CompilerFilter::DependsOnImageChecksum(compiler_options_->GetCompilerFilter())) {
         TimingLogger::ScopedTiming t3("Loading image checksum", timings_);
         std::vector<gc::space::ImageSpace*> image_spaces =
             Runtime::Current()->GetHeap()->GetBootImageSpaces();
-        image_file_location_oat_checksum_ = OatFileAssistant::CalculateCombinedImageChecksum();
+        image_file_location_oat_checksum_ = image_spaces[0]->GetImageHeader().GetOatChecksum();
         image_file_location_oat_data_begin_ =
             reinterpret_cast<uintptr_t>(image_spaces[0]->GetImageHeader().GetOatDataBegin());
         image_patch_delta_ = image_spaces[0]->GetImageHeader().GetPatchDelta();
@@ -1565,7 +1530,7 @@
             update_input_vdex_,
             &opened_dex_files_map,
             &opened_dex_files)) {
-          return false;
+          return dex2oat::ReturnCode::kOther;
         }
         dex_files_per_oat_file_.push_back(MakeNonOwningPointerVector(opened_dex_files));
         if (opened_dex_files_map != nullptr) {
@@ -1617,7 +1582,7 @@
       // Note: Runtime acquires ownership of these dex files.
       runtime_options.Set(RuntimeArgumentMap::BootClassPathDexList, &opened_dex_files_);
       if (!CreateRuntime(std::move(runtime_options))) {
-        return false;
+        return dex2oat::ReturnCode::kOther;
       }
     }
 
@@ -1651,7 +1616,7 @@
     for (const std::unique_ptr<MemMap>& map : opened_dex_files_maps_) {
       if (!map->Protect(PROT_READ | PROT_WRITE)) {
         PLOG(ERROR) << "Failed to make .dex files writeable.";
-        return false;
+        return dex2oat::ReturnCode::kOther;
       }
     }
 
@@ -1666,14 +1631,14 @@
         soa.Self()->AssertPendingException();
         soa.Self()->ClearException();
         PLOG(ERROR) << "Failed to register dex file.";
-        return false;
+        return dex2oat::ReturnCode::kOther;
       }
       // Pre-register dex files so that we can access verification results without locks during
       // compilation and verification.
       verification_results_->AddDexFile(dex_file);
     }
 
-    return true;
+    return dex2oat::ReturnCode::kNoFailure;
   }
 
   // If we need to keep the oat file open for the image writer.
@@ -1922,6 +1887,14 @@
                                              oat_writer->GetOatDataOffset(),
                                              oat_writer->GetOatSize());
         }
+
+        if (IsBootImage()) {
+          // Have the image_file_location_oat_checksum_ for boot oat files
+          // depend on the contents of all the boot oat files. This way only
+          // the primary image checksum needs to be checked to determine
+          // whether any of the images are out of date.
+          image_file_location_oat_checksum_ ^= oat_writer->GetOatHeader().GetChecksum();
+        }
       }
 
       for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
@@ -1968,7 +1941,6 @@
 
         elf_writer->WriteDynamicSection();
         elf_writer->WriteDebugInfo(oat_writer->GetMethodDebugInfo());
-        elf_writer->WritePatchLocations(oat_writer->GetAbsolutePatchLocations());
 
         if (!elf_writer->End()) {
           LOG(ERROR) << "Failed to write ELF file " << oat_file->GetPath();
@@ -2116,6 +2088,10 @@
     return DoProfileGuidedOptimizations();
   }
 
+  bool HasInputVdexFile() const {
+    return input_vdex_file_ != nullptr || input_vdex_fd_ != -1 || !input_vdex_.empty();
+  }
+
   bool LoadProfile() {
     DCHECK(UseProfile());
 
@@ -2793,13 +2769,13 @@
 #endif
 }
 
-static int CompileImage(Dex2Oat& dex2oat) {
+static dex2oat::ReturnCode CompileImage(Dex2Oat& dex2oat) {
   dex2oat.LoadClassProfileDescriptors();
   dex2oat.Compile();
 
   if (!dex2oat.WriteOutputFiles()) {
     dex2oat.EraseOutputFiles();
-    return EXIT_FAILURE;
+    return dex2oat::ReturnCode::kOther;
   }
 
   // Flush boot.oat. We always expect the output file by name, and it will be re-opened from the
@@ -2808,43 +2784,46 @@
   if (dex2oat.ShouldKeepOatFileOpen()) {
     if (!dex2oat.FlushOutputFiles()) {
       dex2oat.EraseOutputFiles();
-      return EXIT_FAILURE;
+      return dex2oat::ReturnCode::kOther;
     }
   } else if (!dex2oat.FlushCloseOutputFiles()) {
-    return EXIT_FAILURE;
+    return dex2oat::ReturnCode::kOther;
   }
 
   // Creates the boot.art and patches the oat files.
   if (!dex2oat.HandleImage()) {
-    return EXIT_FAILURE;
+    return dex2oat::ReturnCode::kOther;
   }
 
   // When given --host, finish early without stripping.
   if (dex2oat.IsHost()) {
+    if (!dex2oat.FlushCloseOutputFiles()) {
+      return dex2oat::ReturnCode::kOther;
+    }
     dex2oat.DumpTiming();
-    return EXIT_SUCCESS;
+    return dex2oat::ReturnCode::kNoFailure;
   }
 
   // Copy stripped to unstripped location, if necessary.
   if (!dex2oat.CopyStrippedToUnstripped()) {
-    return EXIT_FAILURE;
+    return dex2oat::ReturnCode::kOther;
   }
 
   // FlushClose again, as stripping might have re-opened the oat files.
   if (!dex2oat.FlushCloseOutputFiles()) {
-    return EXIT_FAILURE;
+    return dex2oat::ReturnCode::kOther;
   }
 
   dex2oat.DumpTiming();
-  return EXIT_SUCCESS;
+  return dex2oat::ReturnCode::kNoFailure;
 }
 
-static int CompileApp(Dex2Oat& dex2oat) {
+static dex2oat::ReturnCode CompileApp(Dex2Oat& dex2oat) {
   dex2oat.Compile();
 
   if (!dex2oat.WriteOutputFiles()) {
     dex2oat.EraseOutputFiles();
-    return EXIT_FAILURE;
+    return dex2oat::ReturnCode::kOther;
   }
 
   // Do not close the oat files here. We might have gotten the output file by file descriptor,
@@ -2853,29 +2832,29 @@
   // When given --host, finish early without stripping.
   if (dex2oat.IsHost()) {
     if (!dex2oat.FlushCloseOutputFiles()) {
-      return EXIT_FAILURE;
+      return dex2oat::ReturnCode::kOther;
     }
 
     dex2oat.DumpTiming();
-    return EXIT_SUCCESS;
+    return dex2oat::ReturnCode::kNoFailure;
   }
 
   // Copy stripped to unstripped location, if necessary. This will implicitly flush & close the
   // stripped versions. If this is given, we expect to be able to open writable files by name.
   if (!dex2oat.CopyStrippedToUnstripped()) {
-    return EXIT_FAILURE;
+    return dex2oat::ReturnCode::kOther;
   }
 
   // Flush and close the files.
   if (!dex2oat.FlushCloseOutputFiles()) {
-    return EXIT_FAILURE;
+    return dex2oat::ReturnCode::kOther;
   }
 
   dex2oat.DumpTiming();
-  return EXIT_SUCCESS;
+  return dex2oat::ReturnCode::kNoFailure;
 }
 
-static int dex2oat(int argc, char** argv) {
+static dex2oat::ReturnCode Dex2oat(int argc, char** argv) {
   b13564922();
 
   TimingLogger timings("compiler", false, false);
@@ -2894,7 +2873,14 @@
   if (dex2oat->UseProfile()) {
     if (!dex2oat->LoadProfile()) {
       LOG(ERROR) << "Failed to process profile file";
-      return EXIT_FAILURE;
+      return dex2oat::ReturnCode::kOther;
+    }
+  }
+
+  if (dex2oat->DoDexLayoutOptimizations()) {
+    if (dex2oat->HasInputVdexFile()) {
+      LOG(ERROR) << "Dexlayout is incompatible with an input VDEX";
+      return dex2oat::ReturnCode::kOther;
     }
   }
 
@@ -2902,7 +2888,7 @@
 
   // Check early that the result of compilation can be written
   if (!dex2oat->OpenFile()) {
-    return EXIT_FAILURE;
+    return dex2oat::ReturnCode::kOther;
   }
 
   // Print the complete line when any of the following is true:
@@ -2917,16 +2903,17 @@
     LOG(INFO) << StrippedCommandLine();
   }
 
-  if (!dex2oat->Setup()) {
+  dex2oat::ReturnCode setup_code = dex2oat->Setup();
+  if (setup_code != dex2oat::ReturnCode::kNoFailure) {
     dex2oat->EraseOutputFiles();
-    return EXIT_FAILURE;
+    return setup_code;
   }
 
   // Helps debugging on device. Can be used to determine which dalvikvm instance invoked a dex2oat
   // instance. Used by tools/bisection_search/bisection_search.py.
   VLOG(compiler) << "Running dex2oat (parent PID = " << getppid() << ")";
 
-  bool result;
+  dex2oat::ReturnCode result;
   if (dex2oat->IsImage()) {
     result = CompileImage(*dex2oat);
   } else {
@@ -2939,7 +2926,7 @@
 }  // namespace art
 
 int main(int argc, char** argv) {
-  int result = art::dex2oat(argc, argv);
+  int result = static_cast<int>(art::Dex2oat(argc, argv));
   // Everything was done, do an explicit exit here to avoid running Runtime destructors that take
   // time (bug 10645725) unless we're a debug build or running on valgrind. Note: The Dex2Oat class
   // should not destruct the runtime in this case.
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 2c0b125..8c14b50 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -30,6 +30,7 @@
 #include "base/macros.h"
 #include "dex_file-inl.h"
 #include "dex2oat_environment_test.h"
+#include "dex2oat_return_codes.h"
 #include "jit/profile_compilation_info.h"
 #include "oat.h"
 #include "oat_file.h"
@@ -37,6 +38,8 @@
 
 namespace art {
 
+using android::base::StringPrintf;
+
 class Dex2oatTest : public Dex2oatEnvironmentTest {
  public:
   virtual void TearDown() OVERRIDE {
@@ -48,23 +51,50 @@
   }
 
  protected:
-  void GenerateOdexForTest(const std::string& dex_location,
-                           const std::string& odex_location,
-                           CompilerFilter::Filter filter,
-                           const std::vector<std::string>& extra_args = {},
-                           bool expect_success = true) {
+  int GenerateOdexForTestWithStatus(const std::string& dex_location,
+                                    const std::string& odex_location,
+                                    CompilerFilter::Filter filter,
+                                    std::string* error_msg,
+                                    const std::vector<std::string>& extra_args = {},
+                                    bool use_fd = false) {
+    std::unique_ptr<File> oat_file;
     std::vector<std::string> args;
     args.push_back("--dex-file=" + dex_location);
-    args.push_back("--oat-file=" + odex_location);
+    if (use_fd) {
+      oat_file.reset(OS::CreateEmptyFile(odex_location.c_str()));
+      CHECK(oat_file != nullptr) << odex_location;
+      args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
+      args.push_back("--oat-location=" + odex_location);
+    } else {
+      args.push_back("--oat-file=" + odex_location);
+    }
     args.push_back("--compiler-filter=" + CompilerFilter::NameOfFilter(filter));
     args.push_back("--runtime-arg");
     args.push_back("-Xnorelocate");
 
     args.insert(args.end(), extra_args.begin(), extra_args.end());
 
-    std::string error_msg;
-    bool success = Dex2Oat(args, &error_msg);
+    int status = Dex2Oat(args, error_msg);
+    if (oat_file != nullptr) {
+      CHECK_EQ(oat_file->FlushClose(), 0) << "Could not flush and close oat file";
+    }
+    return status;
+  }
 
+  void GenerateOdexForTest(const std::string& dex_location,
+                           const std::string& odex_location,
+                           CompilerFilter::Filter filter,
+                           const std::vector<std::string>& extra_args = {},
+                           bool expect_success = true,
+                           bool use_fd = false) {
+    std::string error_msg;
+    int status = GenerateOdexForTestWithStatus(dex_location,
+                                               odex_location,
+                                               filter,
+                                               &error_msg,
+                                               extra_args,
+                                               use_fd);
+    bool success = (status == 0);
     if (expect_success) {
       ASSERT_TRUE(success) << error_msg << std::endl << output_;
 
@@ -104,7 +134,7 @@
     EXPECT_EQ(expected, actual);
   }
 
-  bool Dex2Oat(const std::vector<std::string>& dex2oat_args, std::string* error_msg) {
+  int Dex2Oat(const std::vector<std::string>& dex2oat_args, std::string* error_msg) {
     Runtime* runtime = Runtime::Current();
 
     const std::vector<gc::space::ImageSpace*>& image_spaces =
@@ -182,6 +212,7 @@
       c_args.push_back(nullptr);
       execv(c_args[0], const_cast<char* const*>(c_args.data()));
       exit(1);
+      UNREACHABLE();
     } else {
       close(link[1]);
       char buffer[128];
@@ -192,12 +223,12 @@
         output_ += std::string(buffer, bytes_read);
       }
       close(link[0]);
-      int status = 0;
+      int status = -1;
       if (waitpid(pid, &status, 0) != -1) {
         success_ = (status == 0);
       }
+      return status;
     }
-    return success_;
   }
 
   std::string output_ = "";
@@ -570,40 +601,152 @@
   // Emits a profile with a single dex file with the given location and a single class index of 1.
   void GenerateProfile(const std::string& test_profile,
                        const std::string& dex_location,
+                       size_t num_classes,
                        uint32_t checksum) {
     int profile_test_fd = open(test_profile.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
     CHECK_GE(profile_test_fd, 0);
 
     ProfileCompilationInfo info;
     std::string profile_key = ProfileCompilationInfo::GetProfileDexFileKey(dex_location);
-    info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1));
+    for (size_t i = 0; i < num_classes; ++i) {
+      info.AddClassIndex(profile_key, checksum, dex::TypeIndex(1 + i));
+    }
     bool result = info.Save(profile_test_fd);
     close(profile_test_fd);
     ASSERT_TRUE(result);
   }
 
-  void RunTest() {
-    std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
-    std::string profile_location = GetScratchDir() + "/primary.prof";
-    std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
-
-    Copy(GetDexSrc2(), dex_location);
+  void CompileProfileOdex(const std::string& dex_location,
+                          const std::string& odex_location,
+                          const std::string& app_image_file_name,
+                          bool use_fd,
+                          size_t num_profile_classes,
+                          const std::vector<std::string>& extra_args = {},
+                          bool expect_success = true) {
+    const std::string profile_location = GetScratchDir() + "/primary.prof";
     const char* location = dex_location.c_str();
     std::string error_msg;
     std::vector<std::unique_ptr<const DexFile>> dex_files;
     ASSERT_TRUE(DexFile::Open(location, location, true, &error_msg, &dex_files));
     EXPECT_EQ(dex_files.size(), 1U);
     std::unique_ptr<const DexFile>& dex_file = dex_files[0];
-    GenerateProfile(profile_location, dex_location, dex_file->GetLocationChecksum());
+    GenerateProfile(profile_location,
+                    dex_location,
+                    num_profile_classes,
+                    dex_file->GetLocationChecksum());
+    std::vector<std::string> copy(extra_args);
+    copy.push_back("--profile-file=" + profile_location);
+    std::unique_ptr<File> app_image_file;
+    if (!app_image_file_name.empty()) {
+      if (use_fd) {
+        app_image_file.reset(OS::CreateEmptyFile(app_image_file_name.c_str()));
+        copy.push_back("--app-image-fd=" + std::to_string(app_image_file->Fd()));
+      } else {
+        copy.push_back("--app-image-file=" + app_image_file_name);
+      }
+    }
+    GenerateOdexForTest(dex_location,
+                        odex_location,
+                        CompilerFilter::kSpeedProfile,
+                        copy,
+                        expect_success,
+                        use_fd);
+    if (app_image_file != nullptr) {
+      ASSERT_EQ(app_image_file->FlushCloseOrErase(), 0) << "Could not flush and close art file";
+    }
+  }
 
-    const std::vector<std::string>& extra_args = { "--profile-file=" + profile_location };
-    GenerateOdexForTest(dex_location, odex_location, CompilerFilter::kSpeedProfile, extra_args);
+  uint64_t GetImageSize(const std::string& image_file_name) {
+    EXPECT_FALSE(image_file_name.empty());
+    std::unique_ptr<File> file(OS::OpenFileForReading(image_file_name.c_str()));
+    CHECK(file != nullptr);
+    ImageHeader image_header;
+    const bool success = file->ReadFully(&image_header, sizeof(image_header));
+    CHECK(success);
+    CHECK(image_header.IsValid());
+    ReaderMutexLock mu(Thread::Current(), *Locks::mutator_lock_);
+    return image_header.GetImageSize();
+  }
 
+  void RunTest(bool app_image) {
+    std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
+    std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
+    std::string app_image_file = app_image ? (GetOdexDir() + "/DexOdexNoOat.art"): "";
+    Copy(GetDexSrc2(), dex_location);
+
+    uint64_t image_file_empty_profile = 0;
+    if (app_image) {
+      CompileProfileOdex(dex_location,
+                         odex_location,
+                         app_image_file,
+                         /* use_fd */ false,
+                         /* num_profile_classes */ 0);
+      CheckValidity();
+      ASSERT_TRUE(success_);
+      // Don't check the result since CheckResult relies on the class being in the profile.
+      image_file_empty_profile = GetImageSize(app_image_file);
+      EXPECT_GT(image_file_empty_profile, 0u);
+    }
+
+    // Small profile.
+    CompileProfileOdex(dex_location,
+                       odex_location,
+                       app_image_file,
+                       /* use_fd */ false,
+                       /* num_profile_classes */ 1);
     CheckValidity();
     ASSERT_TRUE(success_);
-    CheckResult(dex_location, odex_location);
+    CheckResult(dex_location, odex_location, app_image_file);
+
+    if (app_image) {
+      // Test that the profile made a difference by adding more classes.
+      const uint64_t image_file_small_profile = GetImageSize(app_image_file);
+      CHECK_LT(image_file_empty_profile, image_file_small_profile);
+    }
   }
-  void CheckResult(const std::string& dex_location, const std::string& odex_location) {
+
+  void RunTestVDex() {
+    std::string dex_location = GetScratchDir() + "/DexNoOat.jar";
+    std::string odex_location = GetOdexDir() + "/DexOdexNoOat.odex";
+    std::string vdex_location = GetOdexDir() + "/DexOdexNoOat.vdex";
+    std::string app_image_file_name = GetOdexDir() + "/DexOdexNoOat.art";
+    Copy(GetDexSrc2(), dex_location);
+
+    std::unique_ptr<File> vdex_file1(OS::CreateEmptyFile(vdex_location.c_str()));
+    CHECK(vdex_file1 != nullptr) << vdex_location;
+    ScratchFile vdex_file2;
+    {
+      std::string input_vdex = "--input-vdex-fd=-1";
+      std::string output_vdex = StringPrintf("--output-vdex-fd=%d", vdex_file1->Fd());
+      CompileProfileOdex(dex_location,
+                         odex_location,
+                         app_image_file_name,
+                         /* use_fd */ true,
+                         /* num_profile_classes */ 1,
+                         { input_vdex, output_vdex });
+      EXPECT_GT(vdex_file1->GetLength(), 0u);
+    }
+    {
+      // Test that vdex and dexlayout fail gracefully.
+      std::string input_vdex = StringPrintf("--input-vdex-fd=%d", vdex_file1->Fd());
+      std::string output_vdex = StringPrintf("--output-vdex-fd=%d", vdex_file2.GetFd());
+      CompileProfileOdex(dex_location,
+                         odex_location,
+                         app_image_file_name,
+                         /* use_fd */ true,
+                         /* num_profile_classes */ 1,
+                         { input_vdex, output_vdex },
+                         /* expect_success */ false);
+      EXPECT_EQ(vdex_file2.GetFile()->GetLength(), 0u);
+    }
+    ASSERT_EQ(vdex_file1->FlushCloseOrErase(), 0) << "Could not flush and close vdex file";
+    CheckValidity();
+    ASSERT_FALSE(success_);
+  }
+
+  void CheckResult(const std::string& dex_location,
+                   const std::string& odex_location,
+                   const std::string& app_image_file_name) {
     // Host/target independent checks.
     std::string error_msg;
     std::unique_ptr<OatFile> odex_file(OatFile::Open(odex_location.c_str(),
@@ -639,29 +782,47 @@
     }
 
     EXPECT_EQ(odex_file->GetCompilerFilter(), CompilerFilter::kSpeedProfile);
+
+    if (!app_image_file_name.empty()) {
+      // Go peek at the image header to make sure it was large enough to contain the class.
+      std::unique_ptr<File> file(OS::OpenFileForReading(app_image_file_name.c_str()));
+      ImageHeader image_header;
+      bool success = file->ReadFully(&image_header, sizeof(image_header));
+      ASSERT_TRUE(success);
+      ASSERT_TRUE(image_header.IsValid());
+      EXPECT_GT(image_header.GetImageSection(ImageHeader::kSectionObjects).Size(), 0u);
+    }
   }
 
-    // Check whether the dex2oat run was really successful.
-    void CheckValidity() {
-      if (kIsTargetBuild) {
-        CheckTargetValidity();
-      } else {
-        CheckHostValidity();
-      }
+  // Check whether the dex2oat run was really successful.
+  void CheckValidity() {
+    if (kIsTargetBuild) {
+      CheckTargetValidity();
+    } else {
+      CheckHostValidity();
     }
+  }
 
-    void CheckTargetValidity() {
-      // TODO: Ignore for now.
-    }
+  void CheckTargetValidity() {
+    // TODO: Ignore for now.
+  }
 
-    // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
-    void CheckHostValidity() {
-      EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
-    }
-  };
+  // On the host, we can get the dex2oat output. Here, look for "dex2oat took."
+  void CheckHostValidity() {
+    EXPECT_NE(output_.find("dex2oat took"), std::string::npos) << output_;
+  }
+};
 
 TEST_F(Dex2oatLayoutTest, TestLayout) {
-  RunTest();
+  RunTest(/* app-image */ false);
+}
+
+TEST_F(Dex2oatLayoutTest, TestLayoutAppImage) {
+  RunTest(/* app-image */ true);
+}
+
+TEST_F(Dex2oatLayoutTest, TestVdexLayout) {
+  RunTestVDex();
 }
 
 class Dex2oatWatchdogTest : public Dex2oatTest {
@@ -701,4 +862,30 @@
   RunTest(false, { "--watchdog-timeout=10" });
 }
 
+class Dex2oatReturnCodeTest : public Dex2oatTest {
+ protected:
+  int RunTest(const std::vector<std::string>& extra_args = {}) {
+    std::string dex_location = GetScratchDir() + "/Dex2OatSwapTest.jar";
+    std::string odex_location = GetOdexDir() + "/Dex2OatSwapTest.odex";
+
+    Copy(GetTestDexFileName(), dex_location);
+
+    std::string error_msg;
+    return GenerateOdexForTestWithStatus(dex_location,
+                                         odex_location,
+                                         CompilerFilter::kSpeed,
+                                         &error_msg,
+                                         extra_args);
+  }
+
+  std::string GetTestDexFileName() {
+    return GetDexSrc1();
+  }
+};
+
+TEST_F(Dex2oatReturnCodeTest, TestCreateRuntime) {
+  int status = RunTest({ "--boot-image=/this/does/not/exist/yolo.oat" });
+  EXPECT_EQ(static_cast<int>(dex2oat::ReturnCode::kCreateRuntime), WEXITSTATUS(status)) << output_;
+}
+
 }  // namespace art
diff --git a/test/ti-agent/common_load.h b/dex2oat/include/dex2oat_return_codes.h
similarity index 63%
copy from test/ti-agent/common_load.h
copy to dex2oat/include/dex2oat_return_codes.h
index d254421..cc5400f 100644
--- a/test/ti-agent/common_load.h
+++ b/dex2oat/include/dex2oat_return_codes.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,19 @@
  * limitations under the License.
  */
 
-#ifndef ART_TEST_TI_AGENT_COMMON_LOAD_H_
-#define ART_TEST_TI_AGENT_COMMON_LOAD_H_
-
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#ifndef ART_DEX2OAT_INCLUDE_DEX2OAT_RETURN_CODES_H_
+#define ART_DEX2OAT_INCLUDE_DEX2OAT_RETURN_CODES_H_
 
 namespace art {
+namespace dex2oat {
 
-extern jvmtiEnv* jvmti_env;
+enum class ReturnCode : int {
+  kNoFailure = 0,
+  kOther = 1,
+  kCreateRuntime = 2,
+};
 
+}  // namespace dex2oat
 }  // namespace art
 
-#endif  // ART_TEST_TI_AGENT_COMMON_LOAD_H_
+#endif  // ART_DEX2OAT_INCLUDE_DEX2OAT_RETURN_CODES_H_
diff --git a/dexlayout/Android.bp b/dexlayout/Android.bp
index 9ee9ebd..e26d051 100644
--- a/dexlayout/Android.bp
+++ b/dexlayout/Android.bp
@@ -19,6 +19,7 @@
         "dexlayout.cc",
         "dex_ir.cc",
         "dex_ir_builder.cc",
+	"dex_verify.cc",
         "dex_visualize.cc",
         "dex_writer.cc",
     ],
@@ -55,3 +56,16 @@
     defaults: ["art_gtest_defaults"],
     srcs: ["dexlayout_test.cc"],
 }
+
+art_cc_binary {
+    name: "dexdiag",
+    host_supported: false,
+    srcs: ["dexdiag.cc"],
+    cflags: ["-Wall"],
+    shared_libs: [
+        "libart",
+        "libart-dexlayout",
+        "libpagemap",
+    ],
+}
+
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index 131f4b9..4905b5c 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -50,13 +50,14 @@
   DebugInfoItem* debug_info = reinterpret_cast<DebugInfoItem*>(context);
   LocalInfoVector& locals = debug_info->GetLocalInfo();
   const char* name = entry.name_ != nullptr ? entry.name_ : "(null)";
+  const char* descriptor = entry.descriptor_ != nullptr ? entry.descriptor_ : "";
   const char* signature = entry.signature_ != nullptr ? entry.signature_ : "";
   locals.push_back(std::unique_ptr<LocalInfo>(
-      new LocalInfo(name, entry.descriptor_, signature, entry.start_address_,
-                    entry.end_address_, entry.reg_)));
+      new LocalInfo(name, descriptor, signature, entry.start_address_, entry.end_address_,
+                    entry.reg_)));
 }
 
-static uint32_t GetCodeItemSize(const DexFile& dex_file, const DexFile::CodeItem& disk_code_item) {
+static uint32_t GetCodeItemSize(const DexFile::CodeItem& disk_code_item) {
   uintptr_t code_item_start = reinterpret_cast<uintptr_t>(&disk_code_item);
   uint32_t insns_size = disk_code_item.insns_size_in_code_units_;
   uint32_t tries_size = disk_code_item.tries_size_;
@@ -64,23 +65,18 @@
     uintptr_t insns_end = reinterpret_cast<uintptr_t>(&disk_code_item.insns_[insns_size]);
     return insns_end - code_item_start;
   } else {
-    uint32_t last_handler_off = 0;
-    for (uint32_t i = 0; i < tries_size; ++i) {
-      // Iterate over the try items to find the last catch handler.
-      const DexFile::TryItem* disk_try_item = dex_file.GetTryItems(disk_code_item, i);
-      uint16_t handler_off = disk_try_item->handler_off_;
-      if (handler_off > last_handler_off) {
-        last_handler_off = handler_off;
+    // Get the start of the handler data.
+    const uint8_t* handler_data = DexFile::GetCatchHandlerData(disk_code_item, 0);
+    uint32_t handlers_size = DecodeUnsignedLeb128(&handler_data);
+    // Manually read each handler.
+    for (uint32_t i = 0; i < handlers_size; ++i) {
+      int32_t uleb128_count = DecodeSignedLeb128(&handler_data) * 2;
+      if (uleb128_count <= 0) {
+        uleb128_count = -uleb128_count + 1;
       }
-    }
-    // Decode the final handler to see where it ends.
-    const uint8_t* handler_data = DexFile::GetCatchHandlerData(disk_code_item, last_handler_off);
-    int32_t uleb128_count = DecodeSignedLeb128(&handler_data) * 2;
-    if (uleb128_count <= 0) {
-      uleb128_count = -uleb128_count + 1;
-    }
-    for (int32_t i = 0; i < uleb128_count; ++i) {
-      DecodeUnsignedLeb128(&handler_data);
+      for (int32_t j = 0; j < uleb128_count; ++j) {
+        DecodeUnsignedLeb128(&handler_data);
+      }
     }
     return reinterpret_cast<uintptr_t>(handler_data) - code_item_start;
   }
@@ -649,7 +645,7 @@
         }
       }
       int32_t size = DecodeSignedLeb128(&handlers_data);
-      bool has_catch_all = size < 0;
+      bool has_catch_all = size <= 0;
       if (has_catch_all) {
         size = -size;
       }
@@ -680,7 +676,7 @@
     }
   }
 
-  uint32_t size = GetCodeItemSize(dex_file, disk_code_item);
+  uint32_t size = GetCodeItemSize(disk_code_item);
   CodeItem* code_item = new CodeItem(
       registers_size, ins_size, outs_size, debug_info, insns_size, insns, tries, handler_list);
   code_item->SetSize(size);
@@ -768,5 +764,138 @@
   return class_data;
 }
 
+static uint32_t HeaderOffset(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) {
+  return 0;
+}
+
+static uint32_t HeaderSize(const dex_ir::Collections& collections ATTRIBUTE_UNUSED) {
+  // Size is in elements, so there is only one header.
+  return 1;
+}
+
+// The description of each dex file section type.
+struct FileSectionDescriptor {
+ public:
+  std::string name;
+  uint16_t type;
+  // A function that when applied to a collection object, gives the size of the section.
+  std::function<uint32_t(const dex_ir::Collections&)> size_fn;
+  // A function that when applied to a collection object, gives the offset of the section.
+  std::function<uint32_t(const dex_ir::Collections&)> offset_fn;
+};
+
+static const std::vector<FileSectionDescriptor> kFileSectionDescriptors = {
+  {
+    "Header",
+    DexFile::kDexTypeHeaderItem,
+    &HeaderSize,
+    &HeaderOffset,
+  }, {
+    "StringId",
+    DexFile::kDexTypeStringIdItem,
+    &dex_ir::Collections::StringIdsSize,
+    &dex_ir::Collections::StringIdsOffset
+  }, {
+    "TypeId",
+    DexFile::kDexTypeTypeIdItem,
+    &dex_ir::Collections::TypeIdsSize,
+    &dex_ir::Collections::TypeIdsOffset
+  }, {
+    "ProtoId",
+    DexFile::kDexTypeProtoIdItem,
+    &dex_ir::Collections::ProtoIdsSize,
+    &dex_ir::Collections::ProtoIdsOffset
+  }, {
+    "FieldId",
+    DexFile::kDexTypeFieldIdItem,
+    &dex_ir::Collections::FieldIdsSize,
+    &dex_ir::Collections::FieldIdsOffset
+  }, {
+    "MethodId",
+    DexFile::kDexTypeMethodIdItem,
+    &dex_ir::Collections::MethodIdsSize,
+    &dex_ir::Collections::MethodIdsOffset
+  }, {
+    "ClassDef",
+    DexFile::kDexTypeClassDefItem,
+    &dex_ir::Collections::ClassDefsSize,
+    &dex_ir::Collections::ClassDefsOffset
+  }, {
+    "StringData",
+    DexFile::kDexTypeStringDataItem,
+    &dex_ir::Collections::StringDatasSize,
+    &dex_ir::Collections::StringDatasOffset
+  }, {
+    "TypeList",
+    DexFile::kDexTypeTypeList,
+    &dex_ir::Collections::TypeListsSize,
+    &dex_ir::Collections::TypeListsOffset
+  }, {
+    "EncArr",
+    DexFile::kDexTypeEncodedArrayItem,
+    &dex_ir::Collections::EncodedArrayItemsSize,
+    &dex_ir::Collections::EncodedArrayItemsOffset
+  }, {
+    "Annotation",
+    DexFile::kDexTypeAnnotationItem,
+    &dex_ir::Collections::AnnotationItemsSize,
+    &dex_ir::Collections::AnnotationItemsOffset
+  }, {
+    "AnnoSet",
+    DexFile::kDexTypeAnnotationSetItem,
+    &dex_ir::Collections::AnnotationSetItemsSize,
+    &dex_ir::Collections::AnnotationSetItemsOffset
+  }, {
+    "AnnoSetRL",
+    DexFile::kDexTypeAnnotationSetRefList,
+    &dex_ir::Collections::AnnotationSetRefListsSize,
+    &dex_ir::Collections::AnnotationSetRefListsOffset
+  }, {
+    "AnnoDir",
+    DexFile::kDexTypeAnnotationsDirectoryItem,
+    &dex_ir::Collections::AnnotationsDirectoryItemsSize,
+    &dex_ir::Collections::AnnotationsDirectoryItemsOffset
+  }, {
+    "DebugInfo",
+    DexFile::kDexTypeDebugInfoItem,
+    &dex_ir::Collections::DebugInfoItemsSize,
+    &dex_ir::Collections::DebugInfoItemsOffset
+  }, {
+    "CodeItem",
+    DexFile::kDexTypeCodeItem,
+    &dex_ir::Collections::CodeItemsSize,
+    &dex_ir::Collections::CodeItemsOffset
+  }, {
+    "ClassData",
+    DexFile::kDexTypeClassDataItem,
+    &dex_ir::Collections::ClassDatasSize,
+    &dex_ir::Collections::ClassDatasOffset
+  }
+};
+
+std::vector<dex_ir::DexFileSection> GetSortedDexFileSections(dex_ir::Header* header,
+                                                             dex_ir::SortDirection direction) {
+  const dex_ir::Collections& collections = header->GetCollections();
+  std::vector<dex_ir::DexFileSection> sorted_sections;
+  // Build the table that will map from offset to color
+  for (const FileSectionDescriptor& s : kFileSectionDescriptors) {
+    sorted_sections.push_back(dex_ir::DexFileSection(s.name,
+                                                     s.type,
+                                                     s.size_fn(collections),
+                                                     s.offset_fn(collections)));
+  }
+  // Sort by offset.
+  std::sort(sorted_sections.begin(),
+            sorted_sections.end(),
+            [=](dex_ir::DexFileSection& a, dex_ir::DexFileSection& b) {
+              if (direction == SortDirection::kSortDescending) {
+                return a.offset > b.offset;
+              } else {
+                return a.offset < b.offset;
+              }
+            });
+  return sorted_sections;
+}
+
 }  // namespace dex_ir
 }  // namespace art
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index 96afb90..cad0395 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -748,8 +748,7 @@
   const TypeId* ClassType() const { return class_type_; }
   uint32_t GetAccessFlags() const { return access_flags_; }
   const TypeId* Superclass() const { return superclass_; }
-  const TypeIdVector* Interfaces()
-      { return interfaces_ == nullptr ? nullptr : interfaces_->GetTypeList(); }
+  const TypeList* Interfaces() { return interfaces_; }
   uint32_t InterfacesOffset() { return interfaces_ == nullptr ? 0 : interfaces_->GetOffset(); }
   const StringId* SourceFile() const { return source_file_; }
   AnnotationsDirectoryItem* Annotations() const { return annotations_; }
@@ -781,7 +780,7 @@
   uint32_t GetAddress() const { return address_; }
 
  private:
-  const TypeId* type_id_;
+  const TypeId* type_id_;  // This can be nullptr.
   uint32_t address_;
 
   DISALLOW_COPY_AND_ASSIGN(TypeAddrPair);
@@ -1105,6 +1104,28 @@
   DISALLOW_COPY_AND_ASSIGN(MapItem);
 };
 
+// Interface for building a vector of file sections for use by other clients.
+struct DexFileSection {
+ public:
+  DexFileSection(const std::string& name, uint16_t type, uint32_t size, uint32_t offset)
+      : name(name), type(type), size(size), offset(offset) { }
+  std::string name;
+  // The type (DexFile::MapItemType).
+  uint16_t type;
+  // The size (in elements, not bytes).
+  uint32_t size;
+  // The byte offset from the start of the file.
+  uint32_t offset;
+};
+
+enum class SortDirection {
+  kSortAscending,
+  kSortDescending
+};
+
+std::vector<DexFileSection> GetSortedDexFileSections(dex_ir::Header* header,
+                                                     SortDirection direction);
+
 }  // namespace dex_ir
 }  // namespace art
 
diff --git a/dexlayout/dex_verify.cc b/dexlayout/dex_verify.cc
new file mode 100644
index 0000000..5458129
--- /dev/null
+++ b/dexlayout/dex_verify.cc
@@ -0,0 +1,1120 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Implementation file of dex ir verifier.
+ *
+ * Compares two dex files at the IR level, allowing differences in layout, but not in data.
+ */
+
+#include "dex_verify.h"
+
+#include <inttypes.h>
+
+#include "android-base/stringprintf.h"
+
+namespace art {
+
+using android::base::StringPrintf;
+
+bool VerifyOutputDexFile(dex_ir::Header* orig_header,
+                         dex_ir::Header* output_header,
+                         std::string* error_msg) {
+  dex_ir::Collections& orig = orig_header->GetCollections();
+  dex_ir::Collections& output = output_header->GetCollections();
+
+  // Compare all id sections. They have a defined order that can't be changed by dexlayout.
+  if (!VerifyIds(orig.StringIds(), output.StringIds(), "string ids", error_msg) ||
+      !VerifyIds(orig.TypeIds(), output.TypeIds(), "type ids", error_msg) ||
+      !VerifyIds(orig.ProtoIds(), output.ProtoIds(), "proto ids", error_msg) ||
+      !VerifyIds(orig.FieldIds(), output.FieldIds(), "field ids", error_msg) ||
+      !VerifyIds(orig.MethodIds(), output.MethodIds(), "method ids", error_msg)) {
+    return false;
+  }
+  // Compare class defs. The order may have been changed by dexlayout.
+  if (!VerifyClassDefs(orig.ClassDefs(), output.ClassDefs(), error_msg)) {
+    return false;
+  }
+  return true;
+}
+
+template<class T> bool VerifyIds(std::vector<std::unique_ptr<T>>& orig,
+                                 std::vector<std::unique_ptr<T>>& output,
+                                 const char* section_name,
+                                 std::string* error_msg) {
+  if (orig.size() != output.size()) {
+    *error_msg = StringPrintf(
+        "Mismatched size for %s section: %zu vs %zu.", section_name, orig.size(), output.size());
+    return false;
+  }
+  for (size_t i = 0; i < orig.size(); ++i) {
+    if (!VerifyId(orig[i].get(), output[i].get(), error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::StringId* orig, dex_ir::StringId* output, std::string* error_msg) {
+  if (strcmp(orig->Data(), output->Data()) != 0) {
+    *error_msg = StringPrintf("Mismatched string data for string id %u at offset %x: %s vs %s.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Data(),
+                              output->Data());
+    return false;
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::TypeId* orig, dex_ir::TypeId* output, std::string* error_msg) {
+  if (orig->GetStringId()->GetIndex() != output->GetStringId()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched string index for type id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->GetStringId()->GetIndex(),
+                              output->GetStringId()->GetIndex());
+    return false;
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::ProtoId* orig, dex_ir::ProtoId* output, std::string* error_msg) {
+  if (orig->Shorty()->GetIndex() != output->Shorty()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched string index for proto id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Shorty()->GetIndex(),
+                              output->Shorty()->GetIndex());
+    return false;
+  }
+  if (orig->ReturnType()->GetIndex() != output->ReturnType()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched type index for proto id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->ReturnType()->GetIndex(),
+                              output->ReturnType()->GetIndex());
+    return false;
+  }
+  if (!VerifyTypeList(orig->Parameters(), output->Parameters())) {
+    *error_msg = StringPrintf("Mismatched type list for proto id %u at offset %x.",
+                              orig->GetIndex(),
+                              orig->GetOffset());
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::FieldId* orig, dex_ir::FieldId* output, std::string* error_msg) {
+  if (orig->Class()->GetIndex() != output->Class()->GetIndex()) {
+    *error_msg =
+        StringPrintf("Mismatched class type index for field id %u at offset %x: %u vs %u.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig->Class()->GetIndex(),
+                     output->Class()->GetIndex());
+    return false;
+  }
+  if (orig->Type()->GetIndex() != output->Type()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched type index for field id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Class()->GetIndex(),
+                              output->Class()->GetIndex());
+    return false;
+  }
+  if (orig->Name()->GetIndex() != output->Name()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched string index for field id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Name()->GetIndex(),
+                              output->Name()->GetIndex());
+    return false;
+  }
+  return true;
+}
+
+bool VerifyId(dex_ir::MethodId* orig, dex_ir::MethodId* output, std::string* error_msg) {
+  if (orig->Class()->GetIndex() != output->Class()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched type index for method id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Class()->GetIndex(),
+                              output->Class()->GetIndex());
+    return false;
+  }
+  if (orig->Proto()->GetIndex() != output->Proto()->GetIndex()) {
+    *error_msg = StringPrintf("Mismatched proto index for method id %u at offset %x: %u vs %u.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig->Class()->GetIndex(),
+                              output->Class()->GetIndex());
+    return false;
+  }
+  if (orig->Name()->GetIndex() != output->Name()->GetIndex()) {
+    *error_msg =
+        StringPrintf("Mismatched string index for method id %u at offset %x: %u vs %u.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig->Name()->GetIndex(),
+                     output->Name()->GetIndex());
+    return false;
+  }
+  return true;
+}
+
+struct ClassDefCompare {
+  bool operator()(dex_ir::ClassDef* lhs, dex_ir::ClassDef* rhs) const {
+    return lhs->ClassType()->GetIndex() < rhs->ClassType()->GetIndex();
+  }
+};
+
+// The class defs may have a new order due to dexlayout. Use the class's class_idx to uniquely
+// identify them and sort them for comparison.
+bool VerifyClassDefs(std::vector<std::unique_ptr<dex_ir::ClassDef>>& orig,
+                     std::vector<std::unique_ptr<dex_ir::ClassDef>>& output,
+                     std::string* error_msg) {
+  if (orig.size() != output.size()) {
+    *error_msg = StringPrintf(
+        "Mismatched size for class defs section: %zu vs %zu.", orig.size(), output.size());
+    return false;
+  }
+  // Store the class defs into sets sorted by the class's type index.
+  std::set<dex_ir::ClassDef*, ClassDefCompare> orig_set;
+  std::set<dex_ir::ClassDef*, ClassDefCompare> output_set;
+  for (size_t i = 0; i < orig.size(); ++i) {
+    orig_set.insert(orig[i].get());
+    output_set.insert(output[i].get());
+  }
+  auto orig_iter = orig_set.begin();
+  auto output_iter = output_set.begin();
+  while (orig_iter != orig_set.end() && output_iter != output_set.end()) {
+    if (!VerifyClassDef(*orig_iter, *output_iter, error_msg)) {
+      return false;
+    }
+    orig_iter++;
+    output_iter++;
+  }
+  return true;
+}
+
+bool VerifyClassDef(dex_ir::ClassDef* orig, dex_ir::ClassDef* output, std::string* error_msg) {
+  if (orig->ClassType()->GetIndex() != output->ClassType()->GetIndex()) {
+    *error_msg =
+        StringPrintf("Mismatched class type index for class def %u at offset %x: %u vs %u.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig->ClassType()->GetIndex(),
+                     output->ClassType()->GetIndex());
+    return false;
+  }
+  if (orig->GetAccessFlags() != output->GetAccessFlags()) {
+    *error_msg =
+        StringPrintf("Mismatched access flags for class def %u at offset %x: %x vs %x.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig->GetAccessFlags(),
+                     output->GetAccessFlags());
+    return false;
+  }
+  uint32_t orig_super = orig->Superclass() == nullptr ? 0 : orig->Superclass()->GetIndex();
+  uint32_t output_super = output->Superclass() == nullptr ? 0 : output->Superclass()->GetIndex();
+  if (orig_super != output_super) {
+    *error_msg =
+        StringPrintf("Mismatched super class for class def %u at offset %x: %u vs %u.",
+                     orig->GetIndex(),
+                     orig->GetOffset(),
+                     orig_super,
+                     output_super);
+    return false;
+  }
+  if (!VerifyTypeList(orig->Interfaces(), output->Interfaces())) {
+    *error_msg = StringPrintf("Mismatched type list for class def %u at offset %x.",
+                              orig->GetIndex(),
+                              orig->GetOffset());
+    return false;
+  }
+  const char* orig_source = orig->SourceFile() == nullptr ? "" : orig->SourceFile()->Data();
+  const char* output_source = output->SourceFile() == nullptr ? "" : output->SourceFile()->Data();
+  if (strcmp(orig_source, output_source) != 0) {
+    *error_msg = StringPrintf("Mismatched source file for class def %u at offset %x: %s vs %s.",
+                              orig->GetIndex(),
+                              orig->GetOffset(),
+                              orig_source,
+                              output_source);
+    return false;
+  }
+  if (!VerifyAnnotationsDirectory(orig->Annotations(), output->Annotations(), error_msg)) {
+    return false;
+  }
+  if (!VerifyClassData(orig->GetClassData(), output->GetClassData(), error_msg)) {
+    return false;
+  }
+  return VerifyEncodedArray(orig->StaticValues(), output->StaticValues(), error_msg);
+}
+
+bool VerifyTypeList(const dex_ir::TypeList* orig, const dex_ir::TypeList* output) {
+  if (orig == nullptr || output == nullptr) {
+    return orig == output;
+  }
+  const dex_ir::TypeIdVector* orig_list = orig->GetTypeList();
+  const dex_ir::TypeIdVector* output_list = output->GetTypeList();
+  if (orig_list->size() != output_list->size()) {
+    return false;
+  }
+  for (size_t i = 0; i < orig_list->size(); ++i) {
+    if ((*orig_list)[i]->GetIndex() != (*output_list)[i]->GetIndex()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotationsDirectory(dex_ir::AnnotationsDirectoryItem* orig,
+                                dex_ir::AnnotationsDirectoryItem* output,
+                                std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty annotations directory.";
+      return false;
+    }
+    return true;
+  }
+  if (!VerifyAnnotationSet(orig->GetClassAnnotation(), output->GetClassAnnotation(), error_msg)) {
+    return false;
+  }
+  if (!VerifyFieldAnnotations(orig->GetFieldAnnotations(),
+                              output->GetFieldAnnotations(),
+                              orig->GetOffset(),
+                              error_msg)) {
+    return false;
+  }
+  if (!VerifyMethodAnnotations(orig->GetMethodAnnotations(),
+                               output->GetMethodAnnotations(),
+                               orig->GetOffset(),
+                               error_msg)) {
+    return false;
+  }
+  return VerifyParameterAnnotations(orig->GetParameterAnnotations(),
+                                    output->GetParameterAnnotations(),
+                                    orig->GetOffset(),
+                                    error_msg);
+}
+
+bool VerifyFieldAnnotations(dex_ir::FieldAnnotationVector* orig,
+                            dex_ir::FieldAnnotationVector* output,
+                            uint32_t orig_offset,
+                            std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = StringPrintf(
+          "Found unexpected empty field annotations for annotations directory at offset %x.",
+          orig_offset);
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched field annotations size for annotations directory at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig->size(),
+        output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::FieldAnnotation* orig_field = (*orig)[i].get();
+    dex_ir::FieldAnnotation* output_field = (*output)[i].get();
+    if (orig_field->GetFieldId()->GetIndex() != output_field->GetFieldId()->GetIndex()) {
+      *error_msg = StringPrintf(
+          "Mismatched field annotation index for annotations directory at offset %x: %u vs %u.",
+          orig_offset,
+          orig_field->GetFieldId()->GetIndex(),
+          output_field->GetFieldId()->GetIndex());
+      return false;
+    }
+    if (!VerifyAnnotationSet(orig_field->GetAnnotationSetItem(),
+                             output_field->GetAnnotationSetItem(),
+                             error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyMethodAnnotations(dex_ir::MethodAnnotationVector* orig,
+                             dex_ir::MethodAnnotationVector* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = StringPrintf(
+          "Found unexpected empty method annotations for annotations directory at offset %x.",
+          orig_offset);
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched method annotations size for annotations directory at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig->size(),
+        output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::MethodAnnotation* orig_method = (*orig)[i].get();
+    dex_ir::MethodAnnotation* output_method = (*output)[i].get();
+    if (orig_method->GetMethodId()->GetIndex() != output_method->GetMethodId()->GetIndex()) {
+      *error_msg = StringPrintf(
+          "Mismatched method annotation index for annotations directory at offset %x: %u vs %u.",
+          orig_offset,
+          orig_method->GetMethodId()->GetIndex(),
+          output_method->GetMethodId()->GetIndex());
+      return false;
+    }
+    if (!VerifyAnnotationSet(orig_method->GetAnnotationSetItem(),
+                             output_method->GetAnnotationSetItem(),
+                             error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyParameterAnnotations(dex_ir::ParameterAnnotationVector* orig,
+                                dex_ir::ParameterAnnotationVector* output,
+                                uint32_t orig_offset,
+                                std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = StringPrintf(
+          "Found unexpected empty parameter annotations for annotations directory at offset %x.",
+          orig_offset);
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched parameter annotations size for annotations directory at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig->size(),
+        output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::ParameterAnnotation* orig_param = (*orig)[i].get();
+    dex_ir::ParameterAnnotation* output_param = (*output)[i].get();
+    if (orig_param->GetMethodId()->GetIndex() != output_param->GetMethodId()->GetIndex()) {
+      *error_msg = StringPrintf(
+          "Mismatched parameter annotation index for annotations directory at offset %x: %u vs %u.",
+          orig_offset,
+          orig_param->GetMethodId()->GetIndex(),
+          output_param->GetMethodId()->GetIndex());
+      return false;
+    }
+    if (!VerifyAnnotationSetRefList(orig_param->GetAnnotations(),
+                                    output_param->GetAnnotations(),
+                                    error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotationSetRefList(dex_ir::AnnotationSetRefList* orig,
+                                dex_ir::AnnotationSetRefList* output,
+                                std::string* error_msg) {
+  std::vector<dex_ir::AnnotationSetItem*>* orig_items = orig->GetItems();
+  std::vector<dex_ir::AnnotationSetItem*>* output_items = output->GetItems();
+  if (orig_items->size() != output_items->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched annotation set ref list size at offset %x: %zu vs %zu.",
+        orig->GetOffset(),
+        orig_items->size(),
+        output_items->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_items->size(); ++i) {
+    if (!VerifyAnnotationSet((*orig_items)[i], (*output_items)[i], error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotationSet(dex_ir::AnnotationSetItem* orig,
+                         dex_ir::AnnotationSetItem* output,
+                         std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty annotation set.";
+      return false;
+    }
+    return true;
+  }
+  std::vector<dex_ir::AnnotationItem*>* orig_items = orig->GetItems();
+  std::vector<dex_ir::AnnotationItem*>* output_items = output->GetItems();
+  if (orig_items->size() != output_items->size()) {
+    *error_msg = StringPrintf("Mismatched size for annotation set at offset %x: %zu vs %zu.",
+                              orig->GetOffset(),
+                              orig_items->size(),
+                              output_items->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_items->size(); ++i) {
+    if (!VerifyAnnotation((*orig_items)[i], (*output_items)[i], error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotation(dex_ir::AnnotationItem* orig,
+                      dex_ir::AnnotationItem* output,
+                      std::string* error_msg) {
+  if (orig->GetVisibility() != output->GetVisibility()) {
+    *error_msg = StringPrintf("Mismatched visibility for annotation at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->GetVisibility(),
+                              output->GetVisibility());
+    return false;
+  }
+  return VerifyEncodedAnnotation(orig->GetAnnotation(),
+                                 output->GetAnnotation(),
+                                 orig->GetOffset(),
+                                 error_msg);
+}
+
+bool VerifyEncodedAnnotation(dex_ir::EncodedAnnotation* orig,
+                             dex_ir::EncodedAnnotation* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg) {
+  if (orig->GetType()->GetIndex() != output->GetType()->GetIndex()) {
+    *error_msg = StringPrintf(
+        "Mismatched encoded annotation type for annotation at offset %x: %u vs %u.",
+        orig_offset,
+        orig->GetType()->GetIndex(),
+        output->GetType()->GetIndex());
+    return false;
+  }
+  dex_ir::AnnotationElementVector* orig_elements = orig->GetAnnotationElements();
+  dex_ir::AnnotationElementVector* output_elements = output->GetAnnotationElements();
+  if (orig_elements->size() != output_elements->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched encoded annotation size for annotation at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig_elements->size(),
+        output_elements->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_elements->size(); ++i) {
+    if (!VerifyAnnotationElement((*orig_elements)[i].get(),
+                                 (*output_elements)[i].get(),
+                                 orig_offset,
+                                 error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyAnnotationElement(dex_ir::AnnotationElement* orig,
+                             dex_ir::AnnotationElement* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg) {
+  if (orig->GetName()->GetIndex() != output->GetName()->GetIndex()) {
+    *error_msg = StringPrintf(
+        "Mismatched annotation element name for annotation at offset %x: %u vs %u.",
+        orig_offset,
+        orig->GetName()->GetIndex(),
+        output->GetName()->GetIndex());
+    return false;
+  }
+  return VerifyEncodedValue(orig->GetValue(), output->GetValue(), orig_offset, error_msg);
+}
+
+bool VerifyEncodedValue(dex_ir::EncodedValue* orig,
+                        dex_ir::EncodedValue* output,
+                        uint32_t orig_offset,
+                        std::string* error_msg) {
+  if (orig->Type() != output->Type()) {
+    *error_msg = StringPrintf(
+        "Mismatched encoded value type for annotation or encoded array at offset %x: %d vs %d.",
+        orig_offset,
+        orig->Type(),
+        output->Type());
+    return false;
+  }
+  switch (orig->Type()) {
+    case DexFile::kDexAnnotationByte:
+      if (orig->GetByte() != output->GetByte()) {
+        *error_msg = StringPrintf("Mismatched encoded byte for annotation at offset %x: %d vs %d.",
+                                  orig_offset,
+                                  orig->GetByte(),
+                                  output->GetByte());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationShort:
+      if (orig->GetShort() != output->GetShort()) {
+        *error_msg = StringPrintf("Mismatched encoded short for annotation at offset %x: %d vs %d.",
+                                  orig_offset,
+                                  orig->GetShort(),
+                                  output->GetShort());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationChar:
+      if (orig->GetChar() != output->GetChar()) {
+        *error_msg = StringPrintf("Mismatched encoded char for annotation at offset %x: %c vs %c.",
+                                  orig_offset,
+                                  orig->GetChar(),
+                                  output->GetChar());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationInt:
+      if (orig->GetInt() != output->GetInt()) {
+        *error_msg = StringPrintf("Mismatched encoded int for annotation at offset %x: %d vs %d.",
+                                  orig_offset,
+                                  orig->GetInt(),
+                                  output->GetInt());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationLong:
+      if (orig->GetLong() != output->GetLong()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded long for annotation at offset %x: %" PRId64 " vs %" PRId64 ".",
+            orig_offset,
+            orig->GetLong(),
+            output->GetLong());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationFloat:
+      // The float value is encoded, so compare as if it's an int.
+      if (orig->GetInt() != output->GetInt()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded float for annotation at offset %x: %x (encoded) vs %x (encoded).",
+                                  orig_offset,
+                                  orig->GetInt(),
+                                  output->GetInt());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationDouble:
+      // The double value is encoded, so compare as if it's a long.
+      if (orig->GetLong() != output->GetLong()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded double for annotation at offset %x: %" PRIx64
+            " (encoded) vs %" PRIx64 " (encoded).",
+            orig_offset,
+            orig->GetLong(),
+            output->GetLong());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationString:
+      if (orig->GetStringId()->GetIndex() != output->GetStringId()->GetIndex()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded string for annotation at offset %x: %s vs %s.",
+            orig_offset,
+            orig->GetStringId()->Data(),
+            output->GetStringId()->Data());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationType:
+      if (orig->GetTypeId()->GetIndex() != output->GetTypeId()->GetIndex()) {
+        *error_msg = StringPrintf("Mismatched encoded type for annotation at offset %x: %u vs %u.",
+                                  orig_offset,
+                                  orig->GetTypeId()->GetIndex(),
+                                  output->GetTypeId()->GetIndex());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationField:
+    case DexFile::kDexAnnotationEnum:
+      if (orig->GetFieldId()->GetIndex() != output->GetFieldId()->GetIndex()) {
+        *error_msg = StringPrintf("Mismatched encoded field for annotation at offset %x: %u vs %u.",
+                                  orig_offset,
+                                  orig->GetFieldId()->GetIndex(),
+                                  output->GetFieldId()->GetIndex());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationMethod:
+      if (orig->GetMethodId()->GetIndex() != output->GetMethodId()->GetIndex()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded method for annotation at offset %x: %u vs %u.",
+            orig_offset,
+            orig->GetMethodId()->GetIndex(),
+            output->GetMethodId()->GetIndex());
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationArray:
+      if (!VerifyEncodedArray(orig->GetEncodedArray(), output->GetEncodedArray(), error_msg)) {
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationAnnotation:
+      if (!VerifyEncodedAnnotation(orig->GetEncodedAnnotation(),
+                                   output->GetEncodedAnnotation(),
+                                   orig_offset,
+                                   error_msg)) {
+        return false;
+      }
+      break;
+    case DexFile::kDexAnnotationNull:
+      break;
+    case DexFile::kDexAnnotationBoolean:
+      if (orig->GetBoolean() != output->GetBoolean()) {
+        *error_msg = StringPrintf(
+            "Mismatched encoded boolean for annotation at offset %x: %d vs %d.",
+            orig_offset,
+            orig->GetBoolean(),
+            output->GetBoolean());
+        return false;
+      }
+      break;
+    default:
+      break;
+  }
+  return true;
+}
+
+bool VerifyEncodedArray(dex_ir::EncodedArrayItem* orig,
+                        dex_ir::EncodedArrayItem* output,
+                        std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty encoded array.";
+      return false;
+    }
+    return true;
+  }
+  dex_ir::EncodedValueVector* orig_vector = orig->GetEncodedValues();
+  dex_ir::EncodedValueVector* output_vector = output->GetEncodedValues();
+  if (orig_vector->size() != output_vector->size()) {
+    *error_msg = StringPrintf("Mismatched size for encoded array at offset %x: %zu vs %zu.",
+                              orig->GetOffset(),
+                              orig_vector->size(),
+                              output_vector->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_vector->size(); ++i) {
+    if (!VerifyEncodedValue((*orig_vector)[i].get(),
+                            (*output_vector)[i].get(),
+                            orig->GetOffset(),
+                            error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyClassData(dex_ir::ClassData* orig, dex_ir::ClassData* output, std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty class data.";
+      return false;
+    }
+    return true;
+  }
+  if (!VerifyFields(orig->StaticFields(), output->StaticFields(), orig->GetOffset(), error_msg)) {
+    return false;
+  }
+  if (!VerifyFields(orig->InstanceFields(),
+                    output->InstanceFields(),
+                    orig->GetOffset(),
+                    error_msg)) {
+    return false;
+  }
+  if (!VerifyMethods(orig->DirectMethods(),
+                     output->DirectMethods(),
+                     orig->GetOffset(),
+                     error_msg)) {
+    return false;
+  }
+  return VerifyMethods(orig->VirtualMethods(),
+                       output->VirtualMethods(),
+                       orig->GetOffset(),
+                       error_msg);
+}
+
+bool VerifyFields(dex_ir::FieldItemVector* orig,
+                  dex_ir::FieldItemVector* output,
+                  uint32_t orig_offset,
+                  std::string* error_msg) {
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf("Mismatched fields size for class data at offset %x: %zu vs %zu.",
+                              orig_offset,
+                              orig->size(),
+                              output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::FieldItem* orig_field = (*orig)[i].get();
+    dex_ir::FieldItem* output_field = (*output)[i].get();
+    if (orig_field->GetFieldId()->GetIndex() != output_field->GetFieldId()->GetIndex()) {
+      *error_msg = StringPrintf("Mismatched field index for class data at offset %x: %u vs %u.",
+                                orig_offset,
+                                orig_field->GetFieldId()->GetIndex(),
+                                output_field->GetFieldId()->GetIndex());
+      return false;
+    }
+    if (orig_field->GetAccessFlags() != output_field->GetAccessFlags()) {
+      *error_msg = StringPrintf(
+          "Mismatched field access flags for class data at offset %x: %u vs %u.",
+          orig_offset,
+          orig_field->GetAccessFlags(),
+          output_field->GetAccessFlags());
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyMethods(dex_ir::MethodItemVector* orig,
+                   dex_ir::MethodItemVector* output,
+                   uint32_t orig_offset,
+                   std::string* error_msg) {
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf("Mismatched methods size for class data at offset %x: %zu vs %zu.",
+                              orig_offset,
+                              orig->size(),
+                              output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    dex_ir::MethodItem* orig_method = (*orig)[i].get();
+    dex_ir::MethodItem* output_method = (*output)[i].get();
+    if (orig_method->GetMethodId()->GetIndex() != output_method->GetMethodId()->GetIndex()) {
+      *error_msg = StringPrintf("Mismatched method index for class data at offset %x: %u vs %u.",
+                                orig_offset,
+                                orig_method->GetMethodId()->GetIndex(),
+                                output_method->GetMethodId()->GetIndex());
+      return false;
+    }
+    if (orig_method->GetAccessFlags() != output_method->GetAccessFlags()) {
+      *error_msg = StringPrintf(
+          "Mismatched method access flags for class data at offset %x: %u vs %u.",
+          orig_offset,
+          orig_method->GetAccessFlags(),
+          output_method->GetAccessFlags());
+      return false;
+    }
+    if (!VerifyCode(orig_method->GetCodeItem(), output_method->GetCodeItem(), error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyCode(dex_ir::CodeItem* orig, dex_ir::CodeItem* output, std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty code item.";
+      return false;
+    }
+    return true;
+  }
+  if (orig->RegistersSize() != output->RegistersSize()) {
+    *error_msg = StringPrintf("Mismatched registers size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->RegistersSize(),
+                              output->RegistersSize());
+    return false;
+  }
+  if (orig->InsSize() != output->InsSize()) {
+    *error_msg = StringPrintf("Mismatched ins size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->InsSize(),
+                              output->InsSize());
+    return false;
+  }
+  if (orig->OutsSize() != output->OutsSize()) {
+    *error_msg = StringPrintf("Mismatched outs size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->OutsSize(),
+                              output->OutsSize());
+    return false;
+  }
+  if (orig->TriesSize() != output->TriesSize()) {
+    *error_msg = StringPrintf("Mismatched tries size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->TriesSize(),
+                              output->TriesSize());
+    return false;
+  }
+  if (!VerifyDebugInfo(orig->DebugInfo(), output->DebugInfo(), error_msg)) {
+    return false;
+  }
+  if (orig->InsnsSize() != output->InsnsSize()) {
+    *error_msg = StringPrintf("Mismatched insns size for code item at offset %x: %u vs %u.",
+                              orig->GetOffset(),
+                              orig->InsnsSize(),
+                              output->InsnsSize());
+    return false;
+  }
+  if (memcmp(orig->Insns(), output->Insns(), orig->InsnsSize()) != 0) {
+    *error_msg = StringPrintf("Mismatched insns for code item at offset %x.",
+                              orig->GetOffset());
+    return false;
+  }
+  if (!VerifyTries(orig->Tries(), output->Tries(), orig->GetOffset(), error_msg)) {
+    return false;
+  }
+  return VerifyHandlers(orig->Handlers(), output->Handlers(), orig->GetOffset(), error_msg);
+}
+
+bool VerifyDebugInfo(dex_ir::DebugInfoItem* orig,
+                     dex_ir::DebugInfoItem* output,
+                     std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty debug info.";
+      return false;
+    }
+    return true;
+  }
+  if (!VerifyPositionInfo(orig->GetPositionInfo(),
+                          output->GetPositionInfo(),
+                          orig->GetOffset(),
+                          error_msg)) {
+    return false;
+  }
+  return VerifyLocalInfo(orig->GetLocalInfo(),
+                         output->GetLocalInfo(),
+                         orig->GetOffset(),
+                         error_msg);
+}
+
+bool VerifyPositionInfo(dex_ir::PositionInfoVector& orig,
+                        dex_ir::PositionInfoVector& output,
+                        uint32_t orig_offset,
+                        std::string* error_msg) {
+  if (orig.size() != output.size()) {
+    *error_msg = StringPrintf(
+        "Mismatched number of positions for debug info at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig.size(),
+        output.size());
+    return false;
+  }
+  for (size_t i = 0; i < orig.size(); ++i) {
+    if (orig[i]->address_ != output[i]->address_) {
+      *error_msg = StringPrintf(
+          "Mismatched position address for debug info at offset %x: %u vs %u.",
+          orig_offset,
+          orig[i]->address_,
+          output[i]->address_);
+      return false;
+    }
+    if (orig[i]->line_ != output[i]->line_) {
+      *error_msg = StringPrintf("Mismatched position line for debug info at offset %x: %u vs %u.",
+                                orig_offset,
+                                orig[i]->line_,
+                                output[i]->line_);
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyLocalInfo(dex_ir::LocalInfoVector& orig,
+                     dex_ir::LocalInfoVector& output,
+                     uint32_t orig_offset,
+                     std::string* error_msg) {
+  if (orig.size() != output.size()) {
+    *error_msg = StringPrintf(
+        "Mismatched number of locals for debug info at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig.size(),
+        output.size());
+    return false;
+  }
+  for (size_t i = 0; i < orig.size(); ++i) {
+    if (orig[i]->name_ != output[i]->name_) {
+      *error_msg = StringPrintf("Mismatched local name for debug info at offset %x: %s vs %s.",
+                                orig_offset,
+                                orig[i]->name_.c_str(),
+                                output[i]->name_.c_str());
+      return false;
+    }
+    if (orig[i]->descriptor_ != output[i]->descriptor_) {
+      *error_msg = StringPrintf(
+          "Mismatched local descriptor for debug info at offset %x: %s vs %s.",
+          orig_offset,
+          orig[i]->descriptor_.c_str(),
+          output[i]->descriptor_.c_str());
+      return false;
+    }
+    if (orig[i]->signature_ != output[i]->signature_) {
+      *error_msg = StringPrintf("Mismatched local signature for debug info at offset %x: %s vs %s.",
+                                orig_offset,
+                                orig[i]->signature_.c_str(),
+                                output[i]->signature_.c_str());
+      return false;
+    }
+    if (orig[i]->start_address_ != output[i]->start_address_) {
+      *error_msg = StringPrintf(
+          "Mismatched local start address for debug info at offset %x: %u vs %u.",
+          orig_offset,
+          orig[i]->start_address_,
+          output[i]->start_address_);
+      return false;
+    }
+    if (orig[i]->end_address_ != output[i]->end_address_) {
+      *error_msg = StringPrintf(
+          "Mismatched local end address for debug info at offset %x: %u vs %u.",
+          orig_offset,
+          orig[i]->end_address_,
+          output[i]->end_address_);
+      return false;
+    }
+    if (orig[i]->reg_ != output[i]->reg_) {
+      *error_msg = StringPrintf("Mismatched local reg for debug info at offset %x: %u vs %u.",
+                                orig_offset,
+                                orig[i]->reg_,
+                                output[i]->reg_);
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyTries(dex_ir::TryItemVector* orig,
+                 dex_ir::TryItemVector* output,
+                 uint32_t orig_offset,
+                 std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty try items.";
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf("Mismatched tries size for code item at offset %x: %zu vs %zu.",
+                              orig_offset,
+                              orig->size(),
+                              output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    const dex_ir::TryItem* orig_try = (*orig)[i].get();
+    const dex_ir::TryItem* output_try = (*output)[i].get();
+    if (orig_try->StartAddr() != output_try->StartAddr()) {
+      *error_msg = StringPrintf(
+          "Mismatched try item start addr for code item at offset %x: %u vs %u.",
+          orig_offset,
+          orig_try->StartAddr(),
+          output_try->StartAddr());
+      return false;
+    }
+    if (orig_try->InsnCount() != output_try->InsnCount()) {
+      *error_msg = StringPrintf(
+          "Mismatched try item insn count for code item at offset %x: %u vs %u.",
+          orig_offset,
+          orig_try->InsnCount(),
+                                output_try->InsnCount());
+      return false;
+    }
+    if (!VerifyHandler(orig_try->GetHandlers(),
+                       output_try->GetHandlers(),
+                       orig_offset,
+                       error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyHandlers(dex_ir::CatchHandlerVector* orig,
+                    dex_ir::CatchHandlerVector* output,
+                    uint32_t orig_offset,
+                    std::string* error_msg) {
+  if (orig == nullptr || output == nullptr) {
+    if (orig != output) {
+      *error_msg = "Found unexpected empty catch handlers.";
+      return false;
+    }
+    return true;
+  }
+  if (orig->size() != output->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched catch handlers size for code item at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig->size(),
+        output->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig->size(); ++i) {
+    if (!VerifyHandler((*orig)[i].get(), (*output)[i].get(), orig_offset, error_msg)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifyHandler(const dex_ir::CatchHandler* orig,
+                   const dex_ir::CatchHandler* output,
+                   uint32_t orig_offset,
+                   std::string* error_msg) {
+  dex_ir::TypeAddrPairVector* orig_handlers = orig->GetHandlers();
+  dex_ir::TypeAddrPairVector* output_handlers = output->GetHandlers();
+  if (orig_handlers->size() != output_handlers->size()) {
+    *error_msg = StringPrintf(
+        "Mismatched number of catch handlers for code item at offset %x: %zu vs %zu.",
+        orig_offset,
+        orig_handlers->size(),
+        output_handlers->size());
+    return false;
+  }
+  for (size_t i = 0; i < orig_handlers->size(); ++i) {
+    const dex_ir::TypeAddrPair* orig_handler = (*orig_handlers)[i].get();
+    const dex_ir::TypeAddrPair* output_handler = (*output_handlers)[i].get();
+    if (orig_handler->GetTypeId() == nullptr || output_handler->GetTypeId() == nullptr) {
+      if (orig_handler->GetTypeId() != output_handler->GetTypeId()) {
+        *error_msg = StringPrintf(
+            "Found unexpected catch all catch handler for code item at offset %x.",
+            orig_offset);
+        return false;
+      }
+    } else if (orig_handler->GetTypeId()->GetIndex() != output_handler->GetTypeId()->GetIndex()) {
+      *error_msg = StringPrintf(
+          "Mismatched catch handler type for code item at offset %x: %u vs %u.",
+          orig_offset,
+          orig_handler->GetTypeId()->GetIndex(),
+          output_handler->GetTypeId()->GetIndex());
+      return false;
+    }
+    if (orig_handler->GetAddress() != output_handler->GetAddress()) {
+      *error_msg = StringPrintf(
+          "Mismatched catch handler address for code item at offset %x: %u vs %u.",
+          orig_offset,
+          orig_handler->GetAddress(),
+          output_handler->GetAddress());
+      return false;
+    }
+  }
+  return true;
+}
+
+}  // namespace art
diff --git a/dexlayout/dex_verify.h b/dexlayout/dex_verify.h
new file mode 100644
index 0000000..58c95d6
--- /dev/null
+++ b/dexlayout/dex_verify.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Header file of dex ir verifier.
+ *
+ * Compares two dex files at the IR level, allowing differences in layout, but not in data.
+ */
+
+#ifndef ART_DEXLAYOUT_DEX_VERIFY_H_
+#define ART_DEXLAYOUT_DEX_VERIFY_H_
+
+#include "dex_ir.h"
+
+namespace art {
+// Check that the output dex file contains the same data as the original.
+// Compares the dex IR of both dex files. Allows the dex files to have different layouts.
+bool VerifyOutputDexFile(dex_ir::Header* orig_header,
+                         dex_ir::Header* output_header,
+                         std::string* error_msg);
+
+template<class T> bool VerifyIds(std::vector<std::unique_ptr<T>>& orig,
+                                 std::vector<std::unique_ptr<T>>& output,
+                                 const char* section_name,
+                                 std::string* error_msg);
+bool VerifyId(dex_ir::StringId* orig, dex_ir::StringId* output, std::string* error_msg);
+bool VerifyId(dex_ir::TypeId* orig, dex_ir::TypeId* output, std::string* error_msg);
+bool VerifyId(dex_ir::ProtoId* orig, dex_ir::ProtoId* output, std::string* error_msg);
+bool VerifyId(dex_ir::FieldId* orig, dex_ir::FieldId* output, std::string* error_msg);
+bool VerifyId(dex_ir::MethodId* orig, dex_ir::MethodId* output, std::string* error_msg);
+
+bool VerifyClassDefs(std::vector<std::unique_ptr<dex_ir::ClassDef>>& orig,
+                     std::vector<std::unique_ptr<dex_ir::ClassDef>>& output,
+                     std::string* error_msg);
+bool VerifyClassDef(dex_ir::ClassDef* orig, dex_ir::ClassDef* output, std::string* error_msg);
+
+bool VerifyTypeList(const dex_ir::TypeList* orig, const dex_ir::TypeList* output);
+
+bool VerifyAnnotationsDirectory(dex_ir::AnnotationsDirectoryItem* orig,
+                                dex_ir::AnnotationsDirectoryItem* output,
+                                std::string* error_msg);
+bool VerifyFieldAnnotations(dex_ir::FieldAnnotationVector* orig,
+                            dex_ir::FieldAnnotationVector* output,
+                            uint32_t orig_offset,
+                            std::string* error_msg);
+bool VerifyMethodAnnotations(dex_ir::MethodAnnotationVector* orig,
+                             dex_ir::MethodAnnotationVector* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg);
+bool VerifyParameterAnnotations(dex_ir::ParameterAnnotationVector* orig,
+                                dex_ir::ParameterAnnotationVector* output,
+                                uint32_t orig_offset,
+                                std::string* error_msg);
+bool VerifyAnnotationSetRefList(dex_ir::AnnotationSetRefList* orig,
+                                dex_ir::AnnotationSetRefList* output,
+                                std::string* error_msg);
+bool VerifyAnnotationSet(dex_ir::AnnotationSetItem* orig,
+                         dex_ir::AnnotationSetItem* output,
+                         std::string* error_msg);
+bool VerifyAnnotation(dex_ir::AnnotationItem* orig,
+                      dex_ir::AnnotationItem* output,
+                      std::string* error_msg);
+bool VerifyEncodedAnnotation(dex_ir::EncodedAnnotation* orig,
+                             dex_ir::EncodedAnnotation* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg);
+bool VerifyAnnotationElement(dex_ir::AnnotationElement* orig,
+                             dex_ir::AnnotationElement* output,
+                             uint32_t orig_offset,
+                             std::string* error_msg);
+bool VerifyEncodedValue(dex_ir::EncodedValue* orig,
+                        dex_ir::EncodedValue* output,
+                        uint32_t orig_offset,
+                        std::string* error_msg);
+bool VerifyEncodedArray(dex_ir::EncodedArrayItem* orig,
+                        dex_ir::EncodedArrayItem* output,
+                        std::string* error_msg);
+
+bool VerifyClassData(dex_ir::ClassData* orig, dex_ir::ClassData* output, std::string* error_msg);
+bool VerifyFields(dex_ir::FieldItemVector* orig,
+                  dex_ir::FieldItemVector* output,
+                  uint32_t orig_offset,
+                  std::string* error_msg);
+bool VerifyMethods(dex_ir::MethodItemVector* orig,
+                   dex_ir::MethodItemVector* output,
+                   uint32_t orig_offset,
+                   std::string* error_msg);
+bool VerifyCode(dex_ir::CodeItem* orig, dex_ir::CodeItem* output, std::string* error_msg);
+bool VerifyDebugInfo(dex_ir::DebugInfoItem* orig,
+                     dex_ir::DebugInfoItem* output,
+                     std::string* error_msg);
+bool VerifyPositionInfo(dex_ir::PositionInfoVector& orig,
+                        dex_ir::PositionInfoVector& output,
+                        uint32_t orig_offset,
+                        std::string* error_msg);
+bool VerifyLocalInfo(dex_ir::LocalInfoVector& orig,
+                     dex_ir::LocalInfoVector& output,
+                     uint32_t orig_offset,
+                     std::string* error_msg);
+bool VerifyTries(dex_ir::TryItemVector* orig,
+                 dex_ir::TryItemVector* output,
+                 uint32_t orig_offset,
+                 std::string* error_msg);
+bool VerifyHandlers(dex_ir::CatchHandlerVector* orig,
+                    dex_ir::CatchHandlerVector* output,
+                    uint32_t orig_offset,
+                    std::string* error_msg);
+bool VerifyHandler(const dex_ir::CatchHandler* orig,
+                   const dex_ir::CatchHandler* output,
+                   uint32_t orig_offset,
+                   std::string* error_msg);
+}  // namespace art
+
+#endif  // ART_DEXLAYOUT_DEX_VERIFY_H_
diff --git a/dexlayout/dex_visualize.cc b/dexlayout/dex_visualize.cc
index 75d47e4..829e9fe 100644
--- a/dexlayout/dex_visualize.cc
+++ b/dexlayout/dex_visualize.cc
@@ -35,150 +35,55 @@
 
 namespace art {
 
-struct FileSection {
- public:
-  std::string name_;
-  uint16_t type_;
-  std::function<uint32_t(const dex_ir::Collections&)> size_fn_;
-  std::function<uint32_t(const dex_ir::Collections&)> offset_fn_;
-};
-
-static const std::vector<FileSection> kFileSections = {
-  {
-    "StringId",
-    DexFile::kDexTypeStringIdItem,
-    &dex_ir::Collections::StringIdsSize,
-    &dex_ir::Collections::StringIdsOffset
-  }, {
-    "TypeId",
-    DexFile::kDexTypeTypeIdItem,
-    &dex_ir::Collections::TypeIdsSize,
-    &dex_ir::Collections::TypeIdsOffset
-  }, {
-    "ProtoId",
-    DexFile::kDexTypeProtoIdItem,
-    &dex_ir::Collections::ProtoIdsSize,
-    &dex_ir::Collections::ProtoIdsOffset
-  }, {
-    "FieldId",
-    DexFile::kDexTypeFieldIdItem,
-    &dex_ir::Collections::FieldIdsSize,
-    &dex_ir::Collections::FieldIdsOffset
-  }, {
-    "MethodId",
-    DexFile::kDexTypeMethodIdItem,
-    &dex_ir::Collections::MethodIdsSize,
-    &dex_ir::Collections::MethodIdsOffset
-  }, {
-    "ClassDef",
-    DexFile::kDexTypeClassDefItem,
-    &dex_ir::Collections::ClassDefsSize,
-    &dex_ir::Collections::ClassDefsOffset
-  }, {
-    "StringData",
-    DexFile::kDexTypeStringDataItem,
-    &dex_ir::Collections::StringDatasSize,
-    &dex_ir::Collections::StringDatasOffset
-  }, {
-    "TypeList",
-    DexFile::kDexTypeTypeList,
-    &dex_ir::Collections::TypeListsSize,
-    &dex_ir::Collections::TypeListsOffset
-  }, {
-    "EncArr",
-    DexFile::kDexTypeEncodedArrayItem,
-    &dex_ir::Collections::EncodedArrayItemsSize,
-    &dex_ir::Collections::EncodedArrayItemsOffset
-  }, {
-    "Annotation",
-    DexFile::kDexTypeAnnotationItem,
-    &dex_ir::Collections::AnnotationItemsSize,
-    &dex_ir::Collections::AnnotationItemsOffset
-  }, {
-    "AnnoSet",
-    DexFile::kDexTypeAnnotationSetItem,
-    &dex_ir::Collections::AnnotationSetItemsSize,
-    &dex_ir::Collections::AnnotationSetItemsOffset
-  }, {
-    "AnnoSetRL",
-    DexFile::kDexTypeAnnotationSetRefList,
-    &dex_ir::Collections::AnnotationSetRefListsSize,
-    &dex_ir::Collections::AnnotationSetRefListsOffset
-  }, {
-    "AnnoDir",
-    DexFile::kDexTypeAnnotationsDirectoryItem,
-    &dex_ir::Collections::AnnotationsDirectoryItemsSize,
-    &dex_ir::Collections::AnnotationsDirectoryItemsOffset
-  }, {
-    "DebugInfo",
-    DexFile::kDexTypeDebugInfoItem,
-    &dex_ir::Collections::DebugInfoItemsSize,
-    &dex_ir::Collections::DebugInfoItemsOffset
-  }, {
-    "CodeItem",
-    DexFile::kDexTypeCodeItem,
-    &dex_ir::Collections::CodeItemsSize,
-    &dex_ir::Collections::CodeItemsOffset
-  }, {
-    "ClassData",
-    DexFile::kDexTypeClassDataItem,
-    &dex_ir::Collections::ClassDatasSize,
-    &dex_ir::Collections::ClassDatasOffset
-  }
-};
+static std::string MultidexName(const std::string& prefix,
+                                size_t dex_file_index,
+                                const std::string& suffix) {
+  return prefix + ((dex_file_index > 0) ? std::to_string(dex_file_index + 1) : "") + suffix;
+}
 
 class Dumper {
  public:
   // Colors are based on the type of the section in MapList.
-  Dumper(const dex_ir::Collections& collections, size_t dex_file_index) {
-    // Build the table that will map from offset to color
-    table_.emplace_back(DexFile::kDexTypeHeaderItem, 0u);
-    for (const FileSection& s : kFileSections) {
-      table_.emplace_back(s.type_, s.offset_fn_(collections));
-    }
-    // Sort into descending order by offset.
-    std::sort(table_.begin(),
-              table_.end(),
-              [](const SectionColor& a, const SectionColor& b) { return a.offset_ > b.offset_; });
+  explicit Dumper(dex_ir::Header* header)
+      : out_file_(nullptr),
+        sorted_sections_(
+            dex_ir::GetSortedDexFileSections(header, dex_ir::SortDirection::kSortDescending)) { }
+
+  bool OpenAndPrintHeader(size_t dex_index) {
     // Open the file and emit the gnuplot prologue.
-    std::string dex_file_name("classes");
-    std::string out_file_base_name("layout");
-    if (dex_file_index > 0) {
-      out_file_base_name += std::to_string(dex_file_index + 1);
-      dex_file_name += std::to_string(dex_file_index + 1);
+    out_file_ = fopen(MultidexName("layout", dex_index, ".gnuplot").c_str(), "w");
+    if (out_file_ == nullptr) {
+      return false;
     }
-    dex_file_name += ".dex";
-    std::string out_file_name(out_file_base_name + ".gnuplot");
-    std::string png_file_name(out_file_base_name + ".png");
-    out_file_ = fopen(out_file_name.c_str(), "w");
     fprintf(out_file_, "set terminal png size 1920,1080\n");
-    fprintf(out_file_, "set output \"%s\"\n", png_file_name.c_str());
-    fprintf(out_file_, "set title \"%s\"\n", dex_file_name.c_str());
+    fprintf(out_file_, "set output \"%s\"\n", MultidexName("layout", dex_index, ".png").c_str());
+    fprintf(out_file_, "set title \"%s\"\n", MultidexName("classes", dex_index, ".dex").c_str());
     fprintf(out_file_, "set xlabel \"Page offset into dex\"\n");
     fprintf(out_file_, "set ylabel \"ClassDef index\"\n");
     fprintf(out_file_, "set xtics rotate out (");
-    fprintf(out_file_, "\"Header\" %d, ", 0);
     bool printed_one = false;
-    for (const FileSection& s : kFileSections) {
-      if (s.size_fn_(collections) > 0) {
+
+    for (const dex_ir::DexFileSection& s : sorted_sections_) {
+      if (s.size > 0) {
         if (printed_one) {
           fprintf(out_file_, ", ");
         }
-        fprintf(out_file_, "\"%s\" %d", s.name_.c_str(), s.offset_fn_(collections) / kPageSize);
+        fprintf(out_file_, "\"%s\" %d", s.name.c_str(), s.offset / kPageSize);
         printed_one = true;
       }
     }
     fprintf(out_file_, ")\n");
     fprintf(out_file_,
             "plot \"-\" using 1:2:3:4:5 with vector nohead linewidth 1 lc variable notitle\n");
+    return true;
   }
 
   int GetColor(uint32_t offset) const {
     // The dread linear search to find the right section for the reference.
     uint16_t section = 0;
-    for (uint16_t i = 0; i < table_.size(); ++i) {
-      if (table_[i].offset_ < offset) {
-        section = table_[i].type_;
+    for (const dex_ir::DexFileSection& file_section : sorted_sections_) {
+      if (file_section.offset < offset) {
+        section = file_section.type;
         break;
       }
     }
@@ -308,13 +213,6 @@
   }
 
  private:
-  struct SectionColor {
-   public:
-    SectionColor(uint16_t type, uint32_t offset) : type_(type), offset_(offset) { }
-    uint16_t type_;
-    uint32_t offset_;
-  };
-
   using ColorMapType = std::map<uint16_t, int>;
   const ColorMapType kColorMap = {
     { DexFile::kDexTypeHeaderItem, 1 },
@@ -336,8 +234,8 @@
     { DexFile::kDexTypeAnnotationsDirectoryItem, 16 }
   };
 
-  std::vector<SectionColor> table_;
   FILE* out_file_;
+  std::vector<dex_ir::DexFileSection> sorted_sections_;
 
   DISALLOW_COPY_AND_ASSIGN(Dumper);
 };
@@ -350,7 +248,11 @@
                         const DexFile* dex_file,
                         size_t dex_file_index,
                         ProfileCompilationInfo* profile_info) {
-  std::unique_ptr<Dumper> dumper(new Dumper(header->GetCollections(), dex_file_index));
+  std::unique_ptr<Dumper> dumper(new Dumper(header));
+  if (!dumper->OpenAndPrintHeader(dex_file_index)) {
+    fprintf(stderr, "Could not open output file.\n");
+    return;
+  }
 
   const uint32_t class_defs_size = header->GetCollections().ClassDefsSize();
   for (uint32_t class_index = 0; class_index < class_defs_size; class_index++) {
@@ -401,4 +303,45 @@
   }  // for
 }
 
+static uint32_t FindNextByteAfterSection(dex_ir::Header* header,
+                                         const std::vector<dex_ir::DexFileSection>& sorted_sections,
+                                         size_t section_index) {
+  for (size_t i = section_index + 1; i < sorted_sections.size(); ++i) {
+    const dex_ir::DexFileSection& section = sorted_sections.at(i);
+    if (section.size != 0) {
+      return section.offset;
+    }
+  }
+  return header->FileSize();
+}
+
+/*
+ * Dumps the offset and size of sections within the file.
+ */
+void ShowDexSectionStatistics(dex_ir::Header* header, size_t dex_file_index) {
+  // Compute the (multidex) class file name).
+  fprintf(stdout, "%s (%d bytes)\n",
+          MultidexName("classes", dex_file_index, ".dex").c_str(),
+          header->FileSize());
+  fprintf(stdout, "section      offset    items    bytes    pages pct\n");
+  std::vector<dex_ir::DexFileSection> sorted_sections =
+      GetSortedDexFileSections(header, dex_ir::SortDirection::kSortAscending);
+  for (size_t i = 0; i < sorted_sections.size(); ++i) {
+    const dex_ir::DexFileSection& file_section = sorted_sections[i];
+    uint32_t bytes = 0;
+    if (file_section.size > 0) {
+      bytes = FindNextByteAfterSection(header, sorted_sections, i) - file_section.offset;
+    }
+    fprintf(stdout,
+            "%-10s %8d %8d %8d %8d %%%02d\n",
+            file_section.name.c_str(),
+            file_section.offset,
+            file_section.size,
+            bytes,
+            RoundUp(bytes, kPageSize) / kPageSize,
+            100 * bytes / header->FileSize());
+  }
+  fprintf(stdout, "\n");
+}
+
 }  // namespace art
diff --git a/dexlayout/dex_visualize.h b/dexlayout/dex_visualize.h
index 09f8306..a1aa2cd 100644
--- a/dexlayout/dex_visualize.h
+++ b/dexlayout/dex_visualize.h
@@ -38,6 +38,8 @@
                         size_t dex_file_index,
                         ProfileCompilationInfo* profile_info);
 
+void ShowDexSectionStatistics(dex_ir::Header* header, size_t dex_file_index);
+
 }  // namespace art
 
 #endif  // ART_DEXLAYOUT_DEX_VISUALIZE_H_
diff --git a/dexlayout/dexdiag.cc b/dexlayout/dexdiag.cc
new file mode 100644
index 0000000..211bfdf
--- /dev/null
+++ b/dexlayout/dexdiag.cc
@@ -0,0 +1,405 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <errno.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <iostream>
+#include <memory>
+
+#include "android-base/stringprintf.h"
+
+#include "dex_file.h"
+#include "dex_ir.h"
+#include "dex_ir_builder.h"
+#include "pagemap/pagemap.h"
+#include "runtime.h"
+#include "vdex_file.h"
+
+namespace art {
+
+using android::base::StringPrintf;
+
+static constexpr size_t kLineLength = 32;
+
+static bool g_show_key = false;
+static bool g_verbose = false;
+static bool g_show_statistics = false;
+
+struct DexSectionInfo {
+ public:
+  std::string name;
+  char letter;
+};
+
+static const std::map<uint16_t, DexSectionInfo> kDexSectionInfoMap = {
+  { DexFile::kDexTypeHeaderItem, { "Header", 'H' } },
+  { DexFile::kDexTypeStringIdItem, { "StringId", 'S' } },
+  { DexFile::kDexTypeTypeIdItem, { "TypeId", 'T' } },
+  { DexFile::kDexTypeProtoIdItem, { "ProtoId", 'P' } },
+  { DexFile::kDexTypeFieldIdItem, { "FieldId", 'F' } },
+  { DexFile::kDexTypeMethodIdItem, { "MethodId", 'M' } },
+  { DexFile::kDexTypeClassDefItem, { "ClassDef", 'C' } },
+  { DexFile::kDexTypeCallSiteIdItem, { "CallSiteId", 'z' } },
+  { DexFile::kDexTypeMethodHandleItem, { "MethodHandle", 'Z' } },
+  { DexFile::kDexTypeMapList, { "TypeMap", 'L' } },
+  { DexFile::kDexTypeTypeList, { "TypeList", 't' } },
+  { DexFile::kDexTypeAnnotationSetRefList, { "AnnotationSetReferenceItem", '1' } },
+  { DexFile::kDexTypeAnnotationSetItem, { "AnnotationSetItem", '2' } },
+  { DexFile::kDexTypeClassDataItem, { "ClassData", 'c' } },
+  { DexFile::kDexTypeCodeItem, { "CodeItem", 'X' } },
+  { DexFile::kDexTypeStringDataItem, { "StringData", 's' } },
+  { DexFile::kDexTypeDebugInfoItem, { "DebugInfo", 'D' } },
+  { DexFile::kDexTypeAnnotationItem, { "AnnotationItem", '3' } },
+  { DexFile::kDexTypeEncodedArrayItem, { "EncodedArrayItem", 'E' } },
+  { DexFile::kDexTypeAnnotationsDirectoryItem, { "AnnotationsDirectoryItem", '4' } }
+};
+
+class PageCount {
+ public:
+  PageCount() {
+    for (auto it = kDexSectionInfoMap.begin(); it != kDexSectionInfoMap.end(); ++it) {
+      map_[it->first] = 0;
+    }
+  }
+  void Increment(uint16_t type) {
+    map_[type]++;
+  }
+  size_t Get(uint16_t type) const {
+    return map_.at(type);
+  }
+ private:
+  std::map<uint16_t, size_t> map_;
+  DISALLOW_COPY_AND_ASSIGN(PageCount);
+};
+
+static void PrintLetterKey() {
+  std::cout << "letter section_type" << std::endl;
+  for (const auto& p : kDexSectionInfoMap) {
+    const DexSectionInfo& section_info = p.second;
+    std::cout << section_info.letter << "      " << section_info.name.c_str() << std::endl;
+  }
+}
+
+static char PageTypeChar(uint16_t type) {
+  if (kDexSectionInfoMap.find(type) == kDexSectionInfoMap.end()) {
+    return '-';
+  }
+  return kDexSectionInfoMap.find(type)->second.letter;
+}
+
+static uint16_t FindSectionTypeForPage(size_t page,
+                                       const std::vector<dex_ir::DexFileSection>& sections) {
+  for (const auto& section : sections) {
+    size_t first_page_of_section = section.offset / kPageSize;
+    // Only consider non-empty sections.
+    if (section.size == 0) {
+      continue;
+    }
+    // Attribute the page to the highest-offset section that starts before the page.
+    if (first_page_of_section <= page) {
+      return section.type;
+    }
+  }
+  // If there's no non-zero sized section with an offset below offset we're looking for, it
+  // must be the header.
+  return DexFile::kDexTypeHeaderItem;
+}
+
+static void ProcessPageMap(uint64_t* pagemap,
+                           size_t start,
+                           size_t end,
+                           const std::vector<dex_ir::DexFileSection>& sections,
+                           PageCount* page_counts) {
+  for (size_t page = start; page < end; ++page) {
+    char type_char = '.';
+    if (PM_PAGEMAP_PRESENT(pagemap[page])) {
+      uint16_t type = FindSectionTypeForPage(page, sections);
+      page_counts->Increment(type);
+      type_char = PageTypeChar(type);
+    }
+    if (g_verbose) {
+      std::cout << type_char;
+      if ((page - start) % kLineLength == kLineLength - 1) {
+        std::cout << std::endl;
+      }
+    }
+  }
+  if (g_verbose) {
+    if ((end - start) % kLineLength != 0) {
+      std::cout << std::endl;
+    }
+  }
+}
+
+static void DisplayDexStatistics(size_t start,
+                                 size_t end,
+                                 const PageCount& resident_pages,
+                                 const std::vector<dex_ir::DexFileSection>& sections) {
+  // Compute the total possible sizes for sections.
+  PageCount mapped_pages;
+  DCHECK_GE(end, start);
+  size_t total_mapped_pages = end - start;
+  if (total_mapped_pages == 0) {
+    return;
+  }
+  for (size_t page = start; page < end; ++page) {
+    mapped_pages.Increment(FindSectionTypeForPage(page, sections));
+  }
+  size_t total_resident_pages = 0;
+  // Compute the width of the section header column in the table (for fixed formatting).
+  int section_header_width = 0;
+  for (const auto& section_info : kDexSectionInfoMap) {
+    section_header_width = std::max(section_header_width,
+                                    static_cast<int>(section_info.second.name.length()));
+  }
+  // The width needed to print a file page offset (32-bit).
+  static constexpr int kPageCountWidth =
+      static_cast<int>(std::numeric_limits<uint32_t>::digits10);
+  // Display the sections.
+  static constexpr char kSectionHeader[] = "Section name";
+  std::cout << StringPrintf("%-*s %*s %*s %% of   %% of",
+                            section_header_width,
+                            kSectionHeader,
+                            kPageCountWidth,
+                            "resident",
+                            kPageCountWidth,
+                            "total"
+                            )
+            << std::endl;
+  std::cout << StringPrintf("%-*s %*s %*s sect.  total",
+                            section_header_width,
+                            "",
+                            kPageCountWidth,
+                            "pages",
+                            kPageCountWidth,
+                            "pages")
+            << std::endl;
+  for (size_t i = sections.size(); i > 0; --i) {
+    const dex_ir::DexFileSection& section = sections[i - 1];
+    const uint16_t type = section.type;
+    const DexSectionInfo& section_info = kDexSectionInfoMap.find(type)->second;
+    size_t pages_resident = resident_pages.Get(type);
+    double percent_resident = 0;
+    if (mapped_pages.Get(type) > 0) {
+      percent_resident = 100.0 * pages_resident / mapped_pages.Get(type);
+    }
+    // 6.2 is sufficient to print 0-100% with two decimal places of accuracy.
+    std::cout << StringPrintf("%-*s %*zd %*zd %6.2f %6.2f",
+                              section_header_width,
+                              section_info.name.c_str(),
+                              kPageCountWidth,
+                              pages_resident,
+                              kPageCountWidth,
+                              mapped_pages.Get(type),
+                              percent_resident,
+                              100.0 * pages_resident / total_mapped_pages)
+              << std::endl;
+    total_resident_pages += pages_resident;
+  }
+  std::cout << StringPrintf("%-*s %*zd %*zd        %6.2f",
+                            section_header_width,
+                            "GRAND TOTAL",
+                            kPageCountWidth,
+                            total_resident_pages,
+                            kPageCountWidth,
+                            total_mapped_pages,
+                            100.0 * total_resident_pages / total_mapped_pages)
+            << std::endl
+            << std::endl;
+}
+
+static void ProcessOneDexMapping(uint64_t* pagemap,
+                                 uint64_t map_start,
+                                 const DexFile* dex_file,
+                                 uint64_t vdex_start) {
+  uint64_t dex_file_start = reinterpret_cast<uint64_t>(dex_file->Begin());
+  size_t dex_file_size = dex_file->Size();
+  if (dex_file_start < vdex_start) {
+    std::cerr << "Dex file start offset for "
+              << dex_file->GetLocation().c_str()
+              << " is incorrect: map start "
+              << StringPrintf("%zx > dex start %zx\n", map_start, dex_file_start)
+              << std::endl;
+    return;
+  }
+  uint64_t start = (dex_file_start - vdex_start) / kPageSize;
+  uint64_t end = RoundUp(start + dex_file_size, kPageSize) / kPageSize;
+  std::cout << "DEX "
+            << dex_file->GetLocation().c_str()
+            << StringPrintf(": %zx-%zx",
+                            map_start + start * kPageSize,
+                            map_start + end * kPageSize)
+            << std::endl;
+  // Build a list of the dex file section types, sorted from highest offset to lowest.
+  std::vector<dex_ir::DexFileSection> sections;
+  {
+    std::unique_ptr<dex_ir::Header> header(dex_ir::DexIrBuilder(*dex_file));
+    sections = dex_ir::GetSortedDexFileSections(header.get(),
+                                                dex_ir::SortDirection::kSortDescending);
+  }
+  PageCount section_resident_pages;
+  ProcessPageMap(pagemap, start, end, sections, &section_resident_pages);
+  if (g_show_statistics) {
+    DisplayDexStatistics(start, end, section_resident_pages, sections);
+  }
+}
+
+static bool DisplayMappingIfFromVdexFile(pm_map_t* map) {
+  // Confirm that the map is from a vdex file.
+  static const char* suffixes[] = { ".vdex" };
+  std::string vdex_name;
+  bool found = false;
+  for (size_t j = 0; j < sizeof(suffixes) / sizeof(suffixes[0]); ++j) {
+    if (strstr(pm_map_name(map), suffixes[j]) != nullptr) {
+      vdex_name = pm_map_name(map);
+      found = true;
+      break;
+    }
+  }
+  if (!found) {
+    return true;
+  }
+  // Extract all the dex files from the vdex file.
+  std::string error_msg;
+  std::unique_ptr<VdexFile> vdex(VdexFile::Open(vdex_name,
+                                                false /*writeable*/,
+                                                false /*low_4gb*/,
+                                                &error_msg /*out*/));
+  if (vdex == nullptr) {
+    std::cerr << "Could not open vdex file "
+              << vdex_name.c_str()
+              << ": error "
+              << error_msg.c_str()
+              << std::endl;
+    return false;
+  }
+
+  std::vector<std::unique_ptr<const DexFile>> dex_files;
+  if (!vdex->OpenAllDexFiles(&dex_files, &error_msg)) {
+    std::cerr << "Dex files could not be opened for "
+              << vdex_name.c_str()
+              << ": error "
+              << error_msg.c_str()
+              << std::endl;
+  }
+  // Open the page mapping (one uint64_t per page) for the entire vdex mapping.
+  uint64_t* pagemap;
+  size_t len;
+  if (pm_map_pagemap(map, &pagemap, &len) != 0) {
+    std::cerr << "Error creating pagemap." << std::endl;
+    return false;
+  }
+  // Process the dex files.
+  std::cout << "MAPPING "
+            << pm_map_name(map)
+            << StringPrintf(": %zx-%zx", pm_map_start(map), pm_map_end(map))
+            << std::endl;
+  for (const auto& dex_file : dex_files) {
+    ProcessOneDexMapping(pagemap,
+                         pm_map_start(map),
+                         dex_file.get(),
+                         reinterpret_cast<uint64_t>(vdex->Begin()));
+  }
+  free(pagemap);
+  return true;
+}
+
+
+static void Usage(const char* cmd) {
+  std::cerr << "Usage: " << cmd << " [-k] [-s] [-v] pid" << std::endl
+            << "    -k Shows a key to verbose display characters." << std::endl
+            << "    -s Shows section statistics for individual dex files." << std::endl
+            << "    -v Verbosely displays resident pages for dex files." << std::endl;
+}
+
+static int DexDiagMain(int argc, char* argv[]) {
+  if (argc < 2) {
+    Usage(argv[0]);
+    return EXIT_FAILURE;
+  }
+
+  // TODO: add option to track usage by class name, etc.
+  for (int i = 1; i < argc - 1; ++i) {
+    if (strcmp(argv[i], "-k") == 0) {
+      g_show_key = true;
+    } else if (strcmp(argv[i], "-s") == 0) {
+      g_show_statistics = true;
+    } else if (strcmp(argv[i], "-v") == 0) {
+      g_verbose = true;
+    } else {
+      Usage(argv[0]);
+      return EXIT_FAILURE;
+    }
+  }
+
+  // Art specific set up.
+  InitLogging(argv, Runtime::Aborter);
+  MemMap::Init();
+
+  pid_t pid;
+  char* endptr;
+  pid = (pid_t)strtol(argv[argc - 1], &endptr, 10);
+  if (*endptr != '\0' || kill(pid, 0) != 0) {
+    std::cerr << StringPrintf("Invalid PID \"%s\".\n", argv[argc - 1]) << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // get libpagemap kernel information.
+  pm_kernel_t* ker;
+  if (pm_kernel_create(&ker) != 0) {
+    std::cerr << "Error creating kernel interface -- does this kernel have pagemap?" << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // get libpagemap process information.
+  pm_process_t* proc;
+  if (pm_process_create(ker, pid, &proc) != 0) {
+    std::cerr << "Error creating process interface -- does process "
+              << pid
+              << " really exist?"
+              << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // Get the set of mappings by the specified process.
+  pm_map_t** maps;
+  size_t num_maps;
+  if (pm_process_maps(proc, &maps, &num_maps) != 0) {
+    std::cerr << "Error listing maps." << std::endl;
+    return EXIT_FAILURE;
+  }
+
+  // Process the mappings that are due to DEX files.
+  for (size_t i = 0; i < num_maps; ++i) {
+    if (!DisplayMappingIfFromVdexFile(maps[i])) {
+      return EXIT_FAILURE;
+    }
+  }
+
+  if (g_show_key) {
+    PrintLetterKey();
+  }
+  return 0;
+}
+
+}  // namespace art
+
+int main(int argc, char* argv[]) {
+  return art::DexDiagMain(argc, argv);
+}
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index 22619b9..0536f322 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -34,7 +34,9 @@
 
 #include "dex_ir_builder.h"
 #include "dex_file-inl.h"
+#include "dex_file_verifier.h"
 #include "dex_instruction-inl.h"
+#include "dex_verify.h"
 #include "dex_visualize.h"
 #include "dex_writer.h"
 #include "jit/profile_compilation_info.h"
@@ -1367,10 +1369,11 @@
   }
 
   // Interfaces.
-  const dex_ir::TypeIdVector* interfaces = class_def->Interfaces();
+  const dex_ir::TypeList* interfaces = class_def->Interfaces();
   if (interfaces != nullptr) {
-    for (uint32_t i = 0; i < interfaces->size(); i++) {
-      DumpInterface((*interfaces)[i], i);
+    const dex_ir::TypeIdVector* interfaces_vector = interfaces->GetTypeList();
+    for (uint32_t i = 0; i < interfaces_vector->size(); i++) {
+      DumpInterface((*interfaces_vector)[i], i);
     }  // for
   }
 
@@ -1525,14 +1528,127 @@
   return new_class_data_order;
 }
 
+void DexLayout::LayoutStringData(const DexFile* dex_file) {
+  const size_t num_strings = header_->GetCollections().StringIds().size();
+  std::vector<bool> is_shorty(num_strings, false);
+  std::vector<bool> from_hot_method(num_strings, false);
+  for (std::unique_ptr<dex_ir::ClassDef>& class_def : header_->GetCollections().ClassDefs()) {
+    // A name of a profile class is probably going to get looked up by ClassTable::Lookup, mark it
+    // as hot.
+    const bool is_profile_class =
+        info_->ContainsClass(*dex_file, dex::TypeIndex(class_def->ClassType()->GetIndex()));
+    if (is_profile_class) {
+      from_hot_method[class_def->ClassType()->GetStringId()->GetIndex()] = true;
+    }
+    dex_ir::ClassData* data = class_def->GetClassData();
+    if (data == nullptr) {
+      continue;
+    }
+    for (size_t i = 0; i < 2; ++i) {
+      for (auto& method : *(i == 0 ? data->DirectMethods() : data->VirtualMethods())) {
+        const dex_ir::MethodId* method_id = method->GetMethodId();
+        dex_ir::CodeItem* code_item = method->GetCodeItem();
+        if (code_item == nullptr) {
+          continue;
+        }
+        const bool is_clinit = is_profile_class &&
+            (method->GetAccessFlags() & kAccConstructor) != 0 &&
+            (method->GetAccessFlags() & kAccStatic) != 0;
+        const bool method_executed = is_clinit ||
+            info_->ContainsMethod(MethodReference(dex_file, method_id->GetIndex()));
+        if (!method_executed) {
+          continue;
+        }
+        is_shorty[method_id->Proto()->Shorty()->GetIndex()] = true;
+        dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
+        if (fixups == nullptr) {
+          continue;
+        }
+        if (fixups->StringIds() != nullptr) {
+          // Add const-strings.
+          for (dex_ir::StringId* id : *fixups->StringIds()) {
+            from_hot_method[id->GetIndex()] = true;
+          }
+        }
+        // TODO: Only visit field ids from static getters and setters.
+        for (dex_ir::FieldId* id : *fixups->FieldIds()) {
+          // Add the field names and types from getters and setters.
+          from_hot_method[id->Name()->GetIndex()] = true;
+          from_hot_method[id->Type()->GetStringId()->GetIndex()] = true;
+        }
+      }
+    }
+  }
+  // Sort string data by specified order.
+  std::vector<dex_ir::StringId*> string_ids;
+  size_t min_offset = std::numeric_limits<size_t>::max();
+  size_t max_offset = 0;
+  size_t hot_bytes = 0;
+  for (auto& string_id : header_->GetCollections().StringIds()) {
+    string_ids.push_back(string_id.get());
+    const size_t cur_offset = string_id->DataItem()->GetOffset();
+    CHECK_NE(cur_offset, 0u);
+    min_offset = std::min(min_offset, cur_offset);
+    dex_ir::StringData* data = string_id->DataItem();
+    const size_t element_size = data->GetSize() + 1;  // Add one extra for null.
+    size_t end_offset = cur_offset + element_size;
+    if (is_shorty[string_id->GetIndex()] || from_hot_method[string_id->GetIndex()]) {
+      hot_bytes += element_size;
+    }
+    max_offset = std::max(max_offset, end_offset);
+  }
+  VLOG(compiler) << "Hot string data bytes " << hot_bytes << "/" << max_offset - min_offset;
+  std::sort(string_ids.begin(),
+            string_ids.end(),
+            [&is_shorty, &from_hot_method](const dex_ir::StringId* a,
+                                           const dex_ir::StringId* b) {
+    const bool a_is_hot = from_hot_method[a->GetIndex()];
+    const bool b_is_hot = from_hot_method[b->GetIndex()];
+    if (a_is_hot != b_is_hot) {
+      return a_is_hot < b_is_hot;
+    }
+    // After hot methods are partitioned, subpartition shorties.
+    const bool a_is_shorty = is_shorty[a->GetIndex()];
+    const bool b_is_shorty = is_shorty[b->GetIndex()];
+    if (a_is_shorty != b_is_shorty) {
+      return a_is_shorty < b_is_shorty;
+    }
+    // Preserve order.
+    return a->DataItem()->GetOffset() < b->DataItem()->GetOffset();
+  });
+  // Now we know what order we want the string data, reorder the offsets.
+  size_t offset = min_offset;
+  for (dex_ir::StringId* string_id : string_ids) {
+    dex_ir::StringData* data = string_id->DataItem();
+    data->SetOffset(offset);
+    offset += data->GetSize() + 1;  // Add one extra for null.
+  }
+  if (offset > max_offset) {
+    const uint32_t diff = offset - max_offset;
+    // If we expanded the string data section, we need to update the offsets or else we will
+    // corrupt the next section when writing out.
+    FixupSections(header_->GetCollections().StringDatasOffset(), diff);
+    // Update file size.
+    header_->SetFileSize(header_->FileSize() + diff);
+  }
+}
+
 // Orders code items according to specified class data ordering.
 // NOTE: If the section following the code items is byte aligned, the last code item is left in
 // place to preserve alignment. Layout needs an overhaul to handle movement of other sections.
 int32_t DexLayout::LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order) {
+  // Do not move code items if class data section precedes code item section.
+  // ULEB encoding is variable length, causing problems determining the offset of the code items.
+  // TODO: We should swap the order of these sections in the future to avoid this issue.
+  uint32_t class_data_offset = header_->GetCollections().ClassDatasOffset();
+  uint32_t code_item_offset = header_->GetCollections().CodeItemsOffset();
+  if (class_data_offset < code_item_offset) {
+    return 0;
+  }
+
   // Find the last code item so we can leave it in place if the next section is not 4 byte aligned.
   std::unordered_set<dex_ir::CodeItem*> visited_code_items;
-  uint32_t offset = header_->GetCollections().CodeItemsOffset();
-  bool is_code_item_aligned = IsNextSectionCodeItemAligned(offset);
+  bool is_code_item_aligned = IsNextSectionCodeItemAligned(code_item_offset);
   if (!is_code_item_aligned) {
     dex_ir::CodeItem* last_code_item = nullptr;
     for (auto& code_item_pair : header_->GetCollections().CodeItems()) {
@@ -1552,18 +1668,18 @@
       dex_ir::CodeItem* code_item = method->GetCodeItem();
       if (code_item != nullptr && visited_code_items.find(code_item) == visited_code_items.end()) {
         visited_code_items.insert(code_item);
-        diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
-        code_item->SetOffset(offset);
-        offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
+        diff += UnsignedLeb128Size(code_item_offset) - UnsignedLeb128Size(code_item->GetOffset());
+        code_item->SetOffset(code_item_offset);
+        code_item_offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
       }
     }
     for (auto& method : *class_data->VirtualMethods()) {
       dex_ir::CodeItem* code_item = method->GetCodeItem();
       if (code_item != nullptr && visited_code_items.find(code_item) == visited_code_items.end()) {
         visited_code_items.insert(code_item);
-        diff += UnsignedLeb128Size(offset) - UnsignedLeb128Size(code_item->GetOffset());
-        code_item->SetOffset(offset);
-        offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
+        diff += UnsignedLeb128Size(code_item_offset) - UnsignedLeb128Size(code_item->GetOffset());
+        code_item->SetOffset(code_item_offset);
+        code_item_offset += RoundUp(code_item->GetSize(), kDexCodeItemAlignment);
       }
     }
   }
@@ -1675,6 +1791,7 @@
 }
 
 void DexLayout::LayoutOutputFile(const DexFile* dex_file) {
+  LayoutStringData(dex_file);
   std::vector<dex_ir::ClassData*> new_class_data_order = LayoutClassDefsAndClassData(dex_file);
   int32_t diff = LayoutCodeItems(new_class_data_order);
   // Move sections after ClassData by diff bytes.
@@ -1683,12 +1800,13 @@
   header_->SetFileSize(header_->FileSize() + diff);
 }
 
-void DexLayout::OutputDexFile(const std::string& dex_file_location) {
+void DexLayout::OutputDexFile(const DexFile* dex_file) {
+  const std::string& dex_file_location = dex_file->GetLocation();
   std::string error_msg;
   std::unique_ptr<File> new_file;
   if (!options_.output_to_memmap_) {
     std::string output_location(options_.output_dex_directory_);
-    size_t last_slash = dex_file_location.rfind("/");
+    size_t last_slash = dex_file_location.rfind('/');
     std::string dex_file_directory = dex_file_location.substr(0, last_slash + 1);
     if (output_location == dex_file_directory) {
       output_location = dex_file_location + ".new";
@@ -1716,6 +1834,25 @@
   if (new_file != nullptr) {
     UNUSED(new_file->FlushCloseOrErase());
   }
+  // Verify the output dex file's structure for debug builds.
+  if (kIsDebugBuild) {
+    std::string location = "memory mapped file for " + dex_file_location;
+    std::unique_ptr<const DexFile> output_dex_file(DexFile::Open(mem_map_->Begin(),
+                                                                 mem_map_->Size(),
+                                                                 location,
+                                                                 header_->Checksum(),
+                                                                 /*oat_dex_file*/ nullptr,
+                                                                 /*verify*/ true,
+                                                                 /*verify_checksum*/ false,
+                                                                 &error_msg));
+    DCHECK(output_dex_file != nullptr) << "Failed to re-open output file:" << error_msg;
+  }
+  // Do IR-level comparison between input and output. This check ignores potential differences
+  // due to layout, so offsets are not checked. Instead, it checks the data contents of each item.
+  if (options_.verify_output_) {
+    std::unique_ptr<dex_ir::Header> orig_header(dex_ir::DexIrBuilder(*dex_file));
+    CHECK(VerifyOutputDexFile(orig_header.get(), header_, &error_msg)) << error_msg;
+  }
 }
 
 /*
@@ -1737,6 +1874,11 @@
     return;
   }
 
+  if (options_.show_section_statistics_) {
+    ShowDexSectionStatistics(header_, dex_file_index);
+    return;
+  }
+
   // Dump dex file.
   if (options_.dump_) {
     DumpDexFile();
@@ -1747,7 +1889,7 @@
     if (info_ != nullptr) {
       LayoutOutputFile(dex_file);
     }
-    OutputDexFile(dex_file->GetLocation());
+    OutputDexFile(dex_file);
   }
 }
 
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index 3918706..69117ad 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -56,7 +56,9 @@
   bool show_annotations_ = false;
   bool show_file_headers_ = false;
   bool show_section_headers_ = false;
+  bool show_section_statistics_ = false;
   bool verbose_ = false;
+  bool verify_output_ = false;
   bool visualize_pattern_ = false;
   OutputFormat output_format_ = kOutputPlain;
   const char* output_dex_directory_ = nullptr;
@@ -107,6 +109,7 @@
 
   std::vector<dex_ir::ClassData*> LayoutClassDefsAndClassData(const DexFile* dex_file);
   int32_t LayoutCodeItems(std::vector<dex_ir::ClassData*> new_class_data_order);
+  void LayoutStringData(const DexFile* dex_file);
   bool IsNextSectionCodeItemAligned(uint32_t offset);
   template<class T> void FixupSection(std::map<uint32_t, std::unique_ptr<T>>& map, uint32_t diff);
   void FixupSections(uint32_t offset, uint32_t diff);
@@ -114,7 +117,7 @@
   // Creates a new layout for the dex file based on profile info.
   // Currently reorders ClassDefs, ClassDataItems, and CodeItems.
   void LayoutOutputFile(const DexFile* dex_file);
-  void OutputDexFile(const std::string& dex_file_location);
+  void OutputDexFile(const DexFile* dex_file);
 
   void DumpCFG(const DexFile* dex_file, int idx);
   void DumpCFG(const DexFile* dex_file, uint32_t dex_method_idx, const DexFile::CodeItem* code);
diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc
index ad599ae..38faf96 100644
--- a/dexlayout/dexlayout_main.cc
+++ b/dexlayout/dexlayout_main.cc
@@ -1,4 +1,4 @@
-/*
+  /*
  * Copyright (C) 2016 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -44,7 +44,7 @@
 static void Usage(void) {
   fprintf(stderr, "Copyright (C) 2016 The Android Open Source Project\n\n");
   fprintf(stderr, "%s: [-a] [-c] [-d] [-e] [-f] [-h] [-i] [-l layout] [-o outfile] [-p profile]"
-                  " [-s] [-w directory] dexfile...\n\n", kProgramName);
+                  " [-s] [-t] [-v] [-w directory] dexfile...\n\n", kProgramName);
   fprintf(stderr, " -a : display annotations\n");
   fprintf(stderr, " -b : build dex_ir\n");
   fprintf(stderr, " -c : verify checksum and exit\n");
@@ -57,6 +57,8 @@
   fprintf(stderr, " -o : output file name (defaults to stdout)\n");
   fprintf(stderr, " -p : profile file name (defaults to no profile)\n");
   fprintf(stderr, " -s : visualize reference pattern\n");
+  fprintf(stderr, " -t : display file section sizes\n");
+  fprintf(stderr, " -v : verify output file is canonical to input (IR level comparison)\n");
   fprintf(stderr, " -w : output dex directory \n");
 }
 
@@ -75,7 +77,7 @@
 
   // Parse all arguments.
   while (1) {
-    const int ic = getopt(argc, argv, "abcdefghil:mo:p:sw:");
+    const int ic = getopt(argc, argv, "abcdefghil:mo:p:stvw:");
     if (ic < 0) {
       break;  // done
     }
@@ -127,6 +129,13 @@
         options.visualize_pattern_ = true;
         options.verbose_ = false;
         break;
+      case 't':  // display section statistics
+        options.show_section_statistics_ = true;
+        options.verbose_ = false;
+        break;
+      case 'v':  // verify output
+        options.verify_output_ = true;
+        break;
       case 'w':  // output dex files directory
         options.output_dex_directory_ = optarg;
         break;
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index 9f0593a..e988aac 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -41,19 +41,7 @@
     "AAAAdQEAAAAQAAABAAAAjAEAAA==";
 
 static const char kDexFileLayoutInputProfile[] =
-    "cHJvADAwMwABCwABAAAAAAD1KW3+Y2xhc3Nlcy5kZXgBAA==";
-
-static const char kDexFileLayoutExpectedOutputDex[] =
-    "ZGV4CjAzNQD1KW3+B8NAB0f2A/ZVIBJ0aHrGIqcpVTAUAgAAcAAAAHhWNBIAAAAAAAAAAIwBAAAH"
-    "AAAAcAAAAAQAAACMAAAAAQAAAJwAAAAAAAAAAAAAAAMAAACoAAAAAgAAAMAAAAAUAQAAAAEAADAB"
-    "AAA4AQAAQAEAAEgBAABNAQAAUgEAAGYBAAADAAAABAAAAAUAAAAGAAAABgAAAAMAAAAAAAAAAAAA"
-    "AAAAAAABAAAAAAAAAAIAAAAAAAAAAQAAAAAAAAACAAAAAAAAAAIAAAAAAAAAdQEAAAAAAAAAAAAA"
-    "AAAAAAIAAAAAAAAAAQAAAAAAAAB/AQAAAAAAAAEAAQABAAAAbwEAAAQAAABwEAIAAAAOAAEAAQAB"
-    "AAAAaQEAAAQAAABwEAIAAAAOAAY8aW5pdD4ABkEuamF2YQAGQi5qYXZhAANMQTsAA0xCOwASTGph"
-    "dmEvbGFuZy9PYmplY3Q7AAFWAAQABw48AAQABw48AAAAAQABgIAEgAIAAAEAAICABJgCAAAACwAA"
-    "AAAAAAABAAAAAAAAAAEAAAAHAAAAcAAAAAIAAAAEAAAAjAAAAAMAAAABAAAAnAAAAAUAAAADAAAA"
-    "qAAAAAYAAAACAAAAwAAAAAEgAAACAAAAAAEAAAIgAAAHAAAAMAEAAAMgAAACAAAAaQEAAAAgAAAC"
-    "AAAAdQEAAAAQAAABAAAAjAEAAA==";
+    "cHJvADAwNQABCwABAAAAAAD1KW3+Y2xhc3Nlcy5kZXgBAA==";
 
 // Dex file with catch handler unreferenced by try blocks.
 // Constructed by building a dex file with try/catch blocks and hex editing.
@@ -75,6 +63,49 @@
     "AAAEAQAABgAAAAEAAAAkAQAAASAAAAIAAABEAQAAARAAAAIAAADIAQAAAiAAABIAAADWAQAAAyAA"
     "AAIAAAC1AgAAACAAAAEAAADIAgAAABAAAAEAAADYAgAA";
 
+// Dex file with 0-size (catch all only) catch handler unreferenced by try blocks.
+// Constructed by building a dex file with try/catch blocks and hex editing.
+static const char kUnreferenced0SizeCatchHandlerInputDex[] =
+    "ZGV4CjAzNQCEbEEvMstSNpQpjPdfMEfUBS48cis2QRJoAwAAcAAAAHhWNBIAAAAAAAAAAMgCAAAR"
+    "AAAAcAAAAAcAAAC0AAAAAwAAANAAAAABAAAA9AAAAAQAAAD8AAAAAQAAABwBAAAsAgAAPAEAAOoB"
+    "AADyAQAABAIAABMCAAAqAgAAPgIAAFICAABmAgAAaQIAAG0CAACCAgAAhgIAAIoCAACQAgAAlQIA"
+    "AJ4CAACiAgAAAgAAAAMAAAAEAAAABQAAAAYAAAAHAAAACQAAAAcAAAAFAAAAAAAAAAgAAAAFAAAA"
+    "3AEAAAgAAAAFAAAA5AEAAAQAAQANAAAAAAAAAAAAAAAAAAIADAAAAAEAAQAOAAAAAgAAAAAAAAAA"
+    "AAAAAQAAAAIAAAAAAAAAAQAAAAAAAAC5AgAAAAAAAAEAAQABAAAApgIAAAQAAABwEAMAAAAOAAQA"
+    "AQACAAIAqwIAAC8AAABiAAAAGgEPAG4gAgAQAGIAAAAaAQoAbiACABAAYgAAABoBEABuIAIAEABi"
+    "AAAAGgELAG4gAgAQAA4ADQBiAQAAGgIKAG4gAgAhACcADQBiAQAAGgILAG4gAgAhACcAAAAAAAAA"
+    "BwABAA4AAAAHAAEAAgAdACYAAAABAAAAAwAAAAEAAAAGAAY8aW5pdD4AEEhhbmRsZXJUZXN0Lmph"
+    "dmEADUxIYW5kbGVyVGVzdDsAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwASTGphdmEvbGFuZy9PYmpl"
+    "Y3Q7ABJMamF2YS9sYW5nL1N0cmluZzsAEkxqYXZhL2xhbmcvU3lzdGVtOwABVgACVkwAE1tMamF2"
+    "YS9sYW5nL1N0cmluZzsAAmYxAAJmMgAEbWFpbgADb3V0AAdwcmludGxuAAJ0MQACdDIAAQAHDgAE"
+    "AQAHDnl7eXkCeB2bAAAAAgAAgYAEvAIBCdQCAA0AAAAAAAAAAQAAAAAAAAABAAAAEQAAAHAAAAAC"
+    "AAAABwAAALQAAAADAAAAAwAAANAAAAAEAAAAAQAAAPQAAAAFAAAABAAAAPwAAAAGAAAAAQAAABwB"
+    "AAABIAAAAgAAADwBAAABEAAAAgAAANwBAAACIAAAEQAAAOoBAAADIAAAAgAAAKYCAAAAIAAAAQAA"
+    "ALkCAAAAEAAAAQAAAMgCAAA=";
+
+// Dex file with an unreferenced catch handler at end of code item.
+// Constructed by building a dex file with try/catch blocks and hex editing.
+static const char kUnreferencedEndingCatchHandlerInputDex[] =
+    "ZGV4CjAzNQCEflufI6xGTDDRmLpbfYi6ujPrDLIwvYcEBAAAcAAAAHhWNBIAAAAAAAAAAGQDAAAT"
+    "AAAAcAAAAAgAAAC8AAAAAwAAANwAAAABAAAAAAEAAAUAAAAIAQAAAQAAADABAAC0AgAAUAEAAE4C"
+    "AABWAgAAXgIAAGYCAAB4AgAAhwIAAJ4CAAC1AgAAyQIAAN0CAADxAgAA9wIAAP0CAAAAAwAABAMA"
+    "ABkDAAAcAwAAIgMAACcDAAAEAAAABQAAAAYAAAAHAAAACAAAAAkAAAAMAAAADgAAAAwAAAAGAAAA"
+    "AAAAAA0AAAAGAAAAQAIAAA0AAAAGAAAASAIAAAUAAQARAAAAAAAAAAAAAAAAAAAADwAAAAAAAgAQ"
+    "AAAAAQABABIAAAADAAAAAAAAAAAAAAABAAAAAwAAAAAAAAADAAAAAAAAAFADAAAAAAAAAQABAAEA"
+    "AAAwAwAABAAAAHAQBAAAAA4AAgAAAAIAAgA1AwAAIQAAAGIAAAAaAQoAbiADABAAYgAAABoBCwBu"
+    "IAMAEAAOAA0AYgAAABoBAQBuIAMAEAAo8A0AYgAAABoBAgBuIAMAEAAo7gAAAAAAAAcAAQAHAAAA"
+    "BwABAAIBAg8BAhgAAwABAAIAAgBCAwAAIQAAAGIAAAAaAQoAbiADABAAYgAAABoBCwBuIAMAEAAO"
+    "AA0AYgAAABoBAQBuIAMAEAAo8A0AYgAAABoBAgBuIAMAEAAo7gAAAAAAAAcAAQAHAAAABwABAAIB"
+    "Ag8BAhgAAQAAAAQAAAABAAAABwAGPGluaXQ+AAZDYXRjaDEABkNhdGNoMgAQSGFuZGxlclRlc3Qu"
+    "amF2YQANTEhhbmRsZXJUZXN0OwAVTGphdmEvaW8vUHJpbnRTdHJlYW07ABVMamF2YS9sYW5nL0V4"
+    "Y2VwdGlvbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABJMamF2YS9s"
+    "YW5nL1N5c3RlbTsABFRyeTEABFRyeTIAAVYAAlZMABNbTGphdmEvbGFuZy9TdHJpbmc7AAFhAARt"
+    "YWluAANvdXQAB3ByaW50bG4AAQAHDgAEAAcOfHsCeB0eih4AEQEABw59ewJ3HR6LHgAAAAMAAIGA"
+    "BNACAQnoAgEJ1AMAAA0AAAAAAAAAAQAAAAAAAAABAAAAEwAAAHAAAAACAAAACAAAALwAAAADAAAA"
+    "AwAAANwAAAAEAAAAAQAAAAABAAAFAAAABQAAAAgBAAAGAAAAAQAAADABAAABIAAAAwAAAFABAAAB"
+    "EAAAAgAAAEACAAACIAAAEwAAAE4CAAADIAAAAwAAADADAAAAIAAAAQAAAFADAAAAEAAAAQAAAGQD"
+    "AAA=";
+
 // Dex file with multiple code items that have the same debug_info_off_. Constructed by a modified
 // dexlayout on XandY.
 static const char kDexFileDuplicateOffset[] =
@@ -145,6 +176,35 @@
     "AAEAAAC4AAAAASAAAAIAAADYAAAAAiAAAAYAAAACAQAAAyAAAAIAAAAxAQAAACAAAAEAAAA7AQAA"
     "ABAAAAEAAABMAQAA";
 
+// Dex file with class data section preceding code items.
+// Constructed by passing dex file through dexmerger tool and hex editing.
+static const char kClassDataBeforeCodeInputDex[] =
+    "ZGV4CjAzNQCZKmCu3XXn4zvxCh5VH0gZNNobEAcsc49EAgAAcAAAAHhWNBIAAAAAAAAAAAQBAAAJ"
+    "AAAAcAAAAAQAAACUAAAAAgAAAKQAAAAAAAAAAAAAAAUAAAC8AAAAAQAAAOQAAABAAQAABAEAAPgB"
+    "AAAAAgAACAIAAAsCAAAQAgAAJAIAACcCAAAqAgAALQIAAAIAAAADAAAABAAAAAUAAAACAAAAAAAA"
+    "AAAAAAAFAAAAAwAAAAAAAAABAAEAAAAAAAEAAAAGAAAAAQAAAAcAAAABAAAACAAAAAIAAQAAAAAA"
+    "AQAAAAEAAAACAAAAAAAAAAEAAAAAAAAAjAEAAAAAAAALAAAAAAAAAAEAAAAAAAAAAQAAAAkAAABw"
+    "AAAAAgAAAAQAAACUAAAAAwAAAAIAAACkAAAABQAAAAUAAAC8AAAABgAAAAEAAADkAAAAABAAAAEA"
+    "AAAEAQAAACAAAAEAAACMAQAAASAAAAQAAACkAQAAAiAAAAkAAAD4AQAAAyAAAAQAAAAwAgAAAAAB"
+    "AwCBgASkAwEBvAMBAdADAQHkAwAAAQABAAEAAAAwAgAABAAAAHAQBAAAAA4AAgABAAAAAAA1AgAA"
+    "AgAAABIQDwACAAEAAAAAADoCAAACAAAAEiAPAAIAAQAAAAAAPwIAAAIAAAASMA8ABjxpbml0PgAG"
+    "QS5qYXZhAAFJAANMQTsAEkxqYXZhL2xhbmcvT2JqZWN0OwABVgABYQABYgABYwABAAcOAAMABw4A"
+    "BgAHDgAJAAcOAA==";
+
+// Dex file with local info containing a null type descriptor.
+// Constructed a dex file with debug info sequence containing DBG_RESTART_LOCAL without any
+// DBG_START_LOCAL to give it a declared type.
+static const char kUnknownTypeDebugInfoInputDex[] =
+    "ZGV4CjAzNQBtKqZfzjHLNSNwW2A6Bz9FuCEX0sL+FF38AQAAcAAAAHhWNBIAAAAAAAAAAHQBAAAI"
+    "AAAAcAAAAAQAAACQAAAAAgAAAKAAAAAAAAAAAAAAAAMAAAC4AAAAAQAAANAAAAAMAQAA8AAAABwB"
+    "AAAkAQAALAEAAC8BAAA0AQAASAEAAEsBAABOAQAAAgAAAAMAAAAEAAAABQAAAAIAAAAAAAAAAAAA"
+    "AAUAAAADAAAAAAAAAAEAAQAAAAAAAQAAAAYAAAACAAEAAAAAAAEAAAABAAAAAgAAAAAAAAABAAAA"
+    "AAAAAGMBAAAAAAAAAQABAAEAAABUAQAABAAAAHAQAgAAAA4AAgABAAAAAABZAQAAAgAAABIQDwAG"
+    "PGluaXQ+AAZBLmphdmEAAUkAA0xBOwASTGphdmEvbGFuZy9PYmplY3Q7AAFWAAFhAAR0aGlzAAEA"
+    "Bw4AAwAHDh4GAAYAAAAAAQEAgYAE8AEBAYgCAAAACwAAAAAAAAABAAAAAAAAAAEAAAAIAAAAcAAA"
+    "AAIAAAAEAAAAkAAAAAMAAAACAAAAoAAAAAUAAAADAAAAuAAAAAYAAAABAAAA0AAAAAEgAAACAAAA"
+    "8AAAAAIgAAAIAAAAHAEAAAMgAAACAAAAVAEAAAAgAAABAAAAYwEAAAAQAAABAAAAdAEAAA==";
+
 static void WriteBase64ToFile(const char* base64, File* file) {
   // Decode base64.
   CHECK(base64 != nullptr);
@@ -244,7 +304,7 @@
     return true;
   }
 
-  // Runs DexFileOutput test.
+  // Runs DexFileLayout test.
   bool DexFileLayoutExec(std::string* error_msg) {
     ScratchFile tmp_file;
     std::string tmp_name = tmp_file.GetFilename();
@@ -256,34 +316,29 @@
     WriteFileBase64(kDexFileLayoutInputDex, dex_file.c_str());
     std::string profile_file = tmp_dir + "primary.prof";
     WriteFileBase64(kDexFileLayoutInputProfile, profile_file.c_str());
-    std::string expected_output = tmp_dir + "expected.dex";
-    WriteFileBase64(kDexFileLayoutExpectedOutputDex, expected_output.c_str());
     std::string output_dex = tmp_dir + "classes.dex.new";
 
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
 
     std::vector<std::string> dexlayout_exec_argv =
-        { dexlayout, "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
+        { dexlayout, "-v", "-w", tmp_dir, "-o", tmp_name, "-p", profile_file, dex_file };
     if (!::art::Exec(dexlayout_exec_argv, error_msg)) {
       return false;
     }
-    std::vector<std::string> diff_exec_argv =
-        { "/usr/bin/diff", expected_output, output_dex };
-    if (!::art::Exec(diff_exec_argv, error_msg)) {
-      return false;
-    }
+
+    // -v makes sure that the layout did not corrupt the dex file.
 
     std::vector<std::string> rm_exec_argv =
-        { "/bin/rm", dex_file, profile_file, expected_output, output_dex };
+        { "/bin/rm", dex_file, profile_file, output_dex };
     if (!::art::Exec(rm_exec_argv, error_msg)) {
       return false;
     }
     return true;
   }
 
-  // Runs UnreferencedCatchHandlerTest.
-  bool UnreferencedCatchHandlerExec(std::string* error_msg) {
+  // Runs UnreferencedCatchHandlerTest & Unreferenced0SizeCatchHandlerTest.
+  bool UnreferencedCatchHandlerExec(std::string* error_msg, const char* filename) {
     ScratchFile tmp_file;
     std::string tmp_name = tmp_file.GetFilename();
     size_t tmp_last_slash = tmp_name.rfind("/");
@@ -291,7 +346,7 @@
 
     // Write inputs and expected outputs.
     std::string input_dex = tmp_dir + "classes.dex";
-    WriteFileBase64(kUnreferencedCatchHandlerInputDex, input_dex.c_str());
+    WriteFileBase64(filename, input_dex.c_str());
     std::string output_dex = tmp_dir + "classes.dex.new";
 
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
@@ -315,6 +370,26 @@
     }
     return true;
   }
+
+  bool DexLayoutExec(ScratchFile* dex_file,
+                     const char* dex_filename,
+                     ScratchFile* profile_file,
+                     const char* profile_filename,
+                     std::vector<std::string>& dexlayout_exec_argv) {
+    WriteBase64ToFile(dex_filename, dex_file->GetFile());
+    EXPECT_EQ(dex_file->GetFile()->Flush(), 0);
+    if (profile_file != nullptr) {
+      WriteBase64ToFile(profile_filename, profile_file->GetFile());
+      EXPECT_EQ(profile_file->GetFile()->Flush(), 0);
+    }
+    std::string error_msg;
+    const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
+    if (!result) {
+      LOG(ERROR) << "Error: " << error_msg;
+      return false;
+    }
+    return true;
+  }
 };
 
 
@@ -343,79 +418,105 @@
   // Disable test on target.
   TEST_DISABLED_FOR_TARGET();
   std::string error_msg;
-  ASSERT_TRUE(UnreferencedCatchHandlerExec(&error_msg)) << error_msg;
+  ASSERT_TRUE(UnreferencedCatchHandlerExec(&error_msg,
+                                           kUnreferencedCatchHandlerInputDex)) << error_msg;
 }
+
+TEST_F(DexLayoutTest, Unreferenced0SizeCatchHandler) {
+  // Disable test on target.
+  TEST_DISABLED_FOR_TARGET();
+  std::string error_msg;
+  ASSERT_TRUE(UnreferencedCatchHandlerExec(&error_msg,
+                                           kUnreferenced0SizeCatchHandlerInputDex)) << error_msg;
+}
+
+TEST_F(DexLayoutTest, UnreferencedEndingCatchHandler) {
+  // Disable test on target.
+  TEST_DISABLED_FOR_TARGET();
+  std::string error_msg;
+  ASSERT_TRUE(UnreferencedCatchHandlerExec(&error_msg,
+                                           kUnreferencedEndingCatchHandlerInputDex)) << error_msg;
+}
+
 TEST_F(DexLayoutTest, DuplicateOffset) {
-  ScratchFile temp;
-  WriteBase64ToFile(kDexFileDuplicateOffset, temp.GetFile());
-  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  ScratchFile temp_dex;
   std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
   EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
-  std::vector<std::string> dexlayout_exec_argv = {
-      dexlayout,
-      "-a",
-      "-i",
-      "-o",
-      "/dev/null",
-      temp.GetFilename()};
-  std::string error_msg;
-  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
-  EXPECT_TRUE(result);
-  if (!result) {
-    LOG(ERROR) << "Error " << error_msg;
-  }
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-a", "-i", "-o", "/dev/null", temp_dex.GetFilename() };
+  ASSERT_TRUE(DexLayoutExec(&temp_dex,
+                            kDexFileDuplicateOffset,
+                            nullptr /* profile_file */,
+                            nullptr /* profile_filename */,
+                            dexlayout_exec_argv));
 }
 
 TEST_F(DexLayoutTest, NullSetRefListElement) {
-  ScratchFile temp;
-  WriteBase64ToFile(kNullSetRefListElementInputDex, temp.GetFile());
-  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  ScratchFile temp_dex;
   std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
   EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
   std::vector<std::string> dexlayout_exec_argv =
-      { dexlayout, "-o", "/dev/null", temp.GetFilename() };
-  std::string error_msg;
-  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
-  EXPECT_TRUE(result);
-  if (!result) {
-    LOG(ERROR) << "Error " << error_msg;
-  }
+      { dexlayout, "-o", "/dev/null", temp_dex.GetFilename() };
+  ASSERT_TRUE(DexLayoutExec(&temp_dex,
+                            kNullSetRefListElementInputDex,
+                            nullptr /* profile_file */,
+                            nullptr /* profile_filename */,
+                            dexlayout_exec_argv));
 }
 
 TEST_F(DexLayoutTest, MultiClassData) {
-  ScratchFile temp;
-  WriteBase64ToFile(kMultiClassDataInputDex, temp.GetFile());
-  ScratchFile temp2;
-  WriteBase64ToFile(kDexFileLayoutInputProfile, temp2.GetFile());
-  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  ScratchFile temp_dex;
+  ScratchFile temp_profile;
   std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
   EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
   std::vector<std::string> dexlayout_exec_argv =
-      { dexlayout, "-p", temp2.GetFilename(), "-o", "/dev/null", temp.GetFilename() };
-  std::string error_msg;
-  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
-  EXPECT_TRUE(result);
-  if (!result) {
-    LOG(ERROR) << "Error " << error_msg;
-  }
+      { dexlayout, "-p", temp_profile.GetFilename(), "-o", "/dev/null", temp_dex.GetFilename() };
+  ASSERT_TRUE(DexLayoutExec(&temp_dex,
+                            kMultiClassDataInputDex,
+                            &temp_profile,
+                            kDexFileLayoutInputProfile,
+                            dexlayout_exec_argv));
 }
 
 TEST_F(DexLayoutTest, UnalignedCodeInfo) {
-  ScratchFile temp;
-  WriteBase64ToFile(kUnalignedCodeInfoInputDex, temp.GetFile());
-  ScratchFile temp2;
-  WriteBase64ToFile(kDexFileLayoutInputProfile, temp2.GetFile());
-  EXPECT_EQ(temp.GetFile()->Flush(), 0);
+  ScratchFile temp_dex;
+  ScratchFile temp_profile;
   std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
   EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
   std::vector<std::string> dexlayout_exec_argv =
-      { dexlayout, "-p", temp2.GetFilename(), "-o", "/dev/null", temp.GetFilename() };
-  std::string error_msg;
-  const bool result = ::art::Exec(dexlayout_exec_argv, &error_msg);
-  EXPECT_TRUE(result);
-  if (!result) {
-    LOG(ERROR) << "Error " << error_msg;
-  }
+      { dexlayout, "-p", temp_profile.GetFilename(), "-o", "/dev/null", temp_dex.GetFilename() };
+  ASSERT_TRUE(DexLayoutExec(&temp_dex,
+                            kUnalignedCodeInfoInputDex,
+                            &temp_profile,
+                            kDexFileLayoutInputProfile,
+                            dexlayout_exec_argv));
+}
+
+TEST_F(DexLayoutTest, ClassDataBeforeCode) {
+  ScratchFile temp_dex;
+  ScratchFile temp_profile;
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-p", temp_profile.GetFilename(), "-o", "/dev/null", temp_dex.GetFilename() };
+  ASSERT_TRUE(DexLayoutExec(&temp_dex,
+                            kClassDataBeforeCodeInputDex,
+                            &temp_profile,
+                            kDexFileLayoutInputProfile,
+                            dexlayout_exec_argv));
+}
+
+TEST_F(DexLayoutTest, UnknownTypeDebugInfo) {
+  ScratchFile temp_dex;
+  std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
+  EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
+  std::vector<std::string> dexlayout_exec_argv =
+      { dexlayout, "-o", "/dev/null", temp_dex.GetFilename() };
+  ASSERT_TRUE(DexLayoutExec(&temp_dex,
+                            kUnknownTypeDebugInfoInputDex,
+                            nullptr /* profile_file */,
+                            nullptr /* profile_filename */,
+                            dexlayout_exec_argv));
 }
 
 }  // namespace art
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 1f6b874..eb57d33 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -43,6 +43,7 @@
 static const uint32_t kOpcodeShift = 26;
 
 static const uint32_t kCop1 = (17 << kOpcodeShift);
+static const uint32_t kMsa = (30 << kOpcodeShift);  // MSA major opcode.
 
 static const uint32_t kITypeMask = (0x3f << kOpcodeShift);
 static const uint32_t kJTypeMask = (0x3f << kOpcodeShift);
@@ -51,6 +52,8 @@
 static const uint32_t kSpecial2Mask = (0x3f << kOpcodeShift);
 static const uint32_t kSpecial3Mask = (0x3f << kOpcodeShift);
 static const uint32_t kFpMask = kRTypeMask;
+static const uint32_t kMsaMask = kRTypeMask;
+static const uint32_t kMsaSpecialMask = (0x3f << kOpcodeShift);
 
 static const MipsInstruction gMipsInstructions[] = {
   // "sll r0, r0, 0" is the canonical "nop", used in delay slots.
@@ -417,6 +420,37 @@
   { kFpMask, kCop1 | 0x10, "sel", "fadt" },
   { kFpMask, kCop1 | 0x1e, "max", "fadt" },
   { kFpMask, kCop1 | 0x1c, "min", "fadt" },
+
+  // MSA instructions.
+  { kMsaMask | (0x1f << 21), kMsa | (0x0 << 21) | 0x1e, "and.v", "kmn" },
+  { kMsaMask | (0x1f << 21), kMsa | (0x1 << 21) | 0x1e, "or.v", "kmn" },
+  { kMsaMask | (0x1f << 21), kMsa | (0x2 << 21) | 0x1e, "nor.v", "kmn" },
+  { kMsaMask | (0x1f << 21), kMsa | (0x3 << 21) | 0x1e, "xor.v", "kmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0xe, "addv", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x1 << 23) | 0xe, "subv", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0x12, "mulv", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x4 << 23) | 0x12, "div_s", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x5 << 23) | 0x12, "div_u", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x12, "mod_s", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x7 << 23) | 0x12, "mod_u", "Vkmn" },
+  { kMsaMask | (0xf << 22), kMsa | (0x0 << 22) | 0x1b, "fadd", "Ukmn" },
+  { kMsaMask | (0xf << 22), kMsa | (0x1 << 22) | 0x1b, "fsub", "Ukmn" },
+  { kMsaMask | (0xf << 22), kMsa | (0x2 << 22) | 0x1b, "fmul", "Ukmn" },
+  { kMsaMask | (0xf << 22), kMsa | (0x3 << 22) | 0x1b, "fdiv", "Ukmn" },
+  { kMsaMask | (0x1ff << 17), kMsa | (0x19e << 17) | 0x1e, "ffint_s", "ukm" },
+  { kMsaMask | (0x1ff << 17), kMsa | (0x19c << 17) | 0x1e, "ftint_s", "ukm" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0xd, "sll", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x1 << 23) | 0xd, "sra", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x2 << 23) | 0xd, "srl", "Vkmn" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x0 << 23) | 0x9, "slli", "kmW" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x1 << 23) | 0x9, "srai", "kmW" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x2 << 23) | 0x9, "srli", "kmW" },
+  { kMsaMask | (0x3ff << 16), kMsa | (0xbe << 16) | 0x19, "move.v", "km" },
+  { kMsaMask | (0xf << 22), kMsa | (0x1 << 22) | 0x19, "splati", "kX" },
+  { kMsaMask | (0xff << 18), kMsa | (0xc0 << 18) | 0x1e, "fill", "vkD" },
+  { kMsaMask | (0x7 << 23), kMsa | (0x6 << 23) | 0x7, "ldi", "kx" },
+  { kMsaSpecialMask | (0xf << 2), kMsa | (0x8 << 2), "ld", "kw" },
+  { kMsaSpecialMask | (0xf << 2), kMsa | (0x9 << 2), "st", "kw" },
 };
 
 static uint32_t ReadU32(const uint8_t* ptr) {
@@ -559,6 +593,125 @@
           case 't': args << 'f' << rt; break;
           case 'Z': args << (rd + 1); break;  // sz ([d]ext size).
           case 'z': args << (rd - sa + 1); break;  // sz ([d]ins, dinsu size).
+          case 'k': args << 'w' << sa; break;
+          case 'm': args << 'w' << rd; break;
+          case 'n': args << 'w' << rt; break;
+          case 'U':  // MSA 1-bit df (word/doubleword), position 21.
+            {
+              int32_t df = (instruction >> 21) & 0x1;
+              switch (df) {
+                case 0: opcode += ".w"; break;
+                case 1: opcode += ".d"; break;
+              }
+              continue;  // No ", ".
+            }
+          case 'u':  // MSA 1-bit df (word/doubleword), position 16.
+            {
+              int32_t df = (instruction >> 16) & 0x1;
+              switch (df) {
+                case 0: opcode += ".w"; break;
+                case 1: opcode += ".d"; break;
+              }
+              continue;  // No ", ".
+            }
+          case 'V':  // MSA 2-bit df, position 21.
+            {
+              int32_t df = (instruction >> 21) & 0x3;
+              switch (df) {
+                case 0: opcode += ".b"; break;
+                case 1: opcode += ".h"; break;
+                case 2: opcode += ".w"; break;
+                case 3: opcode += ".d"; break;
+              }
+              continue;  // No ", ".
+            }
+          case 'v':  // MSA 2-bit df, position 16.
+            {
+              int32_t df = (instruction >> 16) & 0x3;
+              switch (df) {
+                case 0: opcode += ".b"; break;
+                case 1: opcode += ".h"; break;
+                case 2: opcode += ".w"; break;
+                case 3: opcode += ".d"; break;
+              }
+              continue;  // No ", ".
+            }
+          case 'W':  // MSA df/m.
+            {
+              int32_t df_m = (instruction >> 16) & 0x7f;
+              if ((df_m & (0x1 << 6)) == 0) {
+                opcode += ".d";
+                args << (df_m & 0x3f);
+                break;
+              }
+              if ((df_m & (0x1 << 5)) == 0) {
+                opcode += ".w";
+                args << (df_m & 0x1f);
+                break;
+              }
+              if ((df_m & (0x1 << 4)) == 0) {
+                opcode += ".h";
+                args << (df_m & 0xf);
+                break;
+              }
+              if ((df_m & (0x1 << 3)) == 0) {
+                opcode += ".b";
+                args << (df_m & 0x7);
+              }
+              break;
+            }
+          case 'w':  // MSA +x(rs).
+            {
+              int32_t df = instruction & 0x3;
+              int32_t s10 = (instruction >> 16) & 0x3ff;
+              s10 -= (s10 & 0x200) << 1;  // Sign-extend s10.
+              switch (df) {
+                case 0: opcode += ".b"; break;
+                case 1: opcode += ".h"; break;
+                case 2: opcode += ".w"; break;
+                case 3: opcode += ".d"; break;
+              }
+              args << StringPrintf("%+d(r%d)", s10 << df, rd);
+              break;
+            }
+          case 'X':  // MSA df/n - ws[x].
+            {
+              int32_t df_n = (instruction >> 16) & 0x3f;
+              if ((df_n & (0x3 << 4)) == 0) {
+                opcode += ".b";
+                args << 'w' << rd << '[' << (df_n & 0xf) << ']';
+                break;
+              }
+              if ((df_n & (0x3 << 3)) == 0) {
+                opcode += ".h";
+                args << 'w' << rd << '[' << (df_n & 0x7) << ']';
+                break;
+              }
+              if ((df_n & (0x3 << 2)) == 0) {
+                opcode += ".w";
+                args << 'w' << rd << '[' << (df_n & 0x3) << ']';
+                break;
+              }
+              if ((df_n & (0x3 << 1)) == 0) {
+                opcode += ".d";
+                args << 'w' << rd << '[' << (df_n & 0x1) << ']';
+              }
+              break;
+            }
+          case 'x':  // MSA i10.
+            {
+              int32_t df = (instruction >> 21) & 0x3;
+              int32_t i10 = (instruction >> 11) & 0x3ff;
+              i10 -= (i10 & 0x200) << 1;  // Sign-extend i10.
+              switch (df) {
+                case 0: opcode += ".b"; break;
+                case 1: opcode += ".h"; break;
+                case 2: opcode += ".w"; break;
+                case 3: opcode += ".d"; break;
+              }
+              args << i10;
+              break;
+            }
         }
         if (*(args_fmt + 1)) {
           args << ", ";
diff --git a/disassembler/disassembler_x86.cc b/disassembler/disassembler_x86.cc
index a289433..e12bcec 100644
--- a/disassembler/disassembler_x86.cc
+++ b/disassembler/disassembler_x86.cc
@@ -574,6 +574,20 @@
               load = true;
               src_reg_file = dst_reg_file = SSE;
               break;
+            case 0x29:
+              opcode1 = "pcmpeqq";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
+            case 0x39:
+              opcode1 = "pcmpgtq";
+              prefix[2] = 0;
+              has_modrm = true;
+              load = true;
+              src_reg_file = dst_reg_file = SSE;
+              break;
             case 0x40:
               opcode1 = "pmulld";
               prefix[2] = 0;
@@ -737,6 +751,24 @@
         load = true;
         has_modrm = true;
         break;
+      case 0x64:
+      case 0x65:
+      case 0x66:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        switch (*instr) {
+          case 0x64: opcode1 = "pcmpgtb"; break;
+          case 0x65: opcode1 = "pcmpgtw"; break;
+          case 0x66: opcode1 = "pcmpgtd"; break;
+        }
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
       case 0x6E:
         if (prefix[2] == 0x66) {
           dst_reg_file = SSE;
@@ -832,6 +864,24 @@
         store = true;
         immediate_bytes = 1;
         break;
+      case 0x74:
+      case 0x75:
+      case 0x76:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        switch (*instr) {
+          case 0x74: opcode1 = "pcmpeqb"; break;
+          case 0x75: opcode1 = "pcmpeqw"; break;
+          case 0x76: opcode1 = "pcmpeqd"; break;
+        }
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
       case 0x7C:
         if (prefix[0] == 0xF2) {
           opcode1 = "haddps";
@@ -1083,6 +1133,22 @@
           opcode1 = opcode_tmp.c_str();
         }
         break;
+      case 0xE0:
+      case 0xE3:
+        if (prefix[2] == 0x66) {
+          src_reg_file = dst_reg_file = SSE;
+          prefix[2] = 0;  // clear prefix now it's served its purpose as part of the opcode
+        } else {
+          src_reg_file = dst_reg_file = MMX;
+        }
+        switch (*instr) {
+          case 0xE0: opcode1 = "pavgb"; break;
+          case 0xE3: opcode1 = "pavgw"; break;
+        }
+        prefix[2] = 0;
+        has_modrm = true;
+        load = true;
+        break;
       case 0xEB:
         if (prefix[2] == 0x66) {
           src_reg_file = dst_reg_file = SSE;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index becb827..878d0f2 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -596,7 +596,7 @@
       kByteKindStackMapInlineInfoIndex,
       kByteKindStackMapRegisterMaskIndex,
       kByteKindStackMapStackMaskIndex,
-      kByteKindInlineInfoMethodIndex,
+      kByteKindInlineInfoMethodIndexIdx,
       kByteKindInlineInfoDexPc,
       kByteKindInlineInfoExtraData,
       kByteKindInlineInfoDexRegisterMap,
@@ -605,7 +605,7 @@
       // Special ranges for std::accumulate convenience.
       kByteKindStackMapFirst = kByteKindStackMapNativePc,
       kByteKindStackMapLast = kByteKindStackMapStackMaskIndex,
-      kByteKindInlineInfoFirst = kByteKindInlineInfoMethodIndex,
+      kByteKindInlineInfoFirst = kByteKindInlineInfoMethodIndexIdx,
       kByteKindInlineInfoLast = kByteKindInlineInfoIsLast,
     };
     int64_t bits[kByteKindCount] = {};
@@ -685,8 +685,8 @@
         {
           ScopedIndentation indent1(&os);
           Dump(os,
-               "InlineInfoMethodIndex         ",
-               bits[kByteKindInlineInfoMethodIndex],
+               "InlineInfoMethodIndexIdx      ",
+               bits[kByteKindInlineInfoMethodIndexIdx],
                inline_info_bits,
                "inline info");
           Dump(os,
@@ -1363,7 +1363,8 @@
         CodeInfo code_info(raw_code_info);
         DCHECK(code_item != nullptr);
         ScopedIndentation indent1(vios);
-        DumpCodeInfo(vios, code_info, oat_method, *code_item);
+        MethodInfo method_info = oat_method.GetOatQuickMethodHeader()->GetOptimizedMethodInfo();
+        DumpCodeInfo(vios, code_info, oat_method, *code_item, method_info);
       }
     } else if (IsMethodGeneratedByDexToDexCompiler(oat_method, code_item)) {
       // We don't encode the size in the table, so just emit that we have quickened
@@ -1379,12 +1380,14 @@
   void DumpCodeInfo(VariableIndentationOutputStream* vios,
                     const CodeInfo& code_info,
                     const OatFile::OatMethod& oat_method,
-                    const DexFile::CodeItem& code_item) {
+                    const DexFile::CodeItem& code_item,
+                    const MethodInfo& method_info) {
     code_info.Dump(vios,
                    oat_method.GetCodeOffset(),
                    code_item.registers_size_,
                    options_.dump_code_info_stack_maps_,
-                   instruction_set_);
+                   instruction_set_,
+                   method_info);
   }
 
   void DumpVregLocations(std::ostream& os, const OatFile::OatMethod& oat_method,
@@ -1592,6 +1595,7 @@
     } else if (!bad_input && IsMethodGeneratedByOptimizingCompiler(oat_method, code_item)) {
       // The optimizing compiler outputs its CodeInfo data in the vmap table.
       StackMapsHelper helper(oat_method.GetVmapTable(), instruction_set_);
+      MethodInfo method_info(oat_method.GetOatQuickMethodHeader()->GetOptimizedMethodInfo());
       {
         CodeInfoEncoding encoding(helper.GetEncoding());
         StackMapEncoding stack_map_encoding(encoding.stack_map.encoding);
@@ -1652,8 +1656,9 @@
           const size_t num_inline_infos = encoding.inline_info.num_entries;
           if (num_inline_infos > 0u) {
             stats_.AddBits(
-                Stats::kByteKindInlineInfoMethodIndex,
-                encoding.inline_info.encoding.GetMethodIndexEncoding().BitSize() * num_inline_infos);
+                Stats::kByteKindInlineInfoMethodIndexIdx,
+                encoding.inline_info.encoding.GetMethodIndexIdxEncoding().BitSize() *
+                    num_inline_infos);
             stats_.AddBits(
                 Stats::kByteKindInlineInfoDexPc,
                 encoding.inline_info.encoding.GetDexPcEncoding().BitSize() * num_inline_infos);
@@ -1679,6 +1684,7 @@
           stack_map.Dump(vios,
                          helper.GetCodeInfo(),
                          helper.GetEncoding(),
+                         method_info,
                          oat_method.GetCodeOffset(),
                          code_item->registers_size_,
                          instruction_set_);
@@ -2210,13 +2216,13 @@
           ScopedIndentation indent2(&state->vios_);
           auto* resolved_fields = dex_cache->GetResolvedFields();
           for (size_t i = 0, length = dex_cache->NumResolvedFields(); i < length; ++i) {
-            auto* elem = mirror::DexCache::GetElementPtrSize(
-                resolved_fields, i, image_pointer_size);
+            auto* elem = mirror::DexCache::GetNativePairPtrSize(
+                resolved_fields, i, image_pointer_size).object;
             size_t run = 0;
             for (size_t j = i + 1;
-                 j != length && elem == mirror::DexCache::GetElementPtrSize(resolved_fields,
-                                                                            j,
-                                                                            image_pointer_size);
+                 j != length &&
+                 elem == mirror::DexCache::GetNativePairPtrSize(
+                     resolved_fields, j, image_pointer_size).object;
                  ++j) {
               ++run;
             }
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index 18a6670..0c2717f 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -485,7 +485,7 @@
   explicit RelocatedPointerVisitor(PatchOat* patch_oat) : patch_oat_(patch_oat) {}
 
   template <typename T>
-  T* operator()(T* ptr) const {
+  T* operator()(T* ptr, void** dest_addr ATTRIBUTE_UNUSED = 0) const {
     return patch_oat_->RelocatedAddressOfPointer(ptr);
   }
 
@@ -534,17 +534,18 @@
         mirror::DexCache::SetElementPtrSize(copy_methods, j, copy, pointer_size);
       }
     }
-    ArtField** orig_fields = orig_dex_cache->GetResolvedFields();
-    ArtField** relocated_fields = RelocatedAddressOfPointer(orig_fields);
+    mirror::FieldDexCacheType* orig_fields = orig_dex_cache->GetResolvedFields();
+    mirror::FieldDexCacheType* relocated_fields = RelocatedAddressOfPointer(orig_fields);
     copy_dex_cache->SetField64<false>(
         mirror::DexCache::ResolvedFieldsOffset(),
         static_cast<int64_t>(reinterpret_cast<uintptr_t>(relocated_fields)));
     if (orig_fields != nullptr) {
-      ArtField** copy_fields = RelocatedCopyOf(orig_fields);
+      mirror::FieldDexCacheType* copy_fields = RelocatedCopyOf(orig_fields);
       for (size_t j = 0, num = orig_dex_cache->NumResolvedFields(); j != num; ++j) {
-        ArtField* orig = mirror::DexCache::GetElementPtrSize(orig_fields, j, pointer_size);
-        ArtField* copy = RelocatedAddressOfPointer(orig);
-        mirror::DexCache::SetElementPtrSize(copy_fields, j, copy, pointer_size);
+        mirror::FieldDexCachePair orig =
+            mirror::DexCache::GetNativePairPtrSize(orig_fields, j, pointer_size);
+        mirror::FieldDexCachePair copy(RelocatedAddressOfPointer(orig.object), orig.index);
+        mirror::DexCache::SetNativePairPtrSize(copy_fields, j, copy, pointer_size);
       }
     }
     mirror::MethodTypeDexCacheType* orig_method_types = orig_dex_cache->GetResolvedMethodTypes();
diff --git a/profman/profile_assistant.cc b/profman/profile_assistant.cc
index a25460e..b9a85bc 100644
--- a/profman/profile_assistant.cc
+++ b/profman/profile_assistant.cc
@@ -44,10 +44,15 @@
 
   // Merge all current profiles.
   for (size_t i = 0; i < profile_files.size(); i++) {
-    if (!info.Load(profile_files[i].GetFile()->Fd())) {
+    ProfileCompilationInfo cur_info;
+    if (!cur_info.Load(profile_files[i].GetFile()->Fd())) {
       LOG(WARNING) << "Could not load profile file at index " << i;
       return kErrorBadProfiles;
     }
+    if (!info.MergeWith(cur_info)) {
+      LOG(WARNING) << "Could not merge profile file at index " << i;
+      return kErrorBadProfiles;
+    }
   }
 
   // Check if there is enough new information added by the current profiles.
diff --git a/profman/profile_assistant_test.cc b/profman/profile_assistant_test.cc
index d395c17..94f6e70 100644
--- a/profman/profile_assistant_test.cc
+++ b/profman/profile_assistant_test.cc
@@ -16,11 +16,15 @@
 
 #include <gtest/gtest.h>
 
+#include "art_method-inl.h"
 #include "base/unix_file/fd_file.h"
 #include "common_runtime_test.h"
 #include "exec_utils.h"
-#include "profile_assistant.h"
 #include "jit/profile_compilation_info.h"
+#include "mirror/class-inl.h"
+#include "obj_ptr-inl.h"
+#include "profile_assistant.h"
+#include "scoped_thread_state_change-inl.h"
 #include "utils.h"
 
 namespace art {
@@ -33,14 +37,25 @@
                     uint16_t number_of_classes,
                     const ScratchFile& profile,
                     ProfileCompilationInfo* info,
-                    uint16_t start_method_index = 0) {
+                    uint16_t start_method_index = 0,
+                    bool reverse_dex_write_order = false) {
     std::string dex_location1 = "location1" + id;
     uint32_t dex_location_checksum1 = checksum;
     std::string dex_location2 = "location2" + id;
     uint32_t dex_location_checksum2 = 10 * checksum;
     for (uint16_t i = start_method_index; i < start_method_index + number_of_methods; i++) {
-      ASSERT_TRUE(info->AddMethodIndex(dex_location1, dex_location_checksum1, i));
-      ASSERT_TRUE(info->AddMethodIndex(dex_location2, dex_location_checksum2, i));
+      // reverse_dex_write_order controls the order in which the dex files will be added to
+      // the profile and thus written to disk.
+      ProfileCompilationInfo::OfflineProfileMethodInfo pmi =
+          GetOfflineProfileMethodInfo(dex_location1, dex_location_checksum1,
+                                      dex_location2, dex_location_checksum2);
+      if (reverse_dex_write_order) {
+        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, pmi));
+      } else {
+        ASSERT_TRUE(info->AddMethod(dex_location1, dex_location_checksum1, i, pmi));
+        ASSERT_TRUE(info->AddMethod(dex_location2, dex_location_checksum2, i, pmi));
+      }
     }
     for (uint16_t i = 0; i < number_of_classes; i++) {
       ASSERT_TRUE(info->AddClassIndex(dex_location1, dex_location_checksum1, dex::TypeIndex(i)));
@@ -51,6 +66,43 @@
     ASSERT_TRUE(profile.GetFile()->ResetOffset());
   }
 
+  ProfileCompilationInfo::OfflineProfileMethodInfo GetOfflineProfileMethodInfo(
+        const std::string& dex_location1, uint32_t dex_checksum1,
+        const std::string& dex_location2, uint32_t dex_checksum2) {
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+    pmi.dex_references.emplace_back(dex_location1, dex_checksum1);
+    pmi.dex_references.emplace_back(dex_location2, dex_checksum2);
+
+    // Monomorphic
+    for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.AddClass(0, dex::TypeIndex(0));
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+    // Polymorphic
+    for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.AddClass(0, dex::TypeIndex(0));
+      dex_pc_data.AddClass(1, dex::TypeIndex(1));
+
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+    // Megamorphic
+    for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.SetIsMegamorphic();
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+    // Missing types
+    for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.SetIsMissingTypes();
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+
+    return pmi;
+  }
+
   int GetFd(const ScratchFile& file) const {
     return static_cast<int>(file.GetFd());
   }
@@ -95,10 +147,24 @@
     return ExecAndReturnCode(argv_str, &error);
   }
 
-  bool CreateProfile(std::string class_file_contents, const std::string& filename) {
+  bool GenerateTestProfileWithInputDex(const std::string& filename) {
+    std::string profman_cmd = GetProfmanCmd();
+    std::vector<std::string> argv_str;
+    argv_str.push_back(profman_cmd);
+    argv_str.push_back("--generate-test-profile=" + filename);
+    argv_str.push_back("--generate-test-profile-seed=0");
+    argv_str.push_back("--apk=" + GetLibCoreDexFileNames()[0]);
+    argv_str.push_back("--dex-location=" + GetLibCoreDexFileNames()[0]);
+    std::string error;
+    return ExecAndReturnCode(argv_str, &error);
+  }
+
+  bool CreateProfile(std::string profile_file_contents,
+                     const std::string& filename,
+                     const std::string& dex_location) {
     ScratchFile class_names_file;
     File* file = class_names_file.GetFile();
-    EXPECT_TRUE(file->WriteFully(class_file_contents.c_str(), class_file_contents.length()));
+    EXPECT_TRUE(file->WriteFully(profile_file_contents.c_str(), profile_file_contents.length()));
     EXPECT_EQ(0, file->Flush());
     EXPECT_TRUE(file->ResetOffset());
     std::string profman_cmd = GetProfmanCmd();
@@ -106,8 +172,8 @@
     argv_str.push_back(profman_cmd);
     argv_str.push_back("--create-profile-from=" + class_names_file.GetFilename());
     argv_str.push_back("--reference-profile-file=" + filename);
-    argv_str.push_back("--apk=" + GetLibCoreDexFileNames()[0]);
-    argv_str.push_back("--dex-location=classes.dex");
+    argv_str.push_back("--apk=" + dex_location);
+    argv_str.push_back("--dex-location=" + dex_location);
     std::string error;
     EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0);
     return true;
@@ -121,7 +187,7 @@
     argv_str.push_back("--dump-classes");
     argv_str.push_back("--profile-file=" + filename);
     argv_str.push_back("--apk=" + GetLibCoreDexFileNames()[0]);
-    argv_str.push_back("--dex-location=classes.dex");
+    argv_str.push_back("--dex-location=" + GetLibCoreDexFileNames()[0]);
     argv_str.push_back("--dump-output-to-fd=" + std::to_string(GetFd(class_names_file)));
     std::string error;
     EXPECT_EQ(ExecAndReturnCode(argv_str, &error), 0);
@@ -135,13 +201,77 @@
     return true;
   }
 
-  bool CreateAndDump(const std::string& input_file_contents, std::string* output_file_contents) {
+  bool CreateAndDump(const std::string& input_file_contents,
+                     std::string* output_file_contents) {
     ScratchFile profile_file;
-    EXPECT_TRUE(CreateProfile(input_file_contents, profile_file.GetFilename()));
+    EXPECT_TRUE(CreateProfile(input_file_contents,
+                              profile_file.GetFilename(),
+                              GetLibCoreDexFileNames()[0]));
     profile_file.GetFile()->ResetOffset();
     EXPECT_TRUE(DumpClasses(profile_file.GetFilename(), output_file_contents));
     return true;
   }
+
+  mirror::Class* GetClass(jobject class_loader, const std::string& clazz) {
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    StackHandleScope<1> hs(self);
+    Handle<mirror::ClassLoader> h_loader(
+        hs.NewHandle(ObjPtr<mirror::ClassLoader>::DownCast(self->DecodeJObject(class_loader))));
+    return class_linker->FindClass(self, clazz.c_str(), h_loader);
+  }
+
+  ArtMethod* GetVirtualMethod(jobject class_loader,
+                              const std::string& clazz,
+                              const std::string& name) {
+    mirror::Class* klass = GetClass(class_loader, clazz);
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    const auto pointer_size = class_linker->GetImagePointerSize();
+    ArtMethod* method = nullptr;
+    Thread* self = Thread::Current();
+    ScopedObjectAccess soa(self);
+    for (auto& m : klass->GetVirtualMethods(pointer_size)) {
+      if (name == m.GetName()) {
+        EXPECT_TRUE(method == nullptr);
+        method = &m;
+      }
+    }
+    return method;
+  }
+
+  // Verify that given method has the expected inline caches and nothing else.
+  void AssertInlineCaches(ArtMethod* method,
+                          const std::set<mirror::Class*>& expected_clases,
+                          const ProfileCompilationInfo& info,
+                          bool is_megamorphic,
+                          bool is_missing_types)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+    ASSERT_TRUE(info.GetMethod(method->GetDexFile()->GetLocation(),
+                               method->GetDexFile()->GetLocationChecksum(),
+                               method->GetDexMethodIndex(),
+                               &pmi));
+    ASSERT_EQ(pmi.inline_caches.size(), 1u);
+    ProfileCompilationInfo::DexPcData dex_pc_data = pmi.inline_caches.begin()->second;
+
+    ASSERT_EQ(dex_pc_data.is_megamorphic, is_megamorphic);
+    ASSERT_EQ(dex_pc_data.is_missing_types, is_missing_types);
+    ASSERT_EQ(expected_clases.size(), dex_pc_data.classes.size());
+    size_t found = 0;
+    for (mirror::Class* it : expected_clases) {
+      for (const auto& class_ref : dex_pc_data.classes) {
+        ProfileCompilationInfo::DexReference dex_ref =
+            pmi.dex_references[class_ref.dex_profile_index];
+        if (dex_ref.MatchesDex(&(it->GetDexFile())) &&
+            class_ref.type_index == it->GetDexTypeIndex()) {
+          found++;
+        }
+      }
+    }
+
+    ASSERT_EQ(expected_clases.size(), found);
+  }
 };
 
 TEST_F(ProfileAssistantTest, AdviseCompilationEmptyReferences) {
@@ -355,28 +485,80 @@
   ASSERT_TRUE(info.Load(GetFd(profile)));
 }
 
+TEST_F(ProfileAssistantTest, TestProfileGenerationWithIndexDex) {
+  ScratchFile profile;
+  // Generate a test profile passing in a dex file as reference.
+  GenerateTestProfileWithInputDex(profile.GetFilename());
+
+  // Verify that the generated profile is valid and can be loaded.
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ProfileCompilationInfo info;
+  ASSERT_TRUE(info.Load(GetFd(profile)));
+}
+
 TEST_F(ProfileAssistantTest, TestProfileCreationAllMatch) {
   // Class names put here need to be in sorted order.
   std::vector<std::string> class_names = {
-    "java.lang.Comparable",
-    "java.lang.Math",
-    "java.lang.Object"
+    "Ljava/lang/Comparable;",
+    "Ljava/lang/Math;",
+    "Ljava/lang/Object;"
   };
   std::string input_file_contents;
+  std::string expected_contents;
   for (std::string& class_name : class_names) {
     input_file_contents += class_name + std::string("\n");
+    expected_contents += DescriptorToDot(class_name.c_str()) +
+        std::string("\n");
   }
   std::string output_file_contents;
   ASSERT_TRUE(CreateAndDump(input_file_contents, &output_file_contents));
-  ASSERT_EQ(output_file_contents, input_file_contents);
+  ASSERT_EQ(output_file_contents, expected_contents);
+}
+
+TEST_F(ProfileAssistantTest, TestProfileCreationGenerateMethods) {
+  // Class names put here need to be in sorted order.
+  std::vector<std::string> class_names = {
+    "Ljava/lang/Math;->*",
+  };
+  std::string input_file_contents;
+  std::string expected_contents;
+  for (std::string& class_name : class_names) {
+    input_file_contents += class_name + std::string("\n");
+    expected_contents += DescriptorToDot(class_name.c_str()) +
+        std::string("\n");
+  }
+  std::string output_file_contents;
+  ScratchFile profile_file;
+  EXPECT_TRUE(CreateProfile(input_file_contents,
+                            profile_file.GetFilename(),
+                            GetLibCoreDexFileNames()[0]));
+  ProfileCompilationInfo info;
+  profile_file.GetFile()->ResetOffset();
+  ASSERT_TRUE(info.Load(GetFd(profile_file)));
+  // Verify that the profile has matching methods.
+  ScopedObjectAccess soa(Thread::Current());
+  ObjPtr<mirror::Class> klass = GetClass(nullptr, "Ljava/lang/Math;");
+  ASSERT_TRUE(klass != nullptr);
+  size_t method_count = 0;
+  for (ArtMethod& method : klass->GetMethods(kRuntimePointerSize)) {
+    if (!method.IsCopied() && method.GetCodeItem() != nullptr) {
+      ++method_count;
+      ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+      ASSERT_TRUE(info.GetMethod(method.GetDexFile()->GetLocation(),
+                                 method.GetDexFile()->GetLocationChecksum(),
+                                 method.GetDexMethodIndex(),
+                                 &pmi));
+    }
+  }
+  EXPECT_GT(method_count, 0u);
 }
 
 TEST_F(ProfileAssistantTest, TestProfileCreationOneNotMatched) {
   // Class names put here need to be in sorted order.
   std::vector<std::string> class_names = {
-    "doesnt.match.this.one",
-    "java.lang.Comparable",
-    "java.lang.Object"
+    "Ldoesnt/match/this/one;",
+    "Ljava/lang/Comparable;",
+    "Ljava/lang/Object;"
   };
   std::string input_file_contents;
   for (std::string& class_name : class_names) {
@@ -385,16 +567,17 @@
   std::string output_file_contents;
   ASSERT_TRUE(CreateAndDump(input_file_contents, &output_file_contents));
   std::string expected_contents =
-      class_names[1] + std::string("\n") + class_names[2] + std::string("\n");
+      DescriptorToDot(class_names[1].c_str()) + std::string("\n") +
+      DescriptorToDot(class_names[2].c_str()) + std::string("\n");
   ASSERT_EQ(output_file_contents, expected_contents);
 }
 
 TEST_F(ProfileAssistantTest, TestProfileCreationNoneMatched) {
   // Class names put here need to be in sorted order.
   std::vector<std::string> class_names = {
-    "doesnt.match.this.one",
-    "doesnt.match.this.one.either",
-    "nor.this.one"
+    "Ldoesnt/match/this/one;",
+    "Ldoesnt/match/this/one/either;",
+    "Lnor/this/one;"
   };
   std::string input_file_contents;
   for (std::string& class_name : class_names) {
@@ -406,4 +589,155 @@
   ASSERT_EQ(output_file_contents, expected_contents);
 }
 
+TEST_F(ProfileAssistantTest, TestProfileCreateInlineCache) {
+  // Create the profile content.
+  std::vector<std::string> methods = {
+    "LTestInline;->inlineMonomorphic(LSuper;)I+LSubA;",
+    "LTestInline;->inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;",
+    "LTestInline;->inlineMegamorphic(LSuper;)I+LSubA;,LSubB;,LSubC;,LSubD;,LSubE;",
+    "LTestInline;->inlineMissingTypes(LSuper;)I+missing_types",
+    "LTestInline;->noInlineCache(LSuper;)I"
+  };
+  std::string input_file_contents;
+  for (std::string& m : methods) {
+    input_file_contents += m + std::string("\n");
+  }
+
+  // Create the profile and save it to disk.
+  ScratchFile profile_file;
+  ASSERT_TRUE(CreateProfile(input_file_contents,
+                            profile_file.GetFilename(),
+                            GetTestDexFileName("ProfileTestMultiDex")));
+
+  // Load the profile from disk.
+  ProfileCompilationInfo info;
+  profile_file.GetFile()->ResetOffset();
+  ASSERT_TRUE(info.Load(GetFd(profile_file)));
+
+  // Load the dex files and verify that the profile contains the expected methods info.
+  ScopedObjectAccess soa(Thread::Current());
+  jobject class_loader = LoadDex("ProfileTestMultiDex");
+  ASSERT_NE(class_loader, nullptr);
+
+  mirror::Class* sub_a = GetClass(class_loader, "LSubA;");
+  mirror::Class* sub_b = GetClass(class_loader, "LSubB;");
+  mirror::Class* sub_c = GetClass(class_loader, "LSubC;");
+
+  ASSERT_TRUE(sub_a != nullptr);
+  ASSERT_TRUE(sub_b != nullptr);
+  ASSERT_TRUE(sub_c != nullptr);
+
+  {
+    // Verify that method inlineMonomorphic has the expected inline caches and nothing else.
+    ArtMethod* inline_monomorphic = GetVirtualMethod(class_loader,
+                                                     "LTestInline;",
+                                                     "inlineMonomorphic");
+    ASSERT_TRUE(inline_monomorphic != nullptr);
+    std::set<mirror::Class*> expected_monomorphic;
+    expected_monomorphic.insert(sub_a);
+    AssertInlineCaches(inline_monomorphic,
+                       expected_monomorphic,
+                       info,
+                       /*megamorphic*/false,
+                       /*missing_types*/false);
+  }
+
+  {
+    // Verify that method inlinePolymorphic has the expected inline caches and nothing else.
+    ArtMethod* inline_polymorhic = GetVirtualMethod(class_loader,
+                                                    "LTestInline;",
+                                                    "inlinePolymorphic");
+    ASSERT_TRUE(inline_polymorhic != nullptr);
+    std::set<mirror::Class*> expected_polymorphic;
+    expected_polymorphic.insert(sub_a);
+    expected_polymorphic.insert(sub_b);
+    expected_polymorphic.insert(sub_c);
+    AssertInlineCaches(inline_polymorhic,
+                       expected_polymorphic,
+                       info,
+                       /*megamorphic*/false,
+                       /*missing_types*/false);
+  }
+
+  {
+    // Verify that method inlineMegamorphic has the expected inline caches and nothing else.
+    ArtMethod* inline_megamorphic = GetVirtualMethod(class_loader,
+                                                     "LTestInline;",
+                                                     "inlineMegamorphic");
+    ASSERT_TRUE(inline_megamorphic != nullptr);
+    std::set<mirror::Class*> expected_megamorphic;
+    AssertInlineCaches(inline_megamorphic,
+                       expected_megamorphic,
+                       info,
+                       /*megamorphic*/true,
+                       /*missing_types*/false);
+  }
+
+  {
+    // Verify that method inlineMegamorphic has the expected inline caches and nothing else.
+    ArtMethod* inline_missing_types = GetVirtualMethod(class_loader,
+                                                       "LTestInline;",
+                                                       "inlineMissingTypes");
+    ASSERT_TRUE(inline_missing_types != nullptr);
+    std::set<mirror::Class*> expected_missing_Types;
+    AssertInlineCaches(inline_missing_types,
+                       expected_missing_Types,
+                       info,
+                       /*megamorphic*/false,
+                       /*missing_types*/true);
+  }
+
+  {
+    // Verify that method noInlineCache has no inline caches in the profile.
+    ArtMethod* no_inline_cache = GetVirtualMethod(class_loader, "LTestInline;", "noInlineCache");
+    ASSERT_TRUE(no_inline_cache != nullptr);
+    ProfileCompilationInfo::OfflineProfileMethodInfo pmi_no_inline_cache;
+    ASSERT_TRUE(info.GetMethod(no_inline_cache->GetDexFile()->GetLocation(),
+                               no_inline_cache->GetDexFile()->GetLocationChecksum(),
+                               no_inline_cache->GetDexMethodIndex(),
+                               &pmi_no_inline_cache));
+    ASSERT_TRUE(pmi_no_inline_cache.inline_caches.empty());
+  }
+}
+
+TEST_F(ProfileAssistantTest, MergeProfilesWithDifferentDexOrder) {
+  ScratchFile profile1;
+  ScratchFile reference_profile;
+
+  std::vector<int> profile_fds({GetFd(profile1)});
+  int reference_profile_fd = GetFd(reference_profile);
+
+  // The new profile info will contain the methods with indices 0-100.
+  const uint16_t kNumberOfMethodsToEnableCompilation = 100;
+  ProfileCompilationInfo info1;
+  SetupProfile("p1", 1, kNumberOfMethodsToEnableCompilation, 0, profile1, &info1,
+      /*start_method_index*/0, /*reverse_dex_write_order*/false);
+
+  // The reference profile info will contain the methods with indices 50-150.
+  // When setting up the profile reverse the order in which the dex files
+  // are added to the profile. This will verify that profman merges profiles
+  // with a different dex order correctly.
+  const uint16_t kNumberOfMethodsAlreadyCompiled = 100;
+  ProfileCompilationInfo reference_info;
+  SetupProfile("p1", 1, kNumberOfMethodsAlreadyCompiled, 0, reference_profile,
+      &reference_info, kNumberOfMethodsToEnableCompilation / 2, /*reverse_dex_write_order*/true);
+
+  // We should advise compilation.
+  ASSERT_EQ(ProfileAssistant::kCompile,
+            ProcessProfiles(profile_fds, reference_profile_fd));
+
+  // The resulting compilation info must be equal to the merge of the inputs.
+  ProfileCompilationInfo result;
+  ASSERT_TRUE(reference_profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(result.Load(reference_profile_fd));
+
+  ProfileCompilationInfo expected;
+  ASSERT_TRUE(expected.MergeWith(reference_info));
+  ASSERT_TRUE(expected.MergeWith(info1));
+  ASSERT_TRUE(expected.Equals(result));
+
+  // The information from profile must remain the same.
+  CheckProfileInfo(profile1, info1);
+}
+
 }  // namespace art
diff --git a/profman/profman.cc b/profman/profman.cc
index a42e4f1..5504695 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -36,6 +36,7 @@
 #include "base/stringpiece.h"
 #include "base/time_utils.h"
 #include "base/unix_file/fd_file.h"
+#include "bytecode_utils.h"
 #include "dex_file.h"
 #include "jit/profile_compilation_info.h"
 #include "runtime.h"
@@ -116,10 +117,11 @@
   UsageError("      number of methods that should be generated. Defaults to 5.");
   UsageError("  --generate-test-profile-class-ratio=<number>: the percentage from the maximum");
   UsageError("      number of classes that should be generated. Defaults to 5.");
+  UsageError("  --generate-test-profile-seed=<number>: seed for random number generator used when");
+  UsageError("      generating random test profiles. Defaults to using NanoTime.");
   UsageError("");
   UsageError("  --create-profile-from=<filename>: creates a profile from a list of classes.");
   UsageError("");
-  UsageError("");
   UsageError("  --dex-location=<string>: location string to use with corresponding");
   UsageError("      apk-fd to find dex files");
   UsageError("");
@@ -136,6 +138,16 @@
 static constexpr uint16_t kDefaultTestProfileMethodRatio = 5;
 static constexpr uint16_t kDefaultTestProfileClassRatio = 5;
 
+// Separators used when parsing human friendly representation of profiles.
+static const std::string kMethodSep = "->";
+static const std::string kMissingTypesMarker = "missing_types";
+static const std::string kClassAllMethods = "*";
+static constexpr char kProfileParsingInlineChacheSep = '+';
+static constexpr char kProfileParsingTypeSep = ',';
+static constexpr char kProfileParsingFirstCharInSignature = '(';
+
+// TODO(calin): This class has grown too much from its initial design. Split the functionality
+// into smaller, more contained pieces.
 class ProfMan FINAL {
  public:
   ProfMan() :
@@ -146,6 +158,7 @@
       test_profile_num_dex_(kDefaultTestProfileNumDex),
       test_profile_method_ratio_(kDefaultTestProfileMethodRatio),
       test_profile_class_ratio_(kDefaultTestProfileClassRatio),
+      test_profile_seed_(NanoTime()),
       start_ns_(NanoTime()) {}
 
   ~ProfMan() {
@@ -211,6 +224,8 @@
                         "--generate-test-profile-class-ratio",
                         &test_profile_class_ratio_,
                         Usage);
+      } else if (option.starts_with("--generate-test-profile-seed=")) {
+        ParseUintOption(option, "--generate-test-profile-seed", &test_profile_seed_, Usage);
       } else {
         Usage("Unknown argument '%s'", option.data());
       }
@@ -408,7 +423,7 @@
     return true;
   }
 
-  bool GetClassNames(std::string profile_file,
+  bool GetClassNames(const std::string& profile_file,
                      std::vector<std::unique_ptr<const DexFile>>* dex_files,
                      std::set<std::string>* class_names) {
     int fd = open(profile_file.c_str(), O_RDONLY);
@@ -522,6 +537,208 @@
     return output.release();
   }
 
+  // Find class klass_descriptor in the given dex_files and store its reference
+  // in the out parameter class_ref.
+  // Return true if the definition of the class was found in any of the dex_files.
+  bool FindClass(const std::vector<std::unique_ptr<const DexFile>>& dex_files,
+                 const std::string& klass_descriptor,
+                 /*out*/ProfileMethodInfo::ProfileClassReference* class_ref) {
+    for (const std::unique_ptr<const DexFile>& dex_file_ptr : dex_files) {
+      const DexFile* dex_file = dex_file_ptr.get();
+      const DexFile::TypeId* type_id = dex_file->FindTypeId(klass_descriptor.c_str());
+      if (type_id == nullptr) {
+        continue;
+      }
+      dex::TypeIndex type_index = dex_file->GetIndexForTypeId(*type_id);
+      if (dex_file->FindClassDef(type_index) == nullptr) {
+        // Class is only referenced in the current dex file but not defined in it.
+        continue;
+      }
+      class_ref->dex_file = dex_file;
+      class_ref->type_index = type_index;
+      return true;
+    }
+    return false;
+  }
+
+  // Find the method specified by method_spec in the class class_ref. The method
+  // must have a single INVOKE_VIRTUAL in its byte code.
+  // Upon success it returns true and stores the method index and the invoke dex pc
+  // in the output parameters.
+  // The format of the method spec is "inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;".
+  //
+  // TODO(calin): support INVOKE_INTERFACE and the range variants.
+  bool FindMethodWithSingleInvoke(const ProfileMethodInfo::ProfileClassReference& class_ref,
+                                  const std::string& method_spec,
+                                  /*out*/uint16_t* method_index,
+                                  /*out*/uint32_t* dex_pc) {
+    std::vector<std::string> name_and_signature;
+    Split(method_spec, kProfileParsingFirstCharInSignature, &name_and_signature);
+    if (name_and_signature.size() != 2) {
+      LOG(ERROR) << "Invalid method name and signature " << method_spec;
+    }
+    const std::string& name = name_and_signature[0];
+    const std::string& signature = kProfileParsingFirstCharInSignature + name_and_signature[1];
+    const DexFile* dex_file = class_ref.dex_file;
+
+    const DexFile::StringId* name_id = dex_file->FindStringId(name.c_str());
+    if (name_id == nullptr) {
+      LOG(ERROR) << "Could not find name: "  << name;
+      return false;
+    }
+    dex::TypeIndex return_type_idx;
+    std::vector<dex::TypeIndex> param_type_idxs;
+    if (!dex_file->CreateTypeList(signature, &return_type_idx, &param_type_idxs)) {
+      LOG(ERROR) << "Could not create type list" << signature;
+      return false;
+    }
+    const DexFile::ProtoId* proto_id = dex_file->FindProtoId(return_type_idx, param_type_idxs);
+    if (proto_id == nullptr) {
+      LOG(ERROR) << "Could not find proto_id: " << name;
+      return false;
+    }
+    const DexFile::MethodId* method_id = dex_file->FindMethodId(
+        dex_file->GetTypeId(class_ref.type_index), *name_id, *proto_id);
+    if (method_id == nullptr) {
+      LOG(ERROR) << "Could not find method_id: " << name;
+      return false;
+    }
+
+    *method_index = dex_file->GetIndexForMethodId(*method_id);
+
+    uint32_t offset = dex_file->FindCodeItemOffset(
+        *dex_file->FindClassDef(class_ref.type_index),
+        *method_index);
+    const DexFile::CodeItem* code_item = dex_file->GetCodeItem(offset);
+
+    bool found_invoke = false;
+    for (CodeItemIterator it(*code_item); !it.Done(); it.Advance()) {
+      if (it.CurrentInstruction().Opcode() == Instruction::INVOKE_VIRTUAL) {
+        if (found_invoke) {
+          LOG(ERROR) << "Multiple invoke INVOKE_VIRTUAL found: " << name;
+          return false;
+        }
+        found_invoke = true;
+        *dex_pc = it.CurrentDexPc();
+      }
+    }
+    if (!found_invoke) {
+      LOG(ERROR) << "Could not find any INVOKE_VIRTUAL: " << name;
+    }
+    return found_invoke;
+  }
+
+  // Process a line defining a class or a method and its inline caches.
+  // Upon success return true and add the class or the method info to profile.
+  // The possible line formats are:
+  // "LJustTheCass;".
+  // "LTestInline;->inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;".
+  // "LTestInline;->inlineMissingTypes(LSuper;)I+missing_types".
+  // "LTestInline;->inlineNoInlineCaches(LSuper;)I".
+  // "LTestInline;->*".
+  // The method and classes are searched only in the given dex files.
+  bool ProcessLine(const std::vector<std::unique_ptr<const DexFile>>& dex_files,
+                   const std::string& line,
+                   /*out*/ProfileCompilationInfo* profile) {
+    std::string klass;
+    std::string method_str;
+    size_t method_sep_index = line.find(kMethodSep);
+    if (method_sep_index == std::string::npos) {
+      klass = line;
+    } else {
+      klass = line.substr(0, method_sep_index);
+      method_str = line.substr(method_sep_index + kMethodSep.size());
+    }
+
+    ProfileMethodInfo::ProfileClassReference class_ref;
+    if (!FindClass(dex_files, klass, &class_ref)) {
+      LOG(WARNING) << "Could not find class: " << klass;
+      return false;
+    }
+
+    if (method_str.empty() || method_str == kClassAllMethods) {
+      // Start by adding the class.
+      std::set<DexCacheResolvedClasses> resolved_class_set;
+      const DexFile* dex_file = class_ref.dex_file;
+      const auto& dex_resolved_classes = resolved_class_set.emplace(
+            dex_file->GetLocation(),
+            dex_file->GetBaseLocation(),
+            dex_file->GetLocationChecksum());
+      dex_resolved_classes.first->AddClass(class_ref.type_index);
+      std::vector<ProfileMethodInfo> methods;
+      if (method_str == kClassAllMethods) {
+        // Add all of the methods.
+        const DexFile::ClassDef* class_def = dex_file->FindClassDef(class_ref.type_index);
+        const uint8_t* class_data = dex_file->GetClassData(*class_def);
+        if (class_data != nullptr) {
+          ClassDataItemIterator it(*dex_file, class_data);
+          while (it.HasNextStaticField() || it.HasNextInstanceField()) {
+            it.Next();
+          }
+          while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
+            if (it.GetMethodCodeItemOffset() != 0) {
+              // Add all of the methods that have code to the profile.
+              const uint32_t method_idx = it.GetMemberIndex();
+              methods.push_back(ProfileMethodInfo(dex_file, method_idx));
+            }
+            it.Next();
+          }
+        }
+      }
+      profile->AddMethodsAndClasses(methods, resolved_class_set);
+      return true;
+    }
+
+    // Process the method.
+    std::string method_spec;
+    std::vector<std::string> inline_cache_elems;
+
+    std::vector<std::string> method_elems;
+    bool is_missing_types = false;
+    Split(method_str, kProfileParsingInlineChacheSep, &method_elems);
+    if (method_elems.size() == 2) {
+      method_spec = method_elems[0];
+      is_missing_types = method_elems[1] == kMissingTypesMarker;
+      if (!is_missing_types) {
+        Split(method_elems[1], kProfileParsingTypeSep, &inline_cache_elems);
+      }
+    } else if (method_elems.size() == 1) {
+      method_spec = method_elems[0];
+    } else {
+      LOG(ERROR) << "Invalid method line: " << line;
+      return false;
+    }
+
+    uint16_t method_index;
+    uint32_t dex_pc;
+    if (!FindMethodWithSingleInvoke(class_ref, method_spec, &method_index, &dex_pc)) {
+      return false;
+    }
+    std::vector<ProfileMethodInfo::ProfileClassReference> classes(inline_cache_elems.size());
+    size_t class_it = 0;
+    for (const std::string& ic_class : inline_cache_elems) {
+      if (!FindClass(dex_files, ic_class, &(classes[class_it++]))) {
+        LOG(ERROR) << "Could not find class: " << ic_class;
+        return false;
+      }
+    }
+    std::vector<ProfileMethodInfo::ProfileInlineCache> inline_caches;
+    inline_caches.emplace_back(dex_pc, is_missing_types, classes);
+    std::vector<ProfileMethodInfo> pmi;
+    pmi.emplace_back(class_ref.dex_file, method_index, inline_caches);
+
+    profile->AddMethodsAndClasses(pmi, std::set<DexCacheResolvedClasses>());
+    return true;
+  }
+
+  // Creates a profile from a human friendly textual representation.
+  // The expected input format is:
+  //   # Classes
+  //   Ljava/lang/Comparable;
+  //   Ljava/lang/Math;
+  //   # Methods with inline caches
+  //   LTestInline;->inlinePolymorphic(LSuper;)I+LSubA;,LSubB;,LSubC;
+  //   LTestInline;->noInlineCache(LSuper;)I
   int CreateProfile() {
     // Validate parameters for this command.
     if (apk_files_.empty() && apks_fd_.empty()) {
@@ -550,51 +767,22 @@
         return -1;
       }
     }
-    // Read the user-specified list of classes (dot notation rather than descriptors).
+    // Read the user-specified list of classes and methods.
     std::unique_ptr<std::unordered_set<std::string>>
-        user_class_list(ReadCommentedInputFromFile<std::unordered_set<std::string>>(
+        user_lines(ReadCommentedInputFromFile<std::unordered_set<std::string>>(
             create_profile_from_file_.c_str(), nullptr));  // No post-processing.
-    std::unordered_set<std::string> matched_user_classes;
-    // Open the dex files to look up class names.
+
+    // Open the dex files to look up classes and methods.
     std::vector<std::unique_ptr<const DexFile>> dex_files;
     OpenApkFilesFromLocations(&dex_files);
-    // Iterate over the dex files looking for class names in the input stream.
-    std::set<DexCacheResolvedClasses> resolved_class_set;
-    for (auto& dex_file : dex_files) {
-      // Compute the set of classes to be added for this dex file first.  This
-      // avoids creating an entry in the profile information for dex files that
-      // contribute no classes.
-      std::unordered_set<dex::TypeIndex> classes_to_be_added;
-      for (const auto& klass : *user_class_list) {
-        std::string descriptor = DotToDescriptor(klass.c_str());
-        const DexFile::TypeId* type_id = dex_file->FindTypeId(descriptor.c_str());
-        if (type_id == nullptr) {
-          continue;
-        }
-        classes_to_be_added.insert(dex_file->GetIndexForTypeId(*type_id));
-        matched_user_classes.insert(klass);
-      }
-      if (classes_to_be_added.empty()) {
-        continue;
-      }
-      // Insert the DexCacheResolved Classes into the set expected for
-      // AddMethodsAndClasses.
-      std::set<DexCacheResolvedClasses>::iterator dex_resolved_classes =
-          resolved_class_set.emplace(dex_file->GetLocation(),
-                                     dex_file->GetBaseLocation(),
-                                     dex_file->GetLocationChecksum()).first;
-      dex_resolved_classes->AddClasses(classes_to_be_added.begin(), classes_to_be_added.end());
-    }
-    // Warn the user if we didn't find matches for every class.
-    for (const auto& klass : *user_class_list) {
-      if (matched_user_classes.find(klass) == matched_user_classes.end()) {
-        LOG(WARNING) << "requested class '" << klass << "' was not matched in any dex file";
-      }
-    }
-    // Generate the profile data structure.
+
+    // Process the lines one by one and add the successful ones to the profile.
     ProfileCompilationInfo info;
-    std::vector<ProfileMethodInfo> methods;  // No methods for now.
-    info.AddMethodsAndClasses(methods, resolved_class_set);
+
+    for (const auto& line : *user_lines) {
+      ProcessLine(dex_files, line, &info);
+    }
+
     // Write the profile file.
     CHECK(info.Save(fd));
     if (close(fd) < 0) {
@@ -615,17 +803,39 @@
     if (test_profile_class_ratio_ > 100) {
       Usage("Invalid ratio for --generate-test-profile-class-ratio");
     }
+    // If given APK files or DEX locations, check that they're ok.
+    if (!apk_files_.empty() || !apks_fd_.empty() || !dex_locations_.empty()) {
+      if (apk_files_.empty() && apks_fd_.empty()) {
+        Usage("APK files must be specified when passing DEX locations to --generate-test-profile");
+      }
+      if (dex_locations_.empty()) {
+        Usage("DEX locations must be specified when passing APK files to --generate-test-profile");
+      }
+    }
     // ShouldGenerateTestProfile confirms !test_profile_.empty().
     int profile_test_fd = open(test_profile_.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
     if (profile_test_fd < 0) {
       LOG(ERROR) << "Cannot open " << test_profile_ << strerror(errno);
       return -1;
     }
-
-    bool result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
-                                                              test_profile_num_dex_,
-                                                              test_profile_method_ratio_,
-                                                              test_profile_class_ratio_);
+    bool result;
+    if (apk_files_.empty() && apks_fd_.empty() && dex_locations_.empty()) {
+      result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
+                                                           test_profile_num_dex_,
+                                                           test_profile_method_ratio_,
+                                                           test_profile_class_ratio_,
+                                                           test_profile_seed_);
+    } else {
+      // Initialize MemMap for ZipArchive::OpenFromFd.
+      MemMap::Init();
+      // Open the dex files to look up classes and methods.
+      std::vector<std::unique_ptr<const DexFile>> dex_files;
+      OpenApkFilesFromLocations(&dex_files);
+      // Create a random profile file based on the set of dex files.
+      result = ProfileCompilationInfo::GenerateTestProfile(profile_test_fd,
+                                                           dex_files,
+                                                           test_profile_seed_);
+    }
     close(profile_test_fd);  // ignore close result.
     return result ? 0 : -1;
   }
@@ -674,6 +884,7 @@
   uint16_t test_profile_num_dex_;
   uint16_t test_profile_method_ratio_;
   uint16_t test_profile_class_ratio_;
+  uint32_t test_profile_seed_;
   uint64_t start_ns_;
 };
 
diff --git a/runtime/Android.bp b/runtime/Android.bp
index b4c7b9c..6c3bc04 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -99,6 +99,7 @@
         "intern_table.cc",
         "interpreter/interpreter.cc",
         "interpreter/interpreter_common.cc",
+        "interpreter/interpreter_intrinsics.cc",
         "interpreter/interpreter_switch_impl.cc",
         "interpreter/unstarted_runtime.cc",
         "java_vm_ext.cc",
@@ -148,7 +149,6 @@
         "native/dalvik_system_VMStack.cc",
         "native/dalvik_system_ZygoteHooks.cc",
         "native/java_lang_Class.cc",
-        "native/java_lang_DexCache.cc",
         "native/java_lang_Object.cc",
         "native/java_lang_String.cc",
         "native/java_lang_StringFactory.cc",
@@ -156,6 +156,7 @@
         "native/java_lang_Thread.cc",
         "native/java_lang_Throwable.cc",
         "native/java_lang_VMClassLoader.cc",
+        "native/java_lang_Void.cc",
         "native/java_lang_invoke_MethodHandleImpl.cc",
         "native/java_lang_ref_FinalizerReference.cc",
         "native/java_lang_ref_Reference.cc",
diff --git a/runtime/arch/arm/fault_handler_arm.cc b/runtime/arch/arm/fault_handler_arm.cc
index daa2dff..923ff4f 100644
--- a/runtime/arch/arm/fault_handler_arm.cc
+++ b/runtime/arch/arm/fault_handler_arm.cc
@@ -47,24 +47,6 @@
   return instr_size;
 }
 
-void FaultManager::HandleNestedSignal(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                      void* context) {
-  // Note that in this handler we set up the registers and return to
-  // longjmp directly rather than going through an assembly language stub.  The
-  // reason for this is that longjmp is (currently) in ARM mode and that would
-  // require switching modes in the stub - incurring an unwanted relocation.
-
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  Thread* self = Thread::Current();
-  CHECK(self != nullptr);  // This will cause a SIGABRT if self is null.
-
-  sc->arm_r0 = reinterpret_cast<uintptr_t>(*self->GetNestedSignalState());
-  sc->arm_r1 = 1;
-  sc->arm_pc = reinterpret_cast<uintptr_t>(longjmp);
-  VLOG(signals) << "longjmp address: " << reinterpret_cast<void*>(sc->arm_pc);
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo ATTRIBUTE_UNUSED, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/arm/instruction_set_features_arm_test.cc b/runtime/arch/arm/instruction_set_features_arm_test.cc
index 6d5dd6d..3582351 100644
--- a/runtime/arch/arm/instruction_set_features_arm_test.cc
+++ b/runtime/arch/arm/instruction_set_features_arm_test.cc
@@ -34,6 +34,18 @@
   EXPECT_STREQ("div,atomic_ldrd_strd,-armv8a", krait_features->GetFeatureString().c_str());
   EXPECT_EQ(krait_features->AsBitmap(), 3U);
 
+  // Build features for a 32-bit ARM kryo processor.
+  std::unique_ptr<const InstructionSetFeatures> kryo_features(
+      InstructionSetFeatures::FromVariant(kArm, "kryo", &error_msg));
+  ASSERT_TRUE(kryo_features.get() != nullptr) << error_msg;
+
+  ASSERT_EQ(kryo_features->GetInstructionSet(), kArm);
+  EXPECT_TRUE(kryo_features->Equals(kryo_features.get()));
+  EXPECT_TRUE(kryo_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
+  EXPECT_TRUE(kryo_features->AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd());
+  EXPECT_STREQ("div,atomic_ldrd_strd,armv8a", kryo_features->GetFeatureString().c_str());
+  EXPECT_EQ(kryo_features->AsBitmap(), 7U);
+
   // Build features for a 32-bit ARM denver processor.
   std::unique_ptr<const InstructionSetFeatures> denver_features(
       InstructionSetFeatures::FromVariant(kArm, "denver", &error_msg));
@@ -86,6 +98,18 @@
   EXPECT_STREQ("div,atomic_ldrd_strd,-armv8a", krait_features->GetFeatureString().c_str());
   EXPECT_EQ(krait_features->AsBitmap(), 3U);
 
+  // Build features for a 32-bit ARM with LPAE and div processor.
+  std::unique_ptr<const InstructionSetFeatures> kryo_features(
+      base_features->AddFeaturesFromString("atomic_ldrd_strd,div", &error_msg));
+  ASSERT_TRUE(kryo_features.get() != nullptr) << error_msg;
+
+  ASSERT_EQ(kryo_features->GetInstructionSet(), kArm);
+  EXPECT_TRUE(kryo_features->Equals(krait_features.get()));
+  EXPECT_TRUE(kryo_features->AsArmInstructionSetFeatures()->HasDivideInstruction());
+  EXPECT_TRUE(kryo_features->AsArmInstructionSetFeatures()->HasAtomicLdrdAndStrd());
+  EXPECT_STREQ("div,atomic_ldrd_strd,-armv8a", kryo_features->GetFeatureString().c_str());
+  EXPECT_EQ(kryo_features->AsBitmap(), 3U);
+
   // Build features for a 32-bit ARM processor with LPAE and div flipped.
   std::unique_ptr<const InstructionSetFeatures> denver_features(
       base_features->AddFeaturesFromString("div,atomic_ldrd_strd,armv8a", &error_msg));
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 8531091..029de46 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -1487,6 +1487,7 @@
 .Lconflict_trampoline:
     // Call the runtime stub to populate the ImtConflictTable and jump to the
     // resolved method.
+    mov r0, r12  // Load interface method
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
@@ -1613,6 +1614,11 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_to_interpreter_bridge
 
+/*
+ * Called to attempt to execute an obsolete method.
+ */
+ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
+
     /*
      * Routine that intercepts method calls and returns.
      */
diff --git a/runtime/arch/arm64/fault_handler_arm64.cc b/runtime/arch/arm64/fault_handler_arm64.cc
index c02be87..193af58 100644
--- a/runtime/arch/arm64/fault_handler_arm64.cc
+++ b/runtime/arch/arm64/fault_handler_arm64.cc
@@ -39,21 +39,6 @@
 
 namespace art {
 
-void FaultManager::HandleNestedSignal(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                      void* context) {
-  // To match the case used in ARM we return directly to the longjmp function
-  // rather than through a trivial assembly language stub.
-
-  struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
-  struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
-  Thread* self = Thread::Current();
-  CHECK(self != nullptr);       // This will cause a SIGABRT if self is null.
-
-  sc->regs[0] = reinterpret_cast<uintptr_t>(*self->GetNestedSignalState());
-  sc->regs[1] = 1;
-  sc->pc = reinterpret_cast<uintptr_t>(longjmp);
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo ATTRIBUTE_UNUSED, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 7cb50b7..b2bbd0d 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -1966,6 +1966,7 @@
 .Lconflict_trampoline:
     // Call the runtime stub to populate the ImtConflictTable and jump to the
     // resolved method.
+    mov x0, xIP0  // Load interface method
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
@@ -2151,6 +2152,11 @@
     RETURN_OR_DELIVER_PENDING_EXCEPTION
 END art_quick_to_interpreter_bridge
 
+/*
+ * Called to attempt to execute an obsolete method.
+ */
+ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
+
 
 //
 // Instrumentation-related stubs
@@ -2378,9 +2384,8 @@
     ret
 .Lnot_marked_rb_\name:
     // Check if the top two bits are one, if this is the case it is a forwarding address.
-    mvn wIP0, wIP0
-    cmp wzr, wIP0, lsr #30
-    beq .Lret_forwarding_address\name
+    tst   wIP0, wIP0, lsl #1
+    bmi   .Lret_forwarding_address\name
 .Lslow_rb_\name:
     /*
      * Allocate 44 stack slots * 8 = 352 bytes:
@@ -2451,10 +2456,9 @@
     DECREASE_FRAME 352
     ret
 .Lret_forwarding_address\name:
-    mvn wIP0, wIP0
     // Shift left by the forwarding address shift. This clears out the state bits since they are
     // in the top 2 bits of the lock word.
-    lsl \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
+    lsl   \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
     ret
 END \name
 .endm
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index 36f9ea7..2349620 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -32,6 +32,33 @@
 // Cast entrypoints.
 extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regXX uses a non-standard calling
+// convention: it expects its input in register XX+1 and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg14(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg17(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg18(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg19(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg20(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
+
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -59,9 +86,71 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
-// No read barrier entrypoints for marking registers.
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints ATTRIBUTE_UNUSED,
-                                  bool is_marking ATTRIBUTE_UNUSED) {}
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22),
+                "Non-direct C stub marked direct.");
+  qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr;
+  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29),
+                "Non-direct C stub marked direct.");
+}
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // Note: MIPS has asserts checking for the type of entrypoint. Don't move it
@@ -287,77 +376,19 @@
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierJni), "Direct C stub not marked direct.");
-  // Read barriers (and these entry points in particular) are not
-  // supported in the compiler on MIPS32.
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  // Cannot use the following registers to pass arguments:
+  // 0(ZERO), 1(AT), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA).
+  // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8).
   qpoints->pReadBarrierMarkReg00 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg00),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg01 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg01),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg02 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg02),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg03 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg03),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg04 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg04),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg05 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg05),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg06 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg06),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg07 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg07),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg08 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg08),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg09 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg09),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg10 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg10),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg11 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg11),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg12 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg12),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg13 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg13),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg14 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg14),
-                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierMarkReg15 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg15),
                 "Non-direct C stub marked direct.");
   qpoints->pReadBarrierMarkReg16 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg16),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg17 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg17),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg18 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg18),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg19 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg19),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg20 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg20),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg21 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg21),
-                "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg22 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg22),
-                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierMarkReg23 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg23),
                 "Non-direct C stub marked direct.");
@@ -376,9 +407,6 @@
   qpoints->pReadBarrierMarkReg28 = nullptr;
   static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg28),
                 "Non-direct C stub marked direct.");
-  qpoints->pReadBarrierMarkReg29 = nullptr;
-  static_assert(!IsDirectEntrypoint(kQuickReadBarrierMarkReg29),
-                "Non-direct C stub marked direct.");
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   static_assert(IsDirectEntrypoint(kQuickReadBarrierSlow), "Direct C stub not marked direct.");
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
diff --git a/runtime/arch/mips/fault_handler_mips.cc b/runtime/arch/mips/fault_handler_mips.cc
index 1792f31..f9c19e8 100644
--- a/runtime/arch/mips/fault_handler_mips.cc
+++ b/runtime/arch/mips/fault_handler_mips.cc
@@ -35,10 +35,6 @@
 
 namespace art {
 
-void FaultManager::HandleNestedSignal(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                      void* context ATTRIBUTE_UNUSED) {
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index ec8ae85..722a679 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1763,6 +1763,7 @@
 
 .Lconflict_trampoline:
     # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
+    move    $a0, $t7                                         # Load interface method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
@@ -1876,6 +1877,14 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_to_interpreter_bridge
 
+    .extern artInvokeObsoleteMethod
+ENTRY art_invoke_obsolete_method_stub
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    la      $t9, artInvokeObsoleteMethod
+    jalr    $t9                                 # (Method* method, Thread* self)
+    move    $a1, rSELF                          # pass Thread::Current
+END art_invoke_obsolete_method_stub
+
     /*
      * Routine that intercepts method calls and returns.
      */
@@ -2048,11 +2057,12 @@
     lw    $t0, MIRROR_STRING_COUNT_OFFSET($a0)    # this.length()
 #endif
     slt   $t1, $a2, $zero # if fromIndex < 0
-#if defined(_MIPS_ARCH_MIPS32R6) || defined(_MIPS_ARCH_MIPS64R6)
+#if defined(_MIPS_ARCH_MIPS32R6)
     seleqz $a2, $a2, $t1  #     fromIndex = 0;
 #else
     movn   $a2, $zero, $t1 #    fromIndex = 0;
 #endif
+
 #if (STRING_COMPRESSION_FEATURE)
     srl   $t0, $a3, 1     # $a3 holds count (with flag) and $t0 holds actual length
 #endif
@@ -2196,6 +2206,151 @@
     subu   $v0, $t0, $t1  # return (this.charAt(i) - anotherString.charAt(i))
 END art_quick_string_compareto
 
+    /*
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through register
+     * `reg`, saving and restoring all caller-save registers.
+     */
+.macro READ_BARRIER_MARK_REG name, reg
+ENTRY \name
+    /* TODO: optimizations: mark bit, forwarding. */
+    addiu   $sp, $sp, -160      # includes 16 bytes of space for argument registers a0-a3
+    .cfi_adjust_cfa_offset 160
+
+    sw      $ra, 156($sp)
+    .cfi_rel_offset 31, 156
+    sw      $t8, 152($sp)
+    .cfi_rel_offset 24, 152
+    sw      $t7, 148($sp)
+    .cfi_rel_offset 15, 148
+    sw      $t6, 144($sp)
+    .cfi_rel_offset 14, 144
+    sw      $t5, 140($sp)
+    .cfi_rel_offset 13, 140
+    sw      $t4, 136($sp)
+    .cfi_rel_offset 12, 136
+    sw      $t3, 132($sp)
+    .cfi_rel_offset 11, 132
+    sw      $t2, 128($sp)
+    .cfi_rel_offset 10, 128
+    sw      $t1, 124($sp)
+    .cfi_rel_offset 9, 124
+    sw      $t0, 120($sp)
+    .cfi_rel_offset 8, 120
+    sw      $a3, 116($sp)
+    .cfi_rel_offset 7, 116
+    sw      $a2, 112($sp)
+    .cfi_rel_offset 6, 112
+    sw      $a1, 108($sp)
+    .cfi_rel_offset 5, 108
+    sw      $a0, 104($sp)
+    .cfi_rel_offset 4, 104
+    sw      $v1, 100($sp)
+    .cfi_rel_offset 3, 100
+    sw      $v0, 96($sp)
+    .cfi_rel_offset 2, 96
+
+    la      $t9, artReadBarrierMark
+
+    sdc1    $f18, 88($sp)
+    sdc1    $f16, 80($sp)
+    sdc1    $f14, 72($sp)
+    sdc1    $f12, 64($sp)
+    sdc1    $f10, 56($sp)
+    sdc1    $f8,  48($sp)
+    sdc1    $f6,  40($sp)
+    sdc1    $f4,  32($sp)
+    sdc1    $f2,  24($sp)
+
+    .ifnc \reg, $a0
+      move  $a0, \reg           # pass obj from `reg` in a0
+    .endif
+    jalr    $t9                 # v0 <- artReadBarrierMark(obj)
+    sdc1    $f0,  16($sp)       # in delay slot
+
+    lw      $ra, 156($sp)
+    .cfi_restore 31
+    lw      $t8, 152($sp)
+    .cfi_restore 24
+    lw      $t7, 148($sp)
+    .cfi_restore 15
+    lw      $t6, 144($sp)
+    .cfi_restore 14
+    lw      $t5, 140($sp)
+    .cfi_restore 13
+    lw      $t4, 136($sp)
+    .cfi_restore 12
+    lw      $t3, 132($sp)
+    .cfi_restore 11
+    lw      $t2, 128($sp)
+    .cfi_restore 10
+    lw      $t1, 124($sp)
+    .cfi_restore 9
+    lw      $t0, 120($sp)
+    .cfi_restore 8
+    lw      $a3, 116($sp)
+    .cfi_restore 7
+    lw      $a2, 112($sp)
+    .cfi_restore 6
+    lw      $a1, 108($sp)
+    .cfi_restore 5
+    lw      $a0, 104($sp)
+    .cfi_restore 4
+    lw      $v1, 100($sp)
+    .cfi_restore 3
+
+    .ifnc \reg, $v0
+      move  \reg, $v0           # `reg` <- v0
+      lw    $v0, 96($sp)
+      .cfi_restore 2
+    .endif
+
+    ldc1    $f18, 88($sp)
+    ldc1    $f16, 80($sp)
+    ldc1    $f14, 72($sp)
+    ldc1    $f12, 64($sp)
+    ldc1    $f10, 56($sp)
+    ldc1    $f8,  48($sp)
+    ldc1    $f6,  40($sp)
+    ldc1    $f4,  32($sp)
+    ldc1    $f2,  24($sp)
+    ldc1    $f0,  16($sp)
+
+    jalr    $zero, $ra
+    addiu   $sp, $sp, 160
+    .cfi_adjust_cfa_offset -160
+END \name
+.endm
+
+// Note that art_quick_read_barrier_mark_regXX corresponds to register XX+1.
+// ZERO (register 0) is reserved.
+// AT (register 1) is reserved as a temporary/scratch register.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, $v0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, $v1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, $a0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, $a1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, $a2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, $a3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, $t0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, $t1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, $t2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, $t3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, $t4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, $t5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, $t6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, $t7
+// S0 and S1 (registers 16 and 17) are reserved as suspended and thread registers.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, $s2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, $s3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, $s4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, $s5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, $s6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, $s7
+// T8 and T9 (registers 24 and 25) are reserved as temporary/scratch registers.
+// K0, K1, GP, SP (registers 26 - 29) are reserved.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, $s8
+// RA (register 31) is reserved.
+
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
     SETUP_SAVE_REFS_AND_ARGS_FRAME
diff --git a/runtime/arch/mips64/asm_support_mips64.S b/runtime/arch/mips64/asm_support_mips64.S
index 35f20fb..ef82bd2 100644
--- a/runtime/arch/mips64/asm_support_mips64.S
+++ b/runtime/arch/mips64/asm_support_mips64.S
@@ -70,14 +70,16 @@
 // Macros to poison (negate) the reference for heap poisoning.
 .macro POISON_HEAP_REF rRef
 #ifdef USE_HEAP_POISONING
-    subu \rRef, $zero, \rRef
+    dsubu \rRef, $zero, \rRef
+    dext  \rRef, \rRef, 0, 32
 #endif  // USE_HEAP_POISONING
 .endm
 
 // Macros to unpoison (negate) the reference for heap poisoning.
 .macro UNPOISON_HEAP_REF rRef
 #ifdef USE_HEAP_POISONING
-    subu \rRef, $zero, \rRef
+    dsubu \rRef, $zero, \rRef
+    dext  \rRef, \rRef, 0, 32
 #endif  // USE_HEAP_POISONING
 .endm
 
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index bc17d47..66405cb 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -32,6 +32,32 @@
 // Cast entrypoints.
 extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
+// Read barrier entrypoints.
+// art_quick_read_barrier_mark_regXX uses a non-standard calling
+// convention: it expects its input in register XX+1 and returns its
+// result in that same register, and saves and restores all
+// caller-save registers.
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg01(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg02(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg03(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg04(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg05(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg06(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg07(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg08(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg09(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg10(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg11(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg12(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg13(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg17(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg18(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg19(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg20(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg21(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg22(mirror::Object*);
+extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
+
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -60,8 +86,28 @@
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
 // No read barrier entrypoints for marking registers.
-void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints ATTRIBUTE_UNUSED,
-                                  bool is_marking ATTRIBUTE_UNUSED) {}
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr;
+  qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr;
+  qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr;
+  qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr;
+  qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr;
+  qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr;
+  qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr;
+}
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
@@ -103,38 +149,20 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  // Read barriers (and these entry points in particular) are not
-  // supported in the compiler on MIPS64.
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
+  // Cannot use the following registers to pass arguments:
+  // 0(ZERO), 1(AT), 15(T3), 16(S0), 17(S1), 24(T8), 25(T9), 26(K0), 27(K1), 28(GP), 29(SP), 31(RA).
+  // Note that there are 30 entry points only: 00 for register 1(AT), ..., 29 for register 30(S8).
   qpoints->pReadBarrierMarkReg00 = nullptr;
-  qpoints->pReadBarrierMarkReg01 = nullptr;
-  qpoints->pReadBarrierMarkReg02 = nullptr;
-  qpoints->pReadBarrierMarkReg03 = nullptr;
-  qpoints->pReadBarrierMarkReg04 = nullptr;
-  qpoints->pReadBarrierMarkReg05 = nullptr;
-  qpoints->pReadBarrierMarkReg06 = nullptr;
-  qpoints->pReadBarrierMarkReg07 = nullptr;
-  qpoints->pReadBarrierMarkReg08 = nullptr;
-  qpoints->pReadBarrierMarkReg09 = nullptr;
-  qpoints->pReadBarrierMarkReg10 = nullptr;
-  qpoints->pReadBarrierMarkReg11 = nullptr;
-  qpoints->pReadBarrierMarkReg12 = nullptr;
-  qpoints->pReadBarrierMarkReg13 = nullptr;
   qpoints->pReadBarrierMarkReg14 = nullptr;
   qpoints->pReadBarrierMarkReg15 = nullptr;
   qpoints->pReadBarrierMarkReg16 = nullptr;
-  qpoints->pReadBarrierMarkReg17 = nullptr;
-  qpoints->pReadBarrierMarkReg18 = nullptr;
-  qpoints->pReadBarrierMarkReg19 = nullptr;
-  qpoints->pReadBarrierMarkReg20 = nullptr;
-  qpoints->pReadBarrierMarkReg21 = nullptr;
-  qpoints->pReadBarrierMarkReg22 = nullptr;
   qpoints->pReadBarrierMarkReg23 = nullptr;
   qpoints->pReadBarrierMarkReg24 = nullptr;
   qpoints->pReadBarrierMarkReg25 = nullptr;
   qpoints->pReadBarrierMarkReg26 = nullptr;
   qpoints->pReadBarrierMarkReg27 = nullptr;
   qpoints->pReadBarrierMarkReg28 = nullptr;
-  qpoints->pReadBarrierMarkReg29 = nullptr;
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/mips64/fault_handler_mips64.cc b/runtime/arch/mips64/fault_handler_mips64.cc
index 709cab5..d668d3a 100644
--- a/runtime/arch/mips64/fault_handler_mips64.cc
+++ b/runtime/arch/mips64/fault_handler_mips64.cc
@@ -35,10 +35,6 @@
 
 namespace art {
 
-void FaultManager::HandleNestedSignal(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
-                                      void* context ATTRIBUTE_UNUSED) {
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/mips64/instruction_set_features_mips64.cc b/runtime/arch/mips64/instruction_set_features_mips64.cc
index 5757906..08d0bac 100644
--- a/runtime/arch/mips64/instruction_set_features_mips64.cc
+++ b/runtime/arch/mips64/instruction_set_features_mips64.cc
@@ -30,22 +30,52 @@
 
 Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED) {
+  bool msa = true;
   if (variant != "default" && variant != "mips64r6") {
     LOG(WARNING) << "Unexpected CPU variant for Mips64 using defaults: " << variant;
   }
-  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures());
+  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(msa));
 }
 
-Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromBitmap(uint32_t bitmap ATTRIBUTE_UNUSED) {
-  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures());
+Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromBitmap(uint32_t bitmap) {
+  bool msa = (bitmap & kMsaBitfield) != 0;
+  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(msa));
 }
 
 Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromCppDefines() {
-  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures());
+#if defined(_MIPS_ARCH_MIPS64R6)
+  const bool msa = true;
+#else
+  const bool msa = false;
+#endif
+  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(msa));
 }
 
 Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromCpuInfo() {
-  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures());
+  // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
+  // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
+  bool msa = false;
+
+  std::ifstream in("/proc/cpuinfo");
+  if (!in.fail()) {
+    while (!in.eof()) {
+      std::string line;
+      std::getline(in, line);
+      if (!in.eof()) {
+        LOG(INFO) << "cpuinfo line: " << line;
+        if (line.find("ASEs") != std::string::npos) {
+          LOG(INFO) << "found Application Specific Extensions";
+          if (line.find("msa") != std::string::npos) {
+            msa = true;
+          }
+        }
+      }
+    }
+    in.close();
+  } else {
+    LOG(ERROR) << "Failed to open /proc/cpuinfo";
+  }
+  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(msa));
 }
 
 Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromHwcap() {
@@ -62,28 +92,40 @@
   if (kMips64 != other->GetInstructionSet()) {
     return false;
   }
-  return true;
+  const Mips64InstructionSetFeatures* other_as_mips64 = other->AsMips64InstructionSetFeatures();
+  return msa_ == other_as_mips64->msa_;
 }
 
 uint32_t Mips64InstructionSetFeatures::AsBitmap() const {
-  return 0;
+  return (msa_ ? kMsaBitfield : 0);
 }
 
 std::string Mips64InstructionSetFeatures::GetFeatureString() const {
-  return "default";
+  std::string result;
+  if (msa_) {
+    result += "msa";
+  } else {
+    result += "-msa";
+  }
+  return result;
 }
 
 std::unique_ptr<const InstructionSetFeatures>
 Mips64InstructionSetFeatures::AddFeaturesFromSplitString(
     const std::vector<std::string>& features, std::string* error_msg) const {
-  auto i = features.begin();
-  if (i != features.end()) {
-    // We don't have any features.
+  bool msa = msa_;
+  for (auto i = features.begin(); i != features.end(); i++) {
     std::string feature = android::base::Trim(*i);
-    *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
-    return nullptr;
+    if (feature == "msa") {
+      msa = true;
+    } else if (feature == "-msa") {
+      msa = false;
+    } else {
+      *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
+      return nullptr;
+    }
   }
-  return std::unique_ptr<const InstructionSetFeatures>(new Mips64InstructionSetFeatures());
+  return std::unique_ptr<const InstructionSetFeatures>(new Mips64InstructionSetFeatures(msa));
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips64/instruction_set_features_mips64.h b/runtime/arch/mips64/instruction_set_features_mips64.h
index c80c466..d9f30c7 100644
--- a/runtime/arch/mips64/instruction_set_features_mips64.h
+++ b/runtime/arch/mips64/instruction_set_features_mips64.h
@@ -58,6 +58,11 @@
 
   std::string GetFeatureString() const OVERRIDE;
 
+  // Does it have MSA (MIPS SIMD Architecture) support.
+  bool HasMsa() const {
+    return msa_;
+  }
+
   virtual ~Mips64InstructionSetFeatures() {}
 
  protected:
@@ -67,9 +72,16 @@
                                  std::string* error_msg) const OVERRIDE;
 
  private:
-  Mips64InstructionSetFeatures() : InstructionSetFeatures() {
+  explicit Mips64InstructionSetFeatures(bool msa) : InstructionSetFeatures(), msa_(msa) {
   }
 
+  // Bitmap positions for encoding features as a bitmap.
+  enum {
+    kMsaBitfield = 1,
+  };
+
+  const bool msa_;
+
   DISALLOW_COPY_AND_ASSIGN(Mips64InstructionSetFeatures);
 };
 
diff --git a/runtime/arch/mips64/instruction_set_features_mips64_test.cc b/runtime/arch/mips64/instruction_set_features_mips64_test.cc
index 380c4e5..0ba0bd4 100644
--- a/runtime/arch/mips64/instruction_set_features_mips64_test.cc
+++ b/runtime/arch/mips64/instruction_set_features_mips64_test.cc
@@ -20,15 +20,31 @@
 
 namespace art {
 
-TEST(Mips64InstructionSetFeaturesTest, Mips64Features) {
+TEST(Mips64InstructionSetFeaturesTest, Mips64FeaturesFromDefaultVariant) {
   std::string error_msg;
   std::unique_ptr<const InstructionSetFeatures> mips64_features(
       InstructionSetFeatures::FromVariant(kMips64, "default", &error_msg));
   ASSERT_TRUE(mips64_features.get() != nullptr) << error_msg;
   EXPECT_EQ(mips64_features->GetInstructionSet(), kMips64);
   EXPECT_TRUE(mips64_features->Equals(mips64_features.get()));
-  EXPECT_STREQ("default", mips64_features->GetFeatureString().c_str());
-  EXPECT_EQ(mips64_features->AsBitmap(), 0U);
+  EXPECT_STREQ("msa", mips64_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips64_features->AsBitmap(), 1U);
+}
+
+TEST(Mips64InstructionSetFeaturesTest, Mips64FeaturesFromR6Variant) {
+  std::string error_msg;
+  std::unique_ptr<const InstructionSetFeatures> mips64r6_features(
+      InstructionSetFeatures::FromVariant(kMips64, "mips64r6", &error_msg));
+  ASSERT_TRUE(mips64r6_features.get() != nullptr) << error_msg;
+  EXPECT_EQ(mips64r6_features->GetInstructionSet(), kMips64);
+  EXPECT_TRUE(mips64r6_features->Equals(mips64r6_features.get()));
+  EXPECT_STREQ("msa", mips64r6_features->GetFeatureString().c_str());
+  EXPECT_EQ(mips64r6_features->AsBitmap(), 1U);
+
+  std::unique_ptr<const InstructionSetFeatures> mips64_default_features(
+      InstructionSetFeatures::FromVariant(kMips64, "default", &error_msg));
+  ASSERT_TRUE(mips64_default_features.get() != nullptr) << error_msg;
+  EXPECT_TRUE(mips64r6_features->Equals(mips64_default_features.get()));
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 28d7c77..9402232 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1715,6 +1715,7 @@
 
 .Lconflict_trampoline:
     # Call the runtime stub to populate the ImtConflictTable and jump to the resolved method.
+    move   $a0, $t0                                          # Load interface method.
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END art_quick_imt_conflict_trampoline
 
@@ -1817,6 +1818,13 @@
     DELIVER_PENDING_EXCEPTION
 END art_quick_to_interpreter_bridge
 
+    .extern artInvokeObsoleteMethod
+ENTRY art_invoke_obsolete_method_stub
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    jal     artInvokeObsoleteMethod    # (Method* method, Thread* self)
+    move    $a1, rSELF                 # pass Thread::Current
+END art_invoke_obsolete_method_stub
+
     /*
      * Routine that intercepts method calls and returns.
      */
@@ -2052,6 +2060,180 @@
 #endif
 END art_quick_indexof
 
+    /*
+     * Create a function `name` calling the ReadBarrier::Mark routine,
+     * getting its argument and returning its result through register
+     * `reg`, saving and restoring all caller-save registers.
+     */
+.macro READ_BARRIER_MARK_REG name, reg
+ENTRY \name
+    /* TODO: optimizations: mark bit, forwarding. */
+    daddiu  $sp, $sp, -320
+    .cfi_adjust_cfa_offset 320
+
+    sd      $ra, 312($sp)
+    .cfi_rel_offset 31, 312
+    sd      $t8, 304($sp)       # save t8 holding caller's gp
+    .cfi_rel_offset 24, 304
+    sd      $t3, 296($sp)
+    .cfi_rel_offset 15, 296
+    sd      $t2, 288($sp)
+    .cfi_rel_offset 14, 288
+    sd      $t1, 280($sp)
+    .cfi_rel_offset 13, 280
+    sd      $t0, 272($sp)
+    .cfi_rel_offset 12, 272
+    sd      $a7, 264($sp)
+    .cfi_rel_offset 11, 264
+    sd      $a6, 256($sp)
+    .cfi_rel_offset 10, 256
+    sd      $a5, 248($sp)
+    .cfi_rel_offset 9, 248
+    sd      $a4, 240($sp)
+    .cfi_rel_offset 8, 240
+    sd      $a3, 232($sp)
+    .cfi_rel_offset 7, 232
+    sd      $a2, 224($sp)
+    .cfi_rel_offset 6, 224
+    sd      $a1, 216($sp)
+    .cfi_rel_offset 5, 216
+    sd      $a0, 208($sp)
+    .cfi_rel_offset 4, 208
+    sd      $v1, 200($sp)
+    .cfi_rel_offset 3, 200
+    sd      $v0, 192($sp)
+    .cfi_rel_offset 2, 192
+
+    dla     $t9, artReadBarrierMark
+
+    sdc1    $f23, 184($sp)
+    sdc1    $f22, 176($sp)
+    sdc1    $f21, 168($sp)
+    sdc1    $f20, 160($sp)
+    sdc1    $f19, 152($sp)
+    sdc1    $f18, 144($sp)
+    sdc1    $f17, 136($sp)
+    sdc1    $f16, 128($sp)
+    sdc1    $f15, 120($sp)
+    sdc1    $f14, 112($sp)
+    sdc1    $f13, 104($sp)
+    sdc1    $f12,  96($sp)
+    sdc1    $f11,  88($sp)
+    sdc1    $f10,  80($sp)
+    sdc1    $f9,   72($sp)
+    sdc1    $f8,   64($sp)
+    sdc1    $f7,   56($sp)
+    sdc1    $f6,   48($sp)
+    sdc1    $f5,   40($sp)
+    sdc1    $f4,   32($sp)
+    sdc1    $f3,   24($sp)
+    sdc1    $f2,   16($sp)
+    sdc1    $f1,    8($sp)
+
+    .ifnc \reg, $a0
+      move  $a0, \reg           # pass obj from `reg` in a0
+    .endif
+    jalr    $t9                 # v0 <- artReadBarrierMark(obj)
+    sdc1    $f0,    0($sp)      # in delay slot
+
+    ld      $ra, 312($sp)
+    .cfi_restore 31
+    ld      $t8, 304($sp)       # restore t8 holding caller's gp
+    .cfi_restore 24
+    ld      $t3, 296($sp)
+    .cfi_restore 15
+    ld      $t2, 288($sp)
+    .cfi_restore 14
+    ld      $t1, 280($sp)
+    .cfi_restore 13
+    ld      $t0, 272($sp)
+    .cfi_restore 12
+    ld      $a7, 264($sp)
+    .cfi_restore 11
+    ld      $a6, 256($sp)
+    .cfi_restore 10
+    ld      $a5, 248($sp)
+    .cfi_restore 9
+    ld      $a4, 240($sp)
+    .cfi_restore 8
+    ld      $a3, 232($sp)
+    .cfi_restore 7
+    ld      $a2, 224($sp)
+    .cfi_restore 6
+    ld      $a1, 216($sp)
+    .cfi_restore 5
+    ld      $a0, 208($sp)
+    .cfi_restore 4
+    ld      $v1, 200($sp)
+    .cfi_restore 3
+
+    .ifnc \reg, $v0
+      move  \reg, $v0           # `reg` <- v0
+      ld    $v0, 192($sp)
+      .cfi_restore 2
+    .endif
+
+    ldc1    $f23, 184($sp)
+    ldc1    $f22, 176($sp)
+    ldc1    $f21, 168($sp)
+    ldc1    $f20, 160($sp)
+    ldc1    $f19, 152($sp)
+    ldc1    $f18, 144($sp)
+    ldc1    $f17, 136($sp)
+    ldc1    $f16, 128($sp)
+    ldc1    $f15, 120($sp)
+    ldc1    $f14, 112($sp)
+    ldc1    $f13, 104($sp)
+    ldc1    $f12,  96($sp)
+    ldc1    $f11,  88($sp)
+    ldc1    $f10,  80($sp)
+    ldc1    $f9,   72($sp)
+    ldc1    $f8,   64($sp)
+    ldc1    $f7,   56($sp)
+    ldc1    $f6,   48($sp)
+    ldc1    $f5,   40($sp)
+    ldc1    $f4,   32($sp)
+    ldc1    $f3,   24($sp)
+    ldc1    $f2,   16($sp)
+    ldc1    $f1,    8($sp)
+    ldc1    $f0,    0($sp)
+
+    .cpreturn                   # restore caller's gp from t8
+    jalr    $zero, $ra
+    daddiu  $sp, $sp, 320
+    .cfi_adjust_cfa_offset -320
+END \name
+.endm
+
+// Note that art_quick_read_barrier_mark_regXX corresponds to register XX+1.
+// ZERO (register 0) is reserved.
+// AT (register 1) is reserved as a temporary/scratch register.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, $v0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, $v1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, $a0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, $a1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, $a2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, $a3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, $a4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, $a5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, $a6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, $a7
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, $t0
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, $t1
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, $t2
+// T3 (register 15) is reserved as a temporary/scratch register.
+// S0 and S1 (registers 16 and 17) are reserved as suspended and thread registers.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, $s2
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, $s3
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, $s4
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, $s5
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, $s6
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, $s7
+// T8 and T9 (registers 24 and 25) are reserved as temporary/scratch registers.
+// K0, K1, GP, SP (registers 26 - 29) are reserved.
+READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, $s8
+// RA (register 31) is reserved.
+
 .extern artInvokePolymorphic
 ENTRY art_quick_invoke_polymorphic
     SETUP_SAVE_REFS_AND_ARGS_FRAME
diff --git a/runtime/arch/mips64/registers_mips64.cc b/runtime/arch/mips64/registers_mips64.cc
index 4959208..1ee2cdd 100644
--- a/runtime/arch/mips64/registers_mips64.cc
+++ b/runtime/arch/mips64/registers_mips64.cc
@@ -46,5 +46,14 @@
   return os;
 }
 
+std::ostream& operator<<(std::ostream& os, const VectorRegister& rhs) {
+  if (rhs >= W0 && rhs < kNumberOfVectorRegisters) {
+    os << "w" << static_cast<int>(rhs);
+  } else {
+    os << "VectorRegister[" << static_cast<int>(rhs) << "]";
+  }
+  return os;
+}
+
 }  // namespace mips64
 }  // namespace art
diff --git a/runtime/arch/mips64/registers_mips64.h b/runtime/arch/mips64/registers_mips64.h
index 81fae72..30de2cc 100644
--- a/runtime/arch/mips64/registers_mips64.h
+++ b/runtime/arch/mips64/registers_mips64.h
@@ -107,6 +107,45 @@
 };
 std::ostream& operator<<(std::ostream& os, const FpuRegister& rhs);
 
+// Values for vector registers.
+enum VectorRegister {
+  W0  =  0,
+  W1  =  1,
+  W2  =  2,
+  W3  =  3,
+  W4  =  4,
+  W5  =  5,
+  W6  =  6,
+  W7  =  7,
+  W8  =  8,
+  W9  =  9,
+  W10 = 10,
+  W11 = 11,
+  W12 = 12,
+  W13 = 13,
+  W14 = 14,
+  W15 = 15,
+  W16 = 16,
+  W17 = 17,
+  W18 = 18,
+  W19 = 19,
+  W20 = 20,
+  W21 = 21,
+  W22 = 22,
+  W23 = 23,
+  W24 = 24,
+  W25 = 25,
+  W26 = 26,
+  W27 = 27,
+  W28 = 28,
+  W29 = 29,
+  W30 = 30,
+  W31 = 31,
+  kNumberOfVectorRegisters = 32,
+  kNoVectorRegister = -1,
+};
+std::ostream& operator<<(std::ostream& os, const VectorRegister& rhs);
+
 }  // namespace mips64
 }  // namespace art
 
diff --git a/runtime/arch/x86/fault_handler_x86.cc b/runtime/arch/x86/fault_handler_x86.cc
index a4d6bb4..f407ebf 100644
--- a/runtime/arch/x86/fault_handler_x86.cc
+++ b/runtime/arch/x86/fault_handler_x86.cc
@@ -75,12 +75,6 @@
 extern "C" void art_quick_throw_stack_overflow();
 extern "C" void art_quick_test_suspend();
 
-// Note this is different from the others (no underscore on 64 bit mac) due to
-// the way the symbol is defined in the .S file.
-// TODO: fix the symbols for 64 bit mac - there is a double underscore prefix for some
-// of them.
-extern "C" void art_nested_signal_return();
-
 // Get the size of an instruction in bytes.
 // Return 0 if the instruction is not handled.
 static uint32_t GetInstructionSize(const uint8_t* pc) {
@@ -247,21 +241,6 @@
   return pc - startpc;
 }
 
-void FaultManager::HandleNestedSignal(int, siginfo_t*, void* context) {
-  // For the Intel architectures we need to go to an assembly language
-  // stub.  This is because the 32 bit call to longjmp is much different
-  // from the 64 bit ABI call and pushing things onto the stack inside this
-  // handler was unwieldy and ugly.  The use of the stub means we can keep
-  // this code the same for both 32 and 64 bit.
-
-  Thread* self = Thread::Current();
-  CHECK(self != nullptr);  // This will cause a SIGABRT if self is null.
-
-  struct ucontext* uc = reinterpret_cast<struct ucontext*>(context);
-  uc->CTX_JMP_BUF = reinterpret_cast<uintptr_t>(*self->GetNestedSignalState());
-  uc->CTX_EIP = reinterpret_cast<uintptr_t>(art_nested_signal_return);
-}
-
 void FaultManager::GetMethodAndReturnPcAndSp(siginfo_t* siginfo, void* context,
                                              ArtMethod** out_method,
                                              uintptr_t* out_return_pc, uintptr_t* out_sp) {
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 8c907e0..6c0bcc9 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1806,6 +1806,7 @@
 .Lconflict_trampoline:
     // Call the runtime stub to populate the ImtConflictTable and jump to the
     // resolved method.
+    movl %edi, %eax  // Load interface method
     POP EDI
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 END_FUNCTION art_quick_imt_conflict_trampoline
@@ -1937,6 +1938,11 @@
 END_FUNCTION art_quick_to_interpreter_bridge
 
     /*
+     * Called by managed code, saves callee saves and then calls artInvokeObsoleteMethod
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
+
+    /*
      * Routine that intercepts method calls and returns.
      */
 DEFINE_FUNCTION art_quick_instrumentation_entry
@@ -2136,19 +2142,6 @@
     ret
 END_FUNCTION art_quick_string_compareto
 
-// Return from a nested signal:
-// Entry:
-//  eax: address of jmp_buf in TLS
-
-DEFINE_FUNCTION art_nested_signal_return
-    SETUP_GOT_NOSAVE ebx            // sets %ebx for call into PLT
-    movl LITERAL(1), %ecx
-    PUSH ecx                        // second arg to longjmp (1)
-    PUSH eax                        // first arg to longjmp (jmp_buf)
-    call PLT_SYMBOL(longjmp)
-    UNREACHABLE
-END_FUNCTION art_nested_signal_return
-
 // Create a function `name` calling the ReadBarrier::Mark routine,
 // getting its argument and returning its result through register
 // `reg`, saving and restoring all caller-save registers.
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index f1be52e..8e2acab 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1662,6 +1662,7 @@
 .Lconflict_trampoline:
     // Call the runtime stub to populate the ImtConflictTable and jump to the
     // resolved method.
+    movq %r10, %rdi  // Load interface method
     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
 #endif  // __APPLE__
 END_FUNCTION art_quick_imt_conflict_trampoline
@@ -1901,6 +1902,12 @@
 END_FUNCTION art_quick_to_interpreter_bridge
 
     /*
+     * Called to catch an attempt to invoke an obsolete method.
+     * RDI = method being called.
+     */
+ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
+
+    /*
      * Routine that intercepts method calls and returns.
      */
 DEFINE_FUNCTION art_quick_instrumentation_entry
@@ -2099,18 +2106,6 @@
     ret
 END_FUNCTION art_quick_instance_of
 
-
-// Return from a nested signal:
-// Entry:
-//  rdi: address of jmp_buf in TLS
-
-DEFINE_FUNCTION art_nested_signal_return
-                                    // first arg to longjmp is already in correct register
-    movq LITERAL(1), %rsi           // second arg to longjmp (1)
-    call PLT_SYMBOL(longjmp)
-    UNREACHABLE
-END_FUNCTION art_nested_signal_return
-
 // Create a function `name` calling the ReadBarrier::Mark routine,
 // getting its argument and returning its result through register
 // `reg`, saving and restoring all caller-save registers.
diff --git a/runtime/art_field.h b/runtime/art_field.h
index 75dd981..666ed8a 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -47,6 +47,10 @@
   void SetDeclaringClass(ObjPtr<mirror::Class> new_declaring_class)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  mirror::CompressedReference<mirror::Object>* GetDeclaringClassAddressWithoutBarrier() {
+    return declaring_class_.AddressWithoutBarrier();
+  }
+
   uint32_t GetAccessFlags() REQUIRES_SHARED(Locks::mutator_lock_);
 
   void SetAccessFlags(uint32_t new_access_flags) REQUIRES_SHARED(Locks::mutator_lock_) {
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 685e26c..5cf0e0f 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -32,6 +32,7 @@
 #include "mirror/dex_cache-inl.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array.h"
+#include "mirror/string.h"
 #include "oat.h"
 #include "obj_ptr-inl.h"
 #include "quick/quick_method_frame_info.h"
@@ -56,8 +57,10 @@
     if (!IsRuntimeMethod()) {
       CHECK(result != nullptr) << this;
       if (kCheckDeclaringClassState) {
-        CHECK(result->IsIdxLoaded() || result->IsErroneous())
-            << result->GetStatus() << " " << result->PrettyClass();
+        if (!(result->IsIdxLoaded() || result->IsErroneous())) {
+          LOG(FATAL_WITHOUT_ABORT) << "Class status: " << result->GetStatus();
+          LOG(FATAL) << result->PrettyClass();
+        }
       }
     } else {
       CHECK(result == nullptr) << this;
@@ -347,7 +350,11 @@
 
 inline uint16_t ArtMethod::GetClassDefIndex() {
   DCHECK(!IsProxyMethod());
-  return GetDeclaringClass()->GetDexClassDefIndex();
+  if (LIKELY(!IsObsolete())) {
+    return GetDeclaringClass()->GetDexClassDefIndex();
+  } else {
+    return FindObsoleteDexClassDefIndex();
+  }
 }
 
 inline const DexFile::ClassDef& ArtMethod::GetClassDef() {
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 9d74e7c..5a71be6 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -104,6 +104,16 @@
   UNREACHABLE();
 }
 
+uint16_t ArtMethod::FindObsoleteDexClassDefIndex() {
+  DCHECK(!Runtime::Current()->IsAotCompiler()) << PrettyMethod();
+  DCHECK(IsObsolete());
+  const DexFile* dex_file = GetDexFile();
+  const dex::TypeIndex declaring_class_type = dex_file->GetMethodId(GetDexMethodIndex()).class_idx_;
+  const DexFile::ClassDef* class_def = dex_file->FindClassDef(declaring_class_type);
+  CHECK(class_def != nullptr);
+  return dex_file->GetIndexForClassDef(*class_def);
+}
+
 mirror::String* ArtMethod::GetNameAsString(Thread* self) {
   CHECK(!IsProxyMethod());
   StackHandleScope<1> hs(self);
@@ -327,7 +337,8 @@
       // Ensure that we won't be accidentally calling quick compiled code when -Xint.
       if (kIsDebugBuild && runtime->GetInstrumentation()->IsForcedInterpretOnly()) {
         CHECK(!runtime->UseJitCompilation());
-        const void* oat_quick_code = (IsNative() || !IsInvokable() || IsProxyMethod())
+        const void* oat_quick_code =
+            (IsNative() || !IsInvokable() || IsProxyMethod() || IsObsolete())
             ? nullptr
             : GetOatMethodQuickCode(runtime->GetClassLinker()->GetImagePointerSize());
         CHECK(oat_quick_code == nullptr || oat_quick_code != GetEntryPointFromQuickCompiledCode())
diff --git a/runtime/art_method.h b/runtime/art_method.h
index cd1950c..51b6576 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -73,6 +73,10 @@
   ALWAYS_INLINE mirror::Class* GetDeclaringClassUnchecked()
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  mirror::CompressedReference<mirror::Object>* GetDeclaringClassAddressWithoutBarrier() {
+    return declaring_class_.AddressWithoutBarrier();
+  }
+
   void SetDeclaringClass(ObjPtr<mirror::Class> new_declaring_class)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -691,7 +695,7 @@
 
     // Pointer to JNI function registered to this method, or a function to resolve the JNI function,
     // or the profiling data for non-native methods, or an ImtConflictTable, or the
-    // single-implementation of an abstract method.
+    // single-implementation of an abstract/interface method.
     void* data_;
 
     // Method dispatch from quick compiled code invokes this pointer which may cause bridging into
@@ -700,6 +704,8 @@
   } ptr_sized_fields_;
 
  private:
+  uint16_t FindObsoleteDexClassDefIndex() REQUIRES_SHARED(Locks::mutator_lock_);
+
   bool IsAnnotatedWith(jclass klass, uint32_t visibility);
 
   static constexpr size_t PtrSizedFieldsOffset(PointerSize pointer_size) {
diff --git a/runtime/backtrace_helper.h b/runtime/backtrace_helper.h
new file mode 100644
index 0000000..ace118c
--- /dev/null
+++ b/runtime/backtrace_helper.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_BACKTRACE_HELPER_H_
+#define ART_RUNTIME_BACKTRACE_HELPER_H_
+
+#include <unwind.h>
+
+namespace art {
+
+// Based on debug malloc logic from libc/bionic/debug_stacktrace.cpp.
+class BacktraceCollector {
+ public:
+  BacktraceCollector(uintptr_t* out_frames, size_t max_depth, size_t skip_count)
+      : out_frames_(out_frames), max_depth_(max_depth), skip_count_(skip_count) {}
+
+  size_t NumFrames() const {
+    return num_frames_;
+  }
+
+  // Collect the backtrace, do not call more than once.
+  void Collect() {
+    _Unwind_Backtrace(&Callback, this);
+  }
+
+ private:
+  static _Unwind_Reason_Code Callback(_Unwind_Context* context, void* arg) {
+    auto* const state = reinterpret_cast<BacktraceCollector*>(arg);
+    const uintptr_t ip = _Unwind_GetIP(context);
+    // The first stack frame is get_backtrace itself. Skip it.
+    if (ip != 0 && state->skip_count_ > 0) {
+      --state->skip_count_;
+      return _URC_NO_REASON;
+    }
+    // ip may be off for ARM but it shouldn't matter since we only use it for hashing.
+    state->out_frames_[state->num_frames_] = ip;
+    state->num_frames_++;
+    return state->num_frames_ >= state->max_depth_ ? _URC_END_OF_STACK : _URC_NO_REASON;
+  }
+
+  uintptr_t* const out_frames_ = nullptr;
+  size_t num_frames_ = 0u;
+  const size_t max_depth_ = 0u;
+  size_t skip_count_ = 0u;
+};
+
+// A bounded sized backtrace.
+template <size_t kMaxFrames>
+class FixedSizeBacktrace {
+ public:
+  void Collect(size_t skip_count) {
+    BacktraceCollector collector(frames_, kMaxFrames, skip_count);
+    collector.Collect();
+    num_frames_ = collector.NumFrames();
+  }
+
+  uint64_t Hash() const {
+    uint64_t hash = 9314237;
+    for (size_t i = 0; i < num_frames_; ++i) {
+      hash = hash * 2654435761 + frames_[i];
+      hash += (hash >> 13) ^ (hash << 6);
+    }
+    return hash;
+  }
+
+ private:
+  uintptr_t frames_[kMaxFrames];
+  size_t num_frames_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_BACKTRACE_HELPER_H_
diff --git a/runtime/base/arena_allocator.cc b/runtime/base/arena_allocator.cc
index db43319..e763e43 100644
--- a/runtime/base/arena_allocator.cc
+++ b/runtime/base/arena_allocator.cc
@@ -15,6 +15,7 @@
  */
 
 #include <algorithm>
+#include <cstddef>
 #include <iomanip>
 #include <numeric>
 
@@ -27,7 +28,7 @@
 
 namespace art {
 
-static constexpr size_t kMemoryToolRedZoneBytes = 8;
+constexpr size_t kMemoryToolRedZoneBytes = 8;
 constexpr size_t Arena::kDefaultSize;
 
 template <bool kCount>
@@ -165,26 +166,78 @@
   MEMORY_TOOL_MAKE_NOACCESS(ptr, size);
 }
 
-Arena::Arena() : bytes_allocated_(0), next_(nullptr) {
+Arena::Arena() : bytes_allocated_(0), memory_(nullptr), size_(0), next_(nullptr) {
 }
 
+class MallocArena FINAL : public Arena {
+ public:
+  explicit MallocArena(size_t size = Arena::kDefaultSize);
+  virtual ~MallocArena();
+ private:
+  static constexpr size_t RequiredOverallocation() {
+    return (alignof(std::max_align_t) < ArenaAllocator::kArenaAlignment)
+        ? ArenaAllocator::kArenaAlignment - alignof(std::max_align_t)
+        : 0u;
+  }
+
+  uint8_t* unaligned_memory_;
+};
+
 MallocArena::MallocArena(size_t size) {
-  memory_ = reinterpret_cast<uint8_t*>(calloc(1, size));
-  CHECK(memory_ != nullptr);  // Abort on OOM.
-  DCHECK_ALIGNED(memory_, ArenaAllocator::kAlignment);
+  // We need to guarantee kArenaAlignment aligned allocation for the new arena.
+  // TODO: Use std::aligned_alloc() when it becomes available with C++17.
+  constexpr size_t overallocation = RequiredOverallocation();
+  unaligned_memory_ = reinterpret_cast<uint8_t*>(calloc(1, size + overallocation));
+  CHECK(unaligned_memory_ != nullptr);  // Abort on OOM.
+  DCHECK_ALIGNED(unaligned_memory_, alignof(std::max_align_t));
+  if (overallocation == 0u) {
+    memory_ = unaligned_memory_;
+  } else {
+    memory_ = AlignUp(unaligned_memory_, ArenaAllocator::kArenaAlignment);
+    if (UNLIKELY(RUNNING_ON_MEMORY_TOOL > 0)) {
+      size_t head = memory_ - unaligned_memory_;
+      size_t tail = overallocation - head;
+      MEMORY_TOOL_MAKE_NOACCESS(unaligned_memory_, head);
+      MEMORY_TOOL_MAKE_NOACCESS(memory_ + size, tail);
+    }
+  }
+  DCHECK_ALIGNED(memory_, ArenaAllocator::kArenaAlignment);
   size_ = size;
 }
 
 MallocArena::~MallocArena() {
-  free(reinterpret_cast<void*>(memory_));
+  constexpr size_t overallocation = RequiredOverallocation();
+  if (overallocation != 0u && UNLIKELY(RUNNING_ON_MEMORY_TOOL > 0)) {
+    size_t head = memory_ - unaligned_memory_;
+    size_t tail = overallocation - head;
+    MEMORY_TOOL_MAKE_UNDEFINED(unaligned_memory_, head);
+    MEMORY_TOOL_MAKE_UNDEFINED(memory_ + size_, tail);
+  }
+  free(reinterpret_cast<void*>(unaligned_memory_));
 }
 
+class MemMapArena FINAL : public Arena {
+ public:
+  MemMapArena(size_t size, bool low_4gb, const char* name);
+  virtual ~MemMapArena();
+  void Release() OVERRIDE;
+
+ private:
+  std::unique_ptr<MemMap> map_;
+};
+
 MemMapArena::MemMapArena(size_t size, bool low_4gb, const char* name) {
+  // Round up to a full page as that's the smallest unit of allocation for mmap()
+  // and we want to be able to use all memory that we actually allocate.
+  size = RoundUp(size, kPageSize);
   std::string error_msg;
   map_.reset(MemMap::MapAnonymous(
       name, nullptr, size, PROT_READ | PROT_WRITE, low_4gb, false, &error_msg));
   CHECK(map_.get() != nullptr) << error_msg;
   memory_ = map_->Begin();
+  static_assert(ArenaAllocator::kArenaAlignment <= kPageSize,
+                "Arena should not need stronger alignment than kPageSize.");
+  DCHECK_ALIGNED(memory_, ArenaAllocator::kArenaAlignment);
   size_ = map_->Size();
 }
 
@@ -332,20 +385,7 @@
   ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
   uint8_t* ret;
   if (UNLIKELY(rounded_bytes > static_cast<size_t>(end_ - ptr_))) {
-    ret = AllocFromNewArena(rounded_bytes);
-    uint8_t* noaccess_begin = ret + bytes;
-    uint8_t* noaccess_end;
-    if (ret == arena_head_->Begin()) {
-      DCHECK(ptr_ - rounded_bytes == ret);
-      noaccess_end = end_;
-    } else {
-      // We're still using the old arena but `ret` comes from a new one just after it.
-      DCHECK(arena_head_->next_ != nullptr);
-      DCHECK(ret == arena_head_->next_->Begin());
-      DCHECK_EQ(rounded_bytes, arena_head_->next_->GetBytesAllocated());
-      noaccess_end = arena_head_->next_->End();
-    }
-    MEMORY_TOOL_MAKE_NOACCESS(noaccess_begin, noaccess_end - noaccess_begin);
+    ret = AllocFromNewArenaWithMemoryTool(rounded_bytes);
   } else {
     ret = ptr_;
     ptr_ += rounded_bytes;
@@ -356,6 +396,30 @@
   return ret;
 }
 
+void* ArenaAllocator::AllocWithMemoryToolAlign16(size_t bytes, ArenaAllocKind kind) {
+  // We mark all memory for a newly retrieved arena as inaccessible and then
+  // mark only the actually allocated memory as defined. That leaves red zones
+  // and padding between allocations marked as inaccessible.
+  size_t rounded_bytes = bytes + kMemoryToolRedZoneBytes;
+  DCHECK_ALIGNED(rounded_bytes, 8);  // `bytes` is 16-byte aligned, red zone is 8-byte aligned.
+  uintptr_t padding =
+      ((reinterpret_cast<uintptr_t>(ptr_) + 15u) & 15u) - reinterpret_cast<uintptr_t>(ptr_);
+  ArenaAllocatorStats::RecordAlloc(rounded_bytes, kind);
+  uint8_t* ret;
+  if (UNLIKELY(padding + rounded_bytes > static_cast<size_t>(end_ - ptr_))) {
+    static_assert(kArenaAlignment >= 16, "Expecting sufficient alignment for new Arena.");
+    ret = AllocFromNewArenaWithMemoryTool(rounded_bytes);
+  } else {
+    ptr_ += padding;  // Leave padding inaccessible.
+    ret = ptr_;
+    ptr_ += rounded_bytes;
+  }
+  MEMORY_TOOL_MAKE_DEFINED(ret, bytes);
+  // Check that the memory is already zeroed out.
+  DCHECK(std::all_of(ret, ret + bytes, [](uint8_t val) { return val == 0u; }));
+  return ret;
+}
+
 ArenaAllocator::~ArenaAllocator() {
   // Reclaim all the arenas by giving them back to the thread pool.
   UpdateBytesAllocated();
@@ -386,6 +450,24 @@
   return new_arena->Begin();
 }
 
+uint8_t* ArenaAllocator::AllocFromNewArenaWithMemoryTool(size_t bytes) {
+  uint8_t* ret = AllocFromNewArena(bytes);
+  uint8_t* noaccess_begin = ret + bytes;
+  uint8_t* noaccess_end;
+  if (ret == arena_head_->Begin()) {
+    DCHECK(ptr_ - bytes == ret);
+    noaccess_end = end_;
+  } else {
+    // We're still using the old arena but `ret` comes from a new one just after it.
+    DCHECK(arena_head_->next_ != nullptr);
+    DCHECK(ret == arena_head_->next_->Begin());
+    DCHECK_EQ(bytes, arena_head_->next_->GetBytesAllocated());
+    noaccess_end = arena_head_->next_->End();
+  }
+  MEMORY_TOOL_MAKE_NOACCESS(noaccess_begin, noaccess_end - noaccess_begin);
+  return ret;
+}
+
 bool ArenaAllocator::Contains(const void* ptr) const {
   if (ptr >= begin_ && ptr < end_) {
     return true;
@@ -398,7 +480,9 @@
   return false;
 }
 
-MemStats::MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena,
+MemStats::MemStats(const char* name,
+                   const ArenaAllocatorStats* stats,
+                   const Arena* first_arena,
                    ssize_t lost_bytes_adjustment)
     : name_(name),
       stats_(stats),
diff --git a/runtime/base/arena_allocator.h b/runtime/base/arena_allocator.h
index 245ab3b..c39429c 100644
--- a/runtime/base/arena_allocator.h
+++ b/runtime/base/arena_allocator.h
@@ -34,7 +34,6 @@
 class ArenaAllocator;
 class ArenaStack;
 class ScopedArenaAllocator;
-class MemMap;
 class MemStats;
 
 template <typename T>
@@ -89,6 +88,7 @@
   kArenaAllocRegisterAllocator,
   kArenaAllocRegisterAllocatorValidate,
   kArenaAllocStackMapStream,
+  kArenaAllocVectorNode,
   kArenaAllocCodeGenerator,
   kArenaAllocAssembler,
   kArenaAllocParallelMoveResolver,
@@ -243,22 +243,6 @@
   DISALLOW_COPY_AND_ASSIGN(Arena);
 };
 
-class MallocArena FINAL : public Arena {
- public:
-  explicit MallocArena(size_t size = Arena::kDefaultSize);
-  virtual ~MallocArena();
-};
-
-class MemMapArena FINAL : public Arena {
- public:
-  MemMapArena(size_t size, bool low_4gb, const char* name);
-  virtual ~MemMapArena();
-  void Release() OVERRIDE;
-
- private:
-  std::unique_ptr<MemMap> map_;
-};
-
 class ArenaPool {
  public:
   explicit ArenaPool(bool use_malloc = true,
@@ -318,8 +302,31 @@
     return ret;
   }
 
+  // Returns zeroed memory.
+  void* AllocAlign16(size_t bytes, ArenaAllocKind kind = kArenaAllocMisc) ALWAYS_INLINE {
+    // It is an error to request 16-byte aligned allocation of unaligned size.
+    DCHECK_ALIGNED(bytes, 16);
+    if (UNLIKELY(IsRunningOnMemoryTool())) {
+      return AllocWithMemoryToolAlign16(bytes, kind);
+    }
+    uintptr_t padding =
+        ((reinterpret_cast<uintptr_t>(ptr_) + 15u) & 15u) - reinterpret_cast<uintptr_t>(ptr_);
+    ArenaAllocatorStats::RecordAlloc(bytes, kind);
+    if (UNLIKELY(padding + bytes > static_cast<size_t>(end_ - ptr_))) {
+      static_assert(kArenaAlignment >= 16, "Expecting sufficient alignment for new Arena.");
+      return AllocFromNewArena(bytes);
+    }
+    ptr_ += padding;
+    uint8_t* ret = ptr_;
+    DCHECK_ALIGNED(ret, 16);
+    ptr_ += bytes;
+    return ret;
+  }
+
   // Realloc never frees the input pointer, it is the caller's job to do this if necessary.
-  void* Realloc(void* ptr, size_t ptr_size, size_t new_size,
+  void* Realloc(void* ptr,
+                size_t ptr_size,
+                size_t new_size,
                 ArenaAllocKind kind = kArenaAllocMisc) ALWAYS_INLINE {
     DCHECK_GE(new_size, ptr_size);
     DCHECK_EQ(ptr == nullptr, ptr_size == 0u);
@@ -370,12 +377,17 @@
 
   bool Contains(const void* ptr) const;
 
-  static constexpr size_t kAlignment = 8;
+  // The alignment guaranteed for individual allocations.
+  static constexpr size_t kAlignment = 8u;
+
+  // The alignment required for the whole Arena rather than individual allocations.
+  static constexpr size_t kArenaAlignment = 16u;
 
  private:
   void* AllocWithMemoryTool(size_t bytes, ArenaAllocKind kind);
+  void* AllocWithMemoryToolAlign16(size_t bytes, ArenaAllocKind kind);
   uint8_t* AllocFromNewArena(size_t bytes);
-
+  uint8_t* AllocFromNewArenaWithMemoryTool(size_t bytes);
 
   void UpdateBytesAllocated();
 
@@ -395,7 +407,9 @@
 
 class MemStats {
  public:
-  MemStats(const char* name, const ArenaAllocatorStats* stats, const Arena* first_arena,
+  MemStats(const char* name,
+           const ArenaAllocatorStats* stats,
+           const Arena* first_arena,
            ssize_t lost_bytes_adjustment = 0);
   void Dump(std::ostream& os) const;
 
diff --git a/runtime/base/bit_utils.h b/runtime/base/bit_utils.h
index f0811b0..f536c72 100644
--- a/runtime/base/bit_utils.h
+++ b/runtime/base/bit_utils.h
@@ -27,6 +27,22 @@
 
 namespace art {
 
+// Like sizeof, but count how many bits a type takes. Pass type explicitly.
+template <typename T>
+constexpr size_t BitSizeOf() {
+  static_assert(std::is_integral<T>::value, "T must be integral");
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  static_assert(sizeof(T) == sizeof(unsigned_type), "Unexpected type size mismatch!");
+  static_assert(std::numeric_limits<unsigned_type>::radix == 2, "Unexpected radix!");
+  return std::numeric_limits<unsigned_type>::digits;
+}
+
+// Like sizeof, but count how many bits a type takes. Infers type from parameter.
+template <typename T>
+constexpr size_t BitSizeOf(T /*x*/) {
+  return BitSizeOf<T>();
+}
+
 template<typename T>
 constexpr int CLZ(T x) {
   static_assert(std::is_integral<T>::value, "T must be integral");
@@ -37,6 +53,14 @@
   return (sizeof(T) == sizeof(uint32_t)) ? __builtin_clz(x) : __builtin_clzll(x);
 }
 
+// Similar to CLZ except that on zero input it returns bitwidth and supports signed integers.
+template<typename T>
+constexpr int JAVASTYLE_CLZ(T x) {
+  static_assert(std::is_integral<T>::value, "T must be integral");
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  return (x == 0) ? BitSizeOf<T>() : CLZ(static_cast<unsigned_type>(x));
+}
+
 template<typename T>
 constexpr int CTZ(T x) {
   static_assert(std::is_integral<T>::value, "T must be integral");
@@ -48,12 +72,32 @@
   return (sizeof(T) == sizeof(uint32_t)) ? __builtin_ctz(x) : __builtin_ctzll(x);
 }
 
+// Similar to CTZ except that on zero input it returns bitwidth and supports signed integers.
+template<typename T>
+constexpr int JAVASTYLE_CTZ(T x) {
+  static_assert(std::is_integral<T>::value, "T must be integral");
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  return (x == 0) ? BitSizeOf<T>() : CTZ(static_cast<unsigned_type>(x));
+}
+
 // Return the number of 1-bits in `x`.
 template<typename T>
 constexpr int POPCOUNT(T x) {
   return (sizeof(T) == sizeof(uint32_t)) ? __builtin_popcount(x) : __builtin_popcountll(x);
 }
 
+// Swap bytes.
+template<typename T>
+constexpr T BSWAP(T x) {
+  if (sizeof(T) == sizeof(uint16_t)) {
+    return __builtin_bswap16(x);
+  } else if (sizeof(T) == sizeof(uint32_t)) {
+    return __builtin_bswap32(x);
+  } else {
+    return __builtin_bswap64(x);
+  }
+}
+
 // Find the bit position of the most significant bit (0-based), or -1 if there were no bits set.
 template <typename T>
 constexpr ssize_t MostSignificantBit(T value) {
@@ -152,6 +196,11 @@
   return (x & (n - 1)) == 0;
 }
 
+template<typename T>
+inline bool IsAlignedParam(T* x, int n) {
+  return IsAlignedParam(reinterpret_cast<const uintptr_t>(x), n);
+}
+
 #define CHECK_ALIGNED(value, alignment) \
   CHECK(::art::IsAligned<alignment>(value)) << reinterpret_cast<const void*>(value)
 
@@ -164,22 +213,6 @@
 #define DCHECK_ALIGNED_PARAM(value, alignment) \
   DCHECK(::art::IsAlignedParam(value, alignment)) << reinterpret_cast<const void*>(value)
 
-// Like sizeof, but count how many bits a type takes. Pass type explicitly.
-template <typename T>
-constexpr size_t BitSizeOf() {
-  static_assert(std::is_integral<T>::value, "T must be integral");
-  using unsigned_type = typename std::make_unsigned<T>::type;
-  static_assert(sizeof(T) == sizeof(unsigned_type), "Unexpected type size mismatch!");
-  static_assert(std::numeric_limits<unsigned_type>::radix == 2, "Unexpected radix!");
-  return std::numeric_limits<unsigned_type>::digits;
-}
-
-// Like sizeof, but count how many bits a type takes. Infers type from parameter.
-template <typename T>
-constexpr size_t BitSizeOf(T /*x*/) {
-  return BitSizeOf<T>();
-}
-
 inline uint16_t Low16Bits(uint32_t value) {
   return static_cast<uint16_t>(value);
 }
@@ -358,6 +391,59 @@
       HighToLowBitIterator<T>(bits), HighToLowBitIterator<T>());
 }
 
+// Returns value with bit set in lowest one-bit position or 0 if 0.  (java.lang.X.lowestOneBit).
+template <typename kind>
+inline static kind LowestOneBitValue(kind opnd) {
+  // Hacker's Delight, Section 2-1
+  return opnd & -opnd;
+}
+
+// Returns value with bit set in hightest one-bit position or 0 if 0.  (java.lang.X.highestOneBit).
+template <typename T>
+inline static T HighestOneBitValue(T opnd) {
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  T res;
+  if (opnd == 0) {
+    res = 0;
+  } else {
+    int bit_position = BitSizeOf<T>() - (CLZ(static_cast<unsigned_type>(opnd)) + 1);
+    res = static_cast<T>(UINT64_C(1) << bit_position);
+  }
+  return res;
+}
+
+// Rotate bits.
+template <typename T, bool left>
+inline static T Rot(T opnd, int distance) {
+  int mask = BitSizeOf<T>() - 1;
+  int unsigned_right_shift = left ? (-distance & mask) : (distance & mask);
+  int signed_left_shift = left ? (distance & mask) : (-distance & mask);
+  using unsigned_type = typename std::make_unsigned<T>::type;
+  return (static_cast<unsigned_type>(opnd) >> unsigned_right_shift) | (opnd << signed_left_shift);
+}
+
+// TUNING: use rbit for arm/arm64
+inline static uint32_t ReverseBits32(uint32_t opnd) {
+  // Hacker's Delight 7-1
+  opnd = ((opnd >>  1) & 0x55555555) | ((opnd & 0x55555555) <<  1);
+  opnd = ((opnd >>  2) & 0x33333333) | ((opnd & 0x33333333) <<  2);
+  opnd = ((opnd >>  4) & 0x0F0F0F0F) | ((opnd & 0x0F0F0F0F) <<  4);
+  opnd = ((opnd >>  8) & 0x00FF00FF) | ((opnd & 0x00FF00FF) <<  8);
+  opnd = ((opnd >> 16)) | ((opnd) << 16);
+  return opnd;
+}
+
+// TUNING: use rbit for arm/arm64
+inline static uint64_t ReverseBits64(uint64_t opnd) {
+  // Hacker's Delight 7-1
+  opnd = (opnd & 0x5555555555555555L) << 1 | ((opnd >> 1) & 0x5555555555555555L);
+  opnd = (opnd & 0x3333333333333333L) << 2 | ((opnd >> 2) & 0x3333333333333333L);
+  opnd = (opnd & 0x0f0f0f0f0f0f0f0fL) << 4 | ((opnd >> 4) & 0x0f0f0f0f0f0f0f0fL);
+  opnd = (opnd & 0x00ff00ff00ff00ffL) << 8 | ((opnd >> 8) & 0x00ff00ff00ff00ffL);
+  opnd = (opnd << 48) | ((opnd & 0xffff0000L) << 16) | ((opnd >> 16) & 0xffff0000L) | (opnd >> 48);
+  return opnd;
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_BIT_UTILS_H_
diff --git a/runtime/base/histogram-inl.h b/runtime/base/histogram-inl.h
index ca9a694..b28eb72 100644
--- a/runtime/base/histogram-inl.h
+++ b/runtime/base/histogram-inl.h
@@ -48,7 +48,8 @@
     : kAdjust(0),
       kInitialBucketCount(0),
       name_(name),
-      max_buckets_(0) {
+      max_buckets_(0),
+      sample_size_(0) {
 }
 
 template <class Value>
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 24846e5..b0394a5 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -46,7 +46,6 @@
 ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr;
 Mutex* Locks::instrument_entrypoints_lock_ = nullptr;
 Mutex* Locks::intern_table_lock_ = nullptr;
-Mutex* Locks::jdwp_event_list_lock_ = nullptr;
 Mutex* Locks::jni_function_table_lock_ = nullptr;
 Mutex* Locks::jni_libraries_lock_ = nullptr;
 Mutex* Locks::logging_lock_ = nullptr;
@@ -74,6 +73,7 @@
 Mutex* Locks::jni_weak_globals_lock_ = nullptr;
 ReaderWriterMutex* Locks::dex_lock_ = nullptr;
 std::vector<BaseMutex*> Locks::expected_mutexes_on_weak_ref_access_;
+Atomic<const BaseMutex*> Locks::expected_mutexes_on_weak_ref_access_guard_;
 
 struct AllMutexData {
   // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
@@ -118,6 +118,26 @@
   const BaseMutex* const mutex_;
 };
 
+class Locks::ScopedExpectedMutexesOnWeakRefAccessLock FINAL {
+ public:
+  explicit ScopedExpectedMutexesOnWeakRefAccessLock(const BaseMutex* mutex) : mutex_(mutex) {
+    while (!Locks::expected_mutexes_on_weak_ref_access_guard_.CompareExchangeWeakAcquire(0,
+                                                                                         mutex)) {
+      NanoSleep(100);
+    }
+  }
+
+  ~ScopedExpectedMutexesOnWeakRefAccessLock() {
+    while (!Locks::expected_mutexes_on_weak_ref_access_guard_.CompareExchangeWeakRelease(mutex_,
+                                                                                         0)) {
+      NanoSleep(100);
+    }
+  }
+
+ private:
+  const BaseMutex* const mutex_;
+};
+
 // Scoped class that generates events at the beginning and end of lock contention.
 class ScopedContentionRecorder FINAL : public ValueObject {
  public:
@@ -999,7 +1019,6 @@
     DCHECK(verifier_deps_lock_ != nullptr);
     DCHECK(host_dlopen_handles_lock_ != nullptr);
     DCHECK(intern_table_lock_ != nullptr);
-    DCHECK(jdwp_event_list_lock_ != nullptr);
     DCHECK(jni_function_table_lock_ != nullptr);
     DCHECK(jni_libraries_lock_ != nullptr);
     DCHECK(logging_lock_ != nullptr);
@@ -1042,10 +1061,6 @@
     DCHECK(runtime_shutdown_lock_ == nullptr);
     runtime_shutdown_lock_ = new Mutex("runtime shutdown lock", current_lock_level);
 
-    UPDATE_CURRENT_LOCK_LEVEL(kJdwpEventListLock);
-    DCHECK(jdwp_event_list_lock_ == nullptr);
-    jdwp_event_list_lock_ = new Mutex("JDWP event list lock", current_lock_level);
-
     UPDATE_CURRENT_LOCK_LEVEL(kProfilerLock);
     DCHECK(profiler_lock_ == nullptr);
     profiler_lock_ = new Mutex("profiler lock", current_lock_level);
@@ -1169,14 +1184,9 @@
     #undef UPDATE_CURRENT_LOCK_LEVEL
 
     // List of mutexes that we may hold when accessing a weak ref.
-    dex_lock_->SetShouldRespondToEmptyCheckpointRequest(true);
-    expected_mutexes_on_weak_ref_access_.push_back(dex_lock_);
-    classlinker_classes_lock_->SetShouldRespondToEmptyCheckpointRequest(true);
-    expected_mutexes_on_weak_ref_access_.push_back(classlinker_classes_lock_);
-    jdwp_event_list_lock_->SetShouldRespondToEmptyCheckpointRequest(true);
-    expected_mutexes_on_weak_ref_access_.push_back(jdwp_event_list_lock_);
-    jni_libraries_lock_->SetShouldRespondToEmptyCheckpointRequest(true);
-    expected_mutexes_on_weak_ref_access_.push_back(jni_libraries_lock_);
+    AddToExpectedMutexesOnWeakRefAccess(dex_lock_, /*need_lock*/ false);
+    AddToExpectedMutexesOnWeakRefAccess(classlinker_classes_lock_, /*need_lock*/ false);
+    AddToExpectedMutexesOnWeakRefAccess(jni_libraries_lock_, /*need_lock*/ false);
 
     InitConditions();
   }
@@ -1196,4 +1206,38 @@
   return safe_to_call_abort_cb != nullptr && safe_to_call_abort_cb();
 }
 
+void Locks::AddToExpectedMutexesOnWeakRefAccess(BaseMutex* mutex, bool need_lock) {
+  if (need_lock) {
+    ScopedExpectedMutexesOnWeakRefAccessLock mu(mutex);
+    mutex->SetShouldRespondToEmptyCheckpointRequest(true);
+    expected_mutexes_on_weak_ref_access_.push_back(mutex);
+  } else {
+    mutex->SetShouldRespondToEmptyCheckpointRequest(true);
+    expected_mutexes_on_weak_ref_access_.push_back(mutex);
+  }
+}
+
+void Locks::RemoveFromExpectedMutexesOnWeakRefAccess(BaseMutex* mutex, bool need_lock) {
+  if (need_lock) {
+    ScopedExpectedMutexesOnWeakRefAccessLock mu(mutex);
+    mutex->SetShouldRespondToEmptyCheckpointRequest(false);
+    std::vector<BaseMutex*>& list = expected_mutexes_on_weak_ref_access_;
+    auto it = std::find(list.begin(), list.end(), mutex);
+    DCHECK(it != list.end());
+    list.erase(it);
+  } else {
+    mutex->SetShouldRespondToEmptyCheckpointRequest(false);
+    std::vector<BaseMutex*>& list = expected_mutexes_on_weak_ref_access_;
+    auto it = std::find(list.begin(), list.end(), mutex);
+    DCHECK(it != list.end());
+    list.erase(it);
+  }
+}
+
+bool Locks::IsExpectedOnWeakRefAccess(BaseMutex* mutex) {
+  ScopedExpectedMutexesOnWeakRefAccessLock mu(mutex);
+  std::vector<BaseMutex*>& list = expected_mutexes_on_weak_ref_access_;
+  return std::find(list.begin(), list.end(), mutex) != list.end();
+}
+
 }  // namespace art
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index c59664b..2414b5f 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -62,10 +62,11 @@
   kJdwpAdbStateLock,
   kJdwpSocketLock,
   kRegionSpaceRegionLock,
+  kMarkSweepMarkStackLock,
   kRosAllocGlobalLock,
   kRosAllocBracketLock,
   kRosAllocBulkFreeLock,
-  kMarkSweepMarkStackLock,
+  kTaggingLockLevel,
   kTransactionLogLock,
   kJniFunctionTableLock,
   kJniWeakGlobalsLock,
@@ -516,12 +517,12 @@
 // construction and releases it upon destruction.
 class SCOPED_CAPABILITY ReaderMutexLock {
  public:
-  ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu) :
+  ReaderMutexLock(Thread* self, ReaderWriterMutex& mu) ACQUIRE(mu) ALWAYS_INLINE :
       self_(self), mu_(mu) {
     mu_.SharedLock(self_);
   }
 
-  ~ReaderMutexLock() RELEASE() {
+  ~ReaderMutexLock() RELEASE() ALWAYS_INLINE {
     mu_.SharedUnlock(self_);
   }
 
@@ -583,6 +584,12 @@
   // Checks for whether it is safe to call Abort() without using locks.
   static bool IsSafeToCallAbortRacy() NO_THREAD_SAFETY_ANALYSIS;
 
+  // Add a mutex to expected_mutexes_on_weak_ref_access_.
+  static void AddToExpectedMutexesOnWeakRefAccess(BaseMutex* mutex, bool need_lock = true);
+  // Remove a mutex from expected_mutexes_on_weak_ref_access_.
+  static void RemoveFromExpectedMutexesOnWeakRefAccess(BaseMutex* mutex, bool need_lock = true);
+  // Check if the given mutex is in expected_mutexes_on_weak_ref_access_.
+  static bool IsExpectedOnWeakRefAccess(BaseMutex* mutex);
 
   // Guards allocation entrypoint instrumenting.
   static Mutex* instrument_entrypoints_lock_;
@@ -630,12 +637,8 @@
   // Guards shutdown of the runtime.
   static Mutex* runtime_shutdown_lock_ ACQUIRED_AFTER(heap_bitmap_lock_);
 
-  static Mutex* jdwp_event_list_lock_
-      ACQUIRED_AFTER(runtime_shutdown_lock_)
-      ACQUIRED_BEFORE(breakpoint_lock_);
-
   // Guards background profiler global state.
-  static Mutex* profiler_lock_ ACQUIRED_AFTER(jdwp_event_list_lock_);
+  static Mutex* profiler_lock_ ACQUIRED_AFTER(runtime_shutdown_lock_);
 
   // Guards trace (ie traceview) requests.
   static Mutex* trace_lock_ ACQUIRED_AFTER(profiler_lock_);
@@ -738,6 +741,8 @@
   // encounter an unexpected mutex on accessing weak refs,
   // Thread::CheckEmptyCheckpointFromWeakRefAccess will detect it.
   static std::vector<BaseMutex*> expected_mutexes_on_weak_ref_access_;
+  static Atomic<const BaseMutex*> expected_mutexes_on_weak_ref_access_guard_;
+  class ScopedExpectedMutexesOnWeakRefAccessLock;
 };
 
 class Roles {
diff --git a/runtime/base/scoped_arena_allocator.h b/runtime/base/scoped_arena_allocator.h
index 55044b3..1a0eb5e 100644
--- a/runtime/base/scoped_arena_allocator.h
+++ b/runtime/base/scoped_arena_allocator.h
@@ -39,8 +39,6 @@
   kFree,
 };
 
-static constexpr size_t kArenaAlignment = 8;
-
 // Holds a list of Arenas for use by ScopedArenaAllocator stack.
 // The memory is returned to the ArenaPool when the ArenaStack is destroyed.
 class ArenaStack : private DebugStackRefCounter, private ArenaAllocatorMemoryTool {
@@ -67,6 +65,9 @@
     return *(reinterpret_cast<ArenaFreeTag*>(ptr) - 1);
   }
 
+  // The alignment guaranteed for individual allocations.
+  static constexpr size_t kAlignment = 8u;
+
  private:
   struct Peak;
   struct Current;
@@ -89,8 +90,8 @@
     if (UNLIKELY(IsRunningOnMemoryTool())) {
       return AllocWithMemoryTool(bytes, kind);
     }
-    // Add kArenaAlignment for the free or used tag. Required to preserve alignment.
-    size_t rounded_bytes = RoundUp(bytes + (kIsDebugBuild ? kArenaAlignment : 0u), kArenaAlignment);
+    // Add kAlignment for the free or used tag. Required to preserve alignment.
+    size_t rounded_bytes = RoundUp(bytes + (kIsDebugBuild ? kAlignment : 0u), kAlignment);
     uint8_t* ptr = top_ptr_;
     if (UNLIKELY(static_cast<size_t>(top_end_ - ptr) < rounded_bytes)) {
       ptr = AllocateFromNextArena(rounded_bytes);
@@ -98,7 +99,7 @@
     CurrentStats()->RecordAlloc(bytes, kind);
     top_ptr_ = ptr + rounded_bytes;
     if (kIsDebugBuild) {
-      ptr += kArenaAlignment;
+      ptr += kAlignment;
       ArenaTagForAllocation(ptr) = ArenaFreeTag::kUsed;
     }
     return ptr;
diff --git a/runtime/base/scoped_flock.cc b/runtime/base/scoped_flock.cc
index d4bb56b..5394e53 100644
--- a/runtime/base/scoped_flock.cc
+++ b/runtime/base/scoped_flock.cc
@@ -116,7 +116,10 @@
 ScopedFlock::~ScopedFlock() {
   if (file_.get() != nullptr) {
     int flock_result = TEMP_FAILURE_RETRY(flock(file_->Fd(), LOCK_UN));
-    CHECK_EQ(0, flock_result);
+    if (flock_result != 0) {
+      PLOG(FATAL) << "Unable to unlock file " << file_->GetPath();
+      UNREACHABLE();
+    }
     int close_result = -1;
     if (file_->ReadOnlyMode()) {
       close_result = file_->Close();
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index ff2dd1b..03fc959 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -73,7 +73,7 @@
   }
   if (auto_close_ && fd_ != -1) {
     if (Close() != 0) {
-      PLOG(WARNING) << "Failed to close file " << file_path_;
+      PLOG(WARNING) << "Failed to close file with fd=" << fd_ << " path=" << file_path_;
     }
   }
 }
diff --git a/compiler/optimizing/bytecode_utils.h b/runtime/bytecode_utils.h
similarity index 96%
rename from compiler/optimizing/bytecode_utils.h
rename to runtime/bytecode_utils.h
index 133afa4..fa87b1d 100644
--- a/compiler/optimizing/bytecode_utils.h
+++ b/runtime/bytecode_utils.h
@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
-#define ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+#ifndef ART_RUNTIME_BYTECODE_UTILS_H_
+#define ART_RUNTIME_BYTECODE_UTILS_H_
 
 #include "base/arena_object.h"
 #include "dex_file.h"
@@ -177,4 +177,4 @@
 
 }  // namespace art
 
-#endif  // ART_COMPILER_OPTIMIZING_BYTECODE_UTILS_H_
+#endif  // ART_RUNTIME_BYTECODE_UTILS_H_
diff --git a/runtime/cha.cc b/runtime/cha.cc
index eaba01b..7948c29 100644
--- a/runtime/cha.cc
+++ b/runtime/cha.cc
@@ -210,7 +210,7 @@
   }
 }
 
-void ClassHierarchyAnalysis::CheckSingleImplementationInfo(
+void ClassHierarchyAnalysis::CheckVirtualMethodSingleImplementationInfo(
     Handle<mirror::Class> klass,
     ArtMethod* virtual_method,
     ArtMethod* method_in_super,
@@ -290,8 +290,9 @@
       // A non-abstract method overrides an abstract method.
       if (method_in_super->GetSingleImplementation(pointer_size) == nullptr) {
         // Abstract method_in_super has no implementation yet.
-        // We need to grab cha_lock_ for further checking/updating due to possible
-        // races.
+        // We need to grab cha_lock_ since there may be multiple class linking
+        // going on that can check/modify the single-implementation flag/method
+        // of method_in_super.
         MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
         if (!method_in_super->HasSingleImplementation()) {
           return;
@@ -362,6 +363,55 @@
   }
 }
 
+void ClassHierarchyAnalysis::CheckInterfaceMethodSingleImplementationInfo(
+    Handle<mirror::Class> klass,
+    ArtMethod* interface_method,
+    ArtMethod* implementation_method,
+    std::unordered_set<ArtMethod*>& invalidated_single_impl_methods,
+    PointerSize pointer_size) {
+  DCHECK(klass->IsInstantiable());
+  DCHECK(interface_method->IsAbstract() || interface_method->IsDefault());
+
+  if (!interface_method->HasSingleImplementation()) {
+    return;
+  }
+
+  if (implementation_method->IsAbstract()) {
+    // An instantiable class doesn't supply an implementation for
+    // interface_method. Invoking the interface method on the class will throw
+    // AbstractMethodError. This is an uncommon case, so we simply treat
+    // interface_method as not having single-implementation.
+    invalidated_single_impl_methods.insert(interface_method);
+    return;
+  }
+
+  // We need to grab cha_lock_ since there may be multiple class linking going
+  // on that can check/modify the single-implementation flag/method of
+  // interface_method.
+  MutexLock cha_mu(Thread::Current(), *Locks::cha_lock_);
+  // Do this check again after we grab cha_lock_.
+  if (!interface_method->HasSingleImplementation()) {
+    return;
+  }
+
+  ArtMethod* single_impl = interface_method->GetSingleImplementation(pointer_size);
+  if (single_impl == nullptr) {
+    // implementation_method becomes the first implementation for
+    // interface_method.
+    interface_method->SetSingleImplementation(implementation_method, pointer_size);
+    // Keep interface_method's single-implementation status.
+    return;
+  }
+  DCHECK(!single_impl->IsAbstract());
+  if (single_impl->GetDeclaringClass() == implementation_method->GetDeclaringClass()) {
+    // Same implementation. Since implementation_method may be a copy of a default
+    // method, we need to check the declaring class for equality.
+    return;
+  }
+  // Another implementation for interface_method.
+  invalidated_single_impl_methods.insert(interface_method);
+}
+
 void ClassHierarchyAnalysis::InitSingleImplementationFlag(Handle<mirror::Class> klass,
                                                           ArtMethod* method,
                                                           PointerSize pointer_size) {
@@ -382,6 +432,7 @@
       // Rare case, but we do accept it (such as 800-smali/smali/b_26143249.smali).
       // Do not attempt to devirtualize it.
       method->SetHasSingleImplementation(false);
+      DCHECK(method->GetSingleImplementation(pointer_size) == nullptr);
     } else {
       // Abstract method starts with single-implementation flag set and null
       // implementation method.
@@ -396,9 +447,15 @@
 }
 
 void ClassHierarchyAnalysis::UpdateAfterLoadingOf(Handle<mirror::Class> klass) {
+  PointerSize image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   if (klass->IsInterface()) {
+    for (ArtMethod& method : klass->GetDeclaredVirtualMethods(image_pointer_size)) {
+      DCHECK(method.IsAbstract() || method.IsDefault());
+      InitSingleImplementationFlag(klass, &method, image_pointer_size);
+    }
     return;
   }
+
   mirror::Class* super_class = klass->GetSuperClass();
   if (super_class == nullptr) {
     return;
@@ -408,7 +465,6 @@
   // is invalidated by linking `klass`.
   std::unordered_set<ArtMethod*> invalidated_single_impl_methods;
 
-  PointerSize image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
   // Do an entry-by-entry comparison of vtable contents with super's vtable.
   for (int32_t i = 0; i < super_class->GetVTableLength(); ++i) {
     ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
@@ -418,33 +474,59 @@
       if (method->IsAbstract() && klass->IsInstantiable()) {
         // An instantiable class that inherits an abstract method is treated as
         // supplying an implementation that throws AbstractMethodError.
-        CheckSingleImplementationInfo(klass,
-                                      method,
-                                      method_in_super,
-                                      invalidated_single_impl_methods,
-                                      image_pointer_size);
+        CheckVirtualMethodSingleImplementationInfo(klass,
+                                                   method,
+                                                   method_in_super,
+                                                   invalidated_single_impl_methods,
+                                                   image_pointer_size);
       }
       continue;
     }
     InitSingleImplementationFlag(klass, method, image_pointer_size);
-    CheckSingleImplementationInfo(klass,
-                                  method,
-                                  method_in_super,
-                                  invalidated_single_impl_methods,
-                                  image_pointer_size);
+    CheckVirtualMethodSingleImplementationInfo(klass,
+                                               method,
+                                               method_in_super,
+                                               invalidated_single_impl_methods,
+                                               image_pointer_size);
   }
-
   // For new virtual methods that don't override.
   for (int32_t i = super_class->GetVTableLength(); i < klass->GetVTableLength(); ++i) {
     ArtMethod* method = klass->GetVTableEntry(i, image_pointer_size);
     InitSingleImplementationFlag(klass, method, image_pointer_size);
   }
 
-  Runtime* const runtime = Runtime::Current();
+  if (klass->IsInstantiable()) {
+    auto* iftable = klass->GetIfTable();
+    const size_t ifcount = klass->GetIfTableCount();
+    for (size_t i = 0; i < ifcount; ++i) {
+      mirror::Class* interface = iftable->GetInterface(i);
+      for (size_t j = 0, count = iftable->GetMethodArrayCount(i); j < count; ++j) {
+        ArtMethod* interface_method = interface->GetVirtualMethod(j, image_pointer_size);
+        mirror::PointerArray* method_array = iftable->GetMethodArray(i);
+        ArtMethod* implementation_method =
+            method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size);
+        DCHECK(implementation_method != nullptr) << klass->PrettyClass();
+        CheckInterfaceMethodSingleImplementationInfo(klass,
+                                                     interface_method,
+                                                     implementation_method,
+                                                     invalidated_single_impl_methods,
+                                                     image_pointer_size);
+      }
+    }
+  }
+
+  InvalidateSingleImplementationMethods(invalidated_single_impl_methods);
+}
+
+void ClassHierarchyAnalysis::InvalidateSingleImplementationMethods(
+    std::unordered_set<ArtMethod*>& invalidated_single_impl_methods) {
   if (!invalidated_single_impl_methods.empty()) {
+    Runtime* const runtime = Runtime::Current();
     Thread *self = Thread::Current();
     // Method headers for compiled code to be invalidated.
     std::unordered_set<OatQuickMethodHeader*> dependent_method_headers;
+    PointerSize image_pointer_size =
+        Runtime::Current()->GetClassLinker()->GetImagePointerSize();
 
     {
       // We do this under cha_lock_. Committing code also grabs this lock to
diff --git a/runtime/cha.h b/runtime/cha.h
index a56a752..99c49d2 100644
--- a/runtime/cha.h
+++ b/runtime/cha.h
@@ -117,11 +117,13 @@
                                     PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Check/update single-implementation info when one virtual method
+  // overrides another.
   // `virtual_method` in `klass` overrides `method_in_super`.
-  // This will invalidate some assumptions on single-implementation.
+  // This may invalidate some assumptions on single-implementation.
   // Append methods that should have their single-implementation flag invalidated
   // to `invalidated_single_impl_methods`.
-  void CheckSingleImplementationInfo(
+  void CheckVirtualMethodSingleImplementationInfo(
       Handle<mirror::Class> klass,
       ArtMethod* virtual_method,
       ArtMethod* method_in_super,
@@ -129,6 +131,23 @@
       PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Check/update single-implementation info when one method
+  // implements an interface method.
+  // `implementation_method` in `klass` implements `interface_method`.
+  // Append `interface_method` to `invalidated_single_impl_methods`
+  // if `interface_method` gets a new implementation.
+  void CheckInterfaceMethodSingleImplementationInfo(
+      Handle<mirror::Class> klass,
+      ArtMethod* interface_method,
+      ArtMethod* implementation_method,
+      std::unordered_set<ArtMethod*>& invalidated_single_impl_methods,
+      PointerSize pointer_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  void InvalidateSingleImplementationMethods(
+      std::unordered_set<ArtMethod*>& invalidated_single_impl_methods)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // For all methods in vtable slot at `verify_index` of `verify_class` and its
   // superclasses, single-implementation status should be false, except if the
   // method is `excluded_method`.
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index bd510ca..9ddc6cf 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -161,9 +161,15 @@
   return resolved_method;
 }
 
-inline ArtField* ClassLinker::GetResolvedField(uint32_t field_idx,
-                                               ObjPtr<mirror::DexCache> dex_cache) {
-  return dex_cache->GetResolvedField(field_idx, image_pointer_size_);
+inline ArtField* ClassLinker::LookupResolvedField(uint32_t field_idx,
+                                                  ArtMethod* referrer,
+                                                  bool is_static) {
+  ObjPtr<mirror::DexCache> dex_cache = referrer->GetDexCache();
+  ArtField* field = dex_cache->GetResolvedField(field_idx, image_pointer_size_);
+  if (field == nullptr) {
+    field = LookupResolvedField(field_idx, dex_cache, referrer->GetClassLoader(), is_static);
+  }
+  return field;
 }
 
 inline ArtField* ClassLinker::ResolveField(uint32_t field_idx,
@@ -171,7 +177,8 @@
                                            bool is_static) {
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
-  ArtField* resolved_field = GetResolvedField(field_idx, referrer->GetDexCache());
+  ArtField* resolved_field =
+      referrer->GetDexCache()->GetResolvedField(field_idx, image_pointer_size_);
   if (UNLIKELY(resolved_field == nullptr)) {
     StackHandleScope<2> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 9b0ffaf..4bc8e8e 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -752,22 +752,6 @@
                FindSystemClass(self, "[Ljava/lang/StackTraceElement;"));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
 
-  // Ensure void type is resolved in the core's dex cache so java.lang.Void is correctly
-  // initialized.
-  {
-    const DexFile& dex_file = java_lang_Object->GetDexFile();
-    const DexFile::TypeId* void_type_id = dex_file.FindTypeId("V");
-    CHECK(void_type_id != nullptr);
-    dex::TypeIndex void_type_idx = dex_file.GetIndexForTypeId(*void_type_id);
-    // Now we resolve void type so the dex cache contains it. We use java.lang.Object class
-    // as referrer so the used dex cache is core's one.
-    ObjPtr<mirror::Class> resolved_type = ResolveType(dex_file,
-                                                      void_type_idx,
-                                                      java_lang_Object.Get());
-    CHECK_EQ(resolved_type, GetClassRoot(kPrimitiveVoid));
-    self->AssertNoPendingException();
-  }
-
   // Create conflict tables that depend on the class linker.
   runtime->FixupConflictTables();
 
@@ -922,7 +906,6 @@
       runtime->GetOatFileManager().RegisterImageOatFiles(spaces);
   DCHECK(!oat_files.empty());
   const OatHeader& default_oat_header = oat_files[0]->GetOatHeader();
-  CHECK_EQ(default_oat_header.GetImageFileLocationOatChecksum(), 0U);
   CHECK_EQ(default_oat_header.GetImageFileLocationOatDataBegin(), 0U);
   const char* image_file_location = oat_files[0]->GetOatHeader().
       GetStoreValueByKey(OatHeader::kImageLocationKey);
@@ -1041,7 +1024,8 @@
            class_loader->GetClass();
 }
 
-static mirror::String* GetDexPathListElementName(ObjPtr<mirror::Object> element)
+static bool GetDexPathListElementName(ObjPtr<mirror::Object> element,
+                                      ObjPtr<mirror::String>* out_name)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtField* const dex_file_field =
       jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
@@ -1053,17 +1037,20 @@
   CHECK_EQ(dex_file_field->GetDeclaringClass(), element->GetClass()) << element->PrettyTypeOf();
   ObjPtr<mirror::Object> dex_file = dex_file_field->GetObject(element);
   if (dex_file == nullptr) {
-    return nullptr;
+    // Null dex file means it was probably a jar with no dex files, return a null string.
+    *out_name = nullptr;
+    return true;
   }
   ObjPtr<mirror::Object> name_object = dex_file_name_field->GetObject(dex_file);
   if (name_object != nullptr) {
-    return name_object->AsString();
+    *out_name = name_object->AsString();
+    return true;
   }
-  return nullptr;
+  return false;
 }
 
 static bool FlattenPathClassLoader(ObjPtr<mirror::ClassLoader> class_loader,
-                                   std::list<mirror::String*>* out_dex_file_names,
+                                   std::list<ObjPtr<mirror::String>>* out_dex_file_names,
                                    std::string* error_msg)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(out_dex_file_names != nullptr);
@@ -1099,12 +1086,14 @@
             *error_msg = StringPrintf("Null dex element at index %d", i);
             return false;
           }
-          ObjPtr<mirror::String> const name = GetDexPathListElementName(element);
-          if (name == nullptr) {
-            *error_msg = StringPrintf("Null name for dex element at index %d", i);
+          ObjPtr<mirror::String> name;
+          if (!GetDexPathListElementName(element, &name)) {
+            *error_msg = StringPrintf("Invalid dex path list element at index %d", i);
             return false;
           }
-          out_dex_file_names->push_front(name.Ptr());
+          if (name != nullptr) {
+            out_dex_file_names->push_front(name.Ptr());
+          }
         }
       }
     }
@@ -1173,9 +1162,7 @@
     for (ObjPtr<mirror::Class> klass : classes_) {
       for (uint32_t i = 0, num = klass->NumDirectInterfaces(); i != num; ++i) {
         CHECK(klass->GetDirectInterface(self_, klass, i) != nullptr)
-            << klass->PrettyDescriptor() << " iface #" << i
-            << klass->GetDexFile().StringByTypeIdx(klass->GetDirectInterfaceTypeIdx(i))
-            << " Bug: 34839984";
+            << klass->PrettyDescriptor() << " iface #" << i;
       }
     }
   }
@@ -1292,7 +1279,10 @@
           num_types = dex_file->NumTypeIds();
         }
         const size_t num_methods = dex_file->NumMethodIds();
-        const size_t num_fields = dex_file->NumFieldIds();
+        size_t num_fields = mirror::DexCache::kDexCacheFieldCacheSize;
+        if (dex_file->NumFieldIds() < num_fields) {
+          num_fields = dex_file->NumFieldIds();
+        }
         size_t num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
         if (dex_file->NumProtoIds() < num_method_types) {
           num_method_types = dex_file->NumProtoIds();
@@ -1336,17 +1326,22 @@
           dex_cache->SetResolvedMethods(methods);
         }
         if (num_fields != 0u) {
-          ArtField** const fields =
-              reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
-          for (size_t j = 0; kIsDebugBuild && j < num_fields; ++j) {
-            DCHECK(fields[j] == nullptr);
+          mirror::FieldDexCacheType* const image_resolved_fields = dex_cache->GetResolvedFields();
+          mirror::FieldDexCacheType* const fields =
+              reinterpret_cast<mirror::FieldDexCacheType*>(raw_arrays + layout.FieldsOffset());
+          for (size_t j = 0; j < num_fields; ++j) {
+            DCHECK_EQ(mirror::DexCache::GetNativePairPtrSize(fields, j, image_pointer_size_).index,
+                      0u);
+            DCHECK(mirror::DexCache::GetNativePairPtrSize(fields, j, image_pointer_size_).object ==
+                   nullptr);
+            mirror::DexCache::SetNativePairPtrSize(
+                fields,
+                j,
+                mirror::DexCache::GetNativePairPtrSize(image_resolved_fields,
+                                                       j,
+                                                       image_pointer_size_),
+                image_pointer_size_);
           }
-          CopyNonNull(dex_cache->GetResolvedFields(),
-                      num_fields,
-                      fields,
-                      [] (const ArtField* field) {
-                          return field == nullptr;
-                      });
           dex_cache->SetResolvedFields(fields);
         }
         if (num_method_types != 0u) {
@@ -1785,14 +1780,14 @@
       *error_msg = "Unexpected BootClassLoader in app image";
       return false;
     }
-    std::list<mirror::String*> image_dex_file_names;
+    std::list<ObjPtr<mirror::String>> image_dex_file_names;
     std::string temp_error_msg;
     if (!FlattenPathClassLoader(image_class_loader.Get(), &image_dex_file_names, &temp_error_msg)) {
       *error_msg = StringPrintf("Failed to flatten image class loader hierarchy '%s'",
                                 temp_error_msg.c_str());
       return false;
     }
-    std::list<mirror::String*> loader_dex_file_names;
+    std::list<ObjPtr<mirror::String>> loader_dex_file_names;
     if (!FlattenPathClassLoader(class_loader.Get(), &loader_dex_file_names, &temp_error_msg)) {
       *error_msg = StringPrintf("Failed to flatten class loader hierarchy '%s'",
                                 temp_error_msg.c_str());
@@ -1804,7 +1799,10 @@
       ObjPtr<mirror::Object> element = elements->GetWithoutChecks(i);
       if (element != nullptr) {
         // If we are somewhere in the middle of the array, there may be nulls at the end.
-        loader_dex_file_names.push_back(GetDexPathListElementName(element));
+        ObjPtr<mirror::String> name;
+        if (GetDexPathListElementName(element, &name) && name != nullptr) {
+          loader_dex_file_names.push_back(name);
+        }
       }
     }
     // Ignore the number of image dex files since we are adding those to the class loader anyways.
@@ -1920,12 +1918,22 @@
     // Since it ensures classes are in the class table.
     VerifyClassInTableArtMethodVisitor visitor2(class_table);
     header.VisitPackedArtMethods(&visitor2, space->Begin(), kRuntimePointerSize);
-  }
-  if (app_image) {
-    // TODO: Restrict this check to debug builds. Bug: 34839984
+    // Verify that all direct interfaces of classes in the class table are also resolved.
     VerifyDirectInterfacesInTableClassVisitor visitor(class_loader.Get());
     class_table->Visit(visitor);
     visitor.Check();
+    // Check that all non-primitive classes in dex caches are also in the class table.
+    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
+      ObjPtr<mirror::DexCache> dex_cache = dex_caches->Get(i);
+      mirror::TypeDexCacheType* const types = dex_cache->GetResolvedTypes();
+      for (int32_t j = 0, num_types = dex_cache->NumResolvedTypes(); j < num_types; j++) {
+        ObjPtr<mirror::Class> klass = types[j].load(std::memory_order_relaxed).object.Read();
+        if (klass != nullptr && !klass->IsPrimitive()) {
+          CHECK(class_table->Contains(klass)) << klass->PrettyDescriptor()
+              << " " << dex_cache->GetDexFile()->GetLocation();
+        }
+      }
+    }
   }
   VLOG(class_linker) << "Adding image space took " << PrettyDuration(NanoTime() - start_time);
   return true;
@@ -2774,7 +2782,7 @@
   }
   CHECK(klass->IsLoaded());
 
-  // At this point the class is loaded. Publish a ClassLoad even.
+  // At this point the class is loaded. Publish a ClassLoad event.
   // Note: this may be a temporary class. It is a listener's responsibility to handle this.
   Runtime::Current()->GetRuntimeCallbacks()->ClassLoad(klass);
 
@@ -3497,13 +3505,12 @@
     return dex_cache;
   }
   // Failure, dump diagnostic and abort.
-  std::string location(dex_file.GetLocation());
   for (const DexCacheData& data : dex_caches_) {
     if (DecodeDexCache(self, data) != nullptr) {
-      LOG(ERROR) << "Registered dex file " << data.dex_file->GetLocation();
+      LOG(FATAL_WITHOUT_ABORT) << "Registered dex file " << data.dex_file->GetLocation();
     }
   }
-  LOG(FATAL) << "Failed to find DexCache for DexFile " << location;
+  LOG(FATAL) << "Failed to find DexCache for DexFile " << dex_file.GetLocation();
   UNREACHABLE();
 }
 
@@ -3725,6 +3732,12 @@
 
   ObjPtr<mirror::Class> existing = InsertClass(descriptor, new_class.Get(), hash);
   if (existing == nullptr) {
+    // We postpone ClassLoad and ClassPrepare events to this point in time to avoid
+    // duplicate events in case of races. Array classes don't really follow dedicated
+    // load and prepare, anyways.
+    Runtime::Current()->GetRuntimeCallbacks()->ClassLoad(new_class);
+    Runtime::Current()->GetRuntimeCallbacks()->ClassPrepare(new_class, new_class);
+
     jit::Jit::NewTypeLoadedIfUsingJit(new_class.Get());
     return new_class.Get();
   }
@@ -3907,8 +3920,10 @@
   if (!supertype->IsVerified() && !supertype->IsErroneous()) {
     VerifyClass(self, supertype);
   }
-  if (supertype->IsCompileTimeVerified()) {
-    // Either we are verified or we soft failed and need to retry at runtime.
+
+  if (supertype->IsVerified() || supertype->ShouldVerifyAtRuntime()) {
+    // The supertype is either verified, or we soft failed at AOT time.
+    DCHECK(supertype->IsVerified() || Runtime::Current()->IsAotCompiler());
     return true;
   }
   // If we got this far then we have a hard failure.
@@ -3974,13 +3989,16 @@
       return verifier::MethodVerifier::kHardFailure;
     }
 
-    // Don't attempt to re-verify if already sufficiently verified.
+    // Don't attempt to re-verify if already verified.
     if (klass->IsVerified()) {
       EnsureSkipAccessChecksMethods(klass, image_pointer_size_);
       return verifier::MethodVerifier::kNoFailure;
     }
-    if (klass->IsCompileTimeVerified() && Runtime::Current()->IsAotCompiler()) {
-      return verifier::MethodVerifier::kNoFailure;
+
+    // For AOT, don't attempt to re-verify if we have already found we should
+    // verify at runtime.
+    if (Runtime::Current()->IsAotCompiler() && klass->ShouldVerifyAtRuntime()) {
+      return verifier::MethodVerifier::kSoftFailure;
     }
 
     if (klass->GetStatus() == mirror::Class::kStatusResolved) {
@@ -4162,19 +4180,6 @@
     return false;
   }
 
-  // We may be running with a preopted oat file but without image. In this case,
-  // we don't skip verification of skip_access_checks classes to ensure we initialize
-  // dex caches with all types resolved during verification.
-  // We need to trust image classes, as these might be coming out of a pre-opted, quickened boot
-  // image (that we just failed loading), and the verifier can't be run on quickened opcodes when
-  // the runtime isn't started. On the other hand, app classes can be re-verified even if they are
-  // already pre-opted, as then the runtime is started.
-  if (!Runtime::Current()->IsAotCompiler() &&
-      !Runtime::Current()->GetHeap()->HasBootImageSpace() &&
-      klass->GetClassLoader() != nullptr) {
-    return false;
-  }
-
   uint16_t class_def_index = klass->GetDexClassDefIndex();
   oat_file_class_status = oat_dex_file->GetOatClass(class_def_index).GetStatus();
   if (oat_file_class_status == mirror::Class::kStatusVerified ||
@@ -4268,53 +4273,53 @@
                                              jobjectArray throws) {
   Thread* self = soa.Self();
   StackHandleScope<10> hs(self);
-  MutableHandle<mirror::Class> klass(hs.NewHandle(
+  MutableHandle<mirror::Class> temp_klass(hs.NewHandle(
       AllocClass(self, GetClassRoot(kJavaLangClass), sizeof(mirror::Class))));
-  if (klass == nullptr) {
+  if (temp_klass == nullptr) {
     CHECK(self->IsExceptionPending());  // OOME.
     return nullptr;
   }
-  DCHECK(klass->GetClass() != nullptr);
-  klass->SetObjectSize(sizeof(mirror::Proxy));
+  DCHECK(temp_klass->GetClass() != nullptr);
+  temp_klass->SetObjectSize(sizeof(mirror::Proxy));
   // Set the class access flags incl. VerificationAttempted, so we do not try to set the flag on
   // the methods.
-  klass->SetAccessFlags(kAccClassIsProxy | kAccPublic | kAccFinal | kAccVerificationAttempted);
-  klass->SetClassLoader(soa.Decode<mirror::ClassLoader>(loader));
-  DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
-  klass->SetName(soa.Decode<mirror::String>(name));
-  klass->SetDexCache(GetClassRoot(kJavaLangReflectProxy)->GetDexCache());
+  temp_klass->SetAccessFlags(kAccClassIsProxy | kAccPublic | kAccFinal | kAccVerificationAttempted);
+  temp_klass->SetClassLoader(soa.Decode<mirror::ClassLoader>(loader));
+  DCHECK_EQ(temp_klass->GetPrimitiveType(), Primitive::kPrimNot);
+  temp_klass->SetName(soa.Decode<mirror::String>(name));
+  temp_klass->SetDexCache(GetClassRoot(kJavaLangReflectProxy)->GetDexCache());
   // Object has an empty iftable, copy it for that reason.
-  klass->SetIfTable(GetClassRoot(kJavaLangObject)->GetIfTable());
-  mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
-  std::string descriptor(GetDescriptorForProxy(klass.Get()));
+  temp_klass->SetIfTable(GetClassRoot(kJavaLangObject)->GetIfTable());
+  mirror::Class::SetStatus(temp_klass, mirror::Class::kStatusIdx, self);
+  std::string descriptor(GetDescriptorForProxy(temp_klass.Get()));
   const size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
 
   // Needs to be before we insert the class so that the allocator field is set.
-  LinearAlloc* const allocator = GetOrCreateAllocatorForClassLoader(klass->GetClassLoader());
+  LinearAlloc* const allocator = GetOrCreateAllocatorForClassLoader(temp_klass->GetClassLoader());
 
   // Insert the class before loading the fields as the field roots
   // (ArtField::declaring_class_) are only visited from the class
   // table. There can't be any suspend points between inserting the
   // class and setting the field arrays below.
-  ObjPtr<mirror::Class> existing = InsertClass(descriptor.c_str(), klass.Get(), hash);
+  ObjPtr<mirror::Class> existing = InsertClass(descriptor.c_str(), temp_klass.Get(), hash);
   CHECK(existing == nullptr);
 
   // Instance fields are inherited, but we add a couple of static fields...
   const size_t num_fields = 2;
   LengthPrefixedArray<ArtField>* sfields = AllocArtFieldArray(self, allocator, num_fields);
-  klass->SetSFieldsPtr(sfields);
+  temp_klass->SetSFieldsPtr(sfields);
 
   // 1. Create a static field 'interfaces' that holds the _declared_ interfaces implemented by
   // our proxy, so Class.getInterfaces doesn't return the flattened set.
   ArtField& interfaces_sfield = sfields->At(0);
   interfaces_sfield.SetDexFieldIndex(0);
-  interfaces_sfield.SetDeclaringClass(klass.Get());
+  interfaces_sfield.SetDeclaringClass(temp_klass.Get());
   interfaces_sfield.SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
 
   // 2. Create a static field 'throws' that holds exceptions thrown by our methods.
   ArtField& throws_sfield = sfields->At(1);
   throws_sfield.SetDexFieldIndex(1);
-  throws_sfield.SetDeclaringClass(klass.Get());
+  throws_sfield.SetDeclaringClass(temp_klass.Get());
   throws_sfield.SetAccessFlags(kAccStatic | kAccPublic | kAccFinal);
 
   // Proxies have 1 direct method, the constructor
@@ -4335,43 +4340,46 @@
     self->AssertPendingOOMException();
     return nullptr;
   }
-  klass->SetMethodsPtr(proxy_class_methods, num_direct_methods, num_virtual_methods);
+  temp_klass->SetMethodsPtr(proxy_class_methods, num_direct_methods, num_virtual_methods);
 
   // Create the single direct method.
-  CreateProxyConstructor(klass, klass->GetDirectMethodUnchecked(0, image_pointer_size_));
+  CreateProxyConstructor(temp_klass, temp_klass->GetDirectMethodUnchecked(0, image_pointer_size_));
 
   // Create virtual method using specified prototypes.
   // TODO These should really use the iterators.
   for (size_t i = 0; i < num_virtual_methods; ++i) {
-    auto* virtual_method = klass->GetVirtualMethodUnchecked(i, image_pointer_size_);
+    auto* virtual_method = temp_klass->GetVirtualMethodUnchecked(i, image_pointer_size_);
     auto* prototype = h_methods->Get(i)->GetArtMethod();
-    CreateProxyMethod(klass, prototype, virtual_method);
+    CreateProxyMethod(temp_klass, prototype, virtual_method);
     DCHECK(virtual_method->GetDeclaringClass() != nullptr);
     DCHECK(prototype->GetDeclaringClass() != nullptr);
   }
 
   // The super class is java.lang.reflect.Proxy
-  klass->SetSuperClass(GetClassRoot(kJavaLangReflectProxy));
+  temp_klass->SetSuperClass(GetClassRoot(kJavaLangReflectProxy));
   // Now effectively in the loaded state.
-  mirror::Class::SetStatus(klass, mirror::Class::kStatusLoaded, self);
+  mirror::Class::SetStatus(temp_klass, mirror::Class::kStatusLoaded, self);
   self->AssertNoPendingException();
 
-  MutableHandle<mirror::Class> new_class = hs.NewHandle<mirror::Class>(nullptr);
+  // At this point the class is loaded. Publish a ClassLoad event.
+  // Note: this may be a temporary class. It is a listener's responsibility to handle this.
+  Runtime::Current()->GetRuntimeCallbacks()->ClassLoad(temp_klass);
+
+  MutableHandle<mirror::Class> klass = hs.NewHandle<mirror::Class>(nullptr);
   {
     // Must hold lock on object when resolved.
-    ObjectLock<mirror::Class> resolution_lock(self, klass);
+    ObjectLock<mirror::Class> resolution_lock(self, temp_klass);
     // Link the fields and virtual methods, creating vtable and iftables.
     // The new class will replace the old one in the class table.
     Handle<mirror::ObjectArray<mirror::Class>> h_interfaces(
         hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Class>>(interfaces)));
-    if (!LinkClass(self, descriptor.c_str(), klass, h_interfaces, &new_class)) {
-      mirror::Class::SetStatus(klass, mirror::Class::kStatusErrorUnresolved, self);
+    if (!LinkClass(self, descriptor.c_str(), temp_klass, h_interfaces, &klass)) {
+      mirror::Class::SetStatus(temp_klass, mirror::Class::kStatusErrorUnresolved, self);
       return nullptr;
     }
   }
-  CHECK(klass->IsRetired());
-  CHECK_NE(klass.Get(), new_class.Get());
-  klass.Assign(new_class.Get());
+  CHECK(temp_klass->IsRetired());
+  CHECK_NE(temp_klass.Get(), klass.Get());
 
   CHECK_EQ(interfaces_sfield.GetDeclaringClass(), klass.Get());
   interfaces_sfield.SetObject<false>(
@@ -4382,6 +4390,8 @@
       klass.Get(),
       soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>>(throws));
 
+  Runtime::Current()->GetRuntimeCallbacks()->ClassPrepare(temp_klass, klass);
+
   {
     // Lock on klass is released. Lock new class object.
     ObjectLock<mirror::Class> initialization_lock(self, klass);
@@ -4409,9 +4419,9 @@
                                                decoded_name->ToModifiedUtf8().c_str()));
     CHECK_EQ(ArtField::PrettyField(klass->GetStaticField(1)), throws_field_name);
 
-    CHECK_EQ(klass.Get()->GetInterfaces(),
+    CHECK_EQ(klass.Get()->GetProxyInterfaces(),
              soa.Decode<mirror::ObjectArray<mirror::Class>>(interfaces));
-    CHECK_EQ(klass.Get()->GetThrows(),
+    CHECK_EQ(klass.Get()->GetProxyThrows(),
              soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>>(throws));
   }
   return klass.Get();
@@ -4428,9 +4438,15 @@
   // Create constructor for Proxy that must initialize the method.
   CHECK_EQ(GetClassRoot(kJavaLangReflectProxy)->NumDirectMethods(), 23u);
 
-  ArtMethod* proxy_constructor = GetClassRoot(kJavaLangReflectProxy)->GetDirectMethodUnchecked(
-      8, image_pointer_size_);
-  DCHECK_EQ(std::string(proxy_constructor->GetName()), "<init>");
+  // Find the <init>(InvocationHandler)V method. The exact method offset varies depending
+  // on which front-end compiler was used to build the libcore DEX files.
+  ArtMethod* proxy_constructor = GetClassRoot(kJavaLangReflectProxy)->
+      FindDeclaredDirectMethod("<init>",
+                               "(Ljava/lang/reflect/InvocationHandler;)V",
+                               image_pointer_size_);
+  DCHECK(proxy_constructor != nullptr)
+      << "Could not find <init> method in java.lang.reflect.Proxy";
+
   // Ensure constructor is in dex cache so that we can use the dex cache to look up the overridden
   // constructor method.
   GetClassRoot(kJavaLangReflectProxy)->GetDexCache()->SetResolvedMethod(
@@ -4547,108 +4563,6 @@
   return CanWeInitializeClass(super_class, can_init_statics, can_init_parents);
 }
 
-std::string DescribeSpace(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) {
-  std::ostringstream oss;
-  gc::Heap* heap = Runtime::Current()->GetHeap();
-  gc::space::ContinuousSpace* cs = heap->FindContinuousSpaceFromAddress(klass.Ptr());
-  if (cs != nullptr) {
-    if (cs->IsImageSpace()) {
-      oss << "image/" << cs->GetName() << "/" << cs->AsImageSpace()->GetImageFilename();
-    } else {
-      oss << "continuous/" << cs->GetName();
-    }
-  } else {
-    gc::space::DiscontinuousSpace* ds =
-        heap->FindDiscontinuousSpaceFromObject(klass, /* fail_ok */ true);
-    if (ds != nullptr) {
-      oss << "discontinuous/" << ds->GetName();
-    } else {
-      oss << "invalid";
-    }
-  }
-  return oss.str();
-}
-
-std::string DescribeLoaders(ObjPtr<mirror::Class> klass, const char* iface_descriptor)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  std::ostringstream oss;
-  uint32_t hash = ComputeModifiedUtf8Hash(iface_descriptor);
-  ScopedObjectAccessUnchecked soa(Thread::Current());
-  ObjPtr<mirror::Class> path_class_loader =
-      soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_PathClassLoader);
-  ObjPtr<mirror::Class> dex_class_loader =
-      soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_DexClassLoader);
-
-  // Print the class loader chain.
-  bool found_iface;
-  const char* loader_separator = "";
-  for (ObjPtr<mirror::ClassLoader> loader = klass->GetClassLoader();
-       loader != nullptr;
-       loader = loader->GetParent()) {
-    oss << loader_separator << loader->GetClass()->PrettyDescriptor();
-    loader_separator = ";";
-    // If we didn't find the interface yet, try to find it in the current class loader.
-    if (!found_iface) {
-      ClassTable* table = Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(loader);
-      ObjPtr<mirror::Class> iface =
-          (table != nullptr) ? table->Lookup(iface_descriptor, hash) : nullptr;
-      if (iface != nullptr) {
-        found_iface = true;
-        oss << "[hit:" << DescribeSpace(iface) << "]";
-      }
-    }
-
-    // For PathClassLoader or DexClassLoader also dump the dex file locations.
-    if (loader->GetClass() == path_class_loader || loader->GetClass() == dex_class_loader) {
-      ArtField* const cookie_field =
-          jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
-      ArtField* const dex_file_field =
-          jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
-      ObjPtr<mirror::Object> dex_path_list =
-          jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
-              GetObject(loader);
-      if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
-        ObjPtr<mirror::Object> dex_elements_obj =
-            jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
-            GetObject(dex_path_list);
-        if (dex_elements_obj != nullptr) {
-          ObjPtr<mirror::ObjectArray<mirror::Object>> dex_elements =
-              dex_elements_obj->AsObjectArray<mirror::Object>();
-          oss << "(";
-          const char* path_separator = "";
-          for (int32_t i = 0; i != dex_elements->GetLength(); ++i) {
-            ObjPtr<mirror::Object> element = dex_elements->GetWithoutChecks(i);
-            ObjPtr<mirror::Object> dex_file =
-                (element != nullptr) ? dex_file_field->GetObject(element) : nullptr;
-            ObjPtr<mirror::LongArray> long_array =
-                (dex_file != nullptr) ? cookie_field->GetObject(dex_file)->AsLongArray() : nullptr;
-            if (long_array != nullptr) {
-              int32_t long_array_size = long_array->GetLength();
-              // First element is the oat file.
-              for (int32_t j = kDexFileIndexStart; j < long_array_size; ++j) {
-                const DexFile* cp_dex_file = reinterpret_cast<const DexFile*>(
-                    static_cast<uintptr_t>(long_array->GetWithoutChecks(j)));
-                oss << path_separator << cp_dex_file->GetLocation();
-                path_separator = ":";
-              }
-            }
-          }
-          oss << ")";
-        }
-      }
-    }
-  }
-
-  // Do a paranoid check that the `klass` itself is in the class table.
-  ClassTable* table =
-      Runtime::Current()->GetClassLinker()->ClassTableForClassLoader(klass->GetClassLoader());
-  ObjPtr<mirror::Class> k = (table != nullptr) ? table->LookupByDescriptor(klass) : nullptr;
-  if (k != klass) {
-    oss << "{FAIL:" << k.Ptr() << "!=" << klass.Ptr() << "}";
-  }
-  return oss.str();
-}
-
 bool ClassLinker::InitializeClass(Thread* self, Handle<mirror::Class> klass,
                                   bool can_init_statics, bool can_init_parents) {
   // see JLS 3rd edition, 12.4.2 "Detailed Initialization Procedure" for the locking protocol
@@ -4796,15 +4710,7 @@
       MutableHandle<mirror::Class> handle_scope_iface(hs_iface.NewHandle<mirror::Class>(nullptr));
       for (size_t i = 0; i < num_direct_interfaces; i++) {
         handle_scope_iface.Assign(mirror::Class::GetDirectInterface(self, klass.Get(), i));
-        if (UNLIKELY(handle_scope_iface == nullptr)) {
-          const char* iface_descriptor =
-              klass->GetDexFile().StringByTypeIdx(klass->GetDirectInterfaceTypeIdx(i));
-          LOG(FATAL) << "Check failed: handle_scope_iface != nullptr "
-              << "Debug data for bug 34839984: "
-              << klass->PrettyDescriptor() << " iface #" << i << " " << iface_descriptor
-              << " space: " << DescribeSpace(klass.Get())
-              << " loaders: " << DescribeLoaders(klass.Get(), iface_descriptor);
-        }
+        CHECK(handle_scope_iface != nullptr) << klass->PrettyDescriptor() << " iface #" << i;
         CHECK(handle_scope_iface->IsInterface());
         if (handle_scope_iface->HasBeenRecursivelyInitialized()) {
           // We have already done this for this interface. Skip it.
@@ -4940,7 +4846,7 @@
     // First we initialize all of iface's super-interfaces recursively.
     for (size_t i = 0; i < num_direct_ifaces; i++) {
       ObjPtr<mirror::Class> super_iface = mirror::Class::GetDirectInterface(self, iface.Get(), i);
-      DCHECK(super_iface != nullptr);
+      CHECK(super_iface != nullptr) << iface->PrettyDescriptor() << " iface #" << i;
       if (!super_iface->HasBeenRecursivelyInitialized()) {
         // Recursive step
         handle_super_iface.Assign(super_iface);
@@ -6828,18 +6734,20 @@
     ArtMethod* m = check_vtable->GetElementPtrSize<ArtMethod*>(i, pointer_size);
     CHECK(m != nullptr);
 
-    CHECK_EQ(m->GetMethodIndexDuringLinking(), i)
-        << m->PrettyMethod()
-        << " has an unexpected method index for its spot in the vtable for class"
-        << klass->PrettyClass();
+    if (m->GetMethodIndexDuringLinking() != i) {
+      LOG(WARNING) << m->PrettyMethod()
+                   << " has an unexpected method index for its spot in the vtable for class"
+                   << klass->PrettyClass();
+    }
     ArraySlice<ArtMethod> virtuals = klass->GetVirtualMethodsSliceUnchecked(pointer_size);
     auto is_same_method = [m] (const ArtMethod& meth) {
       return &meth == m;
     };
-    CHECK((super_vtable_length > i && superclass->GetVTableEntry(i, pointer_size) == m) ||
-          std::find_if(virtuals.begin(), virtuals.end(), is_same_method) != virtuals.end())
-        << m->PrettyMethod() << " does not seem to be owned by current class "
-        << klass->PrettyClass() << " or any of its superclasses!";
+    if (!((super_vtable_length > i && superclass->GetVTableEntry(i, pointer_size) == m) ||
+          std::find_if(virtuals.begin(), virtuals.end(), is_same_method) != virtuals.end())) {
+      LOG(WARNING) << m->PrettyMethod() << " does not seem to be owned by current class "
+                   << klass->PrettyClass() << " or any of its superclasses!";
+    }
   }
 }
 
@@ -6867,14 +6775,15 @@
                                   other_entry->GetAccessFlags())) {
         continue;
       }
-      CHECK(vtable_entry != other_entry &&
-            !name_comparator.HasSameNameAndSignature(
-                other_entry->GetInterfaceMethodIfProxy(pointer_size)))
-          << "vtable entries " << i << " and " << j << " are identical for "
-          << klass->PrettyClass() << " in method " << vtable_entry->PrettyMethod() << " (0x"
-          << std::hex << reinterpret_cast<uintptr_t>(vtable_entry) << ") and "
-          << other_entry->PrettyMethod() << "  (0x" << std::hex
-          << reinterpret_cast<uintptr_t>(other_entry) << ")";
+      if (vtable_entry == other_entry ||
+          name_comparator.HasSameNameAndSignature(
+               other_entry->GetInterfaceMethodIfProxy(pointer_size))) {
+        LOG(WARNING) << "vtable entries " << i << " and " << j << " are identical for "
+                     << klass->PrettyClass() << " in method " << vtable_entry->PrettyMethod()
+                     << " (0x" << std::hex << reinterpret_cast<uintptr_t>(vtable_entry) << ") and "
+                     << other_entry->PrettyMethod() << "  (0x" << std::hex
+                     << reinterpret_cast<uintptr_t>(other_entry) << ")";
+      }
     }
   }
 }
@@ -7923,7 +7832,7 @@
 
 mirror::String* ClassLinker::LookupString(const DexFile& dex_file,
                                           dex::StringIndex string_idx,
-                                          Handle<mirror::DexCache> dex_cache) {
+                                          ObjPtr<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache != nullptr);
   ObjPtr<mirror::String> resolved = dex_cache->GetResolvedString(string_idx);
   if (resolved != nullptr) {
@@ -8261,6 +8170,43 @@
   return resolved;
 }
 
+ArtField* ClassLinker::LookupResolvedField(uint32_t field_idx,
+                                           ObjPtr<mirror::DexCache> dex_cache,
+                                           ObjPtr<mirror::ClassLoader> class_loader,
+                                           bool is_static) {
+  const DexFile& dex_file = *dex_cache->GetDexFile();
+  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
+  ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(field_id.class_idx_);
+  if (klass == nullptr) {
+    klass = LookupResolvedType(dex_file, field_id.class_idx_, dex_cache, class_loader);
+  }
+  if (klass == nullptr) {
+    // The class has not been resolved yet, so the field is also unresolved.
+    return nullptr;
+  }
+  DCHECK(klass->IsResolved());
+  Thread* self = is_static ? Thread::Current() : nullptr;
+
+  // First try to find a field declared directly by `klass` by the field index.
+  ArtField* resolved_field = is_static
+      ? mirror::Class::FindStaticField(self, klass, dex_cache, field_idx)
+      : klass->FindInstanceField(dex_cache, field_idx);
+
+  if (resolved_field == nullptr) {
+    // If not found in `klass` by field index, search the class hierarchy using the name and type.
+    const char* name = dex_file.GetFieldName(field_id);
+    const char* type = dex_file.GetFieldTypeDescriptor(field_id);
+    resolved_field = is_static
+        ? mirror::Class::FindStaticField(self, klass, name, type)
+        : klass->FindInstanceField(name, type);
+  }
+
+  if (resolved_field != nullptr) {
+    dex_cache->SetResolvedField(field_idx, resolved_field, image_pointer_size_);
+  }
+  return resolved_field;
+}
+
 ArtField* ClassLinker::ResolveField(const DexFile& dex_file,
                                     uint32_t field_idx,
                                     Handle<mirror::DexCache> dex_cache,
@@ -8321,9 +8267,8 @@
     return nullptr;
   }
 
-  StringPiece name(dex_file.StringDataByIdx(field_id.name_idx_));
-  StringPiece type(dex_file.StringDataByIdx(
-      dex_file.GetTypeId(field_id.type_idx_).descriptor_idx_));
+  StringPiece name(dex_file.GetFieldName(field_id));
+  StringPiece type(dex_file.GetFieldTypeDescriptor(field_id));
   resolved = mirror::Class::FindField(self, klass, name, type);
   if (resolved != nullptr) {
     dex_cache->SetResolvedField(field_idx, resolved, image_pointer_size_);
@@ -8573,6 +8518,15 @@
   }
 }
 
+void ClassLinker::SetEntryPointsForObsoleteMethod(ArtMethod* method) const {
+  DCHECK(method->IsObsolete());
+  // We cannot mess with the entrypoints of native methods because they are used to determine how
+  // large the method's quick stack frame is. Without this information we cannot walk the stacks.
+  if (!method->IsNative()) {
+    method->SetEntryPointFromQuickCompiledCode(GetInvokeObsoleteMethodStub());
+  }
+}
+
 void ClassLinker::DumpForSigQuit(std::ostream& os) {
   ScopedObjectAccess soa(Thread::Current());
   ReaderMutexLock mu(soa.Self(), *Locks::classlinker_classes_lock_);
@@ -8947,7 +8901,7 @@
   return ret;
 }
 
-std::unordered_set<std::string> ClassLinker::GetClassDescriptorsForProfileKeys(
+std::unordered_set<std::string> ClassLinker::GetClassDescriptorsForResolvedClasses(
     const std::set<DexCacheResolvedClasses>& classes) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   std::unordered_set<std::string> ret;
@@ -8962,14 +8916,13 @@
       if (dex_cache != nullptr) {
         const DexFile* dex_file = dex_cache->GetDexFile();
         // There could be duplicates if two dex files with the same location are mapped.
-        location_to_dex_file.emplace(
-            ProfileCompilationInfo::GetProfileDexFileKey(dex_file->GetLocation()), dex_file);
+        location_to_dex_file.emplace(dex_file->GetLocation(), dex_file);
       }
     }
   }
   for (const DexCacheResolvedClasses& info : classes) {
-    const std::string& profile_key = info.GetDexLocation();
-    auto found = location_to_dex_file.find(profile_key);
+    const std::string& location = info.GetDexLocation();
+    auto found = location_to_dex_file.find(location);
     if (found != location_to_dex_file.end()) {
       const DexFile* dex_file = found->second;
       VLOG(profiler) << "Found opened dex file for " << dex_file->GetLocation() << " with "
@@ -8981,7 +8934,7 @@
         ret.insert(descriptor);
       }
     } else {
-      VLOG(class_linker) << "Failed to find opened dex file for profile key " << profile_key;
+      VLOG(class_linker) << "Failed to find opened dex file for location " << location;
     }
   }
   return ret;
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 33eed3c..a26e63b 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -247,7 +247,7 @@
   // result in the DexCache if found. Return null if not found.
   mirror::String* LookupString(const DexFile& dex_file,
                                dex::StringIndex string_idx,
-                               Handle<mirror::DexCache> dex_cache)
+                               ObjPtr<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Resolve a Type with the given index from the DexFile, storing the
@@ -333,7 +333,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::dex_lock_, !Roles::uninterruptible_);
 
-  ArtField* GetResolvedField(uint32_t field_idx, ObjPtr<mirror::DexCache> dex_cache)
+  ArtField* LookupResolvedField(uint32_t field_idx, ArtMethod* referrer, bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_);
   ArtField* ResolveField(uint32_t field_idx, ArtMethod* referrer, bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -544,6 +544,10 @@
   void SetEntryPointsToInterpreter(ArtMethod* method) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Set the entrypoints up for an obsolete method.
+  void SetEntryPointsForObsoleteMethod(ArtMethod* method) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Attempts to insert a class into a class table.  Returns null if
   // the class was inserted, otherwise returns an existing class with
   // the same descriptor and ClassLoader.
@@ -617,7 +621,8 @@
   std::set<DexCacheResolvedClasses> GetResolvedClasses(bool ignore_boot_classes)
       REQUIRES(!Locks::dex_lock_);
 
-  std::unordered_set<std::string> GetClassDescriptorsForProfileKeys(
+  // Returns the class descriptors for loaded dex files.
+  std::unordered_set<std::string> GetClassDescriptorsForResolvedClasses(
       const std::set<DexCacheResolvedClasses>& classes)
       REQUIRES(!Locks::dex_lock_);
 
@@ -841,6 +846,13 @@
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Find a field by its field index.
+  ArtField* LookupResolvedField(uint32_t field_idx,
+                                ObjPtr<mirror::DexCache> dex_cache,
+                                ObjPtr<mirror::ClassLoader> class_loader,
+                                bool is_static)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   void RegisterDexFileLocked(const DexFile& dex_file,
                              ObjPtr<mirror::DexCache> dex_cache,
                              ObjPtr<mirror::ClassLoader> class_loader)
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 21cdede..b421810 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -139,7 +139,7 @@
     EXPECT_FALSE(JavaLangObject->IsFinal());
     EXPECT_FALSE(JavaLangObject->IsPrimitive());
     EXPECT_FALSE(JavaLangObject->IsSynthetic());
-    EXPECT_EQ(2U, JavaLangObject->NumDirectMethods());
+    EXPECT_EQ(4U, JavaLangObject->NumDirectMethods());
     EXPECT_EQ(11U, JavaLangObject->NumVirtualMethods());
     if (!kUseBrooksReadBarrier) {
       EXPECT_EQ(2U, JavaLangObject->NumInstanceFields());
@@ -618,7 +618,7 @@
   ClassExtOffsets() : CheckOffsets<mirror::ClassExt>(false, "Ldalvik/system/ClassExt;") {
     addOffset(OFFSETOF_MEMBER(mirror::ClassExt, obsolete_dex_caches_), "obsoleteDexCaches");
     addOffset(OFFSETOF_MEMBER(mirror::ClassExt, obsolete_methods_), "obsoleteMethods");
-    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, original_dex_file_bytes_), "originalDexFile");
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, original_dex_file_), "originalDexFile");
     addOffset(OFFSETOF_MEMBER(mirror::ClassExt, verify_error_), "verifyError");
   }
 };
@@ -668,7 +668,6 @@
 
 struct DexCacheOffsets : public CheckOffsets<mirror::DexCache> {
   DexCacheOffsets() : CheckOffsets<mirror::DexCache>(false, "Ljava/lang/DexCache;") {
-    addOffset(OFFSETOF_MEMBER(mirror::DexCache, dex_), "dex");
     addOffset(OFFSETOF_MEMBER(mirror::DexCache, dex_file_), "dexFile");
     addOffset(OFFSETOF_MEMBER(mirror::DexCache, location_), "location");
     addOffset(OFFSETOF_MEMBER(mirror::DexCache, num_resolved_call_sites_), "numResolvedCallSites");
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index af4f998..374b711 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -55,6 +55,12 @@
   return nullptr;
 }
 
+// To take into account http://b/35845221
+#pragma clang diagnostic push
+#if __clang_major__ < 4
+#pragma clang diagnostic ignored "-Wunreachable-code"
+#endif
+
 mirror::Class* ClassTable::UpdateClass(const char* descriptor, mirror::Class* klass, size_t hash) {
   WriterMutexLock mu(Thread::Current(), lock_);
   // Should only be updating latest table.
@@ -80,6 +86,8 @@
   return existing;
 }
 
+#pragma clang diagnostic pop
+
 size_t ClassTable::CountDefiningLoaderClasses(ObjPtr<mirror::ClassLoader> defining_loader,
                                               const ClassSet& set) const {
   size_t count = 0;
@@ -105,6 +113,20 @@
   return CountDefiningLoaderClasses(defining_loader, classes_.back());
 }
 
+size_t ClassTable::NumReferencedZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  size_t sum = 0;
+  for (size_t i = 0; i < classes_.size() - 1; ++i) {
+    sum += classes_[i].Size();
+  }
+  return sum;
+}
+
+size_t ClassTable::NumReferencedNonZygoteClasses() const {
+  ReaderMutexLock mu(Thread::Current(), lock_);
+  return classes_.back().Size();
+}
+
 mirror::Class* ClassTable::Lookup(const char* descriptor, size_t hash) {
   DescriptorHashPair pair(descriptor, hash);
   ReaderMutexLock mu(Thread::Current(), lock_);
diff --git a/runtime/class_table.h b/runtime/class_table.h
index 711eae4..430edbb 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -144,16 +144,26 @@
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Returns the number of classes in previous snapshots.
+  // Returns the number of classes in previous snapshots defined by `defining_loader`.
   size_t NumZygoteClasses(ObjPtr<mirror::ClassLoader> defining_loader) const
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Returns all off the classes in the lastest snapshot.
+  // Returns all off the classes in the lastest snapshot defined by `defining_loader`.
   size_t NumNonZygoteClasses(ObjPtr<mirror::ClassLoader> defining_loader) const
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Returns the number of classes in previous snapshots no matter the defining loader.
+  size_t NumReferencedZygoteClasses() const
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns all off the classes in the lastest snapshot no matter the defining loader.
+  size_t NumReferencedNonZygoteClasses() const
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Update a class in the table with the new class. Returns the existing class which was replaced.
   mirror::Class* UpdateClass(const char* descriptor, mirror::Class* new_klass, size_t hash)
       REQUIRES(!lock_)
@@ -246,6 +256,7 @@
   }
 
  private:
+  // Only copies classes.
   void CopyWithoutLocks(const ClassTable& source_table) NO_THREAD_SAFETY_ANALYSIS;
   void InsertWithoutLocks(ObjPtr<mirror::Class> klass) NO_THREAD_SAFETY_ANALYSIS;
 
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 78ba6e7..15724a1 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -68,6 +68,74 @@
 
 using android::base::StringPrintf;
 
+static const uint8_t kBase64Map[256] = {
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255,  62, 255, 255, 255,  63,
+  52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255,
+  255, 254, 255, 255, 255,   0,   1,   2,   3,   4,   5,   6,
+    7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  // NOLINT
+   19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255, 255,  // NOLINT
+  255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,
+   37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  // NOLINT
+   49,  50,  51, 255, 255, 255, 255, 255, 255, 255, 255, 255,  // NOLINT
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+  255, 255, 255, 255
+};
+
+uint8_t* DecodeBase64(const char* src, size_t* dst_size) {
+  CHECK(dst_size != nullptr);
+  std::vector<uint8_t> tmp;
+  uint32_t t = 0, y = 0;
+  int g = 3;
+  for (size_t i = 0; src[i] != '\0'; ++i) {
+    uint8_t c = kBase64Map[src[i] & 0xFF];
+    if (c == 255) continue;
+    // the final = symbols are read and used to trim the remaining bytes
+    if (c == 254) {
+      c = 0;
+      // prevent g < 0 which would potentially allow an overflow later
+      if (--g < 0) {
+        *dst_size = 0;
+        return nullptr;
+      }
+    } else if (g != 3) {
+      // we only allow = to be at the end
+      *dst_size = 0;
+      return nullptr;
+    }
+    t = (t << 6) | c;
+    if (++y == 4) {
+      tmp.push_back((t >> 16) & 255);
+      if (g > 1) {
+        tmp.push_back((t >> 8) & 255);
+      }
+      if (g > 2) {
+        tmp.push_back(t & 255);
+      }
+      y = t = 0;
+    }
+  }
+  if (y != 0) {
+    *dst_size = 0;
+    return nullptr;
+  }
+  std::unique_ptr<uint8_t[]> dst(new uint8_t[tmp.size()]);
+  *dst_size = tmp.size();
+  std::copy(tmp.begin(), tmp.end(), dst.get());
+  return dst.release();
+}
+
 ScratchFile::ScratchFile() {
   // ANDROID_DATA needs to be set
   CHECK_NE(static_cast<char*>(nullptr), getenv("ANDROID_DATA")) <<
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index d7abe2a..bfa273d 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -44,6 +44,8 @@
 class Runtime;
 typedef std::vector<std::pair<std::string, const void*>> RuntimeOptions;
 
+uint8_t* DecodeBase64(const char* src, size_t* dst_size);
+
 class ScratchFile {
  public:
   ScratchFile();
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 4f4bed0..6758d75 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -313,6 +313,14 @@
                               ArtMethod::PrettyMethod(method).c_str()).c_str());
 }
 
+// InternalError
+
+void ThrowInternalError(const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  ThrowException("Ljava/lang/InternalError;", nullptr, fmt, &args);
+  va_end(args);
+}
 
 // IOException
 
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 55a8938..4afef79 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -151,6 +151,12 @@
 void ThrowIncompatibleClassChangeErrorForMethodConflict(ArtMethod* method)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
+// InternalError
+
+void ThrowInternalError(const char* fmt, ...)
+    __attribute__((__format__(__printf__, 1, 2)))
+    REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
+
 // IOException
 
 void ThrowIOException(const char* fmt, ...) __attribute__((__format__(__printf__, 1, 2)))
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index b2fba67..868d8df 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -1396,7 +1396,8 @@
     mirror::Class* c = m->GetDeclaringClass();
     location->type_tag = GetTypeTag(c);
     location->class_id = gRegistry->AddRefType(c);
-    location->method_id = ToMethodId(m);
+    // The RI Seems to return 0 for all obsolete methods. For compatibility we shall do the same.
+    location->method_id = m->IsObsolete() ? 0 : ToMethodId(m);
     location->dex_pc = (m->IsNative() || m->IsProxyMethod()) ? static_cast<uint64_t>(-1) : dex_pc;
   }
 }
@@ -1409,6 +1410,15 @@
   return m->GetInterfaceMethodIfProxy(kRuntimePointerSize)->GetName();
 }
 
+bool Dbg::IsMethodObsolete(JDWP::MethodId method_id) {
+  ArtMethod* m = FromMethodId(method_id);
+  if (m == nullptr) {
+    // NB Since we return 0 as MID for obsolete methods we want to default to true here.
+    return true;
+  }
+  return m->IsObsolete();
+}
+
 std::string Dbg::GetFieldName(JDWP::FieldId field_id) {
   ArtField* f = FromFieldId(field_id);
   if (f == nullptr) {
@@ -3717,10 +3727,9 @@
       if (!m->IsRuntimeMethod()) {
         ++stack_depth;
         if (method == nullptr) {
-          mirror::DexCache* dex_cache = m->GetDeclaringClass()->GetDexCache();
+          const DexFile* dex_file = m->GetDexFile();
           method = m;
-          if (dex_cache != nullptr) {
-            const DexFile* dex_file = dex_cache->GetDexFile();
+          if (dex_file != nullptr) {
             line_number = annotations::GetLineNumFromPC(dex_file, m, GetDexPc());
           }
         }
diff --git a/runtime/debugger.h b/runtime/debugger.h
index a7fd160..27124e1 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -370,6 +370,8 @@
   //
   static std::string GetMethodName(JDWP::MethodId method_id)
       REQUIRES_SHARED(Locks::mutator_lock_);
+  static bool IsMethodObsolete(JDWP::MethodId method_id)
+      REQUIRES_SHARED(Locks::mutator_lock_);
   static JDWP::JdwpError OutputDeclaredFields(JDWP::RefTypeId ref_type_id, bool with_generic,
                                               JDWP::ExpandBuf* pReply)
       REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index b6a2e09..85100ae 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -23,6 +23,7 @@
 #include <string.h>
 #include <sys/file.h>
 #include <sys/stat.h>
+#include <zlib.h>
 
 #include <memory>
 #include <sstream>
@@ -67,6 +68,12 @@
   {'0', '3', '8', '\0'}
 };
 
+uint32_t DexFile::CalculateChecksum() const {
+  const uint32_t non_sum = OFFSETOF_MEMBER(DexFile::Header, signature_);
+  const uint8_t* non_sum_ptr = Begin() + non_sum;
+  return adler32(adler32(0L, Z_NULL, 0), non_sum_ptr, Size() - non_sum);
+}
+
 struct DexFile::AnnotationValue {
   JValue value_;
   uint8_t type_;
@@ -179,6 +186,14 @@
                                              std::string* error_msg) {
   ScopedTrace trace(std::string("Open dex file from mapped-memory ") + location);
   CHECK(map.get() != nullptr);
+
+  if (map->Size() < sizeof(DexFile::Header)) {
+    *error_msg = StringPrintf(
+        "DexFile: failed to open dex file '%s' that is too short to have a header",
+        location.c_str());
+    return nullptr;
+  }
+
   std::unique_ptr<DexFile> dex_file = OpenCommon(map->Begin(),
                                                  map->Size(),
                                                  location,
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 58b8e79..1b18d21 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -1088,6 +1088,9 @@
   static int64_t ReadSignedLong(const uint8_t* ptr, int zwidth);
   static uint64_t ReadUnsignedLong(const uint8_t* ptr, int zwidth, bool fill_on_right);
 
+  // Recalculates the checksum of the dex file. Does not use the current value in the header.
+  uint32_t CalculateChecksum() const;
+
   // Returns a human-readable form of the method at an index.
   std::string PrettyMethod(uint32_t method_idx, bool with_signature = true) const;
   // Returns a human-readable form of the field at an index.
@@ -1320,6 +1323,9 @@
   uint32_t NumVirtualMethods() const {
     return header_.virtual_methods_size_;
   }
+  bool IsAtMethod() const {
+    return pos_ >= EndOfInstanceFieldsPos();
+  }
   bool HasNextStaticField() const {
     return pos_ < EndOfStaticFieldsPos();
   }
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index d39ea35..6b9654d 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -41,7 +41,80 @@
 };
 
 namespace {
-mirror::Object* CreateAnnotationMember(Handle<mirror::Class> klass,
+
+// A helper class that contains all the data needed to do annotation lookup.
+class ClassData {
+ public:
+  explicit ClassData(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_)
+    : ClassData(ScopedNullHandle<mirror::Class>(),  // klass
+                method,
+                *method->GetDexFile(),
+                &method->GetClassDef()) {}
+
+  // Requires Scope to be able to create at least 1 handles.
+  template <typename Scope>
+  ClassData(Scope& hs, ArtField* field) REQUIRES_SHARED(Locks::mutator_lock_)
+    : ClassData(hs.NewHandle(field->GetDeclaringClass())) { }
+
+  explicit ClassData(Handle<mirror::Class> klass) REQUIRES_SHARED(art::Locks::mutator_lock_)
+    : ClassData(klass,  // klass
+                nullptr,  // method
+                klass->GetDexFile(),
+                klass->GetClassDef()) {}
+
+  const DexFile& GetDexFile() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return dex_file_;
+  }
+
+  const DexFile::ClassDef* GetClassDef() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return class_def_;
+  }
+
+  ObjPtr<mirror::DexCache> GetDexCache() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (method_ != nullptr) {
+      return method_->GetDexCache();
+    } else {
+      return real_klass_->GetDexCache();
+    }
+  }
+
+  ObjPtr<mirror::ClassLoader> GetClassLoader() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (method_ != nullptr) {
+      return method_->GetDeclaringClass()->GetClassLoader();
+    } else {
+      return real_klass_->GetClassLoader();
+    }
+  }
+
+  ObjPtr<mirror::Class> GetRealClass() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (method_ != nullptr) {
+      return method_->GetDeclaringClass();
+    } else {
+      return real_klass_.Get();
+    }
+  }
+
+ private:
+  ClassData(Handle<mirror::Class> klass,
+            ArtMethod* method,
+            const DexFile& dex_file,
+            const DexFile::ClassDef* class_def) REQUIRES_SHARED(Locks::mutator_lock_)
+      : real_klass_(klass),
+        method_(method),
+        dex_file_(dex_file),
+        class_def_(class_def) {
+    DCHECK((method_ == nullptr) || real_klass_.IsNull());
+  }
+
+  Handle<mirror::Class> real_klass_;
+  ArtMethod* method_;
+  const DexFile& dex_file_;
+  const DexFile::ClassDef* class_def_;
+
+  DISALLOW_COPY_AND_ASSIGN(ClassData);
+};
+
+mirror::Object* CreateAnnotationMember(const ClassData& klass,
                                        Handle<mirror::Class> annotation_class,
                                        const uint8_t** annotation)
     REQUIRES_SHARED(Locks::mutator_lock_);
@@ -185,9 +258,8 @@
 const DexFile::AnnotationSetItem* FindAnnotationSetForMethod(ArtMethod* method)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const DexFile* dex_file = method->GetDexFile();
-  mirror::Class* klass = method->GetDeclaringClass();
   const DexFile::AnnotationsDirectoryItem* annotations_dir =
-      dex_file->GetAnnotationsDirectory(*klass->GetClassDef());
+      dex_file->GetAnnotationsDirectory(method->GetClassDef());
   if (annotations_dir == nullptr) {
     return nullptr;
   }
@@ -209,9 +281,8 @@
 const DexFile::ParameterAnnotationsItem* FindAnnotationsItemForMethod(ArtMethod* method)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const DexFile* dex_file = method->GetDexFile();
-  mirror::Class* klass = method->GetDeclaringClass();
   const DexFile::AnnotationsDirectoryItem* annotations_dir =
-      dex_file->GetAnnotationsDirectory(*klass->GetClassDef());
+      dex_file->GetAnnotationsDirectory(method->GetClassDef());
   if (annotations_dir == nullptr) {
     return nullptr;
   }
@@ -230,30 +301,34 @@
   return nullptr;
 }
 
-const DexFile::AnnotationSetItem* FindAnnotationSetForClass(Handle<mirror::Class> klass)
+const DexFile::AnnotationSetItem* FindAnnotationSetForClass(const ClassData& klass)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   const DexFile::AnnotationsDirectoryItem* annotations_dir =
-      dex_file.GetAnnotationsDirectory(*klass->GetClassDef());
+      dex_file.GetAnnotationsDirectory(*klass.GetClassDef());
   if (annotations_dir == nullptr) {
     return nullptr;
   }
   return dex_file.GetClassAnnotationSet(annotations_dir);
 }
 
-mirror::Object* ProcessEncodedAnnotation(Handle<mirror::Class> klass, const uint8_t** annotation)
+mirror::Object* ProcessEncodedAnnotation(const ClassData& klass, const uint8_t** annotation)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   uint32_t type_index = DecodeUnsignedLeb128(annotation);
   uint32_t size = DecodeUnsignedLeb128(annotation);
 
   Thread* self = Thread::Current();
   ScopedObjectAccessUnchecked soa(self);
-  StackHandleScope<2> hs(self);
+  StackHandleScope<4> hs(self);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   Handle<mirror::Class> annotation_class(hs.NewHandle(
-      class_linker->ResolveType(klass->GetDexFile(), dex::TypeIndex(type_index), klass.Get())));
+      class_linker->ResolveType(klass.GetDexFile(),
+                                dex::TypeIndex(type_index),
+                                hs.NewHandle(klass.GetDexCache()),
+                                hs.NewHandle(klass.GetClassLoader()))));
   if (annotation_class == nullptr) {
-    LOG(INFO) << "Unable to resolve " << klass->PrettyClass() << " annotation class " << type_index;
+    LOG(INFO) << "Unable to resolve " << klass.GetRealClass()->PrettyClass()
+              << " annotation class " << type_index;
     DCHECK(Thread::Current()->IsExceptionPending());
     Thread::Current()->ClearException();
     return nullptr;
@@ -300,13 +375,13 @@
 }
 
 template <bool kTransactionActive>
-bool ProcessAnnotationValue(Handle<mirror::Class> klass,
+bool ProcessAnnotationValue(const ClassData& klass,
                             const uint8_t** annotation_ptr,
                             DexFile::AnnotationValue* annotation_value,
                             Handle<mirror::Class> array_class,
                             DexFile::AnnotationResultStyle result_style)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   Thread* self = Thread::Current();
   ObjPtr<mirror::Object> element_object = nullptr;
   bool set_object = false;
@@ -361,9 +436,8 @@
         annotation_value->value_.SetI(index);
       } else {
         StackHandleScope<1> hs(self);
-        Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
         element_object = Runtime::Current()->GetClassLinker()->ResolveString(
-            klass->GetDexFile(), dex::StringIndex(index), dex_cache);
+            klass.GetDexFile(), dex::StringIndex(index), hs.NewHandle(klass.GetDexCache()));
         set_object = true;
         if (element_object == nullptr) {
           return false;
@@ -377,8 +451,12 @@
         annotation_value->value_.SetI(index);
       } else {
         dex::TypeIndex type_index(index);
+        StackHandleScope<2> hs(self);
         element_object = Runtime::Current()->GetClassLinker()->ResolveType(
-            klass->GetDexFile(), type_index, klass.Get());
+            klass.GetDexFile(),
+            type_index,
+            hs.NewHandle(klass.GetDexCache()),
+            hs.NewHandle(klass.GetClassLoader()));
         set_object = true;
         if (element_object == nullptr) {
           CHECK(self->IsExceptionPending());
@@ -399,12 +477,13 @@
       if (result_style == DexFile::kAllRaw) {
         annotation_value->value_.SetI(index);
       } else {
-        StackHandleScope<2> hs(self);
-        Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
-        Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+        StackHandleScope<2> hs(self);
         ArtMethod* method = class_linker->ResolveMethodWithoutInvokeType(
-            klass->GetDexFile(), index, dex_cache, class_loader);
+            klass.GetDexFile(),
+            index,
+            hs.NewHandle(klass.GetDexCache()),
+            hs.NewHandle(klass.GetClassLoader()));
         if (method == nullptr) {
           return false;
         }
@@ -439,10 +518,11 @@
         annotation_value->value_.SetI(index);
       } else {
         StackHandleScope<2> hs(self);
-        Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
-        Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ArtField* field = Runtime::Current()->GetClassLinker()->ResolveFieldJLS(
-            klass->GetDexFile(), index, dex_cache, class_loader);
+            klass.GetDexFile(),
+            index,
+            hs.NewHandle(klass.GetDexCache()),
+            hs.NewHandle(klass.GetClassLoader()));
         if (field == nullptr) {
           return false;
         }
@@ -467,10 +547,12 @@
         annotation_value->value_.SetI(index);
       } else {
         StackHandleScope<3> hs(self);
-        Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
-        Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
         ArtField* enum_field = Runtime::Current()->GetClassLinker()->ResolveField(
-            klass->GetDexFile(), index, dex_cache, class_loader, true);
+            klass.GetDexFile(),
+            index,
+            hs.NewHandle(klass.GetDexCache()),
+            hs.NewHandle(klass.GetClassLoader()),
+            true);
         if (enum_field == nullptr) {
           return false;
         } else {
@@ -595,10 +677,10 @@
   return true;
 }
 
-mirror::Object* CreateAnnotationMember(Handle<mirror::Class> klass,
+mirror::Object* CreateAnnotationMember(const ClassData& klass,
                                        Handle<mirror::Class> annotation_class,
                                        const uint8_t** annotation) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   Thread* self = Thread::Current();
   ScopedObjectAccessUnchecked soa(self);
   StackHandleScope<5> hs(self);
@@ -666,12 +748,12 @@
 }
 
 const DexFile::AnnotationItem* GetAnnotationItemFromAnnotationSet(
-    Handle<mirror::Class> klass,
+    const ClassData& klass,
     const DexFile::AnnotationSetItem* annotation_set,
     uint32_t visibility,
     Handle<mirror::Class> annotation_class)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   for (uint32_t i = 0; i < annotation_set->size_; ++i) {
     const DexFile::AnnotationItem* annotation_item = dex_file.GetAnnotationItem(annotation_set, i);
     if (!IsVisibilityCompatible(annotation_item->visibility_, visibility)) {
@@ -679,12 +761,16 @@
     }
     const uint8_t* annotation = annotation_item->annotation_;
     uint32_t type_index = DecodeUnsignedLeb128(&annotation);
+    StackHandleScope<2> hs(Thread::Current());
     mirror::Class* resolved_class = Runtime::Current()->GetClassLinker()->ResolveType(
-        klass->GetDexFile(), dex::TypeIndex(type_index), klass.Get());
+        klass.GetDexFile(),
+        dex::TypeIndex(type_index),
+        hs.NewHandle(klass.GetDexCache()),
+        hs.NewHandle(klass.GetClassLoader()));
     if (resolved_class == nullptr) {
       std::string temp;
       LOG(WARNING) << StringPrintf("Unable to resolve %s annotation class %d",
-                                   klass->GetDescriptor(&temp), type_index);
+                                   klass.GetRealClass()->GetDescriptor(&temp), type_index);
       CHECK(Thread::Current()->IsExceptionPending());
       Thread::Current()->ClearException();
       continue;
@@ -698,7 +784,7 @@
 }
 
 mirror::Object* GetAnnotationObjectFromAnnotationSet(
-    Handle<mirror::Class> klass,
+    const ClassData& klass,
     const DexFile::AnnotationSetItem* annotation_set,
     uint32_t visibility,
     Handle<mirror::Class> annotation_class)
@@ -712,13 +798,13 @@
   return ProcessEncodedAnnotation(klass, &annotation);
 }
 
-mirror::Object* GetAnnotationValue(Handle<mirror::Class> klass,
+mirror::Object* GetAnnotationValue(const ClassData& klass,
                                    const DexFile::AnnotationItem* annotation_item,
                                    const char* annotation_name,
                                    Handle<mirror::Class> array_class,
                                    uint32_t expected_type)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   const uint8_t* annotation =
       SearchEncodedAnnotation(dex_file, annotation_item->annotation_, annotation_name);
   if (annotation == nullptr) {
@@ -745,10 +831,10 @@
   return annotation_value.value_.GetL();
 }
 
-mirror::ObjectArray<mirror::String>* GetSignatureValue(Handle<mirror::Class> klass,
+mirror::ObjectArray<mirror::String>* GetSignatureValue(const ClassData& klass,
     const DexFile::AnnotationSetItem* annotation_set)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   StackHandleScope<1> hs(Thread::Current());
   const DexFile::AnnotationItem* annotation_item =
       SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/Signature;",
@@ -771,10 +857,10 @@
   return obj->AsObjectArray<mirror::String>();
 }
 
-mirror::ObjectArray<mirror::Class>* GetThrowsValue(Handle<mirror::Class> klass,
+mirror::ObjectArray<mirror::Class>* GetThrowsValue(const ClassData& klass,
                                                    const DexFile::AnnotationSetItem* annotation_set)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   StackHandleScope<1> hs(Thread::Current());
   const DexFile::AnnotationItem* annotation_item =
       SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/Throws;",
@@ -798,11 +884,11 @@
 }
 
 mirror::ObjectArray<mirror::Object>* ProcessAnnotationSet(
-    Handle<mirror::Class> klass,
+    const ClassData& klass,
     const DexFile::AnnotationSetItem* annotation_set,
     uint32_t visibility)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   Thread* self = Thread::Current();
   ScopedObjectAccessUnchecked soa(self);
   StackHandleScope<2> hs(self);
@@ -856,11 +942,11 @@
 }
 
 mirror::ObjectArray<mirror::Object>* ProcessAnnotationSetRefList(
-    Handle<mirror::Class> klass,
+    const ClassData& klass,
     const DexFile::AnnotationSetRefList* set_ref_list,
     uint32_t size)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile& dex_file = klass->GetDexFile();
+  const DexFile& dex_file = klass.GetDexFile();
   Thread* self = Thread::Current();
   ScopedObjectAccessUnchecked soa(self);
   StackHandleScope<1> hs(self);
@@ -899,15 +985,17 @@
     return nullptr;
   }
   StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> field_class(hs.NewHandle(field->GetDeclaringClass()));
-  return GetAnnotationObjectFromAnnotationSet(field_class, annotation_set,
-                                              DexFile::kDexVisibilityRuntime, annotation_class);
+  const ClassData field_class(hs, field);
+  return GetAnnotationObjectFromAnnotationSet(field_class,
+                                              annotation_set,
+                                              DexFile::kDexVisibilityRuntime,
+                                              annotation_class);
 }
 
 mirror::ObjectArray<mirror::Object>* GetAnnotationsForField(ArtField* field) {
   const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForField(field);
   StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> field_class(hs.NewHandle(field->GetDeclaringClass()));
+  const ClassData field_class(hs, field);
   return ProcessAnnotationSet(field_class, annotation_set, DexFile::kDexVisibilityRuntime);
 }
 
@@ -917,7 +1005,7 @@
     return nullptr;
   }
   StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> field_class(hs.NewHandle(field->GetDeclaringClass()));
+  const ClassData field_class(hs, field);
   return GetSignatureValue(field_class, annotation_set);
 }
 
@@ -927,17 +1015,17 @@
     return false;
   }
   StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> field_class(hs.NewHandle(field->GetDeclaringClass()));
+  const ClassData field_class(hs, field);
   const DexFile::AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(
       field_class, annotation_set, DexFile::kDexVisibilityRuntime, annotation_class);
   return annotation_item != nullptr;
 }
 
 mirror::Object* GetAnnotationDefaultValue(ArtMethod* method) {
-  const DexFile* dex_file = method->GetDexFile();
-  mirror::Class* klass = method->GetDeclaringClass();
+  const ClassData klass(method);
+  const DexFile* dex_file = &klass.GetDexFile();
   const DexFile::AnnotationsDirectoryItem* annotations_dir =
-      dex_file->GetAnnotationsDirectory(*klass->GetClassDef());
+      dex_file->GetAnnotationsDirectory(*klass.GetClassDef());
   if (annotations_dir == nullptr) {
     return nullptr;
   }
@@ -965,10 +1053,9 @@
     return nullptr;
   }
   DexFile::AnnotationValue annotation_value;
-  StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::Class> h_klass(hs.NewHandle(klass));
+  StackHandleScope<1> hs(Thread::Current());
   Handle<mirror::Class> return_type(hs.NewHandle(method->GetReturnType(true /* resolve */)));
-  if (!ProcessAnnotationValue<false>(h_klass,
+  if (!ProcessAnnotationValue<false>(klass,
                                      &annotation,
                                      &annotation_value,
                                      return_type,
@@ -983,17 +1070,15 @@
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return GetAnnotationObjectFromAnnotationSet(method_class, annotation_set,
+  return GetAnnotationObjectFromAnnotationSet(ClassData(method), annotation_set,
                                               DexFile::kDexVisibilityRuntime, annotation_class);
 }
 
 mirror::ObjectArray<mirror::Object>* GetAnnotationsForMethod(ArtMethod* method) {
   const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForMethod(method);
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return ProcessAnnotationSet(method_class, annotation_set, DexFile::kDexVisibilityRuntime);
+  return ProcessAnnotationSet(ClassData(method),
+                              annotation_set,
+                              DexFile::kDexVisibilityRuntime);
 }
 
 mirror::ObjectArray<mirror::Class>* GetExceptionTypesForMethod(ArtMethod* method) {
@@ -1001,9 +1086,7 @@
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return GetThrowsValue(method_class, annotation_set);
+  return GetThrowsValue(ClassData(method), annotation_set);
 }
 
 mirror::ObjectArray<mirror::Object>* GetParameterAnnotations(ArtMethod* method) {
@@ -1019,9 +1102,7 @@
     return nullptr;
   }
   uint32_t size = set_ref_list->size_;
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return ProcessAnnotationSetRefList(method_class, set_ref_list, size);
+  return ProcessAnnotationSetRefList(ClassData(method), set_ref_list, size);
 }
 
 mirror::Object* GetAnnotationForMethodParameter(ArtMethod* method,
@@ -1045,9 +1126,7 @@
   const DexFile::AnnotationSetItem* annotation_set =
      dex_file->GetSetRefItemItem(annotation_set_ref);
 
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return GetAnnotationObjectFromAnnotationSet(method_class,
+  return GetAnnotationObjectFromAnnotationSet(ClassData(method),
                                               annotation_set,
                                               DexFile::kDexVisibilityRuntime,
                                               annotation_class);
@@ -1072,7 +1151,7 @@
     return false;
   }
 
-  StackHandleScope<5> hs(Thread::Current());
+  StackHandleScope<4> hs(Thread::Current());
 
   // Extract the parameters' names String[].
   ObjPtr<mirror::Class> string_class = mirror::String::GetJavaLangString();
@@ -1082,9 +1161,9 @@
     return false;
   }
 
-  Handle<mirror::Class> klass = hs.NewHandle(method->GetDeclaringClass());
+  ClassData data(method);
   Handle<mirror::Object> names_obj =
-      hs.NewHandle(GetAnnotationValue(klass,
+      hs.NewHandle(GetAnnotationValue(data,
                                       annotation_item,
                                       "names",
                                       string_array_class,
@@ -1099,7 +1178,7 @@
     return false;
   }
   Handle<mirror::Object> access_flags_obj =
-      hs.NewHandle(GetAnnotationValue(klass,
+      hs.NewHandle(GetAnnotationValue(data,
                                       annotation_item,
                                       "accessFlags",
                                       int_array_class,
@@ -1118,9 +1197,7 @@
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
-  return GetSignatureValue(method_class, annotation_set);
+  return GetSignatureValue(ClassData(method), annotation_set);
 }
 
 bool IsMethodAnnotationPresent(ArtMethod* method, Handle<mirror::Class> annotation_class,
@@ -1129,37 +1206,39 @@
   if (annotation_set == nullptr) {
     return false;
   }
-  StackHandleScope<1> hs(Thread::Current());
-  Handle<mirror::Class> method_class(hs.NewHandle(method->GetDeclaringClass()));
   const DexFile::AnnotationItem* annotation_item =
-      GetAnnotationItemFromAnnotationSet(method_class, annotation_set, visibility,
-                                         annotation_class);
+      GetAnnotationItemFromAnnotationSet(ClassData(method),
+                                         annotation_set, visibility, annotation_class);
   return annotation_item != nullptr;
 }
 
 mirror::Object* GetAnnotationForClass(Handle<mirror::Class> klass,
                                       Handle<mirror::Class> annotation_class) {
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  return GetAnnotationObjectFromAnnotationSet(klass, annotation_set, DexFile::kDexVisibilityRuntime,
+  return GetAnnotationObjectFromAnnotationSet(data,
+                                              annotation_set,
+                                              DexFile::kDexVisibilityRuntime,
                                               annotation_class);
 }
 
 mirror::ObjectArray<mirror::Object>* GetAnnotationsForClass(Handle<mirror::Class> klass) {
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
-  return ProcessAnnotationSet(klass, annotation_set, DexFile::kDexVisibilityRuntime);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
+  return ProcessAnnotationSet(data, annotation_set, DexFile::kDexVisibilityRuntime);
 }
 
 mirror::ObjectArray<mirror::Class>* GetDeclaredClasses(Handle<mirror::Class> klass) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/MemberClasses;",
+      SearchAnnotationSet(data.GetDexFile(), annotation_set, "Ldalvik/annotation/MemberClasses;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return nullptr;
@@ -1172,7 +1251,7 @@
     return nullptr;
   }
   mirror::Object* obj =
-      GetAnnotationValue(klass, annotation_item, "value", class_array_class,
+      GetAnnotationValue(data, annotation_item, "value", class_array_class,
                          DexFile::kDexAnnotationArray);
   if (obj == nullptr) {
     return nullptr;
@@ -1181,18 +1260,18 @@
 }
 
 mirror::Class* GetDeclaringClass(Handle<mirror::Class> klass) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/EnclosingClass;",
+      SearchAnnotationSet(data.GetDexFile(), annotation_set, "Ldalvik/annotation/EnclosingClass;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return nullptr;
   }
-  mirror::Object* obj = GetAnnotationValue(klass, annotation_item, "value",
+  mirror::Object* obj = GetAnnotationValue(data, annotation_item, "value",
                                            ScopedNullHandle<mirror::Class>(),
                                            DexFile::kDexAnnotationType);
   if (obj == nullptr) {
@@ -1202,28 +1281,30 @@
 }
 
 mirror::Class* GetEnclosingClass(Handle<mirror::Class> klass) {
-  const DexFile& dex_file = klass->GetDexFile();
   mirror::Class* declaring_class = GetDeclaringClass(klass);
   if (declaring_class != nullptr) {
     return declaring_class;
   }
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/EnclosingMethod;",
+      SearchAnnotationSet(data.GetDexFile(),
+                          annotation_set,
+                          "Ldalvik/annotation/EnclosingMethod;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return nullptr;
   }
   const uint8_t* annotation =
-      SearchEncodedAnnotation(dex_file, annotation_item->annotation_, "value");
+      SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "value");
   if (annotation == nullptr) {
     return nullptr;
   }
   DexFile::AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue<false>(klass,
+  if (!ProcessAnnotationValue<false>(data,
                                      &annotation,
                                      &annotation_value,
                                      ScopedNullHandle<mirror::Class>(),
@@ -1234,10 +1315,11 @@
     return nullptr;
   }
   StackHandleScope<2> hs(Thread::Current());
-  Handle<mirror::DexCache> dex_cache(hs.NewHandle(klass->GetDexCache()));
-  Handle<mirror::ClassLoader> class_loader(hs.NewHandle(klass->GetClassLoader()));
   ArtMethod* method = Runtime::Current()->GetClassLinker()->ResolveMethodWithoutInvokeType(
-      klass->GetDexFile(), annotation_value.value_.GetI(), dex_cache, class_loader);
+      data.GetDexFile(),
+      annotation_value.value_.GetI(),
+      hs.NewHandle(data.GetDexCache()),
+      hs.NewHandle(data.GetClassLoader()));
   if (method == nullptr) {
     return nullptr;
   }
@@ -1245,39 +1327,44 @@
 }
 
 mirror::Object* GetEnclosingMethod(Handle<mirror::Class> klass) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/EnclosingMethod;",
+      SearchAnnotationSet(data.GetDexFile(),
+                          annotation_set,
+                          "Ldalvik/annotation/EnclosingMethod;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return nullptr;
   }
-  return GetAnnotationValue(klass, annotation_item, "value", ScopedNullHandle<mirror::Class>(),
+  return GetAnnotationValue(data, annotation_item, "value", ScopedNullHandle<mirror::Class>(),
       DexFile::kDexAnnotationMethod);
 }
 
 bool GetInnerClass(Handle<mirror::Class> klass, mirror::String** name) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return false;
   }
   const DexFile::AnnotationItem* annotation_item = SearchAnnotationSet(
-      dex_file, annotation_set, "Ldalvik/annotation/InnerClass;", DexFile::kDexVisibilitySystem);
+      data.GetDexFile(),
+      annotation_set,
+      "Ldalvik/annotation/InnerClass;",
+      DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return false;
   }
   const uint8_t* annotation =
-      SearchEncodedAnnotation(dex_file, annotation_item->annotation_, "name");
+      SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "name");
   if (annotation == nullptr) {
     return false;
   }
   DexFile::AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue<false>(klass,
+  if (!ProcessAnnotationValue<false>(data,
                                      &annotation,
                                      &annotation_value,
                                      ScopedNullHandle<mirror::Class>(),
@@ -1293,24 +1380,24 @@
 }
 
 bool GetInnerClassFlags(Handle<mirror::Class> klass, uint32_t* flags) {
-  const DexFile& dex_file = klass->GetDexFile();
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return false;
   }
   const DexFile::AnnotationItem* annotation_item =
-      SearchAnnotationSet(dex_file, annotation_set, "Ldalvik/annotation/InnerClass;",
+      SearchAnnotationSet(data.GetDexFile(), annotation_set, "Ldalvik/annotation/InnerClass;",
                           DexFile::kDexVisibilitySystem);
   if (annotation_item == nullptr) {
     return false;
   }
   const uint8_t* annotation =
-      SearchEncodedAnnotation(dex_file, annotation_item->annotation_, "accessFlags");
+      SearchEncodedAnnotation(data.GetDexFile(), annotation_item->annotation_, "accessFlags");
   if (annotation == nullptr) {
     return false;
   }
   DexFile::AnnotationValue annotation_value;
-  if (!ProcessAnnotationValue<false>(klass,
+  if (!ProcessAnnotationValue<false>(data,
                                      &annotation,
                                      &annotation_value,
                                      ScopedNullHandle<mirror::Class>(),
@@ -1325,20 +1412,22 @@
 }
 
 mirror::ObjectArray<mirror::String>* GetSignatureAnnotationForClass(Handle<mirror::Class> klass) {
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return nullptr;
   }
-  return GetSignatureValue(klass, annotation_set);
+  return GetSignatureValue(data, annotation_set);
 }
 
 bool IsClassAnnotationPresent(Handle<mirror::Class> klass, Handle<mirror::Class> annotation_class) {
-  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(klass);
+  ClassData data(klass);
+  const DexFile::AnnotationSetItem* annotation_set = FindAnnotationSetForClass(data);
   if (annotation_set == nullptr) {
     return false;
   }
   const DexFile::AnnotationItem* annotation_item = GetAnnotationItemFromAnnotationSet(
-      klass, annotation_set, DexFile::kDexVisibilityRuntime, annotation_class);
+      data, annotation_set, DexFile::kDexVisibilityRuntime, annotation_class);
   return annotation_item != nullptr;
 }
 
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 0b3f16a..11b3cd0 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -17,7 +17,6 @@
 #include "dex_file_verifier.h"
 
 #include <inttypes.h>
-#include <zlib.h>
 
 #include <limits>
 #include <memory>
@@ -368,11 +367,8 @@
     return false;
   }
 
+  uint32_t adler_checksum = dex_file_->CalculateChecksum();
   // Compute and verify the checksum in the header.
-  uint32_t adler_checksum = adler32(0L, Z_NULL, 0);
-  const uint32_t non_sum = sizeof(header_->magic_) + sizeof(header_->checksum_);
-  const uint8_t* non_sum_ptr = reinterpret_cast<const uint8_t*>(header_) + non_sum;
-  adler_checksum = adler32(adler_checksum, non_sum_ptr, expected_size - non_sum);
   if (adler_checksum != header_->checksum_) {
     if (verify_checksum_) {
       ErrorStringPrintf("Bad checksum (%08x, expected %08x)", adler_checksum, header_->checksum_);
diff --git a/runtime/dexopt_test.cc b/runtime/dexopt_test.cc
index 5167869..db65e40 100644
--- a/runtime/dexopt_test.cc
+++ b/runtime/dexopt_test.cc
@@ -111,7 +111,7 @@
                                                  &error_msg));
   ASSERT_TRUE(image_header != nullptr) << error_msg;
   const OatHeader& oat_header = odex_file->GetOatHeader();
-  uint32_t combined_checksum = OatFileAssistant::CalculateCombinedImageChecksum();
+  uint32_t combined_checksum = image_header->GetOatChecksum();
 
   if (CompilerFilter::DependsOnImageChecksum(filter)) {
     if (with_alternate_image) {
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index 3bc49b8..ba8cec3 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -43,6 +43,7 @@
 namespace art {
 
 inline ArtMethod* GetResolvedMethod(ArtMethod* outer_method,
+                                    const MethodInfo& method_info,
                                     const InlineInfo& inline_info,
                                     const InlineInfoEncoding& encoding,
                                     uint8_t inlining_depth)
@@ -56,7 +57,7 @@
     return inline_info.GetArtMethodAtDepth(encoding, inlining_depth);
   }
 
-  uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, inlining_depth);
+  uint32_t method_index = inline_info.GetMethodIndexAtDepth(encoding, method_info, inlining_depth);
   if (inline_info.GetDexPcAtDepth(encoding, inlining_depth) == static_cast<uint32_t>(-1)) {
     // "charAt" special case. It is the only non-leaf method we inline across dex files.
     ArtMethod* inlined_method = jni::DecodeArtMethod(WellKnownClasses::java_lang_String_charAt);
@@ -68,6 +69,7 @@
   ArtMethod* caller = outer_method;
   if (inlining_depth != 0) {
     caller = GetResolvedMethod(outer_method,
+                               method_info,
                                inline_info,
                                encoding,
                                inlining_depth - 1);
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index 6301362..b5130d7 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -138,7 +138,7 @@
             reinterpret_cast<uintptr_t>(&virtual_methods.At(0))) / method_size;
         CHECK_LT(throws_index, static_cast<int>(num_virtuals));
         mirror::ObjectArray<mirror::Class>* declared_exceptions =
-            proxy_class->GetThrows()->Get(throws_index);
+            proxy_class->GetProxyThrows()->Get(throws_index);
         mirror::Class* exception_class = exception->GetClass();
         for (int32_t i = 0; i < declared_exceptions->GetLength() && !declares_exception; i++) {
           mirror::Class* declared_exception = declared_exceptions->Get(i);
@@ -201,12 +201,14 @@
       DCHECK(current_code->IsOptimized());
       uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
       CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+      MethodInfo method_info = current_code->GetOptimizedMethodInfo();
       CodeInfoEncoding encoding = code_info.ExtractEncoding();
       StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
       DCHECK(stack_map.IsValid());
       if (stack_map.HasInlineInfo(encoding.stack_map.encoding)) {
         InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
         caller = GetResolvedMethod(outer_method,
+                                   method_info,
                                    inline_info,
                                    encoding.inline_info.encoding,
                                    inline_info.GetDepth(encoding.inline_info.encoding) - 1);
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index c8ee99a..565b4ed 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -29,6 +29,15 @@
   self->QuickDeliverException();
 }
 
+extern "C" NO_RETURN uint64_t artInvokeObsoleteMethod(ArtMethod* method, Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(method->IsObsolete());
+  ScopedQuickEntrypointChecks sqec(self);
+  ThrowInternalError("Attempting to invoke obsolete version of '%s'.",
+                     method->PrettyMethod().c_str());
+  self->QuickDeliverException();
+}
+
 // Called by generated code to throw an exception.
 extern "C" NO_RETURN void artDeliverExceptionFromCode(mirror::Throwable* exception, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -62,9 +71,7 @@
 extern "C" NO_RETURN void artThrowNullPointerExceptionFromSignal(uintptr_t addr, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  self->NoteSignalBeingHandled();
   ThrowNullPointerExceptionFromDexPC(/* check_address */ true, addr);
-  self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
@@ -95,9 +102,7 @@
 extern "C" NO_RETURN void artThrowStackOverflowFromCode(Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  self->NoteSignalBeingHandled();
   ThrowStackOverflowError(self);
-  self->NoteSignalHandlerDone();
   self->QuickDeliverException();
 }
 
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 4c3990a..25073a8 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -372,10 +372,11 @@
     uintptr_t outer_pc_offset = current_code->NativeQuickPcOffset(outer_pc);
     CodeInfo code_info = current_code->GetOptimizedCodeInfo();
     CodeInfoEncoding encoding = code_info.ExtractEncoding();
+    MethodInfo method_info = current_code->GetOptimizedMethodInfo();
     InvokeInfo invoke(code_info.GetInvokeInfoForNativePcOffset(outer_pc_offset, encoding));
     if (invoke.IsValid()) {
       *invoke_type = static_cast<InvokeType>(invoke.GetInvokeType(encoding.invoke_info.encoding));
-      *dex_method_index = invoke.GetMethodIndex(encoding.invoke_info.encoding);
+      *dex_method_index = invoke.GetMethodIndex(encoding.invoke_info.encoding, method_info);
       return true;
     }
     return false;
@@ -2323,48 +2324,26 @@
   return artInvokeCommon<kVirtual, true>(method_idx, this_object, self, sp);
 }
 
-// Determine target of interface dispatch. This object is known non-null. First argument
-// is there for consistency but should not be used, as some architectures overwrite it
-// in the assembly trampoline.
-extern "C" TwoWordReturn artInvokeInterfaceTrampoline(uint32_t deadbeef ATTRIBUTE_UNUSED,
+// Determine target of interface dispatch. The interface method and this object are known non-null.
+// The interface method is the method returned by the dex cache in the conflict trampoline.
+extern "C" TwoWordReturn artInvokeInterfaceTrampoline(ArtMethod* interface_method,
                                                       mirror::Object* raw_this_object,
                                                       Thread* self,
                                                       ArtMethod** sp)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  CHECK(interface_method != nullptr);
   ObjPtr<mirror::Object> this_object(raw_this_object);
   ScopedQuickEntrypointChecks sqec(self);
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> cls(hs.NewHandle(this_object->GetClass()));
 
   ArtMethod* caller_method = QuickArgumentVisitor::GetCallingMethod(sp);
-
-  // Fetch the dex_method_idx of the target interface method from the caller.
-  uint32_t dex_pc = QuickArgumentVisitor::GetCallingDexPc(sp);
-
-  const DexFile::CodeItem* code_item = caller_method->GetCodeItem();
-  CHECK_LT(dex_pc, code_item->insns_size_in_code_units_);
-  const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
-  Instruction::Code instr_code = instr->Opcode();
-  CHECK(instr_code == Instruction::INVOKE_INTERFACE ||
-        instr_code == Instruction::INVOKE_INTERFACE_RANGE)
-      << "Unexpected call into interface trampoline: " << instr->DumpString(nullptr);
-  uint32_t dex_method_idx;
-  if (instr_code == Instruction::INVOKE_INTERFACE) {
-    dex_method_idx = instr->VRegB_35c();
-  } else {
-    CHECK_EQ(instr_code, Instruction::INVOKE_INTERFACE_RANGE);
-    dex_method_idx = instr->VRegB_3rc();
-  }
-
-  ArtMethod* interface_method = caller_method->GetDexCacheResolvedMethod(
-      dex_method_idx, kRuntimePointerSize);
-  DCHECK(interface_method != nullptr) << dex_method_idx << " " << caller_method->PrettyMethod();
   ArtMethod* method = nullptr;
   ImTable* imt = cls->GetImt(kRuntimePointerSize);
 
   if (LIKELY(interface_method->GetDexMethodIndex() != DexFile::kDexNoIndex)) {
-    // If the dex cache already resolved the interface method, look whether we have
-    // a match in the ImtConflictTable.
+    // If the interface method is already resolved, look whether we have a match in the
+    // ImtConflictTable.
     ArtMethod* conflict_method = imt->Get(ImTable::GetImtIndex(interface_method),
                                           kRuntimePointerSize);
     if (LIKELY(conflict_method->IsRuntimeMethod())) {
@@ -2389,9 +2368,26 @@
       return GetTwoWordFailureValue();  // Failure.
     }
   } else {
-    // The dex cache did not resolve the method, look it up in the dex file
-    // of the caller,
+    // The interface method is unresolved, so look it up in the dex file of the caller.
     DCHECK_EQ(interface_method, Runtime::Current()->GetResolutionMethod());
+
+    // Fetch the dex_method_idx of the target interface method from the caller.
+    uint32_t dex_method_idx;
+    uint32_t dex_pc = QuickArgumentVisitor::GetCallingDexPc(sp);
+    const DexFile::CodeItem* code_item = caller_method->GetCodeItem();
+    DCHECK_LT(dex_pc, code_item->insns_size_in_code_units_);
+    const Instruction* instr = Instruction::At(&code_item->insns_[dex_pc]);
+    Instruction::Code instr_code = instr->Opcode();
+    DCHECK(instr_code == Instruction::INVOKE_INTERFACE ||
+           instr_code == Instruction::INVOKE_INTERFACE_RANGE)
+        << "Unexpected call into interface trampoline: " << instr->DumpString(nullptr);
+    if (instr_code == Instruction::INVOKE_INTERFACE) {
+      dex_method_idx = instr->VRegB_35c();
+    } else {
+      DCHECK_EQ(instr_code, Instruction::INVOKE_INTERFACE_RANGE);
+      dex_method_idx = instr->VRegB_3rc();
+    }
+
     const DexFile* dex_file = caller_method->GetDeclaringClass()->GetDexCache()
         ->GetDexFile();
     uint32_t shorty_len;
diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h
index 2842c5a..4ca52de 100644
--- a/runtime/entrypoints/runtime_asm_entrypoints.h
+++ b/runtime/entrypoints/runtime_asm_entrypoints.h
@@ -40,6 +40,12 @@
   return reinterpret_cast<const void*>(art_quick_to_interpreter_bridge);
 }
 
+// Return the address of stub code for attempting to invoke an obsolete method.
+extern "C" void art_invoke_obsolete_method_stub(ArtMethod*);
+static inline const void* GetInvokeObsoleteMethodStub() {
+  return reinterpret_cast<const void*>(art_invoke_obsolete_method_stub);
+}
+
 // Return the address of quick stub code for handling JNI calls.
 extern "C" void art_quick_generic_jni_trampoline(ArtMethod*);
 static inline const void* GetQuickGenericJniStub() {
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index d0687ce..55a4625 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -133,9 +133,8 @@
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_top, thread_local_alloc_stack_end,
                         sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_alloc_stack_end, held_mutexes, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, held_mutexes, nested_signal_state,
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, held_mutexes, flip_function,
                         sizeof(void*) * kLockLevelCount);
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, nested_signal_state, flip_function, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, flip_function, method_verifier, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, method_verifier, thread_local_mark_stack, sizeof(void*));
     EXPECT_OFFSET_DIFF(Thread, tlsPtr_.thread_local_mark_stack, Thread, wait_mutex_, sizeof(void*),
diff --git a/runtime/fault_handler.cc b/runtime/fault_handler.cc
index f9345b6..4220250 100644
--- a/runtime/fault_handler.cc
+++ b/runtime/fault_handler.cc
@@ -28,47 +28,6 @@
 #include "thread-inl.h"
 #include "verify_object-inl.h"
 
-// Note on nested signal support
-// -----------------------------
-//
-// Typically a signal handler should not need to deal with signals that occur within it.
-// However, when a SIGSEGV occurs that is in generated code and is not one of the
-// handled signals (implicit checks), we call a function to try to dump the stack
-// to the log.  This enhances the debugging experience but may have the side effect
-// that it may not work.  If the cause of the original SIGSEGV is a corrupted stack or other
-// memory region, the stack backtrace code may run into trouble and may either crash
-// or fail with an abort (SIGABRT).  In either case we don't want that (new) signal to
-// mask the original signal and thus prevent useful debug output from being presented.
-//
-// In order to handle this situation, before we call the stack tracer we do the following:
-//
-// 1. shutdown the fault manager so that we are talking to the real signal management
-//    functions rather than those in sigchain.
-// 2. use pthread_sigmask to allow SIGSEGV and SIGABRT signals to be delivered to the
-//    thread running the signal handler.
-// 3. set the handler for SIGSEGV and SIGABRT to a secondary signal handler.
-// 4. save the thread's state to the TLS of the current thread using 'setjmp'
-//
-// We then call the stack tracer and one of two things may happen:
-// a. it completes successfully
-// b. it crashes and a signal is raised.
-//
-// In the former case, we fall through and everything is fine.  In the latter case
-// our secondary signal handler gets called in a signal context.  This results in
-// a call to FaultManager::HandledNestedSignal(), an archirecture specific function
-// whose purpose is to call 'longjmp' on the jmp_buf saved in the TLS of the current
-// thread.  This results in a return with a non-zero value from 'setjmp'.  We detect this
-// and write something to the log to tell the user that it happened.
-//
-// Regardless of how we got there, we reach the code after the stack tracer and we
-// restore the signal states to their original values, reinstate the fault manager (thus
-// reestablishing the signal chain) and continue.
-
-// This is difficult to test with a runtime test.  To invoke the nested signal code
-// on any signal, uncomment the following line and run something that throws a
-// NullPointerException.
-// #define TEST_NESTED_SIGNAL
-
 namespace art {
 // Static fault manger object accessed by signal handler.
 FaultManager fault_manager;
@@ -79,13 +38,8 @@
 }
 
 // Signal handler called on SIGSEGV.
-static void art_fault_handler(int sig, siginfo_t* info, void* context) {
-  fault_manager.HandleFault(sig, info, context);
-}
-
-// Signal handler for dealing with a nested signal.
-static void art_nested_signal_handler(int sig, siginfo_t* info, void* context) {
-  fault_manager.HandleNestedSignal(sig, info, context);
+static bool art_fault_handler(int sig, siginfo_t* info, void* context) {
+  return fault_manager.HandleFault(sig, info, context);
 }
 
 FaultManager::FaultManager() : initialized_(false) {
@@ -95,43 +49,15 @@
 FaultManager::~FaultManager() {
 }
 
-static void SetUpArtAction(struct sigaction* action) {
-  action->sa_sigaction = art_fault_handler;
-  sigemptyset(&action->sa_mask);
-  action->sa_flags = SA_SIGINFO | SA_ONSTACK;
-#if !defined(__APPLE__) && !defined(__mips__)
-  action->sa_restorer = nullptr;
-#endif
-}
-
-void FaultManager::EnsureArtActionInFrontOfSignalChain() {
-  if (initialized_) {
-    struct sigaction action;
-    SetUpArtAction(&action);
-    EnsureFrontOfChain(SIGSEGV, &action);
-  } else {
-    LOG(WARNING) << "Can't call " << __FUNCTION__ << " due to unitialized fault manager";
-  }
-}
-
 void FaultManager::Init() {
   CHECK(!initialized_);
-  struct sigaction action;
-  SetUpArtAction(&action);
-
-  // Set our signal handler now.
-  int e = sigaction(SIGSEGV, &action, &oldaction_);
-  if (e != 0) {
-    VLOG(signals) << "Failed to claim SEGV: " << strerror(errno);
-  }
-  // Make sure our signal handler is called before any user handlers.
-  ClaimSignalChain(SIGSEGV, &oldaction_);
+  AddSpecialSignalHandlerFn(SIGSEGV, art_fault_handler);
   initialized_ = true;
 }
 
 void FaultManager::Release() {
   if (initialized_) {
-    UnclaimSignalChain(SIGSEGV);
+    RemoveSpecialSignalHandlerFn(SIGSEGV, art_fault_handler);
     initialized_ = false;
   }
 }
@@ -156,130 +82,44 @@
   DCHECK(self != nullptr);
   DCHECK(Runtime::Current() != nullptr);
   DCHECK(Runtime::Current()->IsStarted());
-
-  // Now set up the nested signal handler.
-
-  // TODO: add SIGSEGV back to the nested signals when we can handle running out stack gracefully.
-  static const int handled_nested_signals[] = {SIGABRT};
-  constexpr size_t num_handled_nested_signals = arraysize(handled_nested_signals);
-
-  // Release the fault manager so that it will remove the signal chain for
-  // SIGSEGV and we call the real sigaction.
-  fault_manager.Release();
-
-  // The action for SIGSEGV should be the default handler now.
-
-  // Unblock the signals we allow so that they can be delivered in the signal handler.
-  sigset_t sigset;
-  sigemptyset(&sigset);
-  for (int signal : handled_nested_signals) {
-    sigaddset(&sigset, signal);
-  }
-  pthread_sigmask(SIG_UNBLOCK, &sigset, nullptr);
-
-  // If we get a signal in this code we want to invoke our nested signal
-  // handler.
-  struct sigaction action;
-  struct sigaction oldactions[num_handled_nested_signals];
-  action.sa_sigaction = art_nested_signal_handler;
-
-  // Explicitly mask out SIGSEGV and SIGABRT from the nested signal handler.  This
-  // should be the default but we definitely don't want these happening in our
-  // nested signal handler.
-  sigemptyset(&action.sa_mask);
-  for (int signal : handled_nested_signals) {
-    sigaddset(&action.sa_mask, signal);
-  }
-
-  action.sa_flags = SA_SIGINFO | SA_ONSTACK;
-#if !defined(__APPLE__) && !defined(__mips__)
-  action.sa_restorer = nullptr;
-#endif
-
-  // Catch handled signals to invoke our nested handler.
-  bool success = true;
-  for (size_t i = 0; i < num_handled_nested_signals; ++i) {
-    success = sigaction(handled_nested_signals[i], &action, &oldactions[i]) == 0;
-    if (!success) {
-      PLOG(ERROR) << "Unable to set up nested signal handler";
-      break;
+  for (const auto& handler : other_handlers_) {
+    if (handler->Action(sig, info, context)) {
+      return true;
     }
   }
-
-  if (success) {
-    // Save the current state and call the handlers.  If anything causes a signal
-    // our nested signal handler will be invoked and this will longjmp to the saved
-    // state.
-    if (setjmp(*self->GetNestedSignalState()) == 0) {
-      for (const auto& handler : other_handlers_) {
-        if (handler->Action(sig, info, context)) {
-          // Restore the signal handlers, reinit the fault manager and return.  Signal was
-          // handled.
-          for (size_t i = 0; i < num_handled_nested_signals; ++i) {
-            success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0;
-            if (!success) {
-              PLOG(ERROR) << "Unable to restore signal handler";
-            }
-          }
-          fault_manager.Init();
-          return true;
-        }
-      }
-    } else {
-      LOG(ERROR) << "Nested signal detected - original signal being reported";
-    }
-
-    // Restore the signal handlers.
-    for (size_t i = 0; i < num_handled_nested_signals; ++i) {
-      success = sigaction(handled_nested_signals[i], &oldactions[i], nullptr) == 0;
-      if (!success) {
-        PLOG(ERROR) << "Unable to restore signal handler";
-      }
-    }
-  }
-
-  // Now put the fault manager back in place.
-  fault_manager.Init();
   return false;
 }
 
-void FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
-  // BE CAREFUL ALLOCATING HERE INCLUDING USING LOG(...)
-  //
-  // If malloc calls abort, it will be holding its lock.
-  // If the handler tries to call malloc, it will deadlock.
+bool FaultManager::HandleFault(int sig, siginfo_t* info, void* context) {
   VLOG(signals) << "Handling fault";
+
+#ifdef TEST_NESTED_SIGNAL
+  // Simulate a crash in a handler.
+  raise(SIGSEGV);
+#endif
+
   if (IsInGeneratedCode(info, context, true)) {
     VLOG(signals) << "in generated code, looking for handler";
     for (const auto& handler : generated_code_handlers_) {
       VLOG(signals) << "invoking Action on handler " << handler;
       if (handler->Action(sig, info, context)) {
-#ifdef TEST_NESTED_SIGNAL
-        // In test mode we want to fall through to stack trace handler
-        // on every signal (in reality this will cause a crash on the first
-        // signal).
-        break;
-#else
         // We have handled a signal so it's time to return from the
         // signal handler to the appropriate place.
-        return;
-#endif
+        return true;
       }
     }
 
     // We hit a signal we didn't handle.  This might be something for which
-    // we can give more information about so call all registered handlers to see
-    // if it is.
+    // we can give more information about so call all registered handlers to
+    // see if it is.
     if (HandleFaultByOtherHandlers(sig, info, context)) {
-        return;
+      return true;
     }
   }
 
   // Set a breakpoint in this function to catch unhandled signals.
   art_sigsegv_fault();
-
-  // Pass this on to the next handler in the chain, or the default if none.
-  InvokeUserSignalHandler(sig, info, context);
+  return false;
 }
 
 void FaultManager::AddHandler(FaultHandler* handler, bool generated_code) {
@@ -417,11 +257,7 @@
 
 bool JavaStackTraceHandler::Action(int sig ATTRIBUTE_UNUSED, siginfo_t* siginfo, void* context) {
   // Make sure that we are in the generated code, but we may not have a dex pc.
-#ifdef TEST_NESTED_SIGNAL
-  bool in_generated_code = true;
-#else
   bool in_generated_code = manager_->IsInGeneratedCode(siginfo, context, false);
-#endif
   if (in_generated_code) {
     LOG(ERROR) << "Dumping java stack trace for crash in generated code";
     ArtMethod* method = nullptr;
@@ -432,12 +268,6 @@
     manager_->GetMethodAndReturnPcAndSp(siginfo, context, &method, &return_pc, &sp);
     // Inside of generated code, sp[0] is the method, so sp is the frame.
     self->SetTopOfStack(reinterpret_cast<ArtMethod**>(sp));
-#ifdef TEST_NESTED_SIGNAL
-    // To test the nested signal handler we raise a signal here.  This will cause the
-    // nested signal handler to be called and perform a longjmp back to the setjmp
-    // above.
-    abort();
-#endif
     self->DumpJavaStack(LOG_STREAM(ERROR));
   }
 
diff --git a/runtime/fault_handler.h b/runtime/fault_handler.h
index 56e0fb7..d56cf17 100644
--- a/runtime/fault_handler.h
+++ b/runtime/fault_handler.h
@@ -42,10 +42,9 @@
 
   // Unclaim signals and delete registered handlers.
   void Shutdown();
-  void EnsureArtActionInFrontOfSignalChain();
 
-  void HandleFault(int sig, siginfo_t* info, void* context);
-  void HandleNestedSignal(int sig, siginfo_t* info, void* context);
+  // Try to handle a fault, returns true if successful.
+  bool HandleFault(int sig, siginfo_t* info, void* context);
 
   // Added handlers are owned by the fault handler and will be freed on Shutdown().
   void AddHandler(FaultHandler* handler, bool generated_code);
diff --git a/runtime/gc/accounting/read_barrier_table.h b/runtime/gc/accounting/read_barrier_table.h
index 86266e2..e77a5b8 100644
--- a/runtime/gc/accounting/read_barrier_table.h
+++ b/runtime/gc/accounting/read_barrier_table.h
@@ -80,7 +80,7 @@
   }
 
   // This should match RegionSpace::kRegionSize. static_assert'ed in concurrent_copying.h.
-  static constexpr size_t kRegionSize = 1 * MB;
+  static constexpr size_t kRegionSize = 256 * KB;
 
  private:
   static constexpr uint64_t kHeapCapacity = 4ULL * GB;  // low 4gb.
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 1fa2d1a..562fc75 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -141,7 +141,7 @@
   template<bool kUseTail = true>
   class SlotFreeList {
    public:
-    SlotFreeList() : head_(0U), tail_(0), size_(0) {}
+    SlotFreeList() : head_(0U), tail_(0), size_(0), padding_(0) {}
     Slot* Head() const {
       return reinterpret_cast<Slot*>(head_);
     }
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 8f9c187..24ba52f 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -72,12 +72,19 @@
       rb_mark_bit_stack_full_(false),
       mark_stack_lock_("concurrent copying mark stack lock", kMarkSweepMarkStackLock),
       thread_running_gc_(nullptr),
-      is_marking_(false), is_active_(false), is_asserting_to_space_invariant_(false),
+      is_marking_(false),
+      is_active_(false),
+      is_asserting_to_space_invariant_(false),
       region_space_bitmap_(nullptr),
-      heap_mark_bitmap_(nullptr), live_stack_freeze_size_(0), mark_stack_mode_(kMarkStackModeOff),
+      heap_mark_bitmap_(nullptr),
+      live_stack_freeze_size_(0),
+      from_space_num_objects_at_first_pause_(0),
+      from_space_num_bytes_at_first_pause_(0),
+      mark_stack_mode_(kMarkStackModeOff),
       weak_ref_access_enabled_(true),
       skipped_blocks_lock_("concurrent copying bytes blocks lock", kMarkSweepMarkStackLock),
       measure_read_barrier_slow_path_(measure_read_barrier_slow_path),
+      mark_from_read_barrier_measurements_(false),
       rb_slow_path_ns_(0),
       rb_slow_path_count_(0),
       rb_slow_path_count_gc_(0),
@@ -87,6 +94,7 @@
       rb_slow_path_count_gc_total_(0),
       rb_table_(heap_->GetReadBarrierTable()),
       force_evacuate_all_(false),
+      gc_grays_immune_objects_(false),
       immune_gray_stack_lock_("concurrent copying immune gray stack lock",
                               kMarkSweepMarkStackLock) {
   static_assert(space::RegionSpace::kRegionSize == accounting::ReadBarrierTable::kRegionSize,
@@ -1644,10 +1652,10 @@
     // Record freed objects.
     TimingLogger::ScopedTiming split2("RecordFree", GetTimings());
     // Don't include thread-locals that are in the to-space.
-    uint64_t from_bytes = region_space_->GetBytesAllocatedInFromSpace();
-    uint64_t from_objects = region_space_->GetObjectsAllocatedInFromSpace();
-    uint64_t unevac_from_bytes = region_space_->GetBytesAllocatedInUnevacFromSpace();
-    uint64_t unevac_from_objects = region_space_->GetObjectsAllocatedInUnevacFromSpace();
+    const uint64_t from_bytes = region_space_->GetBytesAllocatedInFromSpace();
+    const uint64_t from_objects = region_space_->GetObjectsAllocatedInFromSpace();
+    const uint64_t unevac_from_bytes = region_space_->GetBytesAllocatedInUnevacFromSpace();
+    const uint64_t unevac_from_objects = region_space_->GetObjectsAllocatedInUnevacFromSpace();
     uint64_t to_bytes = bytes_moved_.LoadSequentiallyConsistent();
     cumulative_bytes_moved_.FetchAndAddRelaxed(to_bytes);
     uint64_t to_objects = objects_moved_.LoadSequentiallyConsistent();
@@ -1658,8 +1666,18 @@
     }
     CHECK_LE(to_objects, from_objects);
     CHECK_LE(to_bytes, from_bytes);
-    int64_t freed_bytes = from_bytes - to_bytes;
-    int64_t freed_objects = from_objects - to_objects;
+    // cleared_bytes and cleared_objects may be greater than the from space equivalents since
+    // ClearFromSpace may clear empty unevac regions.
+    uint64_t cleared_bytes;
+    uint64_t cleared_objects;
+    {
+      TimingLogger::ScopedTiming split4("ClearFromSpace", GetTimings());
+      region_space_->ClearFromSpace(&cleared_bytes, &cleared_objects);
+      CHECK_GE(cleared_bytes, from_bytes);
+      CHECK_GE(cleared_objects, from_objects);
+    }
+    int64_t freed_bytes = cleared_bytes - to_bytes;
+    int64_t freed_objects = cleared_objects - to_objects;
     if (kVerboseMode) {
       LOG(INFO) << "RecordFree:"
                 << " from_bytes=" << from_bytes << " from_objects=" << from_objects
@@ -1678,11 +1696,6 @@
   }
 
   {
-    TimingLogger::ScopedTiming split4("ClearFromSpace", GetTimings());
-    region_space_->ClearFromSpace();
-  }
-
-  {
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
     Sweep(false);
     SwapBitmaps();
@@ -2166,7 +2179,12 @@
       fall_back_to_non_moving = true;
       to_ref = heap_->non_moving_space_->Alloc(Thread::Current(), obj_size,
                                                &non_moving_space_bytes_allocated, nullptr, &dummy);
-      CHECK(to_ref != nullptr) << "Fall-back non-moving space allocation failed";
+      if (UNLIKELY(to_ref == nullptr)) {
+        LOG(FATAL_WITHOUT_ABORT) << "Fall-back non-moving space allocation failed for a "
+                                 << obj_size << " byte object in region type "
+                                 << region_space_->GetRegionType(from_ref);
+        LOG(FATAL) << "Object address=" << from_ref << " type=" << from_ref->PrettyTypeOf();
+      }
       bytes_allocated = non_moving_space_bytes_allocated;
       // Mark it in the mark bitmap.
       accounting::ContinuousSpaceBitmap* mark_bitmap =
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 0039388..c61f69d 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -52,8 +52,12 @@
 
 MarkCompact::MarkCompact(Heap* heap, const std::string& name_prefix)
     : GarbageCollector(heap, name_prefix + (name_prefix.empty() ? "" : " ") + "mark compact"),
+      mark_stack_(nullptr),
       space_(nullptr),
+      mark_bitmap_(nullptr),
       collector_name_(name_),
+      bump_pointer_(nullptr),
+      live_objects_in_space_(0),
       updating_references_(false) {}
 
 void MarkCompact::RunPhases() {
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 4c0f317..67e7383 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -724,7 +724,9 @@
 void SemiSpace::ScanObject(Object* obj) {
   DCHECK(!from_space_->HasAddress(obj)) << "Scanning object " << obj << " in from space";
   MarkObjectVisitor visitor(this);
-  obj->VisitReferences(visitor, visitor);
+  // Turn off read barrier. ZygoteCompactingCollector doesn't use it (even in the CC build.)
+  obj->VisitReferences</*kVisitNativeRoots*/true, kDefaultVerifyFlags, kWithoutReadBarrier>(
+      visitor, visitor);
 }
 
 // Scan anything that's on the mark stack.
diff --git a/runtime/gc/collector_type.h b/runtime/gc/collector_type.h
index eef4fba..f0e1029 100644
--- a/runtime/gc/collector_type.h
+++ b/runtime/gc/collector_type.h
@@ -59,6 +59,8 @@
   kCollectorTypeHprof,
   // Fake collector for installing/removing a system-weak holder.
   kCollectorTypeAddRemoveSystemWeakHolder,
+  // Fake collector type for GetObjectsAllocated
+  kCollectorTypeGetObjectsAllocated,
 };
 std::ostream& operator<<(std::ostream& os, const CollectorType& collector_type);
 
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 9e34346..c1c1cad 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -40,6 +40,7 @@
     case kGcCauseJitCodeCache: return "JitCodeCache";
     case kGcCauseAddRemoveSystemWeakHolder: return "SystemWeakHolder";
     case kGcCauseHprof: return "Hprof";
+    case kGcCauseGetObjectsAllocated: return "ObjectsAllocated";
   }
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
diff --git a/runtime/gc/gc_cause.h b/runtime/gc/gc_cause.h
index 9b285b1..eb27547 100644
--- a/runtime/gc/gc_cause.h
+++ b/runtime/gc/gc_cause.h
@@ -53,8 +53,10 @@
   kGcCauseJitCodeCache,
   // Not a real GC cause, used to add or remove system-weak holders.
   kGcCauseAddRemoveSystemWeakHolder,
-  // Not a real GC cause, used to hprof running in the middle of GC.
+  // Not a real GC cause, used to prevent hprof running in the middle of GC.
   kGcCauseHprof,
+  // Not a real GC cause, used to prevent GetObjectsAllocated running in the middle of GC.
+  kGcCauseGetObjectsAllocated,
 };
 
 const char* PrettyCause(GcCause cause);
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 37963e4..f04bc89 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -18,13 +18,13 @@
 
 #include <limits>
 #include <memory>
-#include <unwind.h>  // For GC verification.
 #include <vector>
 
 #include "android-base/stringprintf.h"
 
 #include "allocation_listener.h"
 #include "art_field-inl.h"
+#include "backtrace_helper.h"
 #include "base/allocator.h"
 #include "base/arena_allocator.h"
 #include "base/dumpable.h"
@@ -133,6 +133,17 @@
 // config.
 static constexpr double kExtraHeapGrowthMultiplier = kUseReadBarrier ? 1.0 : 0.0;
 
+static const char* kRegionSpaceName = "main space (region space)";
+
+#if defined(__LP64__) || !defined(ADDRESS_SANITIZER)
+// 300 MB (0x12c00000) - (default non-moving space capacity).
+static uint8_t* const kPreferredAllocSpaceBegin =
+    reinterpret_cast<uint8_t*>(300 * MB - Heap::kDefaultNonMovingSpaceCapacity);
+#else
+// For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
+static uint8_t* const kPreferredAllocSpaceBegin = reinterpret_cast<uint8_t*>(0x20000000);
+#endif
+
 static inline bool CareAboutPauseTimes() {
   return Runtime::Current()->InJankPerceptibleProcessState();
 }
@@ -286,15 +297,9 @@
   // Requested begin for the alloc space, to follow the mapped image and oat files
   uint8_t* requested_alloc_space_begin = nullptr;
   if (foreground_collector_type_ == kCollectorTypeCC) {
-    // Need to use a low address so that we can allocate a contiguous
-    // 2 * Xmx space when there's no image (dex2oat for target).
-#if defined(__LP64__) || !defined(ADDRESS_SANITIZER)
-    CHECK_GE(300 * MB, non_moving_space_capacity);
-    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(300 * MB) - non_moving_space_capacity;
-#else
-    // For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
-    requested_alloc_space_begin = reinterpret_cast<uint8_t*>(0x20000000);
-#endif
+    // Need to use a low address so that we can allocate a contiguous 2 * Xmx space when there's no
+    // image (dex2oat for target).
+    requested_alloc_space_begin = kPreferredAllocSpaceBegin;
   }
 
   // Load image space(s).
@@ -369,12 +374,7 @@
                              &error_str));
     CHECK(non_moving_space_mem_map != nullptr) << error_str;
     // Try to reserve virtual memory at a lower address if we have a separate non moving space.
-#if defined(__LP64__) || !defined(ADDRESS_SANITIZER)
-    request_begin = reinterpret_cast<uint8_t*>(300 * MB);
-#else
-    // For 32-bit, use 0x20000000 because asan reserves 0x04000000 - 0x20000000.
-    request_begin = reinterpret_cast<uint8_t*>(0x20000000) + non_moving_space_capacity;
-#endif
+    request_begin = kPreferredAllocSpaceBegin + non_moving_space_capacity;
   }
   // Attempt to create 2 mem maps at or after the requested begin.
   if (foreground_collector_type_ != kCollectorTypeCC) {
@@ -419,7 +419,12 @@
   }
   // Create other spaces based on whether or not we have a moving GC.
   if (foreground_collector_type_ == kCollectorTypeCC) {
-    region_space_ = space::RegionSpace::Create("main space (region space)", capacity_ * 2, request_begin);
+    CHECK(separate_non_moving_space);
+    MemMap* region_space_mem_map = space::RegionSpace::CreateMemMap(kRegionSpaceName,
+                                                                    capacity_ * 2,
+                                                                    request_begin);
+    CHECK(region_space_mem_map != nullptr) << "No region space mem map";
+    region_space_ = space::RegionSpace::Create(kRegionSpaceName, region_space_mem_map);
     AddSpace(region_space_);
   } else if (IsMovingGc(foreground_collector_type_) &&
       foreground_collector_type_ != kCollectorTypeGSS) {
@@ -1830,6 +1835,11 @@
 size_t Heap::GetObjectsAllocated() const {
   Thread* const self = Thread::Current();
   ScopedThreadStateChange tsc(self, kWaitingForGetObjectsAllocated);
+  // Prevent GC running during GetObjectsALlocated since we may get a checkpoint request that tells
+  // us to suspend while we are doing SuspendAll. b/35232978
+  gc::ScopedGCCriticalSection gcs(Thread::Current(),
+                                  gc::kGcCauseGetObjectsAllocated,
+                                  gc::kCollectorTypeGetObjectsAllocated);
   // Need SuspendAll here to prevent lock violation if RosAlloc does it during InspectAll.
   ScopedSuspendAll ssa(__FUNCTION__);
   ReaderMutexLock mu(self, *Locks::heap_bitmap_lock_);
@@ -2327,7 +2337,9 @@
     size_t bin_size = object_addr - context->prev_;
     // Add the bin consisting of the end of the previous object to the start of the current object.
     collector->AddBin(bin_size, context->prev_);
-    context->prev_ = object_addr + RoundUp(obj->SizeOf(), kObjectAlignment);
+    // Turn off read barrier. ZygoteCompactingCollector doesn't use it (even in the CC build.)
+    context->prev_ = object_addr + RoundUp(obj->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>(),
+                                           kObjectAlignment);
   }
 
   void AddBin(size_t size, uintptr_t position) {
@@ -2347,7 +2359,8 @@
 
   virtual mirror::Object* MarkNonForwardedObject(mirror::Object* obj)
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_) {
-    size_t obj_size = obj->SizeOf();
+    // Turn off read barrier. ZygoteCompactingCollector doesn't use it (even in the CC build.)
+    size_t obj_size = obj->SizeOf<kDefaultVerifyFlags, kWithoutReadBarrier>();
     size_t alloc_size = RoundUp(obj_size, kObjectAlignment);
     mirror::Object* forward_address;
     // Find the smallest bin which we can move obj in.
@@ -3551,11 +3564,8 @@
   collector::GcType gc_type = collector_ran->GetGcType();
   const double multiplier = HeapGrowthMultiplier();  // Use the multiplier to grow more for
   // foreground.
-  // Ensure at least 2.5 MB to temporarily fix excessive GC caused by TLAB ergonomics.
-  const uint64_t adjusted_min_free = std::max(static_cast<uint64_t>(min_free_ * multiplier),
-                                              static_cast<uint64_t>(5 * MB / 2));
-  const uint64_t adjusted_max_free = std::max(static_cast<uint64_t>(max_free_ * multiplier),
-                                              static_cast<uint64_t>(5 * MB / 2));
+  const uint64_t adjusted_min_free = static_cast<uint64_t>(min_free_ * multiplier);
+  const uint64_t adjusted_max_free = static_cast<uint64_t>(max_free_ * multiplier);
   if (gc_type != collector::kGcTypeSticky) {
     // Grow the heap for non sticky GC.
     ssize_t delta = bytes_allocated / GetTargetHeapUtilization() - bytes_allocated;
@@ -4055,42 +4065,6 @@
   }
 }
 
-// Based on debug malloc logic from libc/bionic/debug_stacktrace.cpp.
-class StackCrawlState {
- public:
-  StackCrawlState(uintptr_t* frames, size_t max_depth, size_t skip_count)
-      : frames_(frames), frame_count_(0), max_depth_(max_depth), skip_count_(skip_count) {
-  }
-  size_t GetFrameCount() const {
-    return frame_count_;
-  }
-  static _Unwind_Reason_Code Callback(_Unwind_Context* context, void* arg) {
-    auto* const state = reinterpret_cast<StackCrawlState*>(arg);
-    const uintptr_t ip = _Unwind_GetIP(context);
-    // The first stack frame is get_backtrace itself. Skip it.
-    if (ip != 0 && state->skip_count_ > 0) {
-      --state->skip_count_;
-      return _URC_NO_REASON;
-    }
-    // ip may be off for ARM but it shouldn't matter since we only use it for hashing.
-    state->frames_[state->frame_count_] = ip;
-    state->frame_count_++;
-    return state->frame_count_ >= state->max_depth_ ? _URC_END_OF_STACK : _URC_NO_REASON;
-  }
-
- private:
-  uintptr_t* const frames_;
-  size_t frame_count_;
-  const size_t max_depth_;
-  size_t skip_count_;
-};
-
-static size_t get_backtrace(uintptr_t* frames, size_t max_depth) {
-  StackCrawlState state(frames, max_depth, 0u);
-  _Unwind_Backtrace(&StackCrawlState::Callback, &state);
-  return state.GetFrameCount();
-}
-
 void Heap::CheckGcStressMode(Thread* self, ObjPtr<mirror::Object>* obj) {
   auto* const runtime = Runtime::Current();
   if (gc_stress_mode_ && runtime->GetClassLinker()->IsInitialized() &&
@@ -4099,13 +4073,9 @@
     bool new_backtrace = false;
     {
       static constexpr size_t kMaxFrames = 16u;
-      uintptr_t backtrace[kMaxFrames];
-      const size_t frames = get_backtrace(backtrace, kMaxFrames);
-      uint64_t hash = 0;
-      for (size_t i = 0; i < frames; ++i) {
-        hash = hash * 2654435761 + backtrace[i];
-        hash += (hash >> 13) ^ (hash << 6);
-      }
+      FixedSizeBacktrace<kMaxFrames> backtrace;
+      backtrace.Collect(/* skip_frames */ 2);
+      uint64_t hash = backtrace.Hash();
       MutexLock mu(self, *backtrace_lock_);
       new_backtrace = seen_backtraces_.find(hash) == seen_backtraces_.end();
       if (new_backtrace) {
diff --git a/runtime/gc/scoped_gc_critical_section.cc b/runtime/gc/scoped_gc_critical_section.cc
index b5eb979..f937d2c 100644
--- a/runtime/gc/scoped_gc_critical_section.cc
+++ b/runtime/gc/scoped_gc_critical_section.cc
@@ -29,10 +29,14 @@
                                                  CollectorType collector_type)
     : self_(self) {
   Runtime::Current()->GetHeap()->StartGC(self, cause, collector_type);
-  old_cause_ = self->StartAssertNoThreadSuspension("ScopedGCCriticalSection");
+  if (self != nullptr) {
+    old_cause_ = self->StartAssertNoThreadSuspension("ScopedGCCriticalSection");
+  }
 }
 ScopedGCCriticalSection::~ScopedGCCriticalSection() {
-  self_->EndAssertNoThreadSuspension(old_cause_);
+  if (self_ != nullptr) {
+    self_->EndAssertNoThreadSuspension(old_cause_);
+  }
   Runtime::Current()->GetHeap()->FinishGC(self_, collector::kGcTypeNone);
 }
 
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index 010ef11..662efe2 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -886,7 +886,7 @@
     explicit FixupObjectAdapter(Args... args) : FixupVisitor(args...) {}
 
     template <typename T>
-    T* operator()(T* obj) const {
+    T* operator()(T* obj, void** dest_addr ATTRIBUTE_UNUSED = nullptr) const {
       return ForwardObject(obj);
     }
   };
@@ -976,7 +976,8 @@
           ForwardObject(obj));
     }
 
-    void operator()(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+    void operator()(mirror::Object* obj) const
+        NO_THREAD_SAFETY_ANALYSIS {
       if (visited_->Test(obj)) {
         // Already visited.
         return;
@@ -1259,17 +1260,18 @@
             }
           }
         }
-        ArtField** fields = dex_cache->GetResolvedFields();
+        mirror::FieldDexCacheType* fields = dex_cache->GetResolvedFields();
         if (fields != nullptr) {
-          ArtField** new_fields = fixup_adapter.ForwardObject(fields);
+          mirror::FieldDexCacheType* new_fields = fixup_adapter.ForwardObject(fields);
           if (fields != new_fields) {
             dex_cache->SetResolvedFields(new_fields);
           }
           for (size_t j = 0, num = dex_cache->NumResolvedFields(); j != num; ++j) {
-            ArtField* orig = mirror::DexCache::GetElementPtrSize(new_fields, j, pointer_size);
-            ArtField* copy = fixup_adapter.ForwardObject(orig);
-            if (orig != copy) {
-              mirror::DexCache::SetElementPtrSize(new_fields, j, copy, pointer_size);
+            mirror::FieldDexCachePair orig =
+                mirror::DexCache::GetNativePairPtrSize(new_fields, j, pointer_size);
+            mirror::FieldDexCachePair copy(fixup_adapter.ForwardObject(orig.object), orig.index);
+            if (orig.object != copy.object) {
+              mirror::DexCache::SetNativePairPtrSize(new_fields, j, copy, pointer_size);
             }
           }
         }
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 4c6b5bf..3988073 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -16,13 +16,12 @@
 
 #include "large_object_space.h"
 
-#include <valgrind.h>
 #include <memory>
-#include <memcheck/memcheck.h>
 
 #include "gc/accounting/heap_bitmap-inl.h"
 #include "gc/accounting/space_bitmap-inl.h"
 #include "base/logging.h"
+#include "base/memory_tool.h"
 #include "base/mutex-inl.h"
 #include "base/stl_util.h"
 #include "image.h"
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index 3e79223..5809027 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -78,7 +78,7 @@
       for (size_t i = 0; i < num_regions_; ++i) {
         Region* r = &regions_[i];
         if (r->IsFree()) {
-          r->Unfree(time_);
+          r->Unfree(this, time_);
           r->SetNewlyAllocated();
           ++num_non_free_regions_;
           obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated);
@@ -91,7 +91,7 @@
       for (size_t i = 0; i < num_regions_; ++i) {
         Region* r = &regions_[i];
         if (r->IsFree()) {
-          r->Unfree(time_);
+          r->Unfree(this, time_);
           ++num_non_free_regions_;
           obj = r->Alloc(num_bytes, bytes_allocated, usable_size, bytes_tl_bulk_allocated);
           CHECK(obj != nullptr);
@@ -233,10 +233,12 @@
       continue;
     }
     if (r->IsLarge()) {
+      // Avoid visiting dead large objects since they may contain dangling pointers to the
+      // from-space.
+      DCHECK_GT(r->LiveBytes(), 0u) << "Visiting dead large object";
       mirror::Object* obj = reinterpret_cast<mirror::Object*>(r->Begin());
-      if (obj->GetClass() != nullptr) {
-        callback(obj, arg);
-      }
+      DCHECK(obj->GetClass() != nullptr);
+      callback(obj, arg);
     } else if (r->IsLargeTail()) {
       // Do nothing.
     } else {
@@ -310,13 +312,13 @@
       DCHECK_EQ(left + num_regs, right);
       Region* first_reg = &regions_[left];
       DCHECK(first_reg->IsFree());
-      first_reg->UnfreeLarge(time_);
+      first_reg->UnfreeLarge(this, time_);
       ++num_non_free_regions_;
       first_reg->SetTop(first_reg->Begin() + num_bytes);
       for (size_t p = left + 1; p < right; ++p) {
         DCHECK_LT(p, num_regions_);
         DCHECK(regions_[p].IsFree());
-        regions_[p].UnfreeLargeTail(time_);
+        regions_[p].UnfreeLargeTail(this, time_);
         ++num_non_free_regions_;
       }
       *bytes_allocated = num_bytes;
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 8077319..1ad4843 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -28,20 +28,52 @@
 // value of the region size, evaculate the region.
 static constexpr uint kEvaculateLivePercentThreshold = 75U;
 
-RegionSpace* RegionSpace::Create(const std::string& name, size_t capacity,
-                                 uint8_t* requested_begin) {
-  capacity = RoundUp(capacity, kRegionSize);
+MemMap* RegionSpace::CreateMemMap(const std::string& name, size_t capacity,
+                                  uint8_t* requested_begin) {
+  CHECK_ALIGNED(capacity, kRegionSize);
   std::string error_msg;
-  std::unique_ptr<MemMap> mem_map(MemMap::MapAnonymous(name.c_str(), requested_begin, capacity,
-                                                       PROT_READ | PROT_WRITE, true, false,
-                                                       &error_msg));
+  // Ask for the capacity of an additional kRegionSize so that we can align the map by kRegionSize
+  // even if we get unaligned base address. This is necessary for the ReadBarrierTable to work.
+  std::unique_ptr<MemMap> mem_map;
+  while (true) {
+    mem_map.reset(MemMap::MapAnonymous(name.c_str(),
+                                       requested_begin,
+                                       capacity + kRegionSize,
+                                       PROT_READ | PROT_WRITE,
+                                       true,
+                                       false,
+                                       &error_msg));
+    if (mem_map.get() != nullptr || requested_begin == nullptr) {
+      break;
+    }
+    // Retry with no specified request begin.
+    requested_begin = nullptr;
+  }
   if (mem_map.get() == nullptr) {
     LOG(ERROR) << "Failed to allocate pages for alloc space (" << name << ") of size "
         << PrettySize(capacity) << " with message " << error_msg;
     MemMap::DumpMaps(LOG_STREAM(ERROR));
     return nullptr;
   }
-  return new RegionSpace(name, mem_map.release());
+  CHECK_EQ(mem_map->Size(), capacity + kRegionSize);
+  CHECK_EQ(mem_map->Begin(), mem_map->BaseBegin());
+  CHECK_EQ(mem_map->Size(), mem_map->BaseSize());
+  if (IsAlignedParam(mem_map->Begin(), kRegionSize)) {
+    // Got an aligned map. Since we requested a map that's kRegionSize larger. Shrink by
+    // kRegionSize at the end.
+    mem_map->SetSize(capacity);
+  } else {
+    // Got an unaligned map. Align the both ends.
+    mem_map->AlignBy(kRegionSize);
+  }
+  CHECK_ALIGNED(mem_map->Begin(), kRegionSize);
+  CHECK_ALIGNED(mem_map->End(), kRegionSize);
+  CHECK_EQ(mem_map->Size(), capacity);
+  return mem_map.release();
+}
+
+RegionSpace* RegionSpace::Create(const std::string& name, MemMap* mem_map) {
+  return new RegionSpace(name, mem_map);
 }
 
 RegionSpace::RegionSpace(const std::string& name, MemMap* mem_map)
@@ -54,6 +86,7 @@
   num_regions_ = mem_map_size / kRegionSize;
   num_non_free_regions_ = 0U;
   DCHECK_GT(num_regions_, 0U);
+  non_free_region_index_limit_ = 0U;
   regions_.reset(new Region[num_regions_]);
   uint8_t* region_addr = mem_map->Begin();
   for (size_t i = 0; i < num_regions_; ++i, region_addr += kRegionSize) {
@@ -160,7 +193,11 @@
   MutexLock mu(Thread::Current(), region_lock_);
   size_t num_expected_large_tails = 0;
   bool prev_large_evacuated = false;
-  for (size_t i = 0; i < num_regions_; ++i) {
+  VerifyNonFreeRegionLimit();
+  const size_t iter_limit = kUseTableLookupReadBarrier
+      ? num_regions_
+      : std::min(num_regions_, non_free_region_index_limit_);
+  for (size_t i = 0; i < iter_limit; ++i) {
     Region* r = &regions_[i];
     RegionState state = r->State();
     RegionType type = r->Type();
@@ -204,18 +241,50 @@
       }
     }
   }
+  DCHECK_EQ(num_expected_large_tails, 0U);
   current_region_ = &full_region_;
   evac_region_ = &full_region_;
 }
 
-void RegionSpace::ClearFromSpace() {
+void RegionSpace::ClearFromSpace(uint64_t* cleared_bytes, uint64_t* cleared_objects) {
+  DCHECK(cleared_bytes != nullptr);
+  DCHECK(cleared_objects != nullptr);
+  *cleared_bytes = 0;
+  *cleared_objects = 0;
   MutexLock mu(Thread::Current(), region_lock_);
-  for (size_t i = 0; i < num_regions_; ++i) {
+  VerifyNonFreeRegionLimit();
+  size_t new_non_free_region_index_limit = 0;
+  for (size_t i = 0; i < std::min(num_regions_, non_free_region_index_limit_); ++i) {
     Region* r = &regions_[i];
     if (r->IsInFromSpace()) {
-      r->Clear();
+      *cleared_bytes += r->BytesAllocated();
+      *cleared_objects += r->ObjectsAllocated();
       --num_non_free_regions_;
+      r->Clear();
     } else if (r->IsInUnevacFromSpace()) {
+      if (r->LiveBytes() == 0) {
+        // Special case for 0 live bytes, this means all of the objects in the region are dead and
+        // we can clear it. This is important for large objects since we must not visit dead ones in
+        // RegionSpace::Walk because they may contain dangling references to invalid objects.
+        // It is also better to clear these regions now instead of at the end of the next GC to
+        // save RAM. If we don't clear the regions here, they will be cleared next GC by the normal
+        // live percent evacuation logic.
+        size_t free_regions = 1;
+        // Also release RAM for large tails.
+        while (i + free_regions < num_regions_ && regions_[i + free_regions].IsLargeTail()) {
+          DCHECK(r->IsLarge());
+          regions_[i + free_regions].Clear();
+          ++free_regions;
+        }
+        *cleared_bytes += r->BytesAllocated();
+        *cleared_objects += r->ObjectsAllocated();
+        num_non_free_regions_ -= free_regions;
+        r->Clear();
+        GetLiveBitmap()->ClearRange(
+            reinterpret_cast<mirror::Object*>(r->Begin()),
+            reinterpret_cast<mirror::Object*>(r->Begin() + free_regions * kRegionSize));
+        continue;
+      }
       size_t full_count = 0;
       while (r->IsInUnevacFromSpace()) {
         Region* const cur = &regions_[i + full_count];
@@ -223,6 +292,7 @@
             cur->LiveBytes() != static_cast<size_t>(cur->Top() - cur->Begin())) {
           break;
         }
+        DCHECK(cur->IsInUnevacFromSpace());
         if (full_count != 0) {
           cur->SetUnevacFromSpaceAsToSpace();
         }
@@ -239,7 +309,15 @@
         i += full_count - 1;
       }
     }
+    // Note r != last_checked_region if r->IsInUnevacFromSpace() was true above.
+    Region* last_checked_region = &regions_[i];
+    if (!last_checked_region->IsFree()) {
+      new_non_free_region_index_limit = std::max(new_non_free_region_index_limit,
+                                                 last_checked_region->Idx() + 1);
+    }
   }
+  // Update non_free_region_index_limit_.
+  SetNonFreeRegionLimit(new_non_free_region_index_limit);
   evac_region_ = nullptr;
 }
 
@@ -292,6 +370,7 @@
     }
     r->Clear();
   }
+  SetNonFreeRegionLimit(0);
   current_region_ = &full_region_;
   evac_region_ = &full_region_;
 }
@@ -358,7 +437,7 @@
   for (size_t i = 0; i < num_regions_; ++i) {
     Region* r = &regions_[i];
     if (r->IsFree()) {
-      r->Unfree(time_);
+      r->Unfree(this, time_);
       ++num_non_free_regions_;
       r->SetNewlyAllocated();
       r->SetTop(r->End());
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index feab9b0..2537929 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -35,10 +35,11 @@
     return kSpaceTypeRegionSpace;
   }
 
-  // Create a region space with the requested sizes. The requested base address is not
+  // Create a region space mem map with the requested sizes. The requested base address is not
   // guaranteed to be granted, if it is required, the caller should call Begin on the returned
   // space to confirm the request was granted.
-  static RegionSpace* Create(const std::string& name, size_t capacity, uint8_t* requested_begin);
+  static MemMap* CreateMemMap(const std::string& name, size_t capacity, uint8_t* requested_begin);
+  static RegionSpace* Create(const std::string& name, MemMap* mem_map);
 
   // Allocate num_bytes, returns null if the space is full.
   mirror::Object* Alloc(Thread* self, size_t num_bytes, size_t* bytes_allocated,
@@ -166,7 +167,7 @@
   // Object alignment within the space.
   static constexpr size_t kAlignment = kObjectAlignment;
   // The region size.
-  static constexpr size_t kRegionSize = 1 * MB;
+  static constexpr size_t kRegionSize = 256 * KB;
 
   bool IsInFromSpace(mirror::Object* ref) {
     if (HasAddress(ref)) {
@@ -214,7 +215,7 @@
   size_t FromSpaceSize() REQUIRES(!region_lock_);
   size_t UnevacFromSpaceSize() REQUIRES(!region_lock_);
   size_t ToSpaceSize() REQUIRES(!region_lock_);
-  void ClearFromSpace() REQUIRES(!region_lock_);
+  void ClearFromSpace(uint64_t* cleared_bytes, uint64_t* cleared_objects) REQUIRES(!region_lock_);
 
   void AddLiveBytes(mirror::Object* ref, size_t alloc_size) {
     Region* reg = RefToRegionUnlocked(ref);
@@ -307,25 +308,31 @@
     }
 
     // Given a free region, declare it non-free (allocated).
-    void Unfree(uint32_t alloc_time) {
+    void Unfree(RegionSpace* region_space, uint32_t alloc_time)
+        REQUIRES(region_space->region_lock_) {
       DCHECK(IsFree());
       state_ = RegionState::kRegionStateAllocated;
       type_ = RegionType::kRegionTypeToSpace;
       alloc_time_ = alloc_time;
+      region_space->AdjustNonFreeRegionLimit(idx_);
     }
 
-    void UnfreeLarge(uint32_t alloc_time) {
+    void UnfreeLarge(RegionSpace* region_space, uint32_t alloc_time)
+        REQUIRES(region_space->region_lock_) {
       DCHECK(IsFree());
       state_ = RegionState::kRegionStateLarge;
       type_ = RegionType::kRegionTypeToSpace;
       alloc_time_ = alloc_time;
+      region_space->AdjustNonFreeRegionLimit(idx_);
     }
 
-    void UnfreeLargeTail(uint32_t alloc_time) {
+    void UnfreeLargeTail(RegionSpace* region_space, uint32_t alloc_time)
+        REQUIRES(region_space->region_lock_) {
       DCHECK(IsFree());
       state_ = RegionState::kRegionStateLargeTail;
       type_ = RegionType::kRegionTypeToSpace;
       alloc_time_ = alloc_time;
+      region_space->AdjustNonFreeRegionLimit(idx_);
     }
 
     void SetNewlyAllocated() {
@@ -341,7 +348,7 @@
     bool IsLarge() const {
       bool is_large = state_ == RegionState::kRegionStateLarge;
       if (is_large) {
-        DCHECK_LT(begin_ + 1 * MB, Top());
+        DCHECK_LT(begin_ + kRegionSize, Top());
       }
       return is_large;
     }
@@ -428,7 +435,7 @@
 
     size_t ObjectsAllocated() const {
       if (IsLarge()) {
-        DCHECK_LT(begin_ + 1 * MB, Top());
+        DCHECK_LT(begin_ + kRegionSize, Top());
         DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
         return 1;
       } else if (IsLargeTail()) {
@@ -519,6 +526,27 @@
   mirror::Object* GetNextObject(mirror::Object* obj)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void AdjustNonFreeRegionLimit(size_t new_non_free_region_index) REQUIRES(region_lock_) {
+    DCHECK_LT(new_non_free_region_index, num_regions_);
+    non_free_region_index_limit_ = std::max(non_free_region_index_limit_,
+                                            new_non_free_region_index + 1);
+    VerifyNonFreeRegionLimit();
+  }
+
+  void SetNonFreeRegionLimit(size_t new_non_free_region_index_limit) REQUIRES(region_lock_) {
+    DCHECK_LE(new_non_free_region_index_limit, num_regions_);
+    non_free_region_index_limit_ = new_non_free_region_index_limit;
+    VerifyNonFreeRegionLimit();
+  }
+
+  void VerifyNonFreeRegionLimit() REQUIRES(region_lock_) {
+    if (kIsDebugBuild && non_free_region_index_limit_ < num_regions_) {
+      for (size_t i = non_free_region_index_limit_; i < num_regions_; ++i) {
+        CHECK(regions_[i].IsFree());
+      }
+    }
+  }
+
   Mutex region_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
 
   uint32_t time_;                  // The time as the number of collections since the startup.
@@ -526,6 +554,10 @@
   size_t num_non_free_regions_;    // The number of non-free regions in this space.
   std::unique_ptr<Region[]> regions_ GUARDED_BY(region_lock_);
                                    // The pointer to the region array.
+  // The upper-bound index of the non-free regions. Used to avoid scanning all regions in
+  // SetFromSpace().  Invariant: for all i >= non_free_region_index_limit_, regions_[i].IsFree() is
+  // true.
+  size_t non_free_region_index_limit_ GUARDED_BY(region_lock_);
   Region* current_region_;         // The region that's being allocated currently.
   Region* evac_region_;            // The region that's being evacuated to currently.
   Region full_region_;             // The dummy/sentinel region that looks full.
diff --git a/runtime/image.cc b/runtime/image.cc
index 243051e..b153ea0 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '9', '\0' };  // Enable string compression.
+const uint8_t ImageHeader::kImageVersion[] = { '0', '4', '3', '\0' };  // hash-based DexCache fields
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/imt_conflict_table.h b/runtime/imt_conflict_table.h
index fdd10fe..3586864 100644
--- a/runtime/imt_conflict_table.h
+++ b/runtime/imt_conflict_table.h
@@ -81,6 +81,14 @@
     return GetMethod(index * kMethodCount + kMethodImplementation, pointer_size);
   }
 
+  void** AddressOfInterfaceMethod(size_t index, PointerSize pointer_size) {
+    return AddressOfMethod(index * kMethodCount + kMethodInterface, pointer_size);
+  }
+
+  void** AddressOfImplementationMethod(size_t index, PointerSize pointer_size) {
+    return AddressOfMethod(index * kMethodCount + kMethodImplementation, pointer_size);
+  }
+
   // Return true if two conflict tables are the same.
   bool Equals(ImtConflictTable* other, PointerSize pointer_size) const {
     size_t num = NumEntries(pointer_size);
@@ -169,6 +177,14 @@
   }
 
  private:
+  void** AddressOfMethod(size_t index, PointerSize pointer_size) {
+    if (pointer_size == PointerSize::k64) {
+      return reinterpret_cast<void**>(&data64_[index]);
+    } else {
+      return reinterpret_cast<void**>(&data32_[index]);
+    }
+  }
+
   ArtMethod* GetMethod(size_t index, PointerSize pointer_size) const {
     if (pointer_size == PointerSize::k64) {
       return reinterpret_cast<ArtMethod*>(static_cast<uintptr_t>(data64_[index]));
diff --git a/runtime/imtable.h b/runtime/imtable.h
index b7066bd..aa0a504 100644
--- a/runtime/imtable.h
+++ b/runtime/imtable.h
@@ -37,9 +37,13 @@
   // (non-marker) interfaces.
   static constexpr size_t kSize = IMT_SIZE;
 
+  uint8_t* AddressOfElement(size_t index, PointerSize pointer_size) {
+    return reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
+  }
+
   ArtMethod* Get(size_t index, PointerSize pointer_size) {
     DCHECK_LT(index, kSize);
-    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
+    uint8_t* ptr = AddressOfElement(index, pointer_size);
     if (pointer_size == PointerSize::k32) {
       uint32_t value = *reinterpret_cast<uint32_t*>(ptr);
       return reinterpret_cast<ArtMethod*>(value);
@@ -51,7 +55,7 @@
 
   void Set(size_t index, ArtMethod* method, PointerSize pointer_size) {
     DCHECK_LT(index, kSize);
-    uint8_t* ptr = reinterpret_cast<uint8_t*>(this) + OffsetOfElement(index, pointer_size);
+    uint8_t* ptr = AddressOfElement(index, pointer_size);
     if (pointer_size == PointerSize::k32) {
       uintptr_t value = reinterpret_cast<uintptr_t>(method);
       DCHECK_EQ(static_cast<uint32_t>(value), value);  // Check that we dont lose any non 0 bits.
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index 1b3d339..bf49e84 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -232,12 +232,6 @@
   kSwitchImplKind,        // Switch-based interpreter implementation.
   kMterpImplKind          // Assembly interpreter
 };
-static std::ostream& operator<<(std::ostream& os, const InterpreterImplKind& rhs) {
-  os << ((rhs == kSwitchImplKind)
-              ? "Switch-based interpreter"
-              : "Asm interpreter");
-  return os;
-}
 
 static constexpr InterpreterImplKind kInterpreterImplKind = kMterpImplKind;
 
@@ -366,6 +360,14 @@
     return;
   }
 
+  // This can happen if we are in forced interpreter mode and an obsolete method is called using
+  // reflection.
+  if (UNLIKELY(method->IsObsolete())) {
+    ThrowInternalError("Attempting to invoke obsolete version of '%s'.",
+                       method->PrettyMethod().c_str());
+    return;
+  }
+
   const char* old_cause = self->StartAssertNoThreadSuspension("EnterInterpreterFromInvoke");
   const DexFile::CodeItem* code_item = method->GetCodeItem();
   uint16_t num_regs;
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 6b22af9..2589ad0 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_INTERPRETER_INTERPRETER_COMMON_H_
 
 #include "interpreter.h"
+#include "interpreter_intrinsics.h"
 
 #include <math.h>
 
@@ -104,13 +105,58 @@
 void RecordArrayElementsInTransaction(ObjPtr<mirror::Array> array, int32_t count)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
-// Invokes the given method. This is part of the invocation support and is used by DoInvoke and
-// DoInvokeVirtualQuick functions.
+// Invokes the given method. This is part of the invocation support and is used by DoInvoke,
+// DoFastInvoke and DoInvokeVirtualQuick functions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<bool is_range, bool do_assignability_check>
 bool DoCall(ArtMethod* called_method, Thread* self, ShadowFrame& shadow_frame,
             const Instruction* inst, uint16_t inst_data, JValue* result);
 
+// Handles streamlined non-range invoke static, direct and virtual instructions originating in
+// mterp. Access checks and instrumentation other than jit profiling are not supported, but does
+// support interpreter intrinsics if applicable.
+// Returns true on success, otherwise throws an exception and returns false.
+template<InvokeType type>
+static inline bool DoFastInvoke(Thread* self,
+                                ShadowFrame& shadow_frame,
+                                const Instruction* inst,
+                                uint16_t inst_data,
+                                JValue* result) {
+  const uint32_t method_idx = inst->VRegB_35c();
+  const uint32_t vregC = inst->VRegC_35c();
+  ObjPtr<mirror::Object> receiver = (type == kStatic)
+      ? nullptr
+      : shadow_frame.GetVRegReference(vregC);
+  ArtMethod* sf_method = shadow_frame.GetMethod();
+  ArtMethod* const called_method = FindMethodFromCode<type, false>(
+      method_idx, &receiver, sf_method, self);
+  // The shadow frame should already be pushed, so we don't need to update it.
+  if (UNLIKELY(called_method == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    result->SetJ(0);
+    return false;
+  } else if (UNLIKELY(!called_method->IsInvokable())) {
+    called_method->ThrowInvocationTimeError();
+    result->SetJ(0);
+    return false;
+  } else {
+    if (called_method->IsIntrinsic()) {
+      if (MterpHandleIntrinsic(&shadow_frame, called_method, inst, inst_data,
+                               shadow_frame.GetResultRegister())) {
+        return !self->IsExceptionPending();
+      }
+    }
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    if (jit != nullptr) {
+      if (type == kVirtual) {
+        jit->InvokeVirtualOrInterface(receiver, sf_method, shadow_frame.GetDexPC(), called_method);
+      }
+      jit->AddSamples(self, sf_method, 1, /*with_backedges*/false);
+    }
+    return DoCall<false, false>(called_method, self, shadow_frame, inst, inst_data, result);
+  }
+}
+
 // Handles all invoke-XXX/range instructions except for invoke-polymorphic[/range].
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
@@ -495,8 +541,9 @@
 
 // Explicitly instantiate all DoInvoke functions.
 #define EXPLICIT_DO_INVOKE_TEMPLATE_DECL(_type, _is_range, _do_check)                      \
-  template REQUIRES_SHARED(Locks::mutator_lock_)                                     \
-  bool DoInvoke<_type, _is_range, _do_check>(Thread* self, ShadowFrame& shadow_frame,      \
+  template REQUIRES_SHARED(Locks::mutator_lock_)                                           \
+  bool DoInvoke<_type, _is_range, _do_check>(Thread* self,                                 \
+                                             ShadowFrame& shadow_frame,                    \
                                              const Instruction* inst, uint16_t inst_data,  \
                                              JValue* result)
 
@@ -514,6 +561,19 @@
 #undef EXPLICIT_DO_INVOKE_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_INVOKE_TEMPLATE_DECL
 
+// Explicitly instantiate all DoFastInvoke functions.
+#define EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(_type)                     \
+  template REQUIRES_SHARED(Locks::mutator_lock_)                         \
+  bool DoFastInvoke<_type>(Thread* self,                                 \
+                           ShadowFrame& shadow_frame,                    \
+                           const Instruction* inst, uint16_t inst_data,  \
+                           JValue* result)
+
+EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kStatic);     // invoke-static
+EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kDirect);     // invoke-direct
+EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL(kVirtual);    // invoke-virtual
+#undef EXPLICIT_DO_FAST_INVOKE_TEMPLATE_DECL
+
 // Explicitly instantiate all DoInvokeVirtualQuick functions.
 #define EXPLICIT_DO_INVOKE_VIRTUAL_QUICK_TEMPLATE_DECL(_is_range)                    \
   template REQUIRES_SHARED(Locks::mutator_lock_)                               \
diff --git a/runtime/interpreter/interpreter_intrinsics.cc b/runtime/interpreter/interpreter_intrinsics.cc
new file mode 100644
index 0000000..869d430
--- /dev/null
+++ b/runtime/interpreter/interpreter_intrinsics.cc
@@ -0,0 +1,481 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "interpreter/interpreter_intrinsics.h"
+
+#include "compiler/intrinsics_enum.h"
+#include "dex_instruction.h"
+#include "interpreter/interpreter_common.h"
+
+namespace art {
+namespace interpreter {
+
+
+#define BINARY_INTRINSIC(name, op, get1, get2, set)                 \
+static ALWAYS_INLINE bool name(ShadowFrame* shadow_frame,           \
+                               const Instruction* inst,             \
+                               uint16_t inst_data,                  \
+                               JValue* result_register)             \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                         \
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};                   \
+  inst->GetVarArgs(arg, inst_data);                                 \
+  result_register->set(op(shadow_frame->get1, shadow_frame->get2)); \
+  return true;                                                      \
+}
+
+#define BINARY_II_INTRINSIC(name, op, set) \
+    BINARY_INTRINSIC(name, op, GetVReg(arg[0]), GetVReg(arg[1]), set)
+
+#define BINARY_JJ_INTRINSIC(name, op, set) \
+    BINARY_INTRINSIC(name, op, GetVRegLong(arg[0]), GetVRegLong(arg[2]), set)
+
+#define BINARY_JI_INTRINSIC(name, op, set) \
+    BINARY_INTRINSIC(name, op, GetVRegLong(arg[0]), GetVReg(arg[2]), set)
+
+#define UNARY_INTRINSIC(name, op, get, set)                  \
+static ALWAYS_INLINE bool name(ShadowFrame* shadow_frame,    \
+                               const Instruction* inst,      \
+                               uint16_t inst_data,           \
+                               JValue* result_register)      \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                  \
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};            \
+  inst->GetVarArgs(arg, inst_data);                          \
+  result_register->set(op(shadow_frame->get(arg[0])));       \
+  return true;                                               \
+}
+
+
+// java.lang.Integer.reverse(I)I
+UNARY_INTRINSIC(MterpIntegerReverse, ReverseBits32, GetVReg, SetI);
+
+// java.lang.Integer.reverseBytes(I)I
+UNARY_INTRINSIC(MterpIntegerReverseBytes, BSWAP, GetVReg, SetI);
+
+// java.lang.Integer.bitCount(I)I
+UNARY_INTRINSIC(MterpIntegerBitCount, POPCOUNT, GetVReg, SetI);
+
+// java.lang.Integer.compare(II)I
+BINARY_II_INTRINSIC(MterpIntegerCompare, Compare, SetI);
+
+// java.lang.Integer.highestOneBit(I)I
+UNARY_INTRINSIC(MterpIntegerHighestOneBit, HighestOneBitValue, GetVReg, SetI);
+
+// java.lang.Integer.LowestOneBit(I)I
+UNARY_INTRINSIC(MterpIntegerLowestOneBit, LowestOneBitValue, GetVReg, SetI);
+
+// java.lang.Integer.numberOfLeadingZeros(I)I
+UNARY_INTRINSIC(MterpIntegerNumberOfLeadingZeros, JAVASTYLE_CLZ, GetVReg, SetI);
+
+// java.lang.Integer.numberOfTrailingZeros(I)I
+UNARY_INTRINSIC(MterpIntegerNumberOfTrailingZeros, JAVASTYLE_CTZ, GetVReg, SetI);
+
+// java.lang.Integer.rotateRight(II)I
+BINARY_II_INTRINSIC(MterpIntegerRotateRight, (Rot<int32_t, false>), SetI);
+
+// java.lang.Integer.rotateLeft(II)I
+BINARY_II_INTRINSIC(MterpIntegerRotateLeft, (Rot<int32_t, true>), SetI);
+
+// java.lang.Integer.signum(I)I
+UNARY_INTRINSIC(MterpIntegerSignum, Signum, GetVReg, SetI);
+
+// java.lang.Long.reverse(I)I
+UNARY_INTRINSIC(MterpLongReverse, ReverseBits64, GetVRegLong, SetJ);
+
+// java.lang.Long.reverseBytes(J)J
+UNARY_INTRINSIC(MterpLongReverseBytes, BSWAP, GetVRegLong, SetJ);
+
+// java.lang.Long.bitCount(J)I
+UNARY_INTRINSIC(MterpLongBitCount, POPCOUNT, GetVRegLong, SetI);
+
+// java.lang.Long.compare(JJ)I
+BINARY_JJ_INTRINSIC(MterpLongCompare, Compare, SetI);
+
+// java.lang.Long.highestOneBit(J)J
+UNARY_INTRINSIC(MterpLongHighestOneBit, HighestOneBitValue, GetVRegLong, SetJ);
+
+// java.lang.Long.lowestOneBit(J)J
+UNARY_INTRINSIC(MterpLongLowestOneBit, LowestOneBitValue, GetVRegLong, SetJ);
+
+// java.lang.Long.numberOfLeadingZeros(J)I
+UNARY_INTRINSIC(MterpLongNumberOfLeadingZeros, JAVASTYLE_CLZ, GetVRegLong, SetJ);
+
+// java.lang.Long.numberOfTrailingZeros(J)I
+UNARY_INTRINSIC(MterpLongNumberOfTrailingZeros, JAVASTYLE_CTZ, GetVRegLong, SetJ);
+
+// java.lang.Long.rotateRight(JI)J
+BINARY_JJ_INTRINSIC(MterpLongRotateRight, (Rot<int64_t, false>), SetJ);
+
+// java.lang.Long.rotateLeft(JI)J
+BINARY_JJ_INTRINSIC(MterpLongRotateLeft, (Rot<int64_t, true>), SetJ);
+
+// java.lang.Long.signum(J)I
+UNARY_INTRINSIC(MterpLongSignum, Signum, GetVRegLong, SetI);
+
+// java.lang.Short.reverseBytes(S)S
+UNARY_INTRINSIC(MterpShortReverseBytes, BSWAP, GetVRegShort, SetS);
+
+// java.lang.Math.min(II)I
+BINARY_II_INTRINSIC(MterpMathMinIntInt, std::min, SetI);
+
+// java.lang.Math.min(JJ)J
+BINARY_JJ_INTRINSIC(MterpMathMinLongLong, std::min, SetJ);
+
+// java.lang.Math.max(II)I
+BINARY_II_INTRINSIC(MterpMathMaxIntInt, std::max, SetI);
+
+// java.lang.Math.max(JJ)J
+BINARY_JJ_INTRINSIC(MterpMathMaxLongLong, std::max, SetJ);
+
+// java.lang.Math.abs(I)I
+UNARY_INTRINSIC(MterpMathAbsInt, std::abs, GetVReg, SetI);
+
+// java.lang.Math.abs(J)J
+UNARY_INTRINSIC(MterpMathAbsLong, std::abs, GetVRegLong, SetJ);
+
+// java.lang.Math.abs(F)F
+UNARY_INTRINSIC(MterpMathAbsFloat, 0x7fffffff&, GetVReg, SetI);
+
+// java.lang.Math.abs(D)D
+UNARY_INTRINSIC(MterpMathAbsDouble, INT64_C(0x7fffffffffffffff)&, GetVRegLong, SetJ);
+
+// java.lang.Math.sqrt(D)D
+UNARY_INTRINSIC(MterpMathSqrt, std::sqrt, GetVRegDouble, SetD);
+
+// java.lang.Math.ceil(D)D
+UNARY_INTRINSIC(MterpMathCeil, std::ceil, GetVRegDouble, SetD);
+
+// java.lang.Math.floor(D)D
+UNARY_INTRINSIC(MterpMathFloor, std::floor, GetVRegDouble, SetD);
+
+// java.lang.Math.sin(D)D
+UNARY_INTRINSIC(MterpMathSin, std::sin, GetVRegDouble, SetD);
+
+// java.lang.Math.cos(D)D
+UNARY_INTRINSIC(MterpMathCos, std::cos, GetVRegDouble, SetD);
+
+// java.lang.Math.tan(D)D
+UNARY_INTRINSIC(MterpMathTan, std::tan, GetVRegDouble, SetD);
+
+// java.lang.Math.asin(D)D
+UNARY_INTRINSIC(MterpMathAsin, std::asin, GetVRegDouble, SetD);
+
+// java.lang.Math.acos(D)D
+UNARY_INTRINSIC(MterpMathAcos, std::acos, GetVRegDouble, SetD);
+
+// java.lang.Math.atan(D)D
+UNARY_INTRINSIC(MterpMathAtan, std::atan, GetVRegDouble, SetD);
+
+// java.lang.String.charAt(I)C
+static ALWAYS_INLINE bool MterpStringCharAt(ShadowFrame* shadow_frame,
+                                            const Instruction* inst,
+                                            uint16_t inst_data,
+                                            JValue* result_register)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};
+  inst->GetVarArgs(arg, inst_data);
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString();
+  int length = str->GetLength();
+  int index = shadow_frame->GetVReg(arg[1]);
+  uint16_t res;
+  if (UNLIKELY(index < 0) || (index >= length)) {
+    return false;  // Punt and let non-intrinsic version deal with the throw.
+  }
+  if (str->IsCompressed()) {
+    res = str->GetValueCompressed()[index];
+  } else {
+    res = str->GetValue()[index];
+  }
+  result_register->SetC(res);
+  return true;
+}
+
+// java.lang.String.compareTo(Ljava/lang/string)I
+static ALWAYS_INLINE bool MterpStringCompareTo(ShadowFrame* shadow_frame,
+                                               const Instruction* inst,
+                                               uint16_t inst_data,
+                                               JValue* result_register)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};
+  inst->GetVarArgs(arg, inst_data);
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString();
+  mirror::Object* arg1 = shadow_frame->GetVRegReference(arg[1]);
+  if (arg1 == nullptr) {
+    return false;
+  }
+  result_register->SetI(str->CompareTo(arg1->AsString()));
+  return true;
+}
+
+#define STRING_INDEXOF_INTRINSIC(name, starting_pos)             \
+static ALWAYS_INLINE bool Mterp##name(ShadowFrame* shadow_frame, \
+                                      const Instruction* inst,   \
+                                      uint16_t inst_data,        \
+                                      JValue* result_register)   \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                      \
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};                \
+  inst->GetVarArgs(arg, inst_data);                              \
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString(); \
+  int ch = shadow_frame->GetVReg(arg[1]);                        \
+  if (ch >= 0x10000) {                                           \
+    /* Punt if supplementary char. */                            \
+    return false;                                                \
+  }                                                              \
+  result_register->SetI(str->FastIndexOf(ch, starting_pos));     \
+  return true;                                                   \
+}
+
+// java.lang.String.indexOf(I)I
+STRING_INDEXOF_INTRINSIC(StringIndexOf, 0);
+
+// java.lang.String.indexOf(II)I
+STRING_INDEXOF_INTRINSIC(StringIndexOfAfter, shadow_frame->GetVReg(arg[2]));
+
+#define SIMPLE_STRING_INTRINSIC(name, operation)                 \
+static ALWAYS_INLINE bool Mterp##name(ShadowFrame* shadow_frame, \
+                                      const Instruction* inst,   \
+                                      uint16_t inst_data,        \
+                                      JValue* result_register)   \
+    REQUIRES_SHARED(Locks::mutator_lock_) {                      \
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};                \
+  inst->GetVarArgs(arg, inst_data);                              \
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString(); \
+  result_register->operation;                                    \
+  return true;                                                   \
+}
+
+// java.lang.String.isEmpty()Z
+SIMPLE_STRING_INTRINSIC(StringIsEmpty, SetZ(str->GetLength() == 0))
+
+// java.lang.String.length()I
+SIMPLE_STRING_INTRINSIC(StringLength, SetI(str->GetLength()))
+
+// java.lang.String.getCharsNoCheck(II[CI)V
+static ALWAYS_INLINE bool MterpStringGetCharsNoCheck(ShadowFrame* shadow_frame,
+                                                     const Instruction* inst,
+                                                     uint16_t inst_data,
+                                                     JValue* result_register ATTRIBUTE_UNUSED)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Start, end & index already checked by caller - won't throw.  Destination is uncompressed.
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};
+  inst->GetVarArgs(arg, inst_data);
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString();
+  int32_t start = shadow_frame->GetVReg(arg[1]);
+  int32_t end = shadow_frame->GetVReg(arg[2]);
+  int32_t index = shadow_frame->GetVReg(arg[4]);
+  mirror::CharArray* array = shadow_frame->GetVRegReference(arg[3])->AsCharArray();
+  uint16_t* dst = array->GetData() + index;
+  int32_t len = (end - start);
+  if (str->IsCompressed()) {
+    const uint8_t* src_8 = str->GetValueCompressed() + start;
+    for (int i = 0; i < len; i++) {
+      dst[i] = src_8[i];
+    }
+  } else {
+    uint16_t* src_16 = str->GetValue() + start;
+    memcpy(dst, src_16, len * sizeof(uint16_t));
+  }
+  return true;
+}
+
+// java.lang.String.equalsLjava/lang/Object;)Z
+static ALWAYS_INLINE bool MterpStringEquals(ShadowFrame* shadow_frame,
+                                            const Instruction* inst,
+                                            uint16_t inst_data,
+                                            JValue* result_register)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  uint32_t arg[Instruction::kMaxVarArgRegs] = {};
+  inst->GetVarArgs(arg, inst_data);
+  mirror::String* str = shadow_frame->GetVRegReference(arg[0])->AsString();
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg[1]);
+  bool res = false;  // Assume not equal.
+  if ((obj != nullptr) && obj->IsString()) {
+    mirror::String* str2 = obj->AsString();
+    if (str->GetCount() == str2->GetCount()) {
+      // Length & compression status are same.  Can use block compare.
+      void* bytes1;
+      void* bytes2;
+      int len = str->GetLength();
+      if (str->IsCompressed()) {
+        bytes1 = str->GetValueCompressed();
+        bytes2 = str2->GetValueCompressed();
+      } else {
+        len *= sizeof(uint16_t);
+        bytes1 = str->GetValue();
+        bytes2 = str2->GetValue();
+      }
+      res = (memcmp(bytes1, bytes2, len) == 0);
+    }
+  }
+  result_register->SetZ(res);
+  return true;
+}
+
+// Macro to help keep track of what's left to implement.
+#define UNIMPLEMENTED_CASE(name)    \
+    case Intrinsics::k##name:       \
+      res = false;                  \
+      break;
+
+#define INTRINSIC_CASE(name)                                           \
+    case Intrinsics::k##name:                                          \
+      res = Mterp##name(shadow_frame, inst, inst_data, result_register); \
+      break;
+
+bool MterpHandleIntrinsic(ShadowFrame* shadow_frame,
+                          ArtMethod* const called_method,
+                          const Instruction* inst,
+                          uint16_t inst_data,
+                          JValue* result_register)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  Intrinsics intrinsic = static_cast<Intrinsics>(called_method->GetIntrinsic());
+  bool res = false;  // Assume failure
+  switch (intrinsic) {
+    UNIMPLEMENTED_CASE(DoubleDoubleToRawLongBits /* (D)J */)
+    UNIMPLEMENTED_CASE(DoubleDoubleToLongBits /* (D)J */)
+    UNIMPLEMENTED_CASE(DoubleIsInfinite /* (D)Z */)
+    UNIMPLEMENTED_CASE(DoubleIsNaN /* (D)Z */)
+    UNIMPLEMENTED_CASE(DoubleLongBitsToDouble /* (J)D */)
+    UNIMPLEMENTED_CASE(FloatFloatToRawIntBits /* (F)I */)
+    UNIMPLEMENTED_CASE(FloatFloatToIntBits /* (F)I */)
+    UNIMPLEMENTED_CASE(FloatIsInfinite /* (F)Z */)
+    UNIMPLEMENTED_CASE(FloatIsNaN /* (F)Z */)
+    UNIMPLEMENTED_CASE(FloatIntBitsToFloat /* (I)F */)
+    INTRINSIC_CASE(IntegerReverse)
+    INTRINSIC_CASE(IntegerReverseBytes)
+    INTRINSIC_CASE(IntegerBitCount)
+    INTRINSIC_CASE(IntegerCompare)
+    INTRINSIC_CASE(IntegerHighestOneBit)
+    INTRINSIC_CASE(IntegerLowestOneBit)
+    INTRINSIC_CASE(IntegerNumberOfLeadingZeros)
+    INTRINSIC_CASE(IntegerNumberOfTrailingZeros)
+    INTRINSIC_CASE(IntegerRotateRight)
+    INTRINSIC_CASE(IntegerRotateLeft)
+    INTRINSIC_CASE(IntegerSignum)
+    INTRINSIC_CASE(LongReverse)
+    INTRINSIC_CASE(LongReverseBytes)
+    INTRINSIC_CASE(LongBitCount)
+    INTRINSIC_CASE(LongCompare)
+    INTRINSIC_CASE(LongHighestOneBit)
+    INTRINSIC_CASE(LongLowestOneBit)
+    INTRINSIC_CASE(LongNumberOfLeadingZeros)
+    INTRINSIC_CASE(LongNumberOfTrailingZeros)
+    INTRINSIC_CASE(LongRotateRight)
+    INTRINSIC_CASE(LongRotateLeft)
+    INTRINSIC_CASE(LongSignum)
+    INTRINSIC_CASE(ShortReverseBytes)
+    INTRINSIC_CASE(MathAbsDouble)
+    INTRINSIC_CASE(MathAbsFloat)
+    INTRINSIC_CASE(MathAbsLong)
+    INTRINSIC_CASE(MathAbsInt)
+    UNIMPLEMENTED_CASE(MathMinDoubleDouble /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathMinFloatFloat /* (FF)F */)
+    INTRINSIC_CASE(MathMinLongLong)
+    INTRINSIC_CASE(MathMinIntInt)
+    UNIMPLEMENTED_CASE(MathMaxDoubleDouble /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathMaxFloatFloat /* (FF)F */)
+    INTRINSIC_CASE(MathMaxLongLong)
+    INTRINSIC_CASE(MathMaxIntInt)
+    INTRINSIC_CASE(MathCos)
+    INTRINSIC_CASE(MathSin)
+    INTRINSIC_CASE(MathAcos)
+    INTRINSIC_CASE(MathAsin)
+    INTRINSIC_CASE(MathAtan)
+    UNIMPLEMENTED_CASE(MathAtan2 /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathCbrt /* (D)D */)
+    UNIMPLEMENTED_CASE(MathCosh /* (D)D */)
+    UNIMPLEMENTED_CASE(MathExp /* (D)D */)
+    UNIMPLEMENTED_CASE(MathExpm1 /* (D)D */)
+    UNIMPLEMENTED_CASE(MathHypot /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathLog /* (D)D */)
+    UNIMPLEMENTED_CASE(MathLog10 /* (D)D */)
+    UNIMPLEMENTED_CASE(MathNextAfter /* (DD)D */)
+    UNIMPLEMENTED_CASE(MathSinh /* (D)D */)
+    INTRINSIC_CASE(MathTan)
+    UNIMPLEMENTED_CASE(MathTanh /* (D)D */)
+    INTRINSIC_CASE(MathSqrt)
+    INTRINSIC_CASE(MathCeil)
+    INTRINSIC_CASE(MathFloor)
+    UNIMPLEMENTED_CASE(MathRint /* (D)D */)
+    UNIMPLEMENTED_CASE(MathRoundDouble /* (D)J */)
+    UNIMPLEMENTED_CASE(MathRoundFloat /* (F)I */)
+    UNIMPLEMENTED_CASE(SystemArrayCopyChar /* ([CI[CII)V */)
+    UNIMPLEMENTED_CASE(SystemArrayCopy /* (Ljava/lang/Object;ILjava/lang/Object;II)V */)
+    UNIMPLEMENTED_CASE(ThreadCurrentThread /* ()Ljava/lang/Thread; */)
+    UNIMPLEMENTED_CASE(MemoryPeekByte /* (J)B */)
+    UNIMPLEMENTED_CASE(MemoryPeekIntNative /* (J)I */)
+    UNIMPLEMENTED_CASE(MemoryPeekLongNative /* (J)J */)
+    UNIMPLEMENTED_CASE(MemoryPeekShortNative /* (J)S */)
+    UNIMPLEMENTED_CASE(MemoryPokeByte /* (JB)V */)
+    UNIMPLEMENTED_CASE(MemoryPokeIntNative /* (JI)V */)
+    UNIMPLEMENTED_CASE(MemoryPokeLongNative /* (JJ)V */)
+    UNIMPLEMENTED_CASE(MemoryPokeShortNative /* (JS)V */)
+    INTRINSIC_CASE(StringCharAt)
+    INTRINSIC_CASE(StringCompareTo)
+    INTRINSIC_CASE(StringEquals)
+    INTRINSIC_CASE(StringGetCharsNoCheck)
+    INTRINSIC_CASE(StringIndexOf)
+    INTRINSIC_CASE(StringIndexOfAfter)
+    UNIMPLEMENTED_CASE(StringStringIndexOf /* (Ljava/lang/String;)I */)
+    UNIMPLEMENTED_CASE(StringStringIndexOfAfter /* (Ljava/lang/String;I)I */)
+    INTRINSIC_CASE(StringIsEmpty)
+    INTRINSIC_CASE(StringLength)
+    UNIMPLEMENTED_CASE(StringNewStringFromBytes /* ([BIII)Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(StringNewStringFromChars /* (II[C)Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(StringNewStringFromString /* (Ljava/lang/String;)Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(StringBufferAppend /* (Ljava/lang/String;)Ljava/lang/StringBuffer; */)
+    UNIMPLEMENTED_CASE(StringBufferLength /* ()I */)
+    UNIMPLEMENTED_CASE(StringBufferToString /* ()Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(StringBuilderAppend /* (Ljava/lang/String;)Ljava/lang/StringBuilder; */)
+    UNIMPLEMENTED_CASE(StringBuilderLength /* ()I */)
+    UNIMPLEMENTED_CASE(StringBuilderToString /* ()Ljava/lang/String; */)
+    UNIMPLEMENTED_CASE(UnsafeCASInt /* (Ljava/lang/Object;JII)Z */)
+    UNIMPLEMENTED_CASE(UnsafeCASLong /* (Ljava/lang/Object;JJJ)Z */)
+    UNIMPLEMENTED_CASE(UnsafeCASObject /* (Ljava/lang/Object;JLjava/lang/Object;Ljava/lang/Object;)Z */)
+    UNIMPLEMENTED_CASE(UnsafeGet /* (Ljava/lang/Object;J)I */)
+    UNIMPLEMENTED_CASE(UnsafeGetVolatile /* (Ljava/lang/Object;J)I */)
+    UNIMPLEMENTED_CASE(UnsafeGetObject /* (Ljava/lang/Object;J)Ljava/lang/Object; */)
+    UNIMPLEMENTED_CASE(UnsafeGetObjectVolatile /* (Ljava/lang/Object;J)Ljava/lang/Object; */)
+    UNIMPLEMENTED_CASE(UnsafeGetLong /* (Ljava/lang/Object;J)J */)
+    UNIMPLEMENTED_CASE(UnsafeGetLongVolatile /* (Ljava/lang/Object;J)J */)
+    UNIMPLEMENTED_CASE(UnsafePut /* (Ljava/lang/Object;JI)V */)
+    UNIMPLEMENTED_CASE(UnsafePutOrdered /* (Ljava/lang/Object;JI)V */)
+    UNIMPLEMENTED_CASE(UnsafePutVolatile /* (Ljava/lang/Object;JI)V */)
+    UNIMPLEMENTED_CASE(UnsafePutObject /* (Ljava/lang/Object;JLjava/lang/Object;)V */)
+    UNIMPLEMENTED_CASE(UnsafePutObjectOrdered /* (Ljava/lang/Object;JLjava/lang/Object;)V */)
+    UNIMPLEMENTED_CASE(UnsafePutObjectVolatile /* (Ljava/lang/Object;JLjava/lang/Object;)V */)
+    UNIMPLEMENTED_CASE(UnsafePutLong /* (Ljava/lang/Object;JJ)V */)
+    UNIMPLEMENTED_CASE(UnsafePutLongOrdered /* (Ljava/lang/Object;JJ)V */)
+    UNIMPLEMENTED_CASE(UnsafePutLongVolatile /* (Ljava/lang/Object;JJ)V */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndAddInt /* (Ljava/lang/Object;JI)I */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndAddLong /* (Ljava/lang/Object;JJ)J */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndSetInt /* (Ljava/lang/Object;JI)I */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndSetLong /* (Ljava/lang/Object;JJ)J */)
+    UNIMPLEMENTED_CASE(UnsafeGetAndSetObject /* (Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object; */)
+    UNIMPLEMENTED_CASE(UnsafeLoadFence /* ()V */)
+    UNIMPLEMENTED_CASE(UnsafeStoreFence /* ()V */)
+    UNIMPLEMENTED_CASE(UnsafeFullFence /* ()V */)
+    UNIMPLEMENTED_CASE(ReferenceGetReferent /* ()Ljava/lang/Object; */)
+    UNIMPLEMENTED_CASE(IntegerValueOf /* (I)Ljava/lang/Integer; */)
+    case Intrinsics::kNone:
+      res = false;
+      break;
+    // Note: no default case to ensure we catch any newly added intrinsics.
+  }
+  return res;
+}
+
+}  // namespace interpreter
+}  // namespace art
diff --git a/runtime/interpreter/interpreter_intrinsics.h b/runtime/interpreter/interpreter_intrinsics.h
new file mode 100644
index 0000000..2a23002
--- /dev/null
+++ b/runtime/interpreter/interpreter_intrinsics.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_INTERPRETER_INTERPRETER_INTRINSICS_H_
+#define ART_RUNTIME_INTERPRETER_INTERPRETER_INTRINSICS_H_
+
+#include "jvalue.h"
+
+namespace art {
+
+class ArtMethod;
+class Instruction;
+class ShadowFrame;
+
+namespace interpreter {
+
+// Invokes to methods identified as intrinics are routed here.  If there is
+// no interpreter implementation, return false and a normal invoke will proceed.
+bool MterpHandleIntrinsic(ShadowFrame* shadow_frame,
+                          ArtMethod* const called_method,
+                          const Instruction* inst,
+                          uint16_t inst_data,
+                          JValue* result_register);
+
+}  // namespace interpreter
+}  // namespace art
+
+#endif  // ART_RUNTIME_INTERPRETER_INTERPRETER_INTRINSICS_H_
diff --git a/runtime/interpreter/mterp/arm/op_sget.S b/runtime/interpreter/mterp/arm/op_sget.S
index 2b81f50..3c813ef 100644
--- a/runtime/interpreter/mterp/arm/op_sget.S
+++ b/runtime/interpreter/mterp/arm/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+%default { "is_object":"0", "helper":"MterpGet32Static" }
     /*
      * General SGET handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/arm/op_sget_boolean.S b/runtime/interpreter/mterp/arm/op_sget_boolean.S
index ebfb44c..eb06aa8 100644
--- a/runtime/interpreter/mterp/arm/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/arm/op_sget_boolean.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
+%include "arm/op_sget.S" {"helper":"MterpGetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_byte.S b/runtime/interpreter/mterp/arm/op_sget_byte.S
index d76862e..9f4c904 100644
--- a/runtime/interpreter/mterp/arm/op_sget_byte.S
+++ b/runtime/interpreter/mterp/arm/op_sget_byte.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"helper":"artGetByteStaticFromCode"}
+%include "arm/op_sget.S" {"helper":"MterpGetByteStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_char.S b/runtime/interpreter/mterp/arm/op_sget_char.S
index b7fcfc2..dd8c991 100644
--- a/runtime/interpreter/mterp/arm/op_sget_char.S
+++ b/runtime/interpreter/mterp/arm/op_sget_char.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"helper":"artGetCharStaticFromCode"}
+%include "arm/op_sget.S" {"helper":"MterpGetCharStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_object.S b/runtime/interpreter/mterp/arm/op_sget_object.S
index 8e7d075..e1d9eae 100644
--- a/runtime/interpreter/mterp/arm/op_sget_object.S
+++ b/runtime/interpreter/mterp/arm/op_sget_object.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "arm/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_short.S b/runtime/interpreter/mterp/arm/op_sget_short.S
index 3e80f0d..c0d61c4 100644
--- a/runtime/interpreter/mterp/arm/op_sget_short.S
+++ b/runtime/interpreter/mterp/arm/op_sget_short.S
@@ -1 +1 @@
-%include "arm/op_sget.S" {"helper":"artGetShortStaticFromCode"}
+%include "arm/op_sget.S" {"helper":"MterpGetShortStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sget_wide.S b/runtime/interpreter/mterp/arm/op_sget_wide.S
index 4f2f89d..aeee016 100644
--- a/runtime/interpreter/mterp/arm/op_sget_wide.S
+++ b/runtime/interpreter/mterp/arm/op_sget_wide.S
@@ -4,12 +4,12 @@
      */
     /* sget-wide vAA, field@BBBB */
 
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGet64StaticFromCode
+    bl    MterpGet64Static
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r9, rINST, lsr #8             @ r9<- AA
     VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
diff --git a/runtime/interpreter/mterp/arm/op_sput.S b/runtime/interpreter/mterp/arm/op_sput.S
index 7e0c1a6..494df8a 100644
--- a/runtime/interpreter/mterp/arm/op_sput.S
+++ b/runtime/interpreter/mterp/arm/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
     /*
      * General SPUT handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/arm/op_sput_boolean.S b/runtime/interpreter/mterp/arm/op_sput_boolean.S
index e3bbf2b..47bed0a 100644
--- a/runtime/interpreter/mterp/arm/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/arm/op_sput_boolean.S
@@ -1 +1 @@
-%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "arm/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_byte.S b/runtime/interpreter/mterp/arm/op_sput_byte.S
index e3bbf2b..b4d22b4 100644
--- a/runtime/interpreter/mterp/arm/op_sput_byte.S
+++ b/runtime/interpreter/mterp/arm/op_sput_byte.S
@@ -1 +1 @@
-%include "arm/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "arm/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_char.S b/runtime/interpreter/mterp/arm/op_sput_char.S
index d8d65cb..58a957d 100644
--- a/runtime/interpreter/mterp/arm/op_sput_char.S
+++ b/runtime/interpreter/mterp/arm/op_sput_char.S
@@ -1 +1 @@
-%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "arm/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_short.S b/runtime/interpreter/mterp/arm/op_sput_short.S
index d8d65cb..88c3211 100644
--- a/runtime/interpreter/mterp/arm/op_sput_short.S
+++ b/runtime/interpreter/mterp/arm/op_sput_short.S
@@ -1 +1 @@
-%include "arm/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "arm/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/arm/op_sput_wide.S b/runtime/interpreter/mterp/arm/op_sput_wide.S
index 8d8ed8c..1e8fcc9 100644
--- a/runtime/interpreter/mterp/arm/op_sput_wide.S
+++ b/runtime/interpreter/mterp/arm/op_sput_wide.S
@@ -3,15 +3,15 @@
      *
      */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     FETCH   r0, 1                       @ r0<- field ref BBBB
-    ldr     r1, [rFP, #OFF_FP_METHOD]
-    mov     r2, rINST, lsr #8           @ r3<- AA
-    VREG_INDEX_TO_ADDR r2, r2
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    VREG_INDEX_TO_ADDR r1, r1
+    ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet64IndirectStaticFromMterp
+    bl      MterpSet64Static
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
diff --git a/runtime/interpreter/mterp/arm64/op_sget.S b/runtime/interpreter/mterp/arm64/op_sget.S
index 6352ce0..84e71ac 100644
--- a/runtime/interpreter/mterp/arm64/op_sget.S
+++ b/runtime/interpreter/mterp/arm64/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode", "extend":"" }
+%default { "is_object":"0", "helper":"MterpGet32Static", "extend":"" }
     /*
      * General SGET handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/arm64/op_sget_boolean.S b/runtime/interpreter/mterp/arm64/op_sget_boolean.S
index c40dbdd..868f41c 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_boolean.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"helper":"artGetBooleanStaticFromCode", "extend":"uxtb w0, w0"}
+%include "arm64/op_sget.S" {"helper":"MterpGetBooleanStatic", "extend":"uxtb w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_byte.S b/runtime/interpreter/mterp/arm64/op_sget_byte.S
index 6cf69a3..e135aa7 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_byte.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_byte.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"helper":"artGetByteStaticFromCode", "extend":"sxtb w0, w0"}
+%include "arm64/op_sget.S" {"helper":"MterpGetByteStatic", "extend":"sxtb w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_char.S b/runtime/interpreter/mterp/arm64/op_sget_char.S
index 8924a34..05d57ac 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_char.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_char.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"helper":"artGetCharStaticFromCode", "extend":"uxth w0, w0"}
+%include "arm64/op_sget.S" {"helper":"MterpGetCharStatic", "extend":"uxth w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_object.S b/runtime/interpreter/mterp/arm64/op_sget_object.S
index 620b0ba..1faaf6e 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_object.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_object.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "arm64/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_short.S b/runtime/interpreter/mterp/arm64/op_sget_short.S
index 19dbba6..5900231 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_short.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_short.S
@@ -1 +1 @@
-%include "arm64/op_sget.S" {"helper":"artGetShortStaticFromCode", "extend":"sxth w0, w0"}
+%include "arm64/op_sget.S" {"helper":"MterpGetShortStatic", "extend":"sxth w0, w0"}
diff --git a/runtime/interpreter/mterp/arm64/op_sget_wide.S b/runtime/interpreter/mterp/arm64/op_sget_wide.S
index 287f66d..92f3f7d 100644
--- a/runtime/interpreter/mterp/arm64/op_sget_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_sget_wide.S
@@ -4,12 +4,12 @@
      */
     /* sget-wide vAA, field//BBBB */
 
-    .extern artGet64StaticFromCode
+    .extern MterpGet64StaticFromCode
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGet64StaticFromCode
+    bl    MterpGet64Static
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w4, wINST, #8                 // w4<- AA
     cbnz  x3, MterpException            // bail out
diff --git a/runtime/interpreter/mterp/arm64/op_sput.S b/runtime/interpreter/mterp/arm64/op_sput.S
index 75f27ab..e322af0 100644
--- a/runtime/interpreter/mterp/arm64/op_sput.S
+++ b/runtime/interpreter/mterp/arm64/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
     /*
      * General SPUT handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/arm64/op_sput_boolean.S b/runtime/interpreter/mterp/arm64/op_sput_boolean.S
index 11c55e5..9928f31 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_boolean.S
@@ -1 +1 @@
-%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "arm64/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_byte.S b/runtime/interpreter/mterp/arm64/op_sput_byte.S
index 11c55e5..16d6ba9 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_byte.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_byte.S
@@ -1 +1 @@
-%include "arm64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "arm64/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_char.S b/runtime/interpreter/mterp/arm64/op_sput_char.S
index b4dd5aa..ab5e815 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_char.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_char.S
@@ -1 +1 @@
-%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "arm64/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_short.S b/runtime/interpreter/mterp/arm64/op_sput_short.S
index b4dd5aa..b54f88a 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_short.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_short.S
@@ -1 +1 @@
-%include "arm64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "arm64/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/arm64/op_sput_wide.S b/runtime/interpreter/mterp/arm64/op_sput_wide.S
index a79b1a6..4aeb8ff 100644
--- a/runtime/interpreter/mterp/arm64/op_sput_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_sput_wide.S
@@ -3,15 +3,15 @@
      *
      */
     /* sput-wide vAA, field//BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     FETCH   w0, 1                       // w0<- field ref BBBB
-    ldr     x1, [xFP, #OFF_FP_METHOD]
-    lsr     w2, wINST, #8               // w3<- AA
-    VREG_INDEX_TO_ADDR x2, w2
+    lsr     w1, wINST, #8               // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1
+    ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet64IndirectStaticFromMterp
+    bl      MterpSet64Static
     cbnz    w0, MterpException          // 0 on success, -1 on failure
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
diff --git a/runtime/interpreter/mterp/config_arm b/runtime/interpreter/mterp/config_arm
index 6d9774c..b19426b 100644
--- a/runtime/interpreter/mterp/config_arm
+++ b/runtime/interpreter/mterp/config_arm
@@ -288,8 +288,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
index 9f32695..0987964 100644
--- a/runtime/interpreter/mterp/config_arm64
+++ b/runtime/interpreter/mterp/config_arm64
@@ -286,8 +286,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_mips b/runtime/interpreter/mterp/config_mips
index 708a22b..fe07385 100644
--- a/runtime/interpreter/mterp/config_mips
+++ b/runtime/interpreter/mterp/config_mips
@@ -288,8 +288,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_mips64 b/runtime/interpreter/mterp/config_mips64
index 7643a48..d24cf4d 100644
--- a/runtime/interpreter/mterp/config_mips64
+++ b/runtime/interpreter/mterp/config_mips64
@@ -288,8 +288,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
index f454786..076baf2 100644
--- a/runtime/interpreter/mterp/config_x86
+++ b/runtime/interpreter/mterp/config_x86
@@ -292,8 +292,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/config_x86_64 b/runtime/interpreter/mterp/config_x86_64
index dbfd3d1..44b671a 100644
--- a/runtime/interpreter/mterp/config_x86_64
+++ b/runtime/interpreter/mterp/config_x86_64
@@ -292,8 +292,8 @@
     # op op_unused_f9 FALLBACK
     op op_invoke_polymorphic FALLBACK
     op op_invoke_polymorphic_range FALLBACK
-    # op op_unused_fc FALLBACK
-    # op op_unused_fd FALLBACK
+    op op_invoke_custom FALLBACK
+    op op_invoke_custom_range FALLBACK
     # op op_unused_fe FALLBACK
     # op op_unused_ff FALLBACK
 op-end
diff --git a/runtime/interpreter/mterp/mips/op_sget.S b/runtime/interpreter/mterp/mips/op_sget.S
index 64ece1e..635df8a 100644
--- a/runtime/interpreter/mterp/mips/op_sget.S
+++ b/runtime/interpreter/mterp/mips/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+%default { "is_object":"0", "helper":"MterpGet32Static" }
     /*
      * General SGET handler.
      *
diff --git a/runtime/interpreter/mterp/mips/op_sget_boolean.S b/runtime/interpreter/mterp/mips/op_sget_boolean.S
index 45a5a70..7829970 100644
--- a/runtime/interpreter/mterp/mips/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/mips/op_sget_boolean.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
+%include "mips/op_sget.S" {"helper":"MterpGetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_byte.S b/runtime/interpreter/mterp/mips/op_sget_byte.S
index 319122c..ee08342 100644
--- a/runtime/interpreter/mterp/mips/op_sget_byte.S
+++ b/runtime/interpreter/mterp/mips/op_sget_byte.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"helper":"artGetByteStaticFromCode"}
+%include "mips/op_sget.S" {"helper":"MterpGetByteStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_char.S b/runtime/interpreter/mterp/mips/op_sget_char.S
index 7103847..d8b477a 100644
--- a/runtime/interpreter/mterp/mips/op_sget_char.S
+++ b/runtime/interpreter/mterp/mips/op_sget_char.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"helper":"artGetCharStaticFromCode"}
+%include "mips/op_sget.S" {"helper":"MterpGetCharStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_object.S b/runtime/interpreter/mterp/mips/op_sget_object.S
index b205f51..2dc00c3 100644
--- a/runtime/interpreter/mterp/mips/op_sget_object.S
+++ b/runtime/interpreter/mterp/mips/op_sget_object.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "mips/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_short.S b/runtime/interpreter/mterp/mips/op_sget_short.S
index 3301823..ab55d93 100644
--- a/runtime/interpreter/mterp/mips/op_sget_short.S
+++ b/runtime/interpreter/mterp/mips/op_sget_short.S
@@ -1 +1 @@
-%include "mips/op_sget.S" {"helper":"artGetShortStaticFromCode"}
+%include "mips/op_sget.S" {"helper":"MterpGetShortStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sget_wide.S b/runtime/interpreter/mterp/mips/op_sget_wide.S
index c729250..ec4295a 100644
--- a/runtime/interpreter/mterp/mips/op_sget_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sget_wide.S
@@ -2,12 +2,12 @@
      * 64-bit SGET handler.
      */
     /* sget-wide vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGet64StaticFromCode)
+    JAL(MterpGet64Static)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_sput.S b/runtime/interpreter/mterp/mips/op_sput.S
index 7034a0e..37f8687 100644
--- a/runtime/interpreter/mterp/mips/op_sput.S
+++ b/runtime/interpreter/mterp/mips/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
     /*
      * General SPUT handler.
      *
diff --git a/runtime/interpreter/mterp/mips/op_sput_boolean.S b/runtime/interpreter/mterp/mips/op_sput_boolean.S
index 7909ef5..6426cd4 100644
--- a/runtime/interpreter/mterp/mips/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/mips/op_sput_boolean.S
@@ -1 +1 @@
-%include "mips/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "mips/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_byte.S b/runtime/interpreter/mterp/mips/op_sput_byte.S
index 7909ef5..c68d18f 100644
--- a/runtime/interpreter/mterp/mips/op_sput_byte.S
+++ b/runtime/interpreter/mterp/mips/op_sput_byte.S
@@ -1 +1 @@
-%include "mips/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "mips/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_char.S b/runtime/interpreter/mterp/mips/op_sput_char.S
index 188195c..9b8983e 100644
--- a/runtime/interpreter/mterp/mips/op_sput_char.S
+++ b/runtime/interpreter/mterp/mips/op_sput_char.S
@@ -1 +1 @@
-%include "mips/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "mips/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_short.S b/runtime/interpreter/mterp/mips/op_sput_short.S
index 188195c..5a57ed9 100644
--- a/runtime/interpreter/mterp/mips/op_sput_short.S
+++ b/runtime/interpreter/mterp/mips/op_sput_short.S
@@ -1 +1 @@
-%include "mips/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "mips/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/mips/op_sput_wide.S b/runtime/interpreter/mterp/mips/op_sput_wide.S
index 3b347fc..c090007 100644
--- a/runtime/interpreter/mterp/mips/op_sput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sput_wide.S
@@ -2,15 +2,15 @@
      * 64-bit SPUT handler.
      */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
-    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
-    GET_OPA(a2)                            # a2 <- AA
-    EAS2(a2, rFP, a2)                      # a2 <- &fp[AA]
+    GET_OPA(a1)                            # a1 <- AA
+    EAS2(a1, rFP, a1)                      # a1 <- &fp[AA]
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet64IndirectStaticFromMterp)
+    JAL(MterpSet64Static)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips64/op_sget.S b/runtime/interpreter/mterp/mips64/op_sget.S
index bd2cfe3..71046db 100644
--- a/runtime/interpreter/mterp/mips64/op_sget.S
+++ b/runtime/interpreter/mterp/mips64/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode", "extend":"" }
+%default { "is_object":"0", "helper":"MterpGet32Static", "extend":"" }
     /*
      * General SGET handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/mips64/op_sget_boolean.S b/runtime/interpreter/mterp/mips64/op_sget_boolean.S
index e7b1844..ec1ce9e 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_boolean.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"helper":"artGetBooleanStaticFromCode", "extend":"and v0, v0, 0xff"}
+%include "mips64/op_sget.S" {"helper":"MterpGetBooleanStatic", "extend":"and v0, v0, 0xff"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_byte.S b/runtime/interpreter/mterp/mips64/op_sget_byte.S
index 52a2e4a..6a802f6 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_byte.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_byte.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"helper":"artGetByteStaticFromCode", "extend":"seb v0, v0"}
+%include "mips64/op_sget.S" {"helper":"MterpGetByteStatic", "extend":"seb v0, v0"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_char.S b/runtime/interpreter/mterp/mips64/op_sget_char.S
index 873d82a..483d085 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_char.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_char.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"helper":"artGetCharStaticFromCode", "extend":"and v0, v0, 0xffff"}
+%include "mips64/op_sget.S" {"helper":"MterpGetCharStatic", "extend":"and v0, v0, 0xffff"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_object.S b/runtime/interpreter/mterp/mips64/op_sget_object.S
index 3108417..2250696 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_object.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_object.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "mips64/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_short.S b/runtime/interpreter/mterp/mips64/op_sget_short.S
index fed4e76..b257bbb 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_short.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_short.S
@@ -1 +1 @@
-%include "mips64/op_sget.S" {"helper":"artGetShortStaticFromCode", "extend":"seh v0, v0"}
+%include "mips64/op_sget.S" {"helper":"MterpGetShortStatic", "extend":"seh v0, v0"}
diff --git a/runtime/interpreter/mterp/mips64/op_sget_wide.S b/runtime/interpreter/mterp/mips64/op_sget_wide.S
index 77124d1..ace64f8 100644
--- a/runtime/interpreter/mterp/mips64/op_sget_wide.S
+++ b/runtime/interpreter/mterp/mips64/op_sget_wide.S
@@ -3,12 +3,12 @@
      *
      */
     /* sget-wide vAA, field//BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGet64StaticFromCode
+    jal     MterpGet64Static
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a4, rINST, 8                # a4 <- AA
     bnez    a3, MterpException          # bail out
diff --git a/runtime/interpreter/mterp/mips64/op_sput.S b/runtime/interpreter/mterp/mips64/op_sput.S
index 142f18f..466f333 100644
--- a/runtime/interpreter/mterp/mips64/op_sput.S
+++ b/runtime/interpreter/mterp/mips64/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode" }
+%default { "helper":"MterpSet32Static" }
     /*
      * General SPUT handler wrapper.
      *
diff --git a/runtime/interpreter/mterp/mips64/op_sput_boolean.S b/runtime/interpreter/mterp/mips64/op_sput_boolean.S
index f5b8dbf..eba58f7 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_boolean.S
@@ -1 +1 @@
-%include "mips64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "mips64/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_byte.S b/runtime/interpreter/mterp/mips64/op_sput_byte.S
index f5b8dbf..80a26c0 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_byte.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_byte.S
@@ -1 +1 @@
-%include "mips64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "mips64/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_char.S b/runtime/interpreter/mterp/mips64/op_sput_char.S
index c4d195c..c0d5bf3 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_char.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_char.S
@@ -1 +1 @@
-%include "mips64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "mips64/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_short.S b/runtime/interpreter/mterp/mips64/op_sput_short.S
index c4d195c..b001832 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_short.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_short.S
@@ -1 +1 @@
-%include "mips64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "mips64/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/mips64/op_sput_wide.S b/runtime/interpreter/mterp/mips64/op_sput_wide.S
index 828ddc1..aa3d5b4 100644
--- a/runtime/interpreter/mterp/mips64/op_sput_wide.S
+++ b/runtime/interpreter/mterp/mips64/op_sput_wide.S
@@ -3,15 +3,15 @@
      *
      */
     /* sput-wide vAA, field//BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
-    ld      a1, OFF_FP_METHOD(rFP)
-    srl     a2, rINST, 8                # a2 <- AA
-    dlsa    a2, a2, rFP, 2
+    srl     a1, rINST, 8                # a2 <- AA
+    dlsa    a1, a1, rFP, 2
+    ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet64IndirectStaticFromMterp
+    jal     MterpSet64Static
     bnezc   v0, MterpException          # 0 on success, -1 on failure
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index 75ab91a..a53040c 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -18,6 +18,7 @@
  * Mterp entry point and support functions.
  */
 #include "interpreter/interpreter_common.h"
+#include "interpreter/interpreter_intrinsics.h"
 #include "entrypoints/entrypoint_utils-inl.h"
 #include "mterp.h"
 #include "debugger.h"
@@ -157,7 +158,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  return DoInvoke<kVirtual, false, false>(
+  return DoFastInvoke<kVirtual>(
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
@@ -190,7 +191,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  return DoInvoke<kDirect, false, false>(
+  return DoFastInvoke<kDirect>(
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
@@ -201,7 +202,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  return DoInvoke<kStatic, false, false>(
+  return DoFastInvoke<kStatic>(
       self, *shadow_frame, inst, inst_data, result_register);
 }
 
@@ -267,6 +268,18 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   JValue* result_register = shadow_frame->GetResultRegister();
   const Instruction* inst = Instruction::At(dex_pc_ptr);
+  const uint32_t vregC = inst->VRegC_35c();
+  const uint32_t vtable_idx = inst->VRegB_35c();
+  ObjPtr<mirror::Object> const receiver = shadow_frame->GetVRegReference(vregC);
+  if (receiver != nullptr) {
+    ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
+        vtable_idx, kRuntimePointerSize);
+    if ((called_method != nullptr) && called_method->IsIntrinsic()) {
+      if (MterpHandleIntrinsic(shadow_frame, called_method, inst, inst_data, result_register)) {
+        return !self->IsExceptionPending();
+      }
+    }
+  }
   return DoInvokeVirtualQuick<false>(
       self, *shadow_frame, inst, inst_data, result_register);
 }
@@ -587,27 +600,6 @@
   return MterpShouldSwitchInterpreters();
 }
 
-extern "C" ssize_t artSet64IndirectStaticFromMterp(uint32_t field_idx,
-                                                   ArtMethod* referrer,
-                                                   uint64_t* new_value,
-                                                   Thread* self)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  ScopedQuickEntrypointChecks sqec(self);
-  ArtField* field = FindFieldFast(field_idx, referrer, StaticPrimitiveWrite, sizeof(int64_t));
-  if (LIKELY(field != nullptr)) {
-    // Compiled code can't use transactional mode.
-    field->Set64<false>(field->GetDeclaringClass(), *new_value);
-    return 0;  // success
-  }
-  field = FindFieldFromCode<StaticPrimitiveWrite, true>(field_idx, referrer, self, sizeof(int64_t));
-  if (LIKELY(field != nullptr)) {
-    // Compiled code can't use transactional mode.
-    field->Set64<false>(field->GetDeclaringClass(), *new_value);
-    return 0;  // success
-  }
-  return -1;  // failure
-}
-
 extern "C" ssize_t artSet8InstanceFromMterp(uint32_t field_idx,
                                             mirror::Object* obj,
                                             uint8_t new_value,
@@ -689,7 +681,187 @@
   return -1;  // failure
 }
 
-extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr, int32_t index)
+template <typename return_type, Primitive::Type primitive_type>
+ALWAYS_INLINE return_type MterpGetStatic(uint32_t field_idx,
+                                         ArtMethod* referrer,
+                                         Thread* self,
+                                         return_type (ArtField::*func)(ObjPtr<mirror::Object>))
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return_type res = 0;  // On exception, the result will be ignored.
+  ArtField* f =
+      FindFieldFromCode<StaticPrimitiveRead, false>(field_idx,
+                                                    referrer,
+                                                    self,
+                                                    primitive_type);
+  if (LIKELY(f != nullptr)) {
+    ObjPtr<mirror::Object> obj = f->GetDeclaringClass();
+    res = (f->*func)(obj);
+  }
+  return res;
+}
+
+extern "C" int32_t MterpGetBooleanStatic(uint32_t field_idx,
+                                         ArtMethod* referrer,
+                                         Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<uint8_t, Primitive::kPrimBoolean>(field_idx,
+                                                          referrer,
+                                                          self,
+                                                          &ArtField::GetBoolean);
+}
+
+extern "C" int32_t MterpGetByteStatic(uint32_t field_idx,
+                                      ArtMethod* referrer,
+                                      Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<int8_t, Primitive::kPrimByte>(field_idx,
+                                                      referrer,
+                                                      self,
+                                                      &ArtField::GetByte);
+}
+
+extern "C" uint32_t MterpGetCharStatic(uint32_t field_idx,
+                                       ArtMethod* referrer,
+                                       Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<uint16_t, Primitive::kPrimChar>(field_idx,
+                                                        referrer,
+                                                        self,
+                                                        &ArtField::GetChar);
+}
+
+extern "C" int32_t MterpGetShortStatic(uint32_t field_idx,
+                                       ArtMethod* referrer,
+                                       Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<int16_t, Primitive::kPrimShort>(field_idx,
+                                                        referrer,
+                                                        self,
+                                                        &ArtField::GetShort);
+}
+
+extern "C" mirror::Object* MterpGetObjStatic(uint32_t field_idx,
+                                             ArtMethod* referrer,
+                                             Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<ObjPtr<mirror::Object>, Primitive::kPrimNot>(field_idx,
+                                                                     referrer,
+                                                                     self,
+                                                                     &ArtField::GetObject).Ptr();
+}
+
+extern "C" int32_t MterpGet32Static(uint32_t field_idx,
+                                    ArtMethod* referrer,
+                                    Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<int32_t, Primitive::kPrimInt>(field_idx,
+                                                      referrer,
+                                                      self,
+                                                      &ArtField::GetInt);
+}
+
+extern "C" int64_t MterpGet64Static(uint32_t field_idx, ArtMethod* referrer, Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpGetStatic<int64_t, Primitive::kPrimLong>(field_idx,
+                                                       referrer,
+                                                       self,
+                                                       &ArtField::GetLong);
+}
+
+
+template <typename field_type, Primitive::Type primitive_type>
+int MterpSetStatic(uint32_t field_idx,
+                   field_type new_value,
+                   ArtMethod* referrer,
+                   Thread* self,
+                   void (ArtField::*func)(ObjPtr<mirror::Object>, field_type val))
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  int res = 0;  // Assume success (following quick_field_entrypoints conventions)
+  ArtField* f =
+      FindFieldFromCode<StaticPrimitiveWrite, false>(field_idx, referrer, self, primitive_type);
+  if (LIKELY(f != nullptr)) {
+    ObjPtr<mirror::Object> obj = f->GetDeclaringClass();
+    (f->*func)(obj, new_value);
+  } else {
+    res = -1;  // Failure
+  }
+  return res;
+}
+
+extern "C" int MterpSetBooleanStatic(uint32_t field_idx,
+                                     uint8_t new_value,
+                                     ArtMethod* referrer,
+                                     Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<uint8_t, Primitive::kPrimBoolean>(field_idx,
+                                                          new_value,
+                                                          referrer,
+                                                          self,
+                                                          &ArtField::SetBoolean<false>);
+}
+
+extern "C" int MterpSetByteStatic(uint32_t field_idx,
+                                  int8_t new_value,
+                                  ArtMethod* referrer,
+                                  Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<int8_t, Primitive::kPrimByte>(field_idx,
+                                                      new_value,
+                                                      referrer,
+                                                      self,
+                                                      &ArtField::SetByte<false>);
+}
+
+extern "C" int MterpSetCharStatic(uint32_t field_idx,
+                                  uint16_t new_value,
+                                  ArtMethod* referrer,
+                                  Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<uint16_t, Primitive::kPrimChar>(field_idx,
+                                                        new_value,
+                                                        referrer,
+                                                        self,
+                                                        &ArtField::SetChar<false>);
+}
+
+extern "C" int MterpSetShortStatic(uint32_t field_idx,
+                                   int16_t new_value,
+                                   ArtMethod* referrer,
+                                   Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<int16_t, Primitive::kPrimShort>(field_idx,
+                                                        new_value,
+                                                        referrer,
+                                                        self,
+                                                        &ArtField::SetShort<false>);
+}
+
+extern "C" int MterpSet32Static(uint32_t field_idx,
+                                int32_t new_value,
+                                ArtMethod* referrer,
+                                Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<int32_t, Primitive::kPrimInt>(field_idx,
+                                                      new_value,
+                                                      referrer,
+                                                      self,
+                                                      &ArtField::SetInt<false>);
+}
+
+extern "C" int MterpSet64Static(uint32_t field_idx,
+                                int64_t* new_value,
+                                ArtMethod* referrer,
+                                Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return MterpSetStatic<int64_t, Primitive::kPrimLong>(field_idx,
+                                                       *new_value,
+                                                       referrer,
+                                                       self,
+                                                       &ArtField::SetLong<false>);
+}
+
+extern "C" mirror::Object* artAGetObjectFromMterp(mirror::Object* arr,
+                                                  int32_t index)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (UNLIKELY(arr == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
@@ -703,7 +875,8 @@
   }
 }
 
-extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj, uint32_t field_offset)
+extern "C" mirror::Object* artIGetObjectFromMterp(mirror::Object* obj,
+                                                  uint32_t field_offset)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerExceptionFromInterpreter();
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index 8916241..e2b693f 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -2631,12 +2631,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGet32StaticFromCode
+    bl    MterpGet32Static
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2661,12 +2661,12 @@
      */
     /* sget-wide vAA, field@BBBB */
 
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGet64StaticFromCode
+    bl    MterpGet64Static
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r9, rINST, lsr #8             @ r9<- AA
     VREG_INDEX_TO_ADDR lr, r9           @ r9<- &fp[AA]
@@ -2690,12 +2690,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetObjStaticFromCode
+    bl    MterpGetObjStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2723,12 +2723,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetBooleanStaticFromCode
+    bl    MterpGetBooleanStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2756,12 +2756,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetByteStaticFromCode
+    bl    MterpGetByteStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2789,12 +2789,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetCharStaticFromCode
+    bl    MterpGetCharStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2822,12 +2822,12 @@
      */
     /* op vAA, field@BBBB */
 
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     FETCH r0, 1                         @ r0<- field ref BBBB
     ldr   r1, [rFP, #OFF_FP_METHOD]
     mov   r2, rSELF
-    bl    artGetShortStaticFromCode
+    bl    MterpGetShortStatic
     ldr   r3, [rSELF, #THREAD_EXCEPTION_OFFSET]
     mov   r2, rINST, lsr #8             @ r2<- AA
     PREFETCH_INST 2
@@ -2860,7 +2860,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet32StaticFromCode
+    bl      MterpSet32Static
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -2876,15 +2876,15 @@
      *
      */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     FETCH   r0, 1                       @ r0<- field ref BBBB
-    ldr     r1, [rFP, #OFF_FP_METHOD]
-    mov     r2, rINST, lsr #8           @ r3<- AA
-    VREG_INDEX_TO_ADDR r2, r2
+    mov     r1, rINST, lsr #8           @ r1<- AA
+    VREG_INDEX_TO_ADDR r1, r1
+    ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet64IndirectStaticFromMterp
+    bl      MterpSet64Static
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -2925,7 +2925,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet8StaticFromCode
+    bl      MterpSetBooleanStatic
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -2951,7 +2951,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet8StaticFromCode
+    bl      MterpSetByteStatic
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -2977,7 +2977,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet16StaticFromCode
+    bl      MterpSetCharStatic
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -3003,7 +3003,7 @@
     ldr     r2, [rFP, #OFF_FP_METHOD]
     mov     r3, rSELF
     PREFETCH_INST 2                     @ Get next inst, but don't advance rPC
-    bl      artSet16StaticFromCode
+    bl      MterpSetShortStatic
     cmp     r0, #0                      @ 0 on success, -1 on failure
     bne     MterpException
     ADVANCE 2                           @ Past exception point - now advance rPC
@@ -7347,24 +7347,16 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: arm/op_unused_fc.S */
-/* File: arm/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: arm/op_unused_fd.S */
-/* File: arm/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
@@ -11763,7 +11755,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11780,7 +11772,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index 7d442c0..ef5a4da 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -2543,12 +2543,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGet32StaticFromCode
+    bl    MterpGet32Static
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     
@@ -2573,12 +2573,12 @@
      */
     /* sget-wide vAA, field//BBBB */
 
-    .extern artGet64StaticFromCode
+    .extern MterpGet64StaticFromCode
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGet64StaticFromCode
+    bl    MterpGet64Static
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w4, wINST, #8                 // w4<- AA
     cbnz  x3, MterpException            // bail out
@@ -2599,12 +2599,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetObjStaticFromCode
+    bl    MterpGetObjStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     
@@ -2632,12 +2632,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetBooleanStaticFromCode
+    bl    MterpGetBooleanStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     uxtb w0, w0
@@ -2665,12 +2665,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetByteStaticFromCode
+    bl    MterpGetByteStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     sxtb w0, w0
@@ -2698,12 +2698,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetCharStaticFromCode
+    bl    MterpGetCharStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     uxth w0, w0
@@ -2731,12 +2731,12 @@
      */
     /* op vAA, field//BBBB */
 
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     FETCH w0, 1                         // w0<- field ref BBBB
     ldr   x1, [xFP, #OFF_FP_METHOD]
     mov   x2, xSELF
-    bl    artGetShortStaticFromCode
+    bl    MterpGetShortStatic
     ldr   x3, [xSELF, #THREAD_EXCEPTION_OFFSET]
     lsr   w2, wINST, #8                 // w2<- AA
     sxth w0, w0
@@ -2769,7 +2769,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet32StaticFromCode
+    bl      MterpSet32Static
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -2784,15 +2784,15 @@
      *
      */
     /* sput-wide vAA, field//BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     FETCH   w0, 1                       // w0<- field ref BBBB
-    ldr     x1, [xFP, #OFF_FP_METHOD]
-    lsr     w2, wINST, #8               // w3<- AA
-    VREG_INDEX_TO_ADDR x2, w2
+    lsr     w1, wINST, #8               // w1<- AA
+    VREG_INDEX_TO_ADDR x1, w1
+    ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet64IndirectStaticFromMterp
+    bl      MterpSet64Static
     cbnz    w0, MterpException          // 0 on success, -1 on failure
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from wINST
@@ -2831,7 +2831,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet8StaticFromCode
+    bl      MterpSetBooleanStatic
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -2856,7 +2856,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet8StaticFromCode
+    bl      MterpSetByteStatic
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -2881,7 +2881,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet16StaticFromCode
+    bl      MterpSetCharStatic
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -2906,7 +2906,7 @@
     ldr     x2, [xFP, #OFF_FP_METHOD]
     mov     x3, xSELF
     PREFETCH_INST 2                     // Get next inst, but don't advance rPC
-    bl      artSet16StaticFromCode
+    bl      MterpSetShortStatic
     cbnz    w0, MterpException          // 0 on success
     ADVANCE 2                           // Past exception point - now advance rPC
     GET_INST_OPCODE ip                  // extract opcode from rINST
@@ -6914,24 +6914,16 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: arm64/op_unused_fc.S */
-/* File: arm64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: arm64/op_unused_fd.S */
-/* File: arm64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
@@ -11580,7 +11572,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11597,7 +11589,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index e154e6c..579afc2 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -3038,12 +3038,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGet32StaticFromCode)
+    JAL(MterpGet32Static)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3064,12 +3064,12 @@
      * 64-bit SGET handler.
      */
     /* sget-wide vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGet64StaticFromCode)
+    JAL(MterpGet64Static)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
@@ -3088,12 +3088,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetObjStaticFromCode)
+    JAL(MterpGetObjStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3118,12 +3118,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetBooleanStaticFromCode)
+    JAL(MterpGetBooleanStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3148,12 +3148,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetByteStaticFromCode)
+    JAL(MterpGetByteStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3178,12 +3178,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetCharStaticFromCode)
+    JAL(MterpGetCharStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3208,12 +3208,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field@BBBB */
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
     move  a2, rSELF                        # a2 <- self
-    JAL(artGetShortStaticFromCode)
+    JAL(MterpGetShortStatic)
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
@@ -3244,7 +3244,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet32StaticFromCode)
+    JAL(MterpSet32Static)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3258,15 +3258,15 @@
      * 64-bit SPUT handler.
      */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
-    lw    a1, OFF_FP_METHOD(rFP)           # a1 <- method
-    GET_OPA(a2)                            # a2 <- AA
-    EAS2(a2, rFP, a2)                      # a2 <- &fp[AA]
+    GET_OPA(a1)                            # a1 <- AA
+    EAS2(a1, rFP, a1)                      # a1 <- &fp[AA]
+    lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet64IndirectStaticFromMterp)
+    JAL(MterpSet64Static)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3311,7 +3311,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet8StaticFromCode)
+    JAL(MterpSetBooleanStatic)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3336,7 +3336,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet8StaticFromCode)
+    JAL(MterpSetByteStatic)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3361,7 +3361,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet16StaticFromCode)
+    JAL(MterpSetCharStatic)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -3386,7 +3386,7 @@
     lw    a2, OFF_FP_METHOD(rFP)           # a2 <- method
     move  a3, rSELF                        # a3 <- self
     PREFETCH_INST(2)                       # load rINST
-    JAL(artSet16StaticFromCode)
+    JAL(MterpSetShortStatic)
     bnez  v0, MterpException               # bail out
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
@@ -7761,25 +7761,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: mips/op_unused_fc.S */
-/* File: mips/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
-
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: mips/op_unused_fd.S */
-/* File: mips/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
-
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 /* ------------------------------ */
     .balign 128
@@ -12423,7 +12413,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12441,7 +12431,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index 013bb32..3656df9 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -2585,12 +2585,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGet32StaticFromCode
+    jal     MterpGet32Static
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     
@@ -2614,12 +2614,12 @@
      *
      */
     /* sget-wide vAA, field//BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGet64StaticFromCode
+    jal     MterpGet64Static
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a4, rINST, 8                # a4 <- AA
     bnez    a3, MterpException          # bail out
@@ -2639,12 +2639,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetObjStaticFromCode
+    jal     MterpGetObjStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     
@@ -2671,12 +2671,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetBooleanStaticFromCode
+    jal     MterpGetBooleanStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     and v0, v0, 0xff
@@ -2703,12 +2703,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetByteStaticFromCode
+    jal     MterpGetByteStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     seb v0, v0
@@ -2735,12 +2735,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetCharStaticFromCode
+    jal     MterpGetCharStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     and v0, v0, 0xffff
@@ -2767,12 +2767,12 @@
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
     /* op vAA, field//BBBB */
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     ld      a1, OFF_FP_METHOD(rFP)
     move    a2, rSELF
-    jal     artGetShortStaticFromCode
+    jal     MterpGetShortStatic
     ld      a3, THREAD_EXCEPTION_OFFSET(rSELF)
     srl     a2, rINST, 8                # a2 <- AA
     seh v0, v0
@@ -2798,7 +2798,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet32StaticFromCode
+    .extern MterpSet32Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2806,7 +2806,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet32StaticFromCode
+    jal     MterpSet32Static
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2821,15 +2821,15 @@
      *
      */
     /* sput-wide vAA, field//BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
-    ld      a1, OFF_FP_METHOD(rFP)
-    srl     a2, rINST, 8                # a2 <- AA
-    dlsa    a2, a2, rFP, 2
+    srl     a1, rINST, 8                # a2 <- AA
+    dlsa    a1, a1, rFP, 2
+    ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet64IndirectStaticFromMterp
+    jal     MterpSet64Static
     bnezc   v0, MterpException          # 0 on success, -1 on failure
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2862,7 +2862,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetBooleanStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2870,7 +2870,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet8StaticFromCode
+    jal     MterpSetBooleanStatic
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2888,7 +2888,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetByteStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2896,7 +2896,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet8StaticFromCode
+    jal     MterpSetByteStatic
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2914,7 +2914,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetCharStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2922,7 +2922,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet16StaticFromCode
+    jal     MterpSetCharStatic
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -2940,7 +2940,7 @@
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
     /* op vAA, field//BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetShortStatic
     EXPORT_PC
     lhu     a0, 2(rPC)                  # a0 <- field ref BBBB
     srl     a3, rINST, 8                # a3 <- AA
@@ -2948,7 +2948,7 @@
     ld      a2, OFF_FP_METHOD(rFP)
     move    a3, rSELF
     PREFETCH_INST 2                     # Get next inst, but don't advance rPC
-    jal     artSet16StaticFromCode
+    jal     MterpSetShortStatic
     bnezc   v0, MterpException          # 0 on success
     ADVANCE 2                           # Past exception point - now advance rPC
     GET_INST_OPCODE v0                  # extract opcode from rINST
@@ -7084,26 +7084,16 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: mips64/op_unused_fc.S */
-/* File: mips64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
     b       MterpFallback
 
-
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: mips64/op_unused_fd.S */
-/* File: mips64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
     b       MterpFallback
 
-
 /* ------------------------------ */
     .balign 128
 .L_op_unused_fe: /* 0xfe */
@@ -11982,7 +11972,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12001,7 +11991,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 695d1e4..21d9671 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -2535,7 +2535,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2543,7 +2543,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGet32StaticFromCode)
+    call    SYMBOL(MterpGet32Static)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2564,7 +2564,7 @@
  *
  */
     /* sget-wide vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2572,7 +2572,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGet64StaticFromCode)
+    call    SYMBOL(MterpGet64Static)
     movl    rSELF, %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
@@ -2592,7 +2592,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2600,7 +2600,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetObjStaticFromCode)
+    call    SYMBOL(MterpGetObjStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2624,7 +2624,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2632,7 +2632,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetBooleanStaticFromCode)
+    call    SYMBOL(MterpGetBooleanStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2656,7 +2656,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2664,7 +2664,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetByteStaticFromCode)
+    call    SYMBOL(MterpGetByteStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2688,7 +2688,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2696,7 +2696,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetCharStaticFromCode)
+    call    SYMBOL(MterpGetCharStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2720,7 +2720,7 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
  */
     /* op vAA, field@BBBB */
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -2728,7 +2728,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGetShortStaticFromCode)
+    call    SYMBOL(MterpGetShortStatic)
     movl    rSELF, %ecx
     RESTORE_IBASE_FROM_SELF %ecx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%ecx)
@@ -2751,7 +2751,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet32StaticFromCode
+    .extern MterpSet32Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2761,7 +2761,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet32StaticFromCode)
+    call    SYMBOL(MterpSet32Static)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2776,17 +2776,17 @@
  *
  */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG1(%esp)            # referrer
     leal    VREG_ADDRESS(rINST), %eax
-    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    %eax, OUT_ARG1(%esp)            # &fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    call    SYMBOL(MterpSet64Static)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2821,7 +2821,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetBooleanStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2831,7 +2831,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet8StaticFromCode)
+    call    SYMBOL(MterpSetBooleanStatic)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2849,7 +2849,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetByteStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2859,7 +2859,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet8StaticFromCode)
+    call    SYMBOL(MterpSetByteStatic)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2877,7 +2877,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetCharStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2887,7 +2887,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet16StaticFromCode)
+    call    SYMBOL(MterpSetCharStatic)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -2905,7 +2905,7 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetShortStatic
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
@@ -2915,7 +2915,7 @@
     movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet16StaticFromCode)
+    call    SYMBOL(MterpSetShortStatic)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
@@ -6292,23 +6292,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: x86/op_unused_fc.S */
-/* File: x86/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: x86/op_unused_fd.S */
-/* File: x86/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
@@ -12410,7 +12402,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12434,7 +12426,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index 2eab58c..b5a5ae5 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -2445,12 +2445,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGet32StaticFromCode
+    .extern MterpGet32Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGet32StaticFromCode)
+    call    SYMBOL(MterpGet32Static)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2476,12 +2476,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGet64StaticFromCode)
+    call    SYMBOL(MterpGet64Static)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2508,12 +2508,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetObjStaticFromCode
+    .extern MterpGetObjStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetObjStaticFromCode)
+    call    SYMBOL(MterpGetObjStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2540,12 +2540,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetBooleanStaticFromCode
+    .extern MterpGetBooleanStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetBooleanStaticFromCode)
+    call    SYMBOL(MterpGetBooleanStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2572,12 +2572,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetByteStaticFromCode
+    .extern MterpGetByteStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetByteStaticFromCode)
+    call    SYMBOL(MterpGetByteStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2604,12 +2604,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetCharStaticFromCode
+    .extern MterpGetCharStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetCharStaticFromCode)
+    call    SYMBOL(MterpGetCharStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2636,12 +2636,12 @@
  * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short, sget-wide
  */
     /* op vAA, field@BBBB */
-    .extern artGetShortStaticFromCode
+    .extern MterpGetShortStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref CCCC
     movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
     movq    rSELF, OUT_ARG2                 # self
-    call    SYMBOL(artGetShortStaticFromCode)
+    call    SYMBOL(MterpGetShortStatic)
     movq    rSELF, %rcx
     cmpl    $0, THREAD_EXCEPTION_OFFSET(%rcx)
     jnz     MterpException
@@ -2667,13 +2667,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet32StaticFromCode
+    .extern MterpSet32Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet32StaticFromCode)
+    call    SYMBOL(MterpSet32Static)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2687,13 +2687,13 @@
  *
  */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
-    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
-    leaq    VREG_ADDRESS(rINSTq), OUT_ARG2  # &fp[AA]
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG1  # &fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    call    SYMBOL(MterpSet64Static)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2724,13 +2724,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetBooleanStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet8StaticFromCode)
+    call    SYMBOL(MterpSetBooleanStatic)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2747,13 +2747,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet8StaticFromCode
+    .extern MterpSetByteStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet8StaticFromCode)
+    call    SYMBOL(MterpSetByteStatic)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2770,13 +2770,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetCharStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet16StaticFromCode)
+    call    SYMBOL(MterpSetCharStatic)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -2793,13 +2793,13 @@
  * for: sput, sput-boolean, sput-byte, sput-char, sput-short
  */
     /* op vAA, field@BBBB */
-    .extern artSet16StaticFromCode
+    .extern MterpSetShortStatic
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
     GET_VREG OUT_32_ARG1, rINSTq            # fp[AA]
     movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet16StaticFromCode)
+    call    SYMBOL(MterpSetShortStatic)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
@@ -6057,23 +6057,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fc: /* 0xfc */
-/* File: x86_64/op_unused_fc.S */
-/* File: x86_64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom: /* 0xfc */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fd: /* 0xfd */
-/* File: x86_64/op_unused_fd.S */
-/* File: x86_64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_custom_range: /* 0xfd */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
@@ -11671,7 +11663,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fc: /* 0xfc */
+.L_ALT_op_invoke_custom: /* 0xfc */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11693,7 +11685,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fd: /* 0xfd */
+.L_ALT_op_invoke_custom_range: /* 0xfd */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/x86/op_sget.S b/runtime/interpreter/mterp/x86/op_sget.S
index 0e9a3d8..6e42d32 100644
--- a/runtime/interpreter/mterp/x86/op_sget.S
+++ b/runtime/interpreter/mterp/x86/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode" }
+%default { "is_object":"0", "helper":"MterpGet32Static" }
 /*
  * General SGET handler wrapper.
  *
diff --git a/runtime/interpreter/mterp/x86/op_sget_boolean.S b/runtime/interpreter/mterp/x86/op_sget_boolean.S
index f058dd8..5fa2bf0 100644
--- a/runtime/interpreter/mterp/x86/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/x86/op_sget_boolean.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
+%include "x86/op_sget.S" {"helper":"MterpGetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_byte.S b/runtime/interpreter/mterp/x86/op_sget_byte.S
index c952f40..ef812f1 100644
--- a/runtime/interpreter/mterp/x86/op_sget_byte.S
+++ b/runtime/interpreter/mterp/x86/op_sget_byte.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"helper":"artGetByteStaticFromCode"}
+%include "x86/op_sget.S" {"helper":"MterpGetByteStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_char.S b/runtime/interpreter/mterp/x86/op_sget_char.S
index d7bd410..3bc34ef 100644
--- a/runtime/interpreter/mterp/x86/op_sget_char.S
+++ b/runtime/interpreter/mterp/x86/op_sget_char.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"helper":"artGetCharStaticFromCode"}
+%include "x86/op_sget.S" {"helper":"MterpGetCharStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_object.S b/runtime/interpreter/mterp/x86/op_sget_object.S
index 1c95f9a..b829e75 100644
--- a/runtime/interpreter/mterp/x86/op_sget_object.S
+++ b/runtime/interpreter/mterp/x86/op_sget_object.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "x86/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_short.S b/runtime/interpreter/mterp/x86/op_sget_short.S
index 6475306..449cf6f 100644
--- a/runtime/interpreter/mterp/x86/op_sget_short.S
+++ b/runtime/interpreter/mterp/x86/op_sget_short.S
@@ -1 +1 @@
-%include "x86/op_sget.S" {"helper":"artGetShortStaticFromCode"}
+%include "x86/op_sget.S" {"helper":"MterpGetShortStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sget_wide.S b/runtime/interpreter/mterp/x86/op_sget_wide.S
index 2b60303..a605bcf 100644
--- a/runtime/interpreter/mterp/x86/op_sget_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sget_wide.S
@@ -3,7 +3,7 @@
  *
  */
     /* sget-wide vAA, field@BBBB */
-    .extern artGet64StaticFromCode
+    .extern MterpGet64Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref CCCC
@@ -11,7 +11,7 @@
     movl    %eax, OUT_ARG1(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG2(%esp)            # self
-    call    SYMBOL(artGet64StaticFromCode)
+    call    SYMBOL(MterpGet64Static)
     movl    rSELF, %ecx
     cmpl    $$0, THREAD_EXCEPTION_OFFSET(%ecx)
     jnz     MterpException
diff --git a/runtime/interpreter/mterp/x86/op_sput.S b/runtime/interpreter/mterp/x86/op_sput.S
index 0b5de09..99f6088 100644
--- a/runtime/interpreter/mterp/x86/op_sput.S
+++ b/runtime/interpreter/mterp/x86/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
 /*
  * General SPUT handler wrapper.
  *
diff --git a/runtime/interpreter/mterp/x86/op_sput_boolean.S b/runtime/interpreter/mterp/x86/op_sput_boolean.S
index 63601bd..a7fffda 100644
--- a/runtime/interpreter/mterp/x86/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/x86/op_sput_boolean.S
@@ -1 +1 @@
-%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "x86/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_byte.S b/runtime/interpreter/mterp/x86/op_sput_byte.S
index 63601bd..3a5ff92 100644
--- a/runtime/interpreter/mterp/x86/op_sput_byte.S
+++ b/runtime/interpreter/mterp/x86/op_sput_byte.S
@@ -1 +1 @@
-%include "x86/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "x86/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_char.S b/runtime/interpreter/mterp/x86/op_sput_char.S
index 1749f7c..565cc2a 100644
--- a/runtime/interpreter/mterp/x86/op_sput_char.S
+++ b/runtime/interpreter/mterp/x86/op_sput_char.S
@@ -1 +1 @@
-%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "x86/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_short.S b/runtime/interpreter/mterp/x86/op_sput_short.S
index 1749f7c..85c3441 100644
--- a/runtime/interpreter/mterp/x86/op_sput_short.S
+++ b/runtime/interpreter/mterp/x86/op_sput_short.S
@@ -1 +1 @@
-%include "x86/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "x86/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/x86/op_sput_wide.S b/runtime/interpreter/mterp/x86/op_sput_wide.S
index 19cff0d..8cc7e28 100644
--- a/runtime/interpreter/mterp/x86/op_sput_wide.S
+++ b/runtime/interpreter/mterp/x86/op_sput_wide.S
@@ -3,17 +3,17 @@
  *
  */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     movzwl  2(rPC), %eax
     movl    %eax, OUT_ARG0(%esp)            # field ref BBBB
-    movl    OFF_FP_METHOD(rFP), %eax
-    movl    %eax, OUT_ARG1(%esp)            # referrer
     leal    VREG_ADDRESS(rINST), %eax
-    movl    %eax, OUT_ARG2(%esp)            # &fp[AA]
+    movl    %eax, OUT_ARG1(%esp)            # &fp[AA]
+    movl    OFF_FP_METHOD(rFP), %eax
+    movl    %eax, OUT_ARG2(%esp)            # referrer
     movl    rSELF, %ecx
     movl    %ecx, OUT_ARG3(%esp)            # self
-    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    call    SYMBOL(MterpSet64Static)
     testb   %al, %al
     jnz     MterpException
     RESTORE_IBASE
diff --git a/runtime/interpreter/mterp/x86_64/op_sget.S b/runtime/interpreter/mterp/x86_64/op_sget.S
index d39e6c4..e996c77 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget.S
@@ -1,4 +1,4 @@
-%default { "is_object":"0", "helper":"artGet32StaticFromCode", "wide":"0" }
+%default { "is_object":"0", "helper":"MterpGet32Static", "wide":"0" }
 /*
  * General SGET handler wrapper.
  *
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_boolean.S b/runtime/interpreter/mterp/x86_64/op_sget_boolean.S
index 7d358da..ee772ad 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_boolean.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_boolean.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGetBooleanStaticFromCode"}
+%include "x86_64/op_sget.S" {"helper":"MterpGetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_byte.S b/runtime/interpreter/mterp/x86_64/op_sget_byte.S
index 79d9ff4..f65ea49 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_byte.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_byte.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGetByteStaticFromCode"}
+%include "x86_64/op_sget.S" {"helper":"MterpGetByteStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_char.S b/runtime/interpreter/mterp/x86_64/op_sget_char.S
index 4488610..3972551 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_char.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_char.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGetCharStaticFromCode"}
+%include "x86_64/op_sget.S" {"helper":"MterpGetCharStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_object.S b/runtime/interpreter/mterp/x86_64/op_sget_object.S
index 09b627e..a0bbfd8 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_object.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_object.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"is_object":"1", "helper":"artGetObjStaticFromCode"}
+%include "x86_64/op_sget.S" {"is_object":"1", "helper":"MterpGetObjStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_short.S b/runtime/interpreter/mterp/x86_64/op_sget_short.S
index 47ac238..df212dc 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_short.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_short.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGetShortStaticFromCode"}
+%include "x86_64/op_sget.S" {"helper":"MterpGetShortStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sget_wide.S b/runtime/interpreter/mterp/x86_64/op_sget_wide.S
index aa22343..1e98e28 100644
--- a/runtime/interpreter/mterp/x86_64/op_sget_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_sget_wide.S
@@ -1 +1 @@
-%include "x86_64/op_sget.S" {"helper":"artGet64StaticFromCode", "wide":"1"}
+%include "x86_64/op_sget.S" {"helper":"MterpGet64Static", "wide":"1"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput.S b/runtime/interpreter/mterp/x86_64/op_sput.S
index e92b032..9705619 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput.S
@@ -1,4 +1,4 @@
-%default { "helper":"artSet32StaticFromCode"}
+%default { "helper":"MterpSet32Static"}
 /*
  * General SPUT handler wrapper.
  *
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_boolean.S b/runtime/interpreter/mterp/x86_64/op_sput_boolean.S
index 8718915..8bf4a62 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_boolean.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_boolean.S
@@ -1 +1 @@
-%include "x86_64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "x86_64/op_sput.S" {"helper":"MterpSetBooleanStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_byte.S b/runtime/interpreter/mterp/x86_64/op_sput_byte.S
index 8718915..5bb26eb 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_byte.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_byte.S
@@ -1 +1 @@
-%include "x86_64/op_sput.S" {"helper":"artSet8StaticFromCode"}
+%include "x86_64/op_sput.S" {"helper":"MterpSetByteStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_char.S b/runtime/interpreter/mterp/x86_64/op_sput_char.S
index 2fe9d14..42b244e 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_char.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_char.S
@@ -1 +1 @@
-%include "x86_64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "x86_64/op_sput.S" {"helper":"MterpSetCharStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_short.S b/runtime/interpreter/mterp/x86_64/op_sput_short.S
index 2fe9d14..9670092 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_short.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_short.S
@@ -1 +1 @@
-%include "x86_64/op_sput.S" {"helper":"artSet16StaticFromCode"}
+%include "x86_64/op_sput.S" {"helper":"MterpSetShortStatic"}
diff --git a/runtime/interpreter/mterp/x86_64/op_sput_wide.S b/runtime/interpreter/mterp/x86_64/op_sput_wide.S
index c4bc269..a21bcb5 100644
--- a/runtime/interpreter/mterp/x86_64/op_sput_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_sput_wide.S
@@ -3,13 +3,13 @@
  *
  */
     /* sput-wide vAA, field@BBBB */
-    .extern artSet64IndirectStaticFromMterp
+    .extern MterpSet64Static
     EXPORT_PC
     movzwq  2(rPC), OUT_ARG0                # field ref BBBB
-    movq    OFF_FP_METHOD(rFP), OUT_ARG1    # referrer
-    leaq    VREG_ADDRESS(rINSTq), OUT_ARG2  # &fp[AA]
+    leaq    VREG_ADDRESS(rINSTq), OUT_ARG1  # &fp[AA]
+    movq    OFF_FP_METHOD(rFP), OUT_ARG2    # referrer
     movq    rSELF, OUT_ARG3                 # self
-    call    SYMBOL(artSet64IndirectStaticFromMterp)
+    call    SYMBOL(MterpSet64Static)
     testb   %al, %al
     jnz     MterpException
     ADVANCE_PC_FETCH_AND_GOTO_NEXT 2
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index 80554c2..70be30c 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -1131,53 +1131,6 @@
   result->SetJ(bit_cast<int64_t, double>(in));
 }
 
-static ObjPtr<mirror::Object> GetDexFromDexCache(Thread* self, mirror::DexCache* dex_cache)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  const DexFile* dex_file = dex_cache->GetDexFile();
-  if (dex_file == nullptr) {
-    return nullptr;
-  }
-
-  // Create the direct byte buffer.
-  JNIEnv* env = self->GetJniEnv();
-  DCHECK(env != nullptr);
-  void* address = const_cast<void*>(reinterpret_cast<const void*>(dex_file->Begin()));
-  ScopedLocalRef<jobject> byte_buffer(env, env->NewDirectByteBuffer(address, dex_file->Size()));
-  if (byte_buffer.get() == nullptr) {
-    DCHECK(self->IsExceptionPending());
-    return nullptr;
-  }
-
-  jvalue args[1];
-  args[0].l = byte_buffer.get();
-
-  ScopedLocalRef<jobject> dex(env, env->CallStaticObjectMethodA(
-      WellKnownClasses::com_android_dex_Dex,
-      WellKnownClasses::com_android_dex_Dex_create,
-      args));
-
-  return self->DecodeJObject(dex.get());
-}
-
-void UnstartedRuntime::UnstartedDexCacheGetDexNative(
-    Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
-  // We will create the Dex object, but the image writer will release it before creating the
-  // art file.
-  mirror::Object* src = shadow_frame->GetVRegReference(arg_offset);
-  bool have_dex = false;
-  if (src != nullptr) {
-    ObjPtr<mirror::Object> dex = GetDexFromDexCache(self, src->AsDexCache());
-    if (dex != nullptr) {
-      have_dex = true;
-      result->SetL(dex);
-    }
-  }
-  if (!have_dex) {
-    self->ClearException();
-    Runtime::Current()->AbortTransactionAndThrowAbortError(self, "Could not create Dex object");
-  }
-}
-
 static void UnstartedMemoryPeek(
     Primitive::Type type, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   int64_t address = shadow_frame->GetVRegLong(arg_offset);
@@ -1336,12 +1289,14 @@
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
   jchar old_c = shadow_frame->GetVReg(arg_offset + 1);
   jchar new_c = shadow_frame->GetVReg(arg_offset + 2);
-  ObjPtr<mirror::String> string = shadow_frame->GetVRegReference(arg_offset)->AsString();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::String> string =
+      hs.NewHandle(shadow_frame->GetVRegReference(arg_offset)->AsString());
   if (string == nullptr) {
     AbortTransactionOrFail(self, "String.replaceWithMatch with null object");
     return;
   }
-  result->SetL(string->DoReplace(self, old_c, new_c));
+  result->SetL(mirror::String::DoReplace(self, string, old_c, new_c));
 }
 
 // This allows creating the new style of String objects during compilation.
@@ -1672,6 +1627,12 @@
   }
 }
 
+void UnstartedRuntime::UnstartedSystemIdentityHashCode(
+    Thread* self ATTRIBUTE_UNUSED, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  mirror::Object* obj = shadow_frame->GetVRegReference(arg_offset);
+  result->SetI((obj != nullptr) ? obj->IdentityHashCode() : 0);
+}
 
 void UnstartedRuntime::UnstartedJNIVMRuntimeNewUnpaddedArray(
     Thread* self, ArtMethod* method ATTRIBUTE_UNUSED, mirror::Object* receiver ATTRIBUTE_UNUSED,
@@ -1836,13 +1797,6 @@
   }
 }
 
-void UnstartedRuntime::UnstartedJNISystemIdentityHashCode(
-    Thread* self ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED,
-    mirror::Object* receiver ATTRIBUTE_UNUSED, uint32_t* args, JValue* result) {
-  mirror::Object* obj = reinterpret_cast<mirror::Object*>(args[0]);
-  result->SetI((obj != nullptr) ? obj->IdentityHashCode() : 0);
-}
-
 void UnstartedRuntime::UnstartedJNIByteOrderIsLittleEndian(
     Thread* self ATTRIBUTE_UNUSED, ArtMethod* method ATTRIBUTE_UNUSED,
     mirror::Object* receiver ATTRIBUTE_UNUSED, uint32_t* args ATTRIBUTE_UNUSED, JValue* result) {
diff --git a/runtime/interpreter/unstarted_runtime_list.h b/runtime/interpreter/unstarted_runtime_list.h
index e9435e4..4791035 100644
--- a/runtime/interpreter/unstarted_runtime_list.h
+++ b/runtime/interpreter/unstarted_runtime_list.h
@@ -52,7 +52,6 @@
   V(MathPow, "double java.lang.Math.pow(double, double)") \
   V(ObjectHashCode, "int java.lang.Object.hashCode()") \
   V(DoubleDoubleToRawLongBits, "long java.lang.Double.doubleToRawLongBits(double)") \
-  V(DexCacheGetDexNative, "com.android.dex.Dex java.lang.DexCache.getDexNative()") \
   V(MemoryPeekByte, "byte libcore.io.Memory.peekByte(long)") \
   V(MemoryPeekShort, "short libcore.io.Memory.peekShortNative(long)") \
   V(MemoryPeekInt, "int libcore.io.Memory.peekIntNative(long)") \
@@ -76,7 +75,8 @@
   V(UnsafePutObjectVolatile, "void sun.misc.Unsafe.putObjectVolatile(java.lang.Object, long, java.lang.Object)") \
   V(UnsafePutOrderedObject, "void sun.misc.Unsafe.putOrderedObject(java.lang.Object, long, java.lang.Object)") \
   V(IntegerParseInt, "int java.lang.Integer.parseInt(java.lang.String)") \
-  V(LongParseLong, "long java.lang.Long.parseLong(java.lang.String)")
+  V(LongParseLong, "long java.lang.Long.parseLong(java.lang.String)") \
+  V(SystemIdentityHashCode, "int java.lang.System.identityHashCode(java.lang.Object)")
 
 // Methods that are native.
 #define UNSTARTED_RUNTIME_JNI_LIST(V)           \
@@ -98,7 +98,6 @@
   V(ArrayCreateMultiArray, "java.lang.Object java.lang.reflect.Array.createMultiArray(java.lang.Class, int[])") \
   V(ArrayCreateObjectArray, "java.lang.Object java.lang.reflect.Array.createObjectArray(java.lang.Class, int)") \
   V(ThrowableNativeFillInStackTrace, "java.lang.Object java.lang.Throwable.nativeFillInStackTrace()") \
-  V(SystemIdentityHashCode, "int java.lang.System.identityHashCode(java.lang.Object)") \
   V(ByteOrderIsLittleEndian, "boolean java.nio.ByteOrder.isLittleEndian()") \
   V(UnsafeCompareAndSwapInt, "boolean sun.misc.Unsafe.compareAndSwapInt(java.lang.Object, long, int, int)") \
   V(UnsafeGetIntVolatile, "int sun.misc.Unsafe.getIntVolatile(java.lang.Object, long)") \
diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc
index db222fa..56e261c 100644
--- a/runtime/interpreter/unstarted_runtime_test.cc
+++ b/runtime/interpreter/unstarted_runtime_test.cc
@@ -1367,5 +1367,26 @@
   ShadowFrame::DeleteDeoptimizedFrame(shadow_frame);
 }
 
+TEST_F(UnstartedRuntimeTest, IdentityHashCode) {
+  Thread* self = Thread::Current();
+  ScopedObjectAccess soa(self);
+  ShadowFrame* tmp = ShadowFrame::CreateDeoptimizedFrame(10, nullptr, nullptr, 0);
+
+  JValue result;
+  UnstartedSystemIdentityHashCode(self, tmp, &result, 0);
+
+  EXPECT_EQ(0, result.GetI());
+  ASSERT_FALSE(self->IsExceptionPending());
+
+  ObjPtr<mirror::String> str = mirror::String::AllocFromModifiedUtf8(self, "abd");
+  tmp->SetVRegReference(0, str.Ptr());
+  UnstartedSystemIdentityHashCode(self, tmp, &result, 0);
+  EXPECT_NE(0, result.GetI());
+  EXPECT_EQ(str->IdentityHashCode(), result.GetI());
+  ASSERT_FALSE(self->IsExceptionPending());
+
+  ShadowFrame::DeleteDeoptimizedFrame(tmp);
+}
+
 }  // namespace interpreter
 }  // namespace art
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index a341cdb..b93b8f2 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -39,6 +39,7 @@
 #include "runtime_options.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
+#include "sigchain.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 
@@ -900,7 +901,8 @@
     int version = (*jni_on_load)(this, nullptr);
 
     if (runtime_->GetTargetSdkVersion() != 0 && runtime_->GetTargetSdkVersion() <= 21) {
-      fault_manager.EnsureArtActionInFrontOfSignalChain();
+      // Make sure that sigchain owns SIGSEGV.
+      EnsureFrontOfChain(SIGSEGV);
     }
 
     self->SetClassLoaderOverride(old_class_loader.get());
diff --git a/runtime/jdwp/jdwp.h b/runtime/jdwp/jdwp.h
index af29468..86af6d4 100644
--- a/runtime/jdwp/jdwp.h
+++ b/runtime/jdwp/jdwp.h
@@ -203,8 +203,7 @@
    */
   void PostLocationEvent(const EventLocation* pLoc, mirror::Object* thisPtr, int eventFlags,
                          const JValue* returnValue)
-      REQUIRES(!Locks::jdwp_event_list_lock_, !jdwp_token_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+     REQUIRES(!event_list_lock_, !jdwp_token_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
 
   /*
    * A field of interest has been accessed or modified. This is used for field access and field
@@ -215,8 +214,7 @@
    */
   void PostFieldEvent(const EventLocation* pLoc, ArtField* field, mirror::Object* thisPtr,
                       const JValue* fieldValue, bool is_modification)
-      REQUIRES(!Locks::jdwp_event_list_lock_, !jdwp_token_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES(!event_list_lock_, !jdwp_token_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
 
   /*
    * An exception has been thrown.
@@ -225,22 +223,19 @@
    */
   void PostException(const EventLocation* pThrowLoc, mirror::Throwable* exception_object,
                      const EventLocation* pCatchLoc, mirror::Object* thisPtr)
-      REQUIRES(!Locks::jdwp_event_list_lock_, !jdwp_token_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES(!event_list_lock_, !jdwp_token_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
 
   /*
    * A thread has started or stopped.
    */
   void PostThreadChange(Thread* thread, bool start)
-      REQUIRES(!Locks::jdwp_event_list_lock_, !jdwp_token_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES(!event_list_lock_, !jdwp_token_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
 
   /*
    * Class has been prepared.
    */
   void PostClassPrepare(mirror::Class* klass)
-      REQUIRES(!Locks::jdwp_event_list_lock_, !jdwp_token_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES(!event_list_lock_, !jdwp_token_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
 
   /*
    * The VM is about to stop.
@@ -264,7 +259,7 @@
   void SendRequest(ExpandBuf* pReq);
 
   void ResetState()
-      REQUIRES(!Locks::jdwp_event_list_lock_)
+      REQUIRES(!event_list_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   /* atomic ops to get next serial number */
@@ -273,7 +268,7 @@
 
   void Run()
       REQUIRES(!Locks::mutator_lock_, !Locks::thread_suspend_count_lock_, !thread_start_lock_,
-               !attach_lock_, !Locks::jdwp_event_list_lock_);
+               !attach_lock_, !event_list_lock_);
 
   /*
    * Register an event by adding it to the event list.
@@ -282,25 +277,25 @@
    * may discard its pointer after calling this.
    */
   JdwpError RegisterEvent(JdwpEvent* pEvent)
-      REQUIRES(!Locks::jdwp_event_list_lock_)
+      REQUIRES(!event_list_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   /*
    * Unregister an event, given the requestId.
    */
   void UnregisterEventById(uint32_t requestId)
-      REQUIRES(!Locks::jdwp_event_list_lock_)
+      REQUIRES(!event_list_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void UnregisterLocationEventsOnClass(ObjPtr<mirror::Class> klass)
-      REQUIRES(!Locks::jdwp_event_list_lock_)
+      REQUIRES(!event_list_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   /*
    * Unregister all events.
    */
   void UnregisterAll()
-      REQUIRES(!Locks::jdwp_event_list_lock_)
+      REQUIRES(!event_list_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
@@ -315,16 +310,16 @@
                                      ObjectId threadId)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!jdwp_token_lock_);
   void CleanupMatchList(const std::vector<JdwpEvent*>& match_list)
-      REQUIRES(Locks::jdwp_event_list_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES(event_list_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
   void EventFinish(ExpandBuf* pReq);
   bool FindMatchingEvents(JdwpEventKind eventKind, const ModBasket& basket,
                           std::vector<JdwpEvent*>* match_list)
-      REQUIRES(!Locks::jdwp_event_list_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES(!event_list_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
   void FindMatchingEventsLocked(JdwpEventKind eventKind, const ModBasket& basket,
                                 std::vector<JdwpEvent*>* match_list)
-      REQUIRES(Locks::jdwp_event_list_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES(event_list_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
   void UnregisterEvent(JdwpEvent* pEvent)
-      REQUIRES(Locks::jdwp_event_list_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES(event_list_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
   void SendBufferedRequest(uint32_t type, const std::vector<iovec>& iov);
 
   /*
@@ -392,8 +387,9 @@
   AtomicInteger event_serial_;
 
   // Linked list of events requested by the debugger (breakpoints, class prep, etc).
-  JdwpEvent* event_list_ GUARDED_BY(Locks::jdwp_event_list_lock_);
-  size_t event_list_size_ GUARDED_BY(Locks::jdwp_event_list_lock_);  // Number of elements in event_list_.
+  Mutex event_list_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER ACQUIRED_BEFORE(Locks::breakpoint_lock_);
+  JdwpEvent* event_list_ GUARDED_BY(event_list_lock_);
+  size_t event_list_size_ GUARDED_BY(event_list_lock_);  // Number of elements in event_list_.
 
   // Used to synchronize JDWP command handler thread and event threads so only one
   // thread does JDWP stuff at a time. This prevent from interleaving command handling
@@ -414,7 +410,7 @@
   // When the runtime shuts down, it needs to stop JDWP command handler thread by closing the
   // JDWP connection. However, if the JDWP thread is processing a command, it needs to wait
   // for the command to finish so we can send its reply before closing the connection.
-  Mutex shutdown_lock_ ACQUIRED_AFTER(Locks::jdwp_event_list_lock_);
+  Mutex shutdown_lock_ ACQUIRED_AFTER(event_list_lock_);
   ConditionVariable shutdown_cond_ GUARDED_BY(shutdown_lock_);
   bool processing_request_ GUARDED_BY(shutdown_lock_);
 };
diff --git a/runtime/jdwp/jdwp_adb.cc b/runtime/jdwp/jdwp_adb.cc
index b13d565..0aa04c1 100644
--- a/runtime/jdwp/jdwp_adb.cc
+++ b/runtime/jdwp/jdwp_adb.cc
@@ -227,7 +227,7 @@
     const int  sleep_max_ms = 2*1000;
     char       buff[5];
 
-    int sock = socket(PF_UNIX, SOCK_STREAM, 0);
+    int sock = socket(AF_UNIX, SOCK_SEQPACKET, 0);
     if (sock < 0) {
       PLOG(ERROR) << "Could not create ADB control socket";
       return false;
@@ -264,7 +264,7 @@
        * up after a few minutes in case somebody ships an app with
        * the debuggable flag set.
        */
-      int  ret = connect(ControlSock(), &control_addr_.controlAddrPlain, control_addr_len_);
+      int ret = connect(ControlSock(), &control_addr_.controlAddrPlain, control_addr_len_);
       if (!ret) {
         int control_sock = ControlSock();
 #ifdef ART_TARGET_ANDROID
@@ -278,7 +278,7 @@
 
         /* now try to send our pid to the ADB daemon */
         ret = TEMP_FAILURE_RETRY(send(control_sock, buff, 4, 0));
-        if (ret >= 0) {
+        if (ret == 4) {
           VLOG(jdwp) << StringPrintf("PID sent as '%.*s' to ADB", 4, buff);
           break;
         }
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 36d733e..96249f9 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -237,7 +237,7 @@
     /*
      * Add to list.
      */
-    MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+    MutexLock mu(Thread::Current(), event_list_lock_);
     if (event_list_ != nullptr) {
       pEvent->next = event_list_;
       event_list_->prev = pEvent;
@@ -256,7 +256,7 @@
   StackHandleScope<1> hs(Thread::Current());
   Handle<mirror::Class> h_klass(hs.NewHandle(klass));
   std::vector<JdwpEvent*> to_remove;
-  MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+  MutexLock mu(Thread::Current(), event_list_lock_);
   for (JdwpEvent* cur_event = event_list_; cur_event != nullptr; cur_event = cur_event->next) {
     // Fill in the to_remove list
     bool found_event = false;
@@ -356,7 +356,7 @@
 void JdwpState::UnregisterEventById(uint32_t requestId) {
   bool found = false;
   {
-    MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+    MutexLock mu(Thread::Current(), event_list_lock_);
 
     for (JdwpEvent* pEvent = event_list_; pEvent != nullptr; pEvent = pEvent->next) {
       if (pEvent->requestId == requestId) {
@@ -383,7 +383,7 @@
  * Remove all entries from the event list.
  */
 void JdwpState::UnregisterAll() {
-  MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+  MutexLock mu(Thread::Current(), event_list_lock_);
 
   JdwpEvent* pEvent = event_list_;
   while (pEvent != nullptr) {
@@ -593,7 +593,7 @@
  */
 bool JdwpState::FindMatchingEvents(JdwpEventKind event_kind, const ModBasket& basket,
                                    std::vector<JdwpEvent*>* match_list) {
-  MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+  MutexLock mu(Thread::Current(), event_list_lock_);
   match_list->reserve(event_list_size_);
   FindMatchingEventsLocked(event_kind, basket, match_list);
   return !match_list->empty();
@@ -908,7 +908,7 @@
   std::vector<JdwpEvent*> match_list;
   {
     // We use the locked version because we have multiple possible match events.
-    MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+    MutexLock mu(Thread::Current(), event_list_lock_);
     match_list.reserve(event_list_size_);
     if ((eventFlags & Dbg::kBreakpoint) != 0) {
       FindMatchingEventsLocked(EK_BREAKPOINT, basket, &match_list);
@@ -955,7 +955,7 @@
   }
 
   {
-    MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+    MutexLock mu(Thread::Current(), event_list_lock_);
     CleanupMatchList(match_list);
   }
 
@@ -1041,7 +1041,7 @@
   }
 
   {
-    MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+    MutexLock mu(Thread::Current(), event_list_lock_);
     CleanupMatchList(match_list);
   }
 
@@ -1103,7 +1103,7 @@
   }
 
   {
-    MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+    MutexLock mu(Thread::Current(), event_list_lock_);
     CleanupMatchList(match_list);
   }
 
@@ -1213,7 +1213,7 @@
   }
 
   {
-    MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+    MutexLock mu(Thread::Current(), event_list_lock_);
     CleanupMatchList(match_list);
   }
 
@@ -1295,7 +1295,7 @@
   }
 
   {
-    MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+    MutexLock mu(Thread::Current(), event_list_lock_);
     CleanupMatchList(match_list);
   }
 
diff --git a/runtime/jdwp/jdwp_handler.cc b/runtime/jdwp/jdwp_handler.cc
index 964c567..971d039 100644
--- a/runtime/jdwp/jdwp_handler.cc
+++ b/runtime/jdwp/jdwp_handler.cc
@@ -761,12 +761,11 @@
   return ERR_NONE;
 }
 
-// Default implementation for IDEs relying on this command.
 static JdwpError M_IsObsolete(JdwpState*, Request* request, ExpandBuf* reply)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   request->ReadRefTypeId();  // unused reference type ID
-  request->ReadMethodId();   // unused method ID
-  expandBufAdd1(reply, false);  // a method is never obsolete.
+  MethodId id = request->ReadMethodId();
+  expandBufAdd1(reply, Dbg::IsMethodObsolete(id));
   return ERR_NONE;
 }
 
diff --git a/runtime/jdwp/jdwp_main.cc b/runtime/jdwp/jdwp_main.cc
index 64ed724..e6c6068 100644
--- a/runtime/jdwp/jdwp_main.cc
+++ b/runtime/jdwp/jdwp_main.cc
@@ -227,6 +227,7 @@
       last_activity_time_ms_(0),
       request_serial_(0x10000000),
       event_serial_(0x20000000),
+      event_list_lock_("JDWP event list lock", kJdwpEventListLock),
       event_list_(nullptr),
       event_list_size_(0),
       jdwp_token_lock_("JDWP token lock"),
@@ -238,6 +239,7 @@
       shutdown_lock_("JDWP shutdown lock", kJdwpShutdownLock),
       shutdown_cond_("JDWP shutdown condition variable", shutdown_lock_),
       processing_request_(false) {
+  Locks::AddToExpectedMutexesOnWeakRefAccess(&event_list_lock_);
 }
 
 /*
@@ -330,7 +332,7 @@
 
   UnregisterAll();
   {
-    MutexLock mu(Thread::Current(), *Locks::jdwp_event_list_lock_);
+    MutexLock mu(Thread::Current(), event_list_lock_);
     CHECK(event_list_ == nullptr);
   }
 
@@ -380,6 +382,8 @@
   CHECK(netState == nullptr);
 
   ResetState();
+
+  Locks::RemoveFromExpectedMutexesOnWeakRefAccess(&event_list_lock_);
 }
 
 /*
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index bd7251b..510f5f0 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -35,6 +35,11 @@
 
 ObjectRegistry::ObjectRegistry()
     : lock_("ObjectRegistry lock", kJdwpObjectRegistryLock), next_id_(1) {
+  Locks::AddToExpectedMutexesOnWeakRefAccess(&lock_);
+}
+
+ObjectRegistry::~ObjectRegistry() {
+  Locks::RemoveFromExpectedMutexesOnWeakRefAccess(&lock_);
 }
 
 JDWP::RefTypeId ObjectRegistry::AddRefType(ObjPtr<mirror::Class> c) {
diff --git a/runtime/jdwp/object_registry.h b/runtime/jdwp/object_registry.h
index 9cacc66..8754631 100644
--- a/runtime/jdwp/object_registry.h
+++ b/runtime/jdwp/object_registry.h
@@ -62,6 +62,7 @@
 class ObjectRegistry {
  public:
   ObjectRegistry();
+  ~ObjectRegistry();
 
   JDWP::ObjectId Add(ObjPtr<mirror::Object> o)
       REQUIRES_SHARED(Locks::mutator_lock_)
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 1ec4749..3631a9d 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -325,16 +325,12 @@
 }
 
 void Jit::StartProfileSaver(const std::string& filename,
-                            const std::vector<std::string>& code_paths,
-                            const std::string& foreign_dex_profile_path,
-                            const std::string& app_dir) {
+                            const std::vector<std::string>& code_paths) {
   if (profile_saver_options_.IsEnabled()) {
     ProfileSaver::Start(profile_saver_options_,
                         filename,
                         code_cache_.get(),
-                        code_paths,
-                        foreign_dex_profile_path,
-                        app_dir);
+                        code_paths);
   }
 }
 
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index d566799..4f5bebf 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -136,14 +136,8 @@
   // Starts the profile saver if the config options allow profile recording.
   // The profile will be stored in the specified `filename` and will contain
   // information collected from the given `code_paths` (a set of dex locations).
-  // The `foreign_dex_profile_path` is the path where the saver will put the
-  // profile markers for loaded dex files which are not owned by the application.
-  // The `app_dir` is the application directory and is used to decide which
-  // dex files belong to the application.
   void StartProfileSaver(const std::string& filename,
-                         const std::vector<std::string>& code_paths,
-                         const std::string& foreign_dex_profile_path,
-                         const std::string& app_dir);
+                         const std::vector<std::string>& code_paths);
   void StopProfileSaver();
 
   void DumpForSigQuit(std::ostream& os) REQUIRES(!lock_);
@@ -285,6 +279,10 @@
         code_cache_initial_capacity_(0),
         code_cache_max_capacity_(0),
         compile_threshold_(0),
+        warmup_threshold_(0),
+        osr_threshold_(0),
+        priority_thread_weight_(0),
+        invoke_transition_weight_(0),
         dump_info_on_shutdown_(false) {}
 
   DISALLOW_COPY_AND_ASSIGN(JitOptions);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 62acedf..e9a5ae5 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -211,6 +211,7 @@
 uint8_t* JitCodeCache::CommitCode(Thread* self,
                                   ArtMethod* method,
                                   uint8_t* stack_map,
+                                  uint8_t* method_info,
                                   uint8_t* roots_data,
                                   size_t frame_size_in_bytes,
                                   size_t core_spill_mask,
@@ -225,6 +226,7 @@
   uint8_t* result = CommitCodeInternal(self,
                                        method,
                                        stack_map,
+                                       method_info,
                                        roots_data,
                                        frame_size_in_bytes,
                                        core_spill_mask,
@@ -242,6 +244,7 @@
     result = CommitCodeInternal(self,
                                 method,
                                 stack_map,
+                                method_info,
                                 roots_data,
                                 frame_size_in_bytes,
                                 core_spill_mask,
@@ -510,6 +513,7 @@
 uint8_t* JitCodeCache::CommitCodeInternal(Thread* self,
                                           ArtMethod* method,
                                           uint8_t* stack_map,
+                                          uint8_t* method_info,
                                           uint8_t* roots_data,
                                           size_t frame_size_in_bytes,
                                           size_t core_spill_mask,
@@ -547,6 +551,7 @@
       method_header = OatQuickMethodHeader::FromCodePointer(code_ptr);
       new (method_header) OatQuickMethodHeader(
           code_ptr - stack_map,
+          code_ptr - method_info,
           frame_size_in_bytes,
           core_spill_mask,
           fp_spill_mask,
@@ -739,12 +744,14 @@
 
 size_t JitCodeCache::ReserveData(Thread* self,
                                  size_t stack_map_size,
+                                 size_t method_info_size,
                                  size_t number_of_roots,
                                  ArtMethod* method,
                                  uint8_t** stack_map_data,
+                                 uint8_t** method_info_data,
                                  uint8_t** roots_data) {
   size_t table_size = ComputeRootTableSize(number_of_roots);
-  size_t size = RoundUp(stack_map_size + table_size, sizeof(void*));
+  size_t size = RoundUp(stack_map_size + method_info_size + table_size, sizeof(void*));
   uint8_t* result = nullptr;
 
   {
@@ -774,11 +781,13 @@
   if (result != nullptr) {
     *roots_data = result;
     *stack_map_data = result + table_size;
+    *method_info_data = *stack_map_data + stack_map_size;
     FillRootTableLength(*roots_data, number_of_roots);
     return size;
   } else {
     *roots_data = nullptr;
     *stack_map_data = nullptr;
+    *method_info_data = nullptr;
     return 0;
   }
 }
@@ -1262,12 +1271,23 @@
     for (size_t i = 0; i < info->number_of_inline_caches_; ++i) {
       std::vector<ProfileMethodInfo::ProfileClassReference> profile_classes;
       const InlineCache& cache = info->cache_[i];
+      ArtMethod* caller = info->GetMethod();
+      bool is_missing_types = false;
       for (size_t k = 0; k < InlineCache::kIndividualCacheSize; k++) {
         mirror::Class* cls = cache.classes_[k].Read();
         if (cls == nullptr) {
           break;
         }
 
+        // Check if the receiver is in the boot class path or if it's in the
+        // same class loader as the caller. If not, skip it, as there is not
+        // much we can do during AOT.
+        if (!cls->IsBootStrapClassLoaded() &&
+            caller->GetClassLoader() != cls->GetClassLoader()) {
+          is_missing_types = true;
+          continue;
+        }
+
         const DexFile* class_dex_file = nullptr;
         dex::TypeIndex type_index;
 
@@ -1284,17 +1304,20 @@
         }
         if (!type_index.IsValid()) {
           // Could be a proxy class or an array for which we couldn't find the type index.
+          is_missing_types = true;
           continue;
         }
         if (ContainsElement(dex_base_locations, class_dex_file->GetBaseLocation())) {
           // Only consider classes from the same apk (including multidex).
           profile_classes.emplace_back(/*ProfileMethodInfo::ProfileClassReference*/
               class_dex_file, type_index);
+        } else {
+          is_missing_types = true;
         }
       }
       if (!profile_classes.empty()) {
         inline_caches.emplace_back(/*ProfileMethodInfo::ProfileInlineCache*/
-            cache.dex_pc_, profile_classes);
+            cache.dex_pc_, is_missing_types, profile_classes);
       }
     }
     methods.emplace_back(/*ProfileMethodInfo*/
@@ -1343,7 +1366,10 @@
   MutexLock mu(self, lock_);
   ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
   if (info != nullptr) {
-    info->IncrementInlineUse();
+    if (!info->IncrementInlineUse()) {
+      // Overflow of inlining uses, just bail.
+      return nullptr;
+    }
   }
   return info;
 }
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 33a792f..db214e7 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -105,6 +105,7 @@
   uint8_t* CommitCode(Thread* self,
                       ArtMethod* method,
                       uint8_t* stack_map,
+                      uint8_t* method_info,
                       uint8_t* roots_data,
                       size_t frame_size_in_bytes,
                       size_t core_spill_mask,
@@ -129,10 +130,12 @@
   // for storing `number_of_roots` roots. Returns null if there is no more room.
   // Return the number of bytes allocated.
   size_t ReserveData(Thread* self,
-                     size_t size,
+                     size_t stack_map_size,
+                     size_t method_info_size,
                      size_t number_of_roots,
                      ArtMethod* method,
                      uint8_t** stack_map_data,
+                     uint8_t** method_info_data,
                      uint8_t** roots_data)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!lock_);
@@ -229,6 +232,12 @@
   void MoveObsoleteMethod(ArtMethod* old_method, ArtMethod* new_method)
       REQUIRES(!lock_) REQUIRES(Locks::mutator_lock_);
 
+  // Dynamically change whether we want to garbage collect code. Should only be used
+  // by tests.
+  void SetGarbageCollectCode(bool value) {
+    garbage_collect_code_ = value;
+  }
+
  private:
   // Take ownership of maps.
   JitCodeCache(MemMap* code_map,
@@ -243,6 +252,7 @@
   uint8_t* CommitCodeInternal(Thread* self,
                               ArtMethod* method,
                               uint8_t* stack_map,
+                              uint8_t* method_info,
                               uint8_t* roots_data,
                               size_t frame_size_in_bytes,
                               size_t core_spill_mask,
@@ -359,8 +369,8 @@
   // It is atomic to avoid locking when reading it.
   Atomic<uint64_t> last_update_time_ns_;
 
-  // Whether we can do garbage collection.
-  const bool garbage_collect_code_;
+  // Whether we can do garbage collection. Not 'const' as tests may override this.
+  bool garbage_collect_code_;
 
   // The size in bytes of used memory for the data portion of the code cache.
   size_t used_memory_for_data_ GUARDED_BY(lock_);
diff --git a/runtime/jit/profile_compilation_info.cc b/runtime/jit/profile_compilation_info.cc
index 627cc93..24ea275 100644
--- a/runtime/jit/profile_compilation_info.cc
+++ b/runtime/jit/profile_compilation_info.cc
@@ -37,7 +37,9 @@
 namespace art {
 
 const uint8_t ProfileCompilationInfo::kProfileMagic[] = { 'p', 'r', 'o', '\0' };
-const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '3', '\0' };  // inline caches
+// Last profile version: fix profman merges. Update profile version to force
+// regeneration of possibly faulty profiles.
+const uint8_t ProfileCompilationInfo::kProfileVersion[] = { '0', '0', '5', '\0' };
 
 static constexpr uint16_t kMaxDexFileKeyLength = PATH_MAX;
 
@@ -46,16 +48,27 @@
 // using the same test profile.
 static constexpr bool kDebugIgnoreChecksum = false;
 
-static constexpr uint8_t kMegamorphicEncoding = 7;
+static constexpr uint8_t kIsMissingTypesEncoding = 6;
+static constexpr uint8_t kIsMegamorphicEncoding = 7;
 
 static_assert(sizeof(InlineCache::kIndividualCacheSize) == sizeof(uint8_t),
               "InlineCache::kIndividualCacheSize does not have the expect type size");
-static_assert(InlineCache::kIndividualCacheSize < kMegamorphicEncoding,
+static_assert(InlineCache::kIndividualCacheSize < kIsMegamorphicEncoding,
               "InlineCache::kIndividualCacheSize is larger than expected");
+static_assert(InlineCache::kIndividualCacheSize < kIsMissingTypesEncoding,
+              "InlineCache::kIndividualCacheSize is larger than expected");
+
+ProfileCompilationInfo::ProfileCompilationInfo(const ProfileCompilationInfo& pci) {
+  MergeWith(pci);
+}
+
+ProfileCompilationInfo::~ProfileCompilationInfo() {
+  ClearProfile();
+}
 
 void ProfileCompilationInfo::DexPcData::AddClass(uint16_t dex_profile_idx,
                                                  const dex::TypeIndex& type_idx) {
-  if (is_megamorphic) {
+  if (is_megamorphic || is_missing_types) {
     return;
   }
   classes.emplace(dex_profile_idx, type_idx);
@@ -206,7 +219,8 @@
  *       Classes are grouped per their dex files and the line
  *       `dex_profile_index,class_id1,class_id2...,dex_profile_index2,...` encodes the
  *       mapping from `dex_profile_index` to the set of classes `class_id1,class_id2...`
- *    M stands for megamorphic and it's encoded as the byte kMegamorphicEncoding.
+ *    M stands for megamorphic or missing types and it's encoded as either
+ *    the byte kIsMegamorphicEncoding or kIsMissingTypesEncoding.
  *    When present, there will be no class ids following.
  **/
 bool ProfileCompilationInfo::Save(int fd) {
@@ -222,20 +236,21 @@
   DCHECK_LE(info_.size(), std::numeric_limits<uint8_t>::max());
   AddUintToBuffer(&buffer, static_cast<uint8_t>(info_.size()));
 
-  for (const auto& it : info_) {
+  // Dex files must be written in the order of their profile index. This
+  // avoids writing the index in the output file and simplifies the parsing logic.
+  for (const DexFileData* dex_data_ptr : info_) {
+    const DexFileData& dex_data = *dex_data_ptr;
     if (buffer.size() > kMaxSizeToKeepBeforeWriting) {
       if (!WriteBuffer(fd, buffer.data(), buffer.size())) {
         return false;
       }
       buffer.clear();
     }
-    const std::string& dex_location = it.first;
-    const DexFileData& dex_data = it.second;
 
     // Note that we allow dex files without any methods or classes, so that
     // inline caches can refer valid dex files.
 
-    if (dex_location.size() >= kMaxDexFileKeyLength) {
+    if (dex_data.profile_key.size() >= kMaxDexFileKeyLength) {
       LOG(WARNING) << "DexFileKey exceeds allocated limit";
       return false;
     }
@@ -245,19 +260,19 @@
     uint32_t methods_region_size = GetMethodsRegionSize(dex_data);
     size_t required_capacity = buffer.size() +
         kLineHeaderSize +
-        dex_location.size() +
+        dex_data.profile_key.size() +
         sizeof(uint16_t) * dex_data.class_set.size() +
         methods_region_size;
 
     buffer.reserve(required_capacity);
-    DCHECK_LE(dex_location.size(), std::numeric_limits<uint16_t>::max());
+    DCHECK_LE(dex_data.profile_key.size(), std::numeric_limits<uint16_t>::max());
     DCHECK_LE(dex_data.class_set.size(), std::numeric_limits<uint16_t>::max());
-    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_location.size()));
+    AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.profile_key.size()));
     AddUintToBuffer(&buffer, static_cast<uint16_t>(dex_data.class_set.size()));
     AddUintToBuffer(&buffer, methods_region_size);  // uint32_t
     AddUintToBuffer(&buffer, dex_data.checksum);  // uint32_t
 
-    AddStringToBuffer(&buffer, dex_location);
+    AddStringToBuffer(&buffer, dex_data.profile_key);
 
     for (const auto& method_it : dex_data.method_map) {
       AddUintToBuffer(&buffer, method_it.first);
@@ -289,10 +304,19 @@
     // Add the dex pc.
     AddUintToBuffer(buffer, dex_pc);
 
-    if (dex_pc_data.is_megamorphic) {
-      // Add the megamorphic encoding if needed and continue.
-      // If megamorphic, we don't add the rest of the classes.
-      AddUintToBuffer(buffer, kMegamorphicEncoding);
+    // Add the megamorphic/missing_types encoding if needed and continue.
+    // In either cases we don't add any classes to the profiles and so there's
+    // no point to continue.
+    // TODO(calin): in case we miss types there is still value to add the
+    // rest of the classes. They can be added without bumping the profile version.
+    if (dex_pc_data.is_missing_types) {
+      DCHECK(!dex_pc_data.is_megamorphic);  // at this point the megamorphic flag should not be set.
+      DCHECK_EQ(classes.size(), 0u);
+      AddUintToBuffer(buffer, kIsMissingTypesEncoding);
+      continue;
+    } else if (dex_pc_data.is_megamorphic) {
+      DCHECK_EQ(classes.size(), 0u);
+      AddUintToBuffer(buffer, kIsMegamorphicEncoding);
       continue;
     }
 
@@ -353,23 +377,52 @@
 }
 
 ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::GetOrAddDexFileData(
-    const std::string& dex_location,
+    const std::string& profile_key,
     uint32_t checksum) {
-  auto info_it = info_.FindOrAdd(dex_location, DexFileData(checksum, info_.size()));
-  if (info_.size() > std::numeric_limits<uint8_t>::max()) {
+  const auto& profile_index_it = profile_key_map_.FindOrAdd(profile_key, profile_key_map_.size());
+  if (profile_key_map_.size() > std::numeric_limits<uint8_t>::max()) {
     // Allow only 255 dex files to be profiled. This allows us to save bytes
     // when encoding. The number is well above what we expect for normal applications.
     if (kIsDebugBuild) {
-      LOG(WARNING) << "Exceeded the maximum number of dex files (255). Something went wrong";
+      LOG(ERROR) << "Exceeded the maximum number of dex files (255). Something went wrong";
     }
-    info_.erase(dex_location);
+    profile_key_map_.erase(profile_key);
     return nullptr;
   }
-  if (info_it->second.checksum != checksum) {
-    LOG(WARNING) << "Checksum mismatch for dex " << dex_location;
+
+  uint8_t profile_index = profile_index_it->second;
+  if (info_.size() <= profile_index) {
+    // This is a new addition. Add it to the info_ array.
+    info_.emplace_back(new DexFileData(profile_key, checksum, profile_index));
+  }
+  DexFileData* result = info_[profile_index];
+  // DCHECK that profile info map key is consistent with the one stored in the dex file data.
+  // This should always be the case since since the cache map is managed by ProfileCompilationInfo.
+  DCHECK_EQ(profile_key, result->profile_key);
+  DCHECK_EQ(profile_index, result->profile_index);
+
+  // Check that the checksum matches.
+  // This may different if for example the dex file was updated and
+  // we had a record of the old one.
+  if (result->checksum != checksum) {
+    LOG(WARNING) << "Checksum mismatch for dex " << profile_key;
     return nullptr;
   }
-  return &info_it->second;
+  return result;
+}
+
+const ProfileCompilationInfo::DexFileData* ProfileCompilationInfo::FindDexData(
+      const std::string& profile_key) const {
+  const auto& profile_index_it = profile_key_map_.find(profile_key);
+  if (profile_index_it == profile_key_map_.end()) {
+    return nullptr;
+  }
+
+  uint8_t profile_index = profile_index_it->second;
+  const DexFileData* result = info_[profile_index];
+  DCHECK_EQ(profile_key, result->profile_key);
+  DCHECK_EQ(profile_index, result->profile_index);
+  return result;
 }
 
 bool ProfileCompilationInfo::AddResolvedClasses(const DexCacheResolvedClasses& classes) {
@@ -393,9 +446,7 @@
                                        uint32_t dex_checksum,
                                        uint16_t method_index,
                                        const OfflineProfileMethodInfo& pmi) {
-  DexFileData* const data = GetOrAddDexFileData(
-      GetProfileDexFileKey(dex_location),
-      dex_checksum);
+  DexFileData* const data = GetOrAddDexFileData(GetProfileDexFileKey(dex_location), dex_checksum);
   if (data == nullptr) {  // checksum mismatch
     return false;
   }
@@ -403,11 +454,21 @@
   for (const auto& pmi_inline_cache_it : pmi.inline_caches) {
     uint16_t pmi_ic_dex_pc = pmi_inline_cache_it.first;
     const DexPcData& pmi_ic_dex_pc_data = pmi_inline_cache_it.second;
-    auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(pmi_ic_dex_pc);
-    if (pmi_ic_dex_pc_data.is_megamorphic) {
-      dex_pc_data_it->second.SetMegamorphic();
+    DexPcData& dex_pc_data = inline_cache_it->second.FindOrAdd(pmi_ic_dex_pc)->second;
+    if (dex_pc_data.is_missing_types || dex_pc_data.is_megamorphic) {
+      // We are already megamorphic or we are missing types; no point in going forward.
       continue;
     }
+
+    if (pmi_ic_dex_pc_data.is_missing_types) {
+      dex_pc_data.SetIsMissingTypes();
+      continue;
+    }
+    if (pmi_ic_dex_pc_data.is_megamorphic) {
+      dex_pc_data.SetIsMegamorphic();
+      continue;
+    }
+
     for (const ClassReference& class_ref : pmi_ic_dex_pc_data.classes) {
       const DexReference& dex_ref = pmi.dex_references[class_ref.dex_profile_index];
       DexFileData* class_dex_data = GetOrAddDexFileData(
@@ -416,7 +477,7 @@
       if (class_dex_data == nullptr) {  // checksum mismatch
         return false;
       }
-      dex_pc_data_it->second.AddClass(class_dex_data->profile_index, class_ref.type_index);
+      dex_pc_data.AddClass(class_dex_data->profile_index, class_ref.type_index);
     }
   }
   return true;
@@ -432,6 +493,11 @@
   auto inline_cache_it = data->method_map.FindOrAdd(pmi.dex_method_index);
 
   for (const ProfileMethodInfo::ProfileInlineCache& cache : pmi.inline_caches) {
+    if (cache.is_missing_types) {
+      auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(cache.dex_pc);
+      dex_pc_data_it->second.SetIsMissingTypes();
+      continue;
+    }
     for (const ProfileMethodInfo::ProfileClassReference& class_ref : cache.classes) {
       DexFileData* class_dex_data = GetOrAddDexFileData(
           GetProfileDexFileKey(class_ref.dex_file->GetLocation()),
@@ -440,6 +506,10 @@
         return false;
       }
       auto dex_pc_data_it = inline_cache_it->second.FindOrAdd(cache.dex_pc);
+      if (dex_pc_data_it->second.is_missing_types) {
+        // Don't bother adding classes if we are missing types.
+        break;
+      }
       dex_pc_data_it->second.AddClass(class_dex_data->profile_index, class_ref.type_index);
     }
   }
@@ -457,13 +527,13 @@
   return true;
 }
 
-#define READ_UINT(type, buffer, dest, error)          \
-  do {                                                \
-    if (!buffer.ReadUintAndAdvance<type>(&dest)) {    \
-      *error = "Could not read "#dest;                \
-      return false;                                   \
-    }                                                 \
-  }                                                   \
+#define READ_UINT(type, buffer, dest, error)            \
+  do {                                                  \
+    if (!(buffer).ReadUintAndAdvance<type>(&(dest))) {  \
+      *(error) = "Could not read "#dest;                \
+      return false;                                     \
+    }                                                   \
+  }                                                     \
   while (false)
 
 bool ProfileCompilationInfo::ReadInlineCache(SafeBuffer& buffer,
@@ -478,8 +548,12 @@
     READ_UINT(uint16_t, buffer, dex_pc, error);
     READ_UINT(uint8_t, buffer, dex_to_classes_map_size, error);
     auto dex_pc_data_it = inline_cache->FindOrAdd(dex_pc);
-    if (dex_to_classes_map_size == kMegamorphicEncoding) {
-      dex_pc_data_it->second.SetMegamorphic();
+    if (dex_to_classes_map_size == kIsMissingTypesEncoding) {
+      dex_pc_data_it->second.SetIsMissingTypes();
+      continue;
+    }
+    if (dex_to_classes_map_size == kIsMegamorphicEncoding) {
+      dex_pc_data_it->second.SetIsMegamorphic();
       continue;
     }
     for (; dex_to_classes_map_size > 0; dex_to_classes_map_size--) {
@@ -708,6 +782,8 @@
   return kProfileLoadSuccess;
 }
 
+// TODO(calin): Fix this API. ProfileCompilationInfo::Load should be static and
+// return a unique pointer to a ProfileCompilationInfo upon success.
 bool ProfileCompilationInfo::Load(int fd) {
   std::string error;
   ProfileLoadSatus status = LoadInternal(fd, &error);
@@ -725,6 +801,10 @@
   ScopedTrace trace(__PRETTY_FUNCTION__);
   DCHECK_GE(fd, 0);
 
+  if (!IsEmpty()) {
+    return kProfileLoadWouldOverwiteData;
+  }
+
   struct stat stat_buffer;
   if (fstat(fd, &stat_buffer) != 0) {
     return kProfileLoadIOError;
@@ -775,10 +855,10 @@
   // the current profile info.
   // Note that the number of elements should be very small, so this should not
   // be a performance issue.
-  for (const auto& other_it : other.info_) {
-    auto info_it = info_.find(other_it.first);
-    if ((info_it != info_.end()) && (info_it->second.checksum != other_it.second.checksum)) {
-      LOG(WARNING) << "Checksum mismatch for dex " << other_it.first;
+  for (const DexFileData* other_dex_data : other.info_) {
+    const DexFileData* dex_data = FindDexData(other_dex_data->profile_key);
+    if ((dex_data != nullptr) && (dex_data->checksum != other_dex_data->checksum)) {
+      LOG(WARNING) << "Checksum mismatch for dex " << other_dex_data->profile_key;
       return false;
     }
   }
@@ -795,36 +875,37 @@
   // First, build a mapping from other_dex_profile_index to this_dex_profile_index.
   // This will make sure that the ClassReferences  will point to the correct dex file.
   SafeMap<uint8_t, uint8_t> dex_profile_index_remap;
-  for (const auto& other_it : other.info_) {
-    const std::string& other_dex_location = other_it.first;
-    const DexFileData& other_dex_data = other_it.second;
-    auto info_it = info_.FindOrAdd(other_dex_location, DexFileData(other_dex_data.checksum, 0));
-    const DexFileData& dex_data = info_it->second;
-    dex_profile_index_remap.Put(other_dex_data.profile_index, dex_data.profile_index);
+  for (const DexFileData* other_dex_data : other.info_) {
+    const DexFileData* dex_data = GetOrAddDexFileData(other_dex_data->profile_key,
+                                                      other_dex_data->checksum);
+    if (dex_data == nullptr) {
+      return false;  // Could happen if we exceed the number of allowed dex files.
+    }
+    dex_profile_index_remap.Put(other_dex_data->profile_index, dex_data->profile_index);
   }
 
   // Merge the actual profile data.
-  for (const auto& other_it : other.info_) {
-    const std::string& other_dex_location = other_it.first;
-    const DexFileData& other_dex_data = other_it.second;
-    auto info_it = info_.find(other_dex_location);
-    DCHECK(info_it != info_.end());
+  for (const DexFileData* other_dex_data : other.info_) {
+    DexFileData* dex_data = const_cast<DexFileData*>(FindDexData(other_dex_data->profile_key));
+    DCHECK(dex_data != nullptr);
 
     // Merge the classes.
-    info_it->second.class_set.insert(other_dex_data.class_set.begin(),
-                                     other_dex_data.class_set.end());
+    dex_data->class_set.insert(other_dex_data->class_set.begin(),
+                               other_dex_data->class_set.end());
 
     // Merge the methods and the inline caches.
-    for (const auto& other_method_it : other_dex_data.method_map) {
+    for (const auto& other_method_it : other_dex_data->method_map) {
       uint16_t other_method_index = other_method_it.first;
-      auto method_it = info_it->second.method_map.FindOrAdd(other_method_index);
+      auto method_it = dex_data->method_map.FindOrAdd(other_method_index);
       const auto& other_inline_cache = other_method_it.second;
       for (const auto& other_ic_it : other_inline_cache) {
         uint16_t other_dex_pc = other_ic_it.first;
         const ClassSet& other_class_set = other_ic_it.second.classes;
         auto class_set = method_it->second.FindOrAdd(other_dex_pc);
-        if (other_ic_it.second.is_megamorphic) {
-          class_set->second.SetMegamorphic();
+        if (other_ic_it.second.is_missing_types) {
+          class_set->second.SetIsMissingTypes();
+        } else if (other_ic_it.second.is_megamorphic) {
+          class_set->second.SetIsMegamorphic();
         } else {
           for (const auto& class_it : other_class_set) {
             class_set->second.AddClass(dex_profile_index_remap.Get(
@@ -855,28 +936,18 @@
 ProfileCompilationInfo::FindMethod(const std::string& dex_location,
                                    uint32_t dex_checksum,
                                    uint16_t dex_method_index) const {
-  auto info_it = info_.find(GetProfileDexFileKey(dex_location));
-  if (info_it != info_.end()) {
-    if (!ChecksumMatch(dex_checksum, info_it->second.checksum)) {
+  const DexFileData* dex_data = FindDexData(GetProfileDexFileKey(dex_location));
+  if (dex_data != nullptr) {
+    if (!ChecksumMatch(dex_checksum, dex_data->checksum)) {
       return nullptr;
     }
-    const MethodMap& methods = info_it->second.method_map;
+    const MethodMap& methods = dex_data->method_map;
     const auto method_it = methods.find(dex_method_index);
     return method_it == methods.end() ? nullptr : &(method_it->second);
   }
   return nullptr;
 }
 
-void ProfileCompilationInfo::DexFileToProfileIndex(
-    /*out*/std::vector<DexReference>* dex_references) const {
-  dex_references->resize(info_.size());
-  for (const auto& info_it : info_) {
-    DexReference& dex_ref = (*dex_references)[info_it.second.profile_index];
-    dex_ref.dex_location = info_it.first;
-    dex_ref.dex_checksum = info_it.second.checksum;
-  }
-}
-
 bool ProfileCompilationInfo::GetMethod(const std::string& dex_location,
                                        uint32_t dex_checksum,
                                        uint16_t dex_method_index,
@@ -886,7 +957,12 @@
     return false;
   }
 
-  DexFileToProfileIndex(&pmi->dex_references);
+  pmi->dex_references.resize(info_.size());
+  for (const DexFileData* dex_data : info_) {
+    pmi->dex_references[dex_data->profile_index].dex_location = dex_data->profile_key;
+    pmi->dex_references[dex_data->profile_index].dex_checksum = dex_data->checksum;
+  }
+
   // TODO(calin): maybe expose a direct pointer to avoid copying
   pmi->inline_caches = *inline_caches;
   return true;
@@ -894,12 +970,12 @@
 
 
 bool ProfileCompilationInfo::ContainsClass(const DexFile& dex_file, dex::TypeIndex type_idx) const {
-  auto info_it = info_.find(GetProfileDexFileKey(dex_file.GetLocation()));
-  if (info_it != info_.end()) {
-    if (!ChecksumMatch(dex_file, info_it->second.checksum)) {
+  const DexFileData* dex_data = FindDexData(GetProfileDexFileKey(dex_file.GetLocation()));
+  if (dex_data != nullptr) {
+    if (!ChecksumMatch(dex_file, dex_data->checksum)) {
       return false;
     }
-    const std::set<dex::TypeIndex>& classes = info_it->second.class_set;
+    const std::set<dex::TypeIndex>& classes = dex_data->class_set;
     return classes.find(type_idx) != classes.end();
   }
   return false;
@@ -907,16 +983,16 @@
 
 uint32_t ProfileCompilationInfo::GetNumberOfMethods() const {
   uint32_t total = 0;
-  for (const auto& it : info_) {
-    total += it.second.method_map.size();
+  for (const DexFileData* dex_data : info_) {
+    total += dex_data->method_map.size();
   }
   return total;
 }
 
 uint32_t ProfileCompilationInfo::GetNumberOfResolvedClasses() const {
   uint32_t total = 0;
-  for (const auto& it : info_) {
-    total += it.second.class_set.size();
+  for (const DexFileData* dex_data : info_) {
+    total += dex_data->class_set.size();
   }
   return total;
 }
@@ -949,27 +1025,27 @@
   os << "ProfileInfo:";
 
   const std::string kFirstDexFileKeySubstitute = ":classes.dex";
-  for (const auto& it : info_) {
+
+  for (const DexFileData* dex_data : info_) {
     os << "\n";
-    const std::string& location = it.first;
-    const DexFileData& dex_data = it.second;
     if (print_full_dex_location) {
-      os << location;
+      os << dex_data->profile_key;
     } else {
       // Replace the (empty) multidex suffix of the first key with a substitute for easier reading.
-      std::string multidex_suffix = DexFile::GetMultiDexSuffix(location);
+      std::string multidex_suffix = DexFile::GetMultiDexSuffix(dex_data->profile_key);
       os << (multidex_suffix.empty() ? kFirstDexFileKeySubstitute : multidex_suffix);
     }
+    os << " [index=" << static_cast<uint32_t>(dex_data->profile_index) << "]";
     const DexFile* dex_file = nullptr;
     if (dex_files != nullptr) {
       for (size_t i = 0; i < dex_files->size(); i++) {
-        if (location == (*dex_files)[i]->GetLocation()) {
+        if (dex_data->profile_key == (*dex_files)[i]->GetLocation()) {
           dex_file = (*dex_files)[i];
         }
       }
     }
     os << "\n\tmethods: ";
-    for (const auto method_it : dex_data.method_map) {
+    for (const auto& method_it : dex_data->method_map) {
       if (dex_file != nullptr) {
         os << "\n\t\t" << dex_file->PrettyMethod(method_it.first, true);
       } else {
@@ -979,8 +1055,10 @@
       os << "[";
       for (const auto& inline_cache_it : method_it.second) {
         os << "{" << std::hex << inline_cache_it.first << std::dec << ":";
-        if (inline_cache_it.second.is_megamorphic) {
-          os << "M";
+        if (inline_cache_it.second.is_missing_types) {
+          os << "MT";
+        } else if (inline_cache_it.second.is_megamorphic) {
+          os << "MM";
         } else {
           for (const ClassReference& class_ref : inline_cache_it.second.classes) {
             os << "(" << static_cast<uint32_t>(class_ref.dex_profile_index)
@@ -992,7 +1070,7 @@
       os << "], ";
     }
     os << "\n\tclasses: ";
-    for (const auto class_it : dex_data.class_set) {
+    for (const auto class_it : dex_data->class_set) {
       if (dex_file != nullptr) {
         os << "\n\t\t" << dex_file->PrettyType(class_it);
       } else {
@@ -1016,18 +1094,17 @@
   if (info_.empty()) {
     return;
   }
-  for (const auto& it : info_) {
-    const std::string& location = it.first;
-    const DexFileData& dex_data = it.second;
+  for (const DexFileData* dex_data : info_) {
     const DexFile* dex_file = nullptr;
     if (dex_files != nullptr) {
       for (size_t i = 0; i < dex_files->size(); i++) {
-        if (location == (*dex_files)[i]->GetLocation()) {
+        if (dex_data->profile_key == GetProfileDexFileKey((*dex_files)[i]->GetLocation()) &&
+            dex_data->checksum == (*dex_files)[i]->GetLocationChecksum()) {
           dex_file = (*dex_files)[i];
         }
       }
     }
-    for (const auto class_it : dex_data.class_set) {
+    for (const auto class_it : dex_data->class_set) {
       if (dex_file != nullptr) {
         class_names->insert(std::string(dex_file->PrettyType(class_it)));
       }
@@ -1036,25 +1113,42 @@
 }
 
 bool ProfileCompilationInfo::Equals(const ProfileCompilationInfo& other) {
-  return info_.Equals(other.info_);
+  // No need to compare profile_key_map_. That's only a cache for fast search.
+  // All the information is already in the info_ vector.
+  if (info_.size() != other.info_.size()) {
+    return false;
+  }
+  for (size_t i = 0; i < info_.size(); i++) {
+    const DexFileData& dex_data = *info_[i];
+    const DexFileData& other_dex_data = *other.info_[i];
+    if (!(dex_data == other_dex_data)) {
+      return false;
+    }
+  }
+  return true;
 }
 
-std::set<DexCacheResolvedClasses> ProfileCompilationInfo::GetResolvedClasses() const {
+std::set<DexCacheResolvedClasses> ProfileCompilationInfo::GetResolvedClasses(
+    const std::unordered_set<std::string>& dex_files_locations) const {
+  std::unordered_map<std::string, std::string> key_to_location_map;
+  for (const std::string& location : dex_files_locations) {
+    key_to_location_map.emplace(GetProfileDexFileKey(location), location);
+  }
   std::set<DexCacheResolvedClasses> ret;
-  for (auto&& pair : info_) {
-    const std::string& profile_key = pair.first;
-    const DexFileData& data = pair.second;
-    // TODO: Is it OK to use the same location for both base and dex location here?
-    DexCacheResolvedClasses classes(profile_key, profile_key, data.checksum);
-    classes.AddClasses(data.class_set.begin(), data.class_set.end());
-    ret.insert(classes);
+  for (const DexFileData* dex_data : info_) {
+    const auto& it = key_to_location_map.find(dex_data->profile_key);
+    if (it != key_to_location_map.end()) {
+      DexCacheResolvedClasses classes(it->second, it->second, dex_data->checksum);
+      classes.AddClasses(dex_data->class_set.begin(), dex_data->class_set.end());
+      ret.insert(classes);
+    }
   }
   return ret;
 }
 
 void ProfileCompilationInfo::ClearResolvedClasses() {
-  for (auto& pair : info_) {
-    pair.second.class_set.clear();
+  for (DexFileData* dex_data : info_) {
+    dex_data->class_set.clear();
   }
 }
 
@@ -1062,7 +1156,8 @@
 bool ProfileCompilationInfo::GenerateTestProfile(int fd,
                                                  uint16_t number_of_dex_files,
                                                  uint16_t method_ratio,
-                                                 uint16_t class_ratio) {
+                                                 uint16_t class_ratio,
+                                                 uint32_t random_seed) {
   const std::string base_dex_location = "base.apk";
   ProfileCompilationInfo info;
   // The limits are defined by the dex specification.
@@ -1071,7 +1166,7 @@
   uint16_t number_of_methods = max_method * method_ratio / 100;
   uint16_t number_of_classes = max_classes * class_ratio / 100;
 
-  srand(MicroTime());
+  std::srand(random_seed);
 
   // Make sure we generate more samples with a low index value.
   // This makes it more likely to hit valid method/class indices in small apps.
@@ -1101,6 +1196,32 @@
   return info.Save(fd);
 }
 
+// Naive implementation to generate a random profile file suitable for testing.
+bool ProfileCompilationInfo::GenerateTestProfile(
+    int fd,
+    std::vector<std::unique_ptr<const DexFile>>& dex_files,
+    uint32_t random_seed) {
+  std::srand(random_seed);
+  ProfileCompilationInfo info;
+  for (std::unique_ptr<const DexFile>& dex_file : dex_files) {
+    const std::string& location = dex_file->GetLocation();
+    uint32_t checksum = dex_file->GetLocationChecksum();
+    for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+      // Randomly add a class from the dex file (with 50% chance).
+      if (std::rand() % 2 != 0) {
+        info.AddClassIndex(location, checksum, dex::TypeIndex(dex_file->GetClassDef(i).class_idx_));
+      }
+    }
+    for (uint32_t i = 0; i < dex_file->NumMethodIds(); ++i) {
+      // Randomly add a method from the dex file (with 50% chance).
+      if (std::rand() % 2 != 0) {
+        info.AddMethodIndex(location, checksum, i);
+      }
+    }
+  }
+  return info.Save(fd);
+}
+
 bool ProfileCompilationInfo::OfflineProfileMethodInfo::operator==(
       const OfflineProfileMethodInfo& other) const {
   if (inline_caches.size() != other.inline_caches.size()) {
@@ -1108,7 +1229,7 @@
   }
 
   // We can't use a simple equality test because we need to match the dex files
-  // of the inline caches which might have different profile indices.
+  // of the inline caches which might have different profile indexes.
   for (const auto& inline_cache_it : inline_caches) {
     uint16_t dex_pc = inline_cache_it.first;
     const DexPcData dex_pc_data = inline_cache_it.second;
@@ -1117,7 +1238,8 @@
       return false;
     }
     const DexPcData& other_dex_pc_data = other_it->second;
-    if (dex_pc_data.is_megamorphic != other_dex_pc_data.is_megamorphic) {
+    if (dex_pc_data.is_megamorphic != other_dex_pc_data.is_megamorphic ||
+        dex_pc_data.is_missing_types != other_dex_pc_data.is_missing_types) {
       return false;
     }
     for (const ClassReference& class_ref : dex_pc_data.classes) {
@@ -1141,4 +1263,17 @@
   return true;
 }
 
+void ProfileCompilationInfo::ClearProfile() {
+  for (DexFileData* dex_data : info_) {
+    delete dex_data;
+  }
+  info_.clear();
+  profile_key_map_.clear();
+}
+
+bool ProfileCompilationInfo::IsEmpty() const {
+  DCHECK_EQ(info_.empty(), profile_key_map_.empty());
+  return info_.empty();
+}
+
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info.h b/runtime/jit/profile_compilation_info.h
index 4bfbfcd..87f7636 100644
--- a/runtime/jit/profile_compilation_info.h
+++ b/runtime/jit/profile_compilation_info.h
@@ -36,18 +36,22 @@
  */
 struct ProfileMethodInfo {
   struct ProfileClassReference {
+    ProfileClassReference() : dex_file(nullptr) {}
     ProfileClassReference(const DexFile* dex, const dex::TypeIndex& index)
         : dex_file(dex), type_index(index) {}
 
     const DexFile* dex_file;
-    const dex::TypeIndex type_index;
+    dex::TypeIndex type_index;
   };
 
   struct ProfileInlineCache {
-    ProfileInlineCache(uint32_t pc, const std::vector<ProfileClassReference>& profile_classes)
-        : dex_pc(pc), classes(profile_classes) {}
+    ProfileInlineCache(uint32_t pc,
+                       bool missing_types,
+                       const std::vector<ProfileClassReference>& profile_classes)
+        : dex_pc(pc), is_missing_types(missing_types), classes(profile_classes) {}
 
     const uint32_t dex_pc;
+    const bool is_missing_types;
     const std::vector<ProfileClassReference> classes;
   };
 
@@ -82,7 +86,7 @@
 
   // A dex location together with its checksum.
   struct DexReference {
-    DexReference() {}
+    DexReference() : dex_checksum(0) {}
 
     DexReference(const std::string& location, uint32_t checksum)
         : dex_location(location), dex_checksum(checksum) {}
@@ -91,6 +95,11 @@
       return dex_checksum == other.dex_checksum && dex_location == other.dex_location;
     }
 
+    bool MatchesDex(const DexFile* dex_file) const {
+      return dex_checksum == dex_file->GetLocationChecksum() &&
+           dex_location == GetProfileDexFileKey(dex_file->GetLocation());
+    }
+
     std::string dex_location;
     uint32_t dex_checksum;
   };
@@ -128,18 +137,30 @@
 
   // Encodes the actual inline cache for a given dex pc (whether or not the receiver is
   // megamorphic and its possible types).
-  // If the receiver is megamorphic the set of classes will be empty.
+  // If the receiver is megamorphic or is missing types the set of classes will be empty.
   struct DexPcData {
-    DexPcData() : is_megamorphic(false) {}
+    DexPcData() : is_missing_types(false), is_megamorphic(false) {}
     void AddClass(uint16_t dex_profile_idx, const dex::TypeIndex& type_idx);
-    void SetMegamorphic() {
+    void SetIsMegamorphic() {
+      if (is_missing_types) return;
       is_megamorphic = true;
       classes.clear();
     }
+    void SetIsMissingTypes() {
+      is_megamorphic = false;
+      is_missing_types = true;
+      classes.clear();
+    }
     bool operator==(const DexPcData& other) const {
-      return is_megamorphic == other.is_megamorphic && classes == other.classes;
+      return is_megamorphic == other.is_megamorphic &&
+          is_missing_types == other.is_missing_types &&
+          classes == other.classes;
     }
 
+    // Not all runtime types can be encoded in the profile. For example if the receiver
+    // type is in a dex file which is not tracked for profiling its type cannot be
+    // encoded. When types are missing this field will be set to true.
+    bool is_missing_types;
     bool is_megamorphic;
     ClassSet classes;
   };
@@ -160,11 +181,16 @@
 
   // Public methods to create, extend or query the profile.
 
+  ProfileCompilationInfo() {}
+  ProfileCompilationInfo(const ProfileCompilationInfo& pci);
+  ~ProfileCompilationInfo();
+
   // Add the given methods and classes to the current profile object.
   bool AddMethodsAndClasses(const std::vector<ProfileMethodInfo>& methods,
                             const std::set<DexCacheResolvedClasses>& resolved_classes);
 
   // Load profile information from the given file descriptor.
+  // If the current profile is non-empty the load will fail.
   bool Load(int fd);
 
   // Merge the data from another ProfileCompilationInfo into the current object.
@@ -218,9 +244,8 @@
   bool Equals(const ProfileCompilationInfo& other);
 
   // Return the class descriptors for all of the classes in the profiles' class sets.
-  // Note the dex location is actually the profile key, the caller needs to call back in to the
-  // profile info stuff to generate a map back to the dex location.
-  std::set<DexCacheResolvedClasses> GetResolvedClasses() const;
+  std::set<DexCacheResolvedClasses> GetResolvedClasses(
+      const std::unordered_set<std::string>& dex_files_locations) const;
 
   // Clear the resolved classes from the current object.
   void ClearResolvedClasses();
@@ -233,7 +258,14 @@
   static bool GenerateTestProfile(int fd,
                                   uint16_t number_of_dex_files,
                                   uint16_t method_ratio,
-                                  uint16_t class_ratio);
+                                  uint16_t class_ratio,
+                                  uint32_t random_seed);
+
+  // Generate a test profile which will randomly contain classes and methods from
+  // the provided list of dex files.
+  static bool GenerateTestProfile(int fd,
+                                  std::vector<std::unique_ptr<const DexFile>>& dex_files,
+                                  uint32_t random_seed);
 
   // Check that the given profile method info contain the same data.
   static bool Equals(const ProfileCompilationInfo::OfflineProfileMethodInfo& pmi1,
@@ -241,6 +273,7 @@
 
  private:
   enum ProfileLoadSatus {
+    kProfileLoadWouldOverwiteData,
     kProfileLoadIOError,
     kProfileLoadVersionMismatch,
     kProfileLoadBadData,
@@ -251,14 +284,21 @@
   using MethodMap = SafeMap<uint16_t, InlineCacheMap>;
 
   // Internal representation of the profile information belonging to a dex file.
+  // Note that we could do without profile_key (the key used to encode the dex
+  // file in the profile) and profile_index (the index of the dex file in the
+  // profile) fields in this struct because we can infer them from
+  // profile_key_map_ and info_. However, it makes the profiles logic much
+  // simpler if we have references here as well.
   struct DexFileData {
-    DexFileData(uint32_t location_checksum, uint16_t index)
-         : profile_index(index), checksum(location_checksum) {}
-    // The profile index of this dex file (matches ClassReference#dex_profile_index)
+    DexFileData(const std::string& key, uint32_t location_checksum, uint16_t index)
+         : profile_key(key), profile_index(index), checksum(location_checksum) {}
+    // The profile key this data belongs to.
+    std::string profile_key;
+    // The profile index of this dex file (matches ClassReference#dex_profile_index).
     uint8_t profile_index;
-    // The dex checksum
+    // The dex checksum.
     uint32_t checksum;
-    // The methonds' profile information
+    // The methonds' profile information.
     MethodMap method_map;
     // The classes which have been profiled. Note that these don't necessarily include
     // all the classes that can be found in the inline caches reference.
@@ -269,12 +309,9 @@
     }
   };
 
-  // Maps dex file to their profile information.
-  using DexFileToProfileInfoMap = SafeMap<const std::string, DexFileData>;
-
-  // Return the profile data for the given dex location or null if the dex location
+  // Return the profile data for the given profile key or null if the dex location
   // already exists but has a different checksum
-  DexFileData* GetOrAddDexFileData(const std::string& dex_location, uint32_t checksum);
+  DexFileData* GetOrAddDexFileData(const std::string& profile_key, uint32_t checksum);
 
   // Add a method index to the profile (without inline caches).
   bool AddMethodIndex(const std::string& dex_location, uint32_t checksum, uint16_t method_idx);
@@ -305,6 +342,16 @@
   // be the same as the profile index of the dex file (used to encode the ClassReferences).
   void DexFileToProfileIndex(/*out*/std::vector<DexReference>* dex_references) const;
 
+  // Return the dex data associated with the given profile key or null if the profile
+  // doesn't contain the key.
+  const DexFileData* FindDexData(const std::string& profile_key) const;
+
+  // Clear all the profile data.
+  void ClearProfile();
+
+  // Checks if the profile is empty.
+  bool IsEmpty() const;
+
   // Parsing functionality.
 
   // The information present in the header of each profile line.
@@ -411,7 +458,15 @@
   friend class ProfileAssistantTest;
   friend class Dex2oatLayoutTest;
 
-  DexFileToProfileInfoMap info_;
+  // Vector containing the actual profile info.
+  // The vector index is the profile index of the dex data and
+  // matched DexFileData::profile_index.
+  std::vector<DexFileData*> info_;
+
+  // Cache mapping profile keys to profile index.
+  // This is used to speed up searches since it avoids iterating
+  // over the info_ vector when searching by profile key.
+  SafeMap<const std::string, uint8_t> profile_key_map_;
 };
 
 }  // namespace art
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index 332280a..c9f2d0e 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -108,26 +108,31 @@
     for (ArtMethod* method : methods) {
       std::vector<ProfileMethodInfo::ProfileInlineCache> caches;
       // Monomorphic
-      for (uint16_t dex_pc = 0; dex_pc < 1; dex_pc++) {
+      for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
         std::vector<ProfileMethodInfo::ProfileClassReference> classes;
         classes.emplace_back(method->GetDexFile(), dex::TypeIndex(0));
-        caches.emplace_back(dex_pc, classes);
+        caches.emplace_back(dex_pc, /*is_missing_types*/false, classes);
       }
       // Polymorphic
-      for (uint16_t dex_pc = 1; dex_pc < 2; dex_pc++) {
+      for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
         std::vector<ProfileMethodInfo::ProfileClassReference> classes;
         for (uint16_t k = 0; k < InlineCache::kIndividualCacheSize / 2; k++) {
           classes.emplace_back(method->GetDexFile(), dex::TypeIndex(k));
         }
-        caches.emplace_back(dex_pc, classes);
+        caches.emplace_back(dex_pc, /*is_missing_types*/false, classes);
       }
       // Megamorphic
-      for (uint16_t dex_pc = 2; dex_pc < 3; dex_pc++) {
+      for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
         std::vector<ProfileMethodInfo::ProfileClassReference> classes;
         for (uint16_t k = 0; k < 2 * InlineCache::kIndividualCacheSize; k++) {
           classes.emplace_back(method->GetDexFile(), dex::TypeIndex(k));
         }
-        caches.emplace_back(dex_pc, classes);
+        caches.emplace_back(dex_pc, /*is_missing_types*/false, classes);
+      }
+      // Missing types
+      for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
+        std::vector<ProfileMethodInfo::ProfileClassReference> classes;
+        caches.emplace_back(dex_pc, /*is_missing_types*/true, classes);
       }
       ProfileMethodInfo pmi(method->GetDexFile(), method->GetDexMethodIndex(), caches);
       profile_methods.push_back(pmi);
@@ -148,12 +153,15 @@
     ProfileCompilationInfo::OfflineProfileMethodInfo offline_pmi;
     SafeMap<DexFile*, uint8_t> dex_map;  // dex files to profile index
     for (const auto& inline_cache : pmi.inline_caches) {
+      ProfileCompilationInfo::DexPcData& dex_pc_data =
+          offline_pmi.inline_caches.FindOrAdd(inline_cache.dex_pc)->second;
+      if (inline_cache.is_missing_types) {
+        dex_pc_data.SetIsMissingTypes();
+      }
       for (const auto& class_ref : inline_cache.classes) {
         uint8_t dex_profile_index = dex_map.FindOrAdd(const_cast<DexFile*>(class_ref.dex_file),
                                                       static_cast<uint8_t>(dex_map.size()))->second;
-        offline_pmi.inline_caches
-            .FindOrAdd(inline_cache.dex_pc)->second
-            .AddClass(dex_profile_index, class_ref.type_index);
+        dex_pc_data.AddClass(dex_profile_index, class_ref.type_index);
         if (dex_profile_index >= offline_pmi.dex_references.size()) {
           // This is a new dex.
           const std::string& dex_key = ProfileCompilationInfo::GetProfileDexFileKey(
@@ -170,29 +178,35 @@
   ProfileCompilationInfo::OfflineProfileMethodInfo GetOfflineProfileMethodInfo() {
     ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
 
-    pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
-    pmi.dex_references.emplace_back("dex_location2", /* checksum */ 2);
-    pmi.dex_references.emplace_back("dex_location3", /* checksum */ 3);
+    pmi.dex_references.emplace_back("dex_location1", /* checksum */1);
+    pmi.dex_references.emplace_back("dex_location2", /* checksum */2);
+    pmi.dex_references.emplace_back("dex_location3", /* checksum */3);
 
     // Monomorphic
-    for (uint16_t dex_pc = 0; dex_pc < 10; dex_pc++) {
+    for (uint16_t dex_pc = 0; dex_pc < 11; dex_pc++) {
       ProfileCompilationInfo::DexPcData dex_pc_data;
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
     // Polymorphic
-    for (uint16_t dex_pc = 10; dex_pc < 20; dex_pc++) {
+    for (uint16_t dex_pc = 11; dex_pc < 22; dex_pc++) {
       ProfileCompilationInfo::DexPcData dex_pc_data;
       dex_pc_data.AddClass(0, dex::TypeIndex(0));
       dex_pc_data.AddClass(1, dex::TypeIndex(1));
       dex_pc_data.AddClass(2, dex::TypeIndex(2));
 
-       pmi.inline_caches.Put(dex_pc, dex_pc_data);
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
     // Megamorphic
-    for (uint16_t dex_pc = 20; dex_pc < 30; dex_pc++) {
+    for (uint16_t dex_pc = 22; dex_pc < 33; dex_pc++) {
       ProfileCompilationInfo::DexPcData dex_pc_data;
-      dex_pc_data.is_megamorphic = true;
+      dex_pc_data.SetIsMegamorphic();
+      pmi.inline_caches.Put(dex_pc, dex_pc_data);
+    }
+    // Missing types
+    for (uint16_t dex_pc = 33; dex_pc < 44; dex_pc++) {
+      ProfileCompilationInfo::DexPcData dex_pc_data;
+      dex_pc_data.SetIsMissingTypes();
       pmi.inline_caches.Put(dex_pc, dex_pc_data);
     }
 
@@ -207,7 +221,13 @@
     }
   }
 
-  // Cannot sizeof the actual arrays so hardcode the values here.
+  void SetIsMissingTypes(/*out*/ProfileCompilationInfo::OfflineProfileMethodInfo* pmi) {
+    for (auto it : pmi->inline_caches) {
+      it.second.SetIsMissingTypes();
+    }
+  }
+
+  // Cannot sizeof the actual arrays so hard code the values here.
   // They should not change anyway.
   static constexpr int kProfileMagicSize = 4;
   static constexpr int kProfileVersionSize = 4;
@@ -530,6 +550,58 @@
   ASSERT_TRUE(loaded_pmi1 == pmi_extra);
 }
 
+TEST_F(ProfileCompilationInfoTest, MissingTypesInlineCaches) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi = GetOfflineProfileMethodInfo();
+
+  // Add methods with inline caches.
+  for (uint16_t method_idx = 0; method_idx < 10; method_idx++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, method_idx, pmi, &saved_info));
+  }
+
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Make some inline caches megamorphic and add them to the profile again.
+  ProfileCompilationInfo saved_info_extra;
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi_extra = GetOfflineProfileMethodInfo();
+  MakeMegamorphic(&pmi_extra);
+  for (uint16_t method_idx = 5; method_idx < 10; method_idx++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, method_idx, pmi, &saved_info_extra));
+  }
+
+  // Mark all inline caches with missing types and add them to the profile again.
+  // This will verify that all inline caches (megamorphic or not) should be marked as missing types.
+  ProfileCompilationInfo::OfflineProfileMethodInfo missing_types = GetOfflineProfileMethodInfo();
+  SetIsMissingTypes(&missing_types);
+  for (uint16_t method_idx = 0; method_idx < 10; method_idx++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, method_idx, pmi, &saved_info_extra));
+  }
+
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(saved_info_extra.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+
+  // Merge the profiles so that we have the same view as the file.
+  ASSERT_TRUE(saved_info.MergeWith(saved_info_extra));
+
+  // Check that we get back what we saved.
+  ProfileCompilationInfo loaded_info;
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+  ASSERT_TRUE(loaded_info.Load(GetFd(profile)));
+
+  ASSERT_TRUE(loaded_info.Equals(saved_info));
+
+  ProfileCompilationInfo::OfflineProfileMethodInfo loaded_pmi1;
+  ASSERT_TRUE(loaded_info.GetMethod("dex_location1",
+                                    /* checksum */ 1,
+                                    /* method_idx */ 3,
+                                    &loaded_pmi1));
+  ASSERT_TRUE(loaded_pmi1 == pmi_extra);
+}
+
 TEST_F(ProfileCompilationInfoTest, SaveArtMethodsWithInlineCaches) {
   ScratchFile profile;
 
@@ -570,7 +642,7 @@
   }
 }
 
-TEST_F(ProfileCompilationInfoTest, InvalidChecksumInInlineCahce) {
+TEST_F(ProfileCompilationInfoTest, InvalidChecksumInInlineCache) {
   ScratchFile profile;
 
   ProfileCompilationInfo info;
@@ -662,7 +734,7 @@
   ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
   pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
   ProfileCompilationInfo::DexPcData dex_pc_data;
-  dex_pc_data.is_megamorphic = true;
+  dex_pc_data.SetIsMegamorphic();
   pmi.inline_caches.Put(/*dex_pc*/ 0, dex_pc_data);
 
   ProfileCompilationInfo info_megamorphic;
@@ -686,4 +758,55 @@
   ASSERT_TRUE(info_no_inline_cache.Save(GetFd(profile)));
 }
 
+TEST_F(ProfileCompilationInfoTest, MissingTypesInlineCachesMerge) {
+  // Create an inline cache with missing types
+  ProfileCompilationInfo::OfflineProfileMethodInfo pmi;
+  pmi.dex_references.emplace_back("dex_location1", /* checksum */ 1);
+  ProfileCompilationInfo::DexPcData dex_pc_data;
+  dex_pc_data.SetIsMissingTypes();
+  pmi.inline_caches.Put(/*dex_pc*/ 0, dex_pc_data);
+
+  ProfileCompilationInfo info_megamorphic;
+  ASSERT_TRUE(AddMethod("dex_location1",
+                        /*checksum*/ 1,
+                        /*method_idx*/ 0,
+                        pmi,
+                        &info_megamorphic));
+
+  // Create a profile with no inline caches (for the same method).
+  ProfileCompilationInfo info_no_inline_cache;
+  ASSERT_TRUE(AddMethod("dex_location1",
+                        /*checksum*/ 1,
+                        /*method_idx*/ 0,
+                        &info_no_inline_cache));
+
+  // Merge the missing type cache into the empty one.
+  // Everything should be saved without errors.
+  ASSERT_TRUE(info_no_inline_cache.MergeWith(info_megamorphic));
+  ScratchFile profile;
+  ASSERT_TRUE(info_no_inline_cache.Save(GetFd(profile)));
+}
+
+TEST_F(ProfileCompilationInfoTest, LoadShouldClearExistingDataFromProfiles) {
+  ScratchFile profile;
+
+  ProfileCompilationInfo saved_info;
+  // Save a few methods.
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddMethod("dex_location1", /* checksum */ 1, /* method_idx */ i, &saved_info));
+  }
+  ASSERT_TRUE(saved_info.Save(GetFd(profile)));
+  ASSERT_EQ(0, profile.GetFile()->Flush());
+  ASSERT_TRUE(profile.GetFile()->ResetOffset());
+
+  // Add a bunch of methods to test_info.
+  ProfileCompilationInfo test_info;
+  for (uint16_t i = 0; i < 10; i++) {
+    ASSERT_TRUE(AddMethod("dex_location2", /* checksum */ 2, /* method_idx */ i, &test_info));
+  }
+
+  // Attempt to load the saved profile into test_info.
+  // This should fail since the test_info already contains data and the load would overwrite it.
+  ASSERT_FALSE(test_info.Load(GetFd(profile)));
+}
 }  // namespace art
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index 61e6c41..9d6cd95 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -39,14 +39,9 @@
 ProfileSaver::ProfileSaver(const ProfileSaverOptions& options,
                            const std::string& output_filename,
                            jit::JitCodeCache* jit_code_cache,
-                           const std::vector<std::string>& code_paths,
-                           const std::string& foreign_dex_profile_path,
-                           const std::string& app_data_dir)
+                           const std::vector<std::string>& code_paths)
     : jit_code_cache_(jit_code_cache),
-      foreign_dex_profile_path_(foreign_dex_profile_path),
       shutting_down_(false),
-      last_save_number_of_methods_(0),
-      last_save_number_of_classes_(0),
       last_time_ns_saver_woke_up_(0),
       jit_activity_notifications_(0),
       wait_lock_("ProfileSaver wait lock"),
@@ -58,13 +53,12 @@
       total_number_of_failed_writes_(0),
       total_ms_of_sleep_(0),
       total_ns_of_work_(0),
-      total_number_of_foreign_dex_marks_(0),
       max_number_of_profile_entries_cached_(0),
       total_number_of_hot_spikes_(0),
       total_number_of_wake_ups_(0),
       options_(options) {
   DCHECK(options_.IsEnabled());
-  AddTrackedLocations(output_filename, app_data_dir, code_paths);
+  AddTrackedLocations(output_filename, code_paths);
 }
 
 void ProfileSaver::Run() {
@@ -127,15 +121,16 @@
       break;
     }
 
-    uint16_t new_methods = 0;
+    uint16_t number_of_new_methods = 0;
     uint64_t start_work = NanoTime();
-    bool profile_saved_to_disk = ProcessProfilingInfo(&new_methods);
+    bool profile_saved_to_disk = ProcessProfilingInfo(/*force_save*/false, &number_of_new_methods);
     // Update the notification counter based on result. Note that there might be contention on this
     // but we don't care about to be 100% precise.
     if (!profile_saved_to_disk) {
       // If we didn't save to disk it may be because we didn't have enough new methods.
-      // Set the jit activity notifications to new_methods so we can wake up earlier if needed.
-      jit_activity_notifications_ = new_methods;
+      // Set the jit activity notifications to number_of_new_methods so we can wake up earlier
+      // if needed.
+      jit_activity_notifications_ = number_of_new_methods;
     }
     total_ns_of_work_ += NanoTime() - start_work;
   }
@@ -175,10 +170,10 @@
   }
 }
 
-ProfileCompilationInfo* ProfileSaver::GetCachedProfiledInfo(const std::string& filename) {
+ProfileSaver::ProfileInfoCache* ProfileSaver::GetCachedProfiledInfo(const std::string& filename) {
   auto info_it = profile_cache_.find(filename);
   if (info_it == profile_cache_.end()) {
-    info_it = profile_cache_.Put(filename, ProfileCompilationInfo());
+    info_it = profile_cache_.Put(filename, ProfileInfoCache());
   }
   return &info_it->second;
 }
@@ -252,8 +247,9 @@
                        << " (" << classes.GetDexLocation() << ")";
       }
     }
-    ProfileCompilationInfo* info = GetCachedProfiledInfo(filename);
-    info->AddMethodsAndClasses(profile_methods_for_location, resolved_classes_for_location);
+    ProfileInfoCache* cached_info = GetCachedProfiledInfo(filename);
+    cached_info->profile.AddMethodsAndClasses(profile_methods_for_location,
+                                              resolved_classes_for_location);
     total_number_of_profile_entries_cached += resolved_classes_for_location.size();
   }
   max_number_of_profile_entries_cached_ = std::max(
@@ -261,7 +257,7 @@
       total_number_of_profile_entries_cached);
 }
 
-bool ProfileSaver::ProcessProfilingInfo(uint16_t* new_methods) {
+bool ProfileSaver::ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods) {
   ScopedTrace trace(__PRETTY_FUNCTION__);
   SafeMap<std::string, std::set<std::string>> tracked_locations;
   {
@@ -272,10 +268,16 @@
 
   bool profile_file_saved = false;
   uint64_t total_number_of_profile_entries_cached = 0;
-  *new_methods = 0;
+  if (number_of_new_methods != nullptr) {
+    *number_of_new_methods = 0;
+  }
 
   for (const auto& it : tracked_locations) {
-    if (ShuttingDown(Thread::Current())) {
+    if (!force_save && ShuttingDown(Thread::Current())) {
+      // The ProfileSaver is in shutdown mode, meaning a stop request was made and
+      // we need to exit cleanly (by waiting for the saver thread to finish). Unless
+      // we have a request for a forced save, do not do any processing so that we
+      // speed up the exit.
       return true;
     }
     const std::string& filename = it.first;
@@ -287,16 +289,18 @@
       total_number_of_code_cache_queries_++;
     }
 
-    ProfileCompilationInfo* cached_info = GetCachedProfiledInfo(filename);
-    cached_info->AddMethodsAndClasses(profile_methods, std::set<DexCacheResolvedClasses>());
+    ProfileInfoCache* cached_info = GetCachedProfiledInfo(filename);
+    ProfileCompilationInfo* cached_profile = &cached_info->profile;
+    cached_profile->AddMethodsAndClasses(profile_methods, std::set<DexCacheResolvedClasses>());
     int64_t delta_number_of_methods =
-        cached_info->GetNumberOfMethods() -
-        static_cast<int64_t>(last_save_number_of_methods_);
+        cached_profile->GetNumberOfMethods() -
+        static_cast<int64_t>(cached_info->last_save_number_of_methods);
     int64_t delta_number_of_classes =
-        cached_info->GetNumberOfResolvedClasses() -
-        static_cast<int64_t>(last_save_number_of_classes_);
+        cached_profile->GetNumberOfResolvedClasses() -
+        static_cast<int64_t>(cached_info->last_save_number_of_classes);
 
-    if (delta_number_of_methods < options_.GetMinMethodsToSave() &&
+    if (!force_save &&
+        delta_number_of_methods < options_.GetMinMethodsToSave() &&
         delta_number_of_classes < options_.GetMinClassesToSave()) {
       VLOG(profiler) << "Not enough information to save to: " << filename
           << " Number of methods: " << delta_number_of_methods
@@ -304,16 +308,19 @@
       total_number_of_skipped_writes_++;
       continue;
     }
-    *new_methods = std::max(static_cast<uint16_t>(delta_number_of_methods), *new_methods);
+    if (number_of_new_methods != nullptr) {
+      *number_of_new_methods = std::max(static_cast<uint16_t>(delta_number_of_methods),
+                                        *number_of_new_methods);
+    }
     uint64_t bytes_written;
     // Force the save. In case the profile data is corrupted or the the profile
     // has the wrong version this will "fix" the file to the correct format.
-    if (cached_info->MergeAndSave(filename, &bytes_written, /*force*/ true)) {
-      last_save_number_of_methods_ = cached_info->GetNumberOfMethods();
-      last_save_number_of_classes_ = cached_info->GetNumberOfResolvedClasses();
+    if (cached_profile->MergeAndSave(filename, &bytes_written, /*force*/ true)) {
+      cached_info->last_save_number_of_methods = cached_profile->GetNumberOfMethods();
+      cached_info->last_save_number_of_classes = cached_profile->GetNumberOfResolvedClasses();
       // Clear resolved classes. No need to store them around as
       // they don't change after the first write.
-      cached_info->ClearResolvedClasses();
+      cached_profile->ClearResolvedClasses();
       if (bytes_written > 0) {
         total_number_of_writes_++;
         total_bytes_written_ += bytes_written;
@@ -330,8 +337,8 @@
       total_number_of_failed_writes_++;
     }
     total_number_of_profile_entries_cached +=
-        cached_info->GetNumberOfMethods() +
-        cached_info->GetNumberOfResolvedClasses();
+        cached_profile->GetNumberOfMethods() +
+        cached_profile->GetNumberOfResolvedClasses();
   }
   max_number_of_profile_entries_cached_ = std::max(
       max_number_of_profile_entries_cached_,
@@ -382,9 +389,7 @@
 void ProfileSaver::Start(const ProfileSaverOptions& options,
                          const std::string& output_filename,
                          jit::JitCodeCache* jit_code_cache,
-                         const std::vector<std::string>& code_paths,
-                         const std::string& foreign_dex_profile_path,
-                         const std::string& app_data_dir) {
+                         const std::vector<std::string>& code_paths) {
   DCHECK(options.IsEnabled());
   DCHECK(Runtime::Current()->GetJit() != nullptr);
   DCHECK(!output_filename.empty());
@@ -409,7 +414,7 @@
     // apps which share the same runtime).
     DCHECK_EQ(instance_->jit_code_cache_, jit_code_cache);
     // Add the code_paths to the tracked locations.
-    instance_->AddTrackedLocations(output_filename, app_data_dir, code_paths_to_profile);
+    instance_->AddTrackedLocations(output_filename, code_paths_to_profile);
     return;
   }
 
@@ -419,9 +424,7 @@
   instance_ = new ProfileSaver(options,
                                output_filename,
                                jit_code_cache,
-                               code_paths_to_profile,
-                               foreign_dex_profile_path,
-                               app_data_dir);
+                               code_paths_to_profile);
 
   // Create a new thread which does the saving.
   CHECK_PTHREAD_CALL(
@@ -448,9 +451,6 @@
       return;
     }
     instance_->shutting_down_ = true;
-    if (dump_info) {
-      instance_->DumpInfo(LOG_STREAM(INFO));
-    }
   }
 
   {
@@ -462,8 +462,14 @@
   // Wait for the saver thread to stop.
   CHECK_PTHREAD_CALL(pthread_join, (profiler_pthread, nullptr), "profile saver thread shutdown");
 
+  // Force save everything before destroying the instance.
+  instance_->ProcessProfilingInfo(/*force_save*/true, /*number_of_new_methods*/nullptr);
+
   {
     MutexLock profiler_mutex(Thread::Current(), *Locks::profiler_lock_);
+    if (dump_info) {
+      instance_->DumpInfo(LOG_STREAM(INFO));
+    }
     instance_ = nullptr;
     profiler_pthread_ = 0U;
   }
@@ -481,154 +487,16 @@
 }
 
 void ProfileSaver::AddTrackedLocations(const std::string& output_filename,
-                                       const std::string& app_data_dir,
                                        const std::vector<std::string>& code_paths) {
   auto it = tracked_dex_base_locations_.find(output_filename);
   if (it == tracked_dex_base_locations_.end()) {
     tracked_dex_base_locations_.Put(output_filename,
                                     std::set<std::string>(code_paths.begin(), code_paths.end()));
-    if (!app_data_dir.empty()) {
-      app_data_dirs_.insert(app_data_dir);
-    }
   } else {
     it->second.insert(code_paths.begin(), code_paths.end());
   }
 }
 
-// TODO(calin): This may lead to several calls to realpath.
-// Consider moving the logic to the saver thread (i.e. when notified,
-// only cache the location, and then wake up the saver thread to do the
-// comparisons with the real file paths and to create the markers).
-void ProfileSaver::NotifyDexUse(const std::string& dex_location) {
-  if (!ShouldProfileLocation(dex_location)) {
-    return;
-  }
-  std::set<std::string> app_code_paths;
-  std::string foreign_dex_profile_path;
-  std::set<std::string> app_data_dirs;
-  {
-    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-    if (instance_ == nullptr) {
-      return;
-    }
-    // Make a copy so that we don't hold the lock while doing I/O.
-    for (const auto& it : instance_->tracked_dex_base_locations_) {
-      app_code_paths.insert(it.second.begin(), it.second.end());
-    }
-    foreign_dex_profile_path = instance_->foreign_dex_profile_path_;
-    app_data_dirs.insert(instance_->app_data_dirs_.begin(), instance_->app_data_dirs_.end());
-  }
-
-  bool mark_created = MaybeRecordDexUseInternal(dex_location,
-                                                app_code_paths,
-                                                foreign_dex_profile_path,
-                                                app_data_dirs);
-  if (mark_created) {
-    MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
-    if (instance_ != nullptr) {
-      instance_->total_number_of_foreign_dex_marks_++;
-    }
-  }
-}
-
-static bool CheckContainsWithRealPath(const std::set<std::string>& paths_set,
-                                      const std::string& path_to_check) {
-  for (const auto& path : paths_set) {
-    UniqueCPtr<const char[]> real_path(realpath(path.c_str(), nullptr));
-    if (real_path == nullptr) {
-      PLOG(WARNING) << "Could not get realpath for " << path;
-      continue;
-    }
-    std::string real_path_str(real_path.get());
-    if (real_path_str == path_to_check) {
-      return true;
-    }
-  }
-  return false;
-}
-
-// After the call, dex_location_real_path will contain the marker's name.
-static bool CreateForeignDexMarker(const std::string& foreign_dex_profile_path,
-                                   /*in-out*/ std::string* dex_location_real_path) {
-  // For foreign dex files we record a flag on disk. PackageManager will (potentially) take this
-  // into account when deciding how to optimize the loaded dex file.
-  // The expected flag name is the canonical path of the apk where '/' is substituted to '@'.
-  // (it needs to be kept in sync with
-  // frameworks/base/services/core/java/com/android/server/pm/PackageDexOptimizer.java)
-  std::replace(dex_location_real_path->begin(), dex_location_real_path->end(), '/', '@');
-  std::string flag_path = foreign_dex_profile_path + "/" + *dex_location_real_path;
-  // We use O_RDONLY as the access mode because we must supply some access
-  // mode, and there is no access mode that means 'create but do not read' the
-  // file. We will not not actually read from the file.
-  int fd = TEMP_FAILURE_RETRY(open(flag_path.c_str(),
-        O_CREAT | O_RDONLY | O_EXCL | O_CLOEXEC | O_NOFOLLOW, 0));
-  if (fd != -1) {
-    if (close(fd) != 0) {
-      PLOG(WARNING) << "Could not close file after flagging foreign dex use " << flag_path;
-    }
-    return true;
-  } else {
-    if (errno != EEXIST && errno != EACCES) {
-      // Another app could have already created the file, and selinux may not
-      // allow the read access to the file implied by the call to open.
-      PLOG(WARNING) << "Could not create foreign dex use mark " << flag_path;
-      return false;
-    }
-    return true;
-  }
-}
-
-bool ProfileSaver::MaybeRecordDexUseInternal(
-      const std::string& dex_location,
-      const std::set<std::string>& app_code_paths,
-      const std::string& foreign_dex_profile_path,
-      const std::set<std::string>& app_data_dirs) {
-  if (dex_location.empty()) {
-    LOG(WARNING) << "Asked to record foreign dex use with an empty dex location.";
-    return false;
-  }
-  if (foreign_dex_profile_path.empty()) {
-    LOG(WARNING) << "Asked to record foreign dex use without a valid profile path ";
-    return false;
-  }
-
-  if (app_code_paths.find(dex_location) != app_code_paths.end()) {
-    // The dex location belongs to the application code paths. Nothing to record.
-    return false;
-  }
-
-  if (app_data_dirs.find(dex_location) != app_data_dirs.end()) {
-    // The dex location is under the application folder. Nothing to record.
-    return false;
-  }
-
-  // Do another round of checks with the real paths.
-  // Application directory could be a symlink (e.g. /data/data instead of /data/user/0), and we
-  // don't have control over how the dex files are actually loaded (symlink or canonical path),
-
-  // Note that we could cache all the real locations in the saver (since it's an expensive
-  // operation). However we expect that app_code_paths is small (usually 1 element), and
-  // NotifyDexUse is called just a few times in the app lifetime. So we make the compromise
-  // to save some bytes of memory usage.
-
-  UniqueCPtr<const char[]> dex_location_real_path(realpath(dex_location.c_str(), nullptr));
-  if (dex_location_real_path == nullptr) {
-    PLOG(WARNING) << "Could not get realpath for " << dex_location;
-    return false;
-  }
-  std::string dex_location_real_path_str(dex_location_real_path.get());
-
-  if (CheckContainsWithRealPath(app_code_paths, dex_location_real_path_str)) {
-    return false;
-  }
-
-  if (CheckContainsWithRealPath(app_data_dirs, dex_location_real_path_str)) {
-    return false;
-  }
-
-  return CreateForeignDexMarker(foreign_dex_profile_path, &dex_location_real_path_str);
-}
-
 void ProfileSaver::DumpInstanceInfo(std::ostream& os) {
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
@@ -645,8 +513,6 @@
      << "ProfileSaver total_number_of_failed_writes=" << total_number_of_failed_writes_ << '\n'
      << "ProfileSaver total_ms_of_sleep=" << total_ms_of_sleep_ << '\n'
      << "ProfileSaver total_ms_of_work=" << NsToMs(total_ns_of_work_) << '\n'
-     << "ProfileSaver total_number_of_foreign_dex_marks="
-     << total_number_of_foreign_dex_marks_ << '\n'
      << "ProfileSaver max_number_profile_entries_cached="
      << max_number_of_profile_entries_cached_ << '\n'
      << "ProfileSaver total_number_of_hot_spikes=" << total_number_of_hot_spikes_ << '\n'
@@ -664,8 +530,7 @@
   // but we only use this in testing when we now this won't happen.
   // Refactor the way we handle the instance so that we don't end up in this situation.
   if (saver != nullptr) {
-    uint16_t new_methods;
-    saver->ProcessProfilingInfo(&new_methods);
+    saver->ProcessProfilingInfo(/*force_save*/true, /*number_of_new_methods*/nullptr);
   }
 }
 
@@ -674,10 +539,8 @@
                                  uint16_t method_idx) {
   MutexLock mu(Thread::Current(), *Locks::profiler_lock_);
   if (instance_ != nullptr) {
-    ProfileCompilationInfo* info = instance_->GetCachedProfiledInfo(profile);
-    if (info != nullptr) {
-      return info->ContainsMethod(MethodReference(dex_file, method_idx));
-    }
+    const ProfileCompilationInfo& info = instance_->GetCachedProfiledInfo(profile)->profile;
+    return info.ContainsMethod(MethodReference(dex_file, method_idx));
   }
   return false;
 }
diff --git a/runtime/jit/profile_saver.h b/runtime/jit/profile_saver.h
index 9c5e41f..be2bffc 100644
--- a/runtime/jit/profile_saver.h
+++ b/runtime/jit/profile_saver.h
@@ -32,9 +32,7 @@
   static void Start(const ProfileSaverOptions& options,
                     const std::string& output_filename,
                     jit::JitCodeCache* jit_code_cache,
-                    const std::vector<std::string>& code_paths,
-                    const std::string& foreign_dex_profile_path,
-                    const std::string& app_data_dir)
+                    const std::vector<std::string>& code_paths)
       REQUIRES(!Locks::profiler_lock_, !wait_lock_);
 
   // Stops the profile saver thread.
@@ -46,8 +44,6 @@
   // Returns true if the profile saver is started.
   static bool IsStarted() REQUIRES(!Locks::profiler_lock_);
 
-  static void NotifyDexUse(const std::string& dex_location);
-
   // If the profile saver is running, dumps statistics to the `os`. Otherwise it does nothing.
   static void DumpInstanceInfo(std::ostream& os);
 
@@ -56,19 +52,27 @@
       REQUIRES(!Locks::profiler_lock_, !wait_lock_)
       NO_THREAD_SAFETY_ANALYSIS;
 
-  // Just for testing purpose.
+  // For testing or manual purposes (SIGUSR1).
   static void ForceProcessProfiles();
+
+  // Just for testing purpose.
   static bool HasSeenMethod(const std::string& profile,
                             const DexFile* dex_file,
                             uint16_t method_idx);
 
  private:
+  // A cache structure which keeps track of the data saved to disk.
+  // It is used to reduce the number of disk read/writes.
+  struct ProfileInfoCache {
+    ProfileCompilationInfo profile;
+    uint32_t last_save_number_of_methods = 0;
+    uint32_t last_save_number_of_classes = 0;
+  };
+
   ProfileSaver(const ProfileSaverOptions& options,
                const std::string& output_filename,
                jit::JitCodeCache* jit_code_cache,
-               const std::vector<std::string>& code_paths,
-               const std::string& foreign_dex_profile_path,
-               const std::string& app_data_dir);
+               const std::vector<std::string>& code_paths);
 
   // NO_THREAD_SAFETY_ANALYSIS for static function calling into member function with excludes lock.
   static void* RunProfileSaverThread(void* arg)
@@ -77,9 +81,14 @@
 
   // The run loop for the saver.
   void Run() REQUIRES(!Locks::profiler_lock_, !wait_lock_);
+
   // Processes the existing profiling info from the jit code cache and returns
   // true if it needed to be saved to disk.
-  bool ProcessProfilingInfo(uint16_t* new_methods)
+  // If number_of_new_methods is not null, after the call it will contain the number of new methods
+  // written to disk.
+  // If force_save is true, the saver will ignore any constraints which limit IO (e.g. will write
+  // the profile to disk even if it's just one new method).
+  bool ProcessProfilingInfo(bool force_save, /*out*/uint16_t* number_of_new_methods)
     REQUIRES(!Locks::profiler_lock_)
     REQUIRES(!Locks::mutator_lock_);
 
@@ -90,24 +99,17 @@
   bool ShuttingDown(Thread* self) REQUIRES(!Locks::profiler_lock_);
 
   void AddTrackedLocations(const std::string& output_filename,
-                           const std::string& app_data_dir,
                            const std::vector<std::string>& code_paths)
       REQUIRES(Locks::profiler_lock_);
 
   // Retrieves the cached profile compilation info for the given profile file.
   // If no entry exists, a new empty one will be created, added to the cache and
   // then returned.
-  ProfileCompilationInfo* GetCachedProfiledInfo(const std::string& filename);
+  ProfileInfoCache* GetCachedProfiledInfo(const std::string& filename);
   // Fetches the current resolved classes and methods from the ClassLinker and stores them in the
   // profile_cache_ for later save.
   void FetchAndCacheResolvedClassesAndMethods();
 
-  static bool MaybeRecordDexUseInternal(
-      const std::string& dex_location,
-      const std::set<std::string>& tracked_locations,
-      const std::string& foreign_dex_profile_path,
-      const std::set<std::string>& app_data_dirs);
-
   void DumpInfo(std::ostream& os);
 
   // The only instance of the saver.
@@ -121,17 +123,8 @@
   // It maps profile locations to code paths (dex base locations).
   SafeMap<std::string, std::set<std::string>> tracked_dex_base_locations_
       GUARDED_BY(Locks::profiler_lock_);
-  // The directory were the we should store the code paths.
-  std::string foreign_dex_profile_path_;
-
-  // A list of application directories, used to infer if a loaded dex belongs
-  // to the application or not. Multiple application data directories are possible when
-  // different apps share the same runtime.
-  std::set<std::string> app_data_dirs_ GUARDED_BY(Locks::profiler_lock_);
 
   bool shutting_down_ GUARDED_BY(Locks::profiler_lock_);
-  uint32_t last_save_number_of_methods_;
-  uint32_t last_save_number_of_classes_;
   uint64_t last_time_ns_saver_woke_up_ GUARDED_BY(wait_lock_);
   uint32_t jit_activity_notifications_;
 
@@ -139,7 +132,7 @@
   // profile information. The size of this cache is usually very small and tops
   // to just a few hundreds entries in the ProfileCompilationInfo objects.
   // It helps avoiding unnecessary writes to disk.
-  SafeMap<std::string, ProfileCompilationInfo> profile_cache_;
+  SafeMap<std::string, ProfileInfoCache> profile_cache_;
 
   // Save period condition support.
   Mutex wait_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
@@ -152,7 +145,6 @@
   uint64_t total_number_of_failed_writes_;
   uint64_t total_ms_of_sleep_;
   uint64_t total_ns_of_work_;
-  uint64_t total_number_of_foreign_dex_marks_;
   // TODO(calin): replace with an actual size.
   uint64_t max_number_of_profile_entries_cached_;
   uint64_t total_number_of_hot_spikes_;
diff --git a/runtime/jit/profile_saver_options.h b/runtime/jit/profile_saver_options.h
index a6385d7..c8d256f 100644
--- a/runtime/jit/profile_saver_options.h
+++ b/runtime/jit/profile_saver_options.h
@@ -21,7 +21,7 @@
 struct ProfileSaverOptions {
  public:
   static constexpr uint32_t kMinSavePeriodMs = 20 * 1000;  // 20 seconds
-  static constexpr uint32_t kSaveResolvedClassesDelayMs = 2 * 1000;  // 2 seconds
+  static constexpr uint32_t kSaveResolvedClassesDelayMs = 5 * 1000;  // 5 seconds
   // Minimum number of JIT samples during launch to include a method into the profile.
   static constexpr uint32_t kStartupMethodSamples = 1;
   static constexpr uint32_t kMinMethodsToSave = 10;
@@ -37,7 +37,8 @@
     min_methods_to_save_(kMinMethodsToSave),
     min_classes_to_save_(kMinClassesToSave),
     min_notification_before_wake_(kMinNotificationBeforeWake),
-    max_notification_before_wake_(kMaxNotificationBeforeWake) {}
+    max_notification_before_wake_(kMaxNotificationBeforeWake),
+    profile_path_("") {}
 
   ProfileSaverOptions(
       bool enabled,
@@ -47,7 +48,8 @@
       uint32_t min_methods_to_save,
       uint32_t min_classes_to_save,
       uint32_t min_notification_before_wake,
-      uint32_t max_notification_before_wake):
+      uint32_t max_notification_before_wake,
+      const std::string& profile_path):
     enabled_(enabled),
     min_save_period_ms_(min_save_period_ms),
     save_resolved_classes_delay_ms_(save_resolved_classes_delay_ms),
@@ -55,7 +57,8 @@
     min_methods_to_save_(min_methods_to_save),
     min_classes_to_save_(min_classes_to_save),
     min_notification_before_wake_(min_notification_before_wake),
-    max_notification_before_wake_(max_notification_before_wake) {}
+    max_notification_before_wake_(max_notification_before_wake),
+    profile_path_(profile_path) {}
 
   bool IsEnabled() const {
     return enabled_;
@@ -85,6 +88,9 @@
   uint32_t GetMaxNotificationBeforeWake() const {
     return max_notification_before_wake_;
   }
+  std::string GetProfilePath() const {
+    return profile_path_;
+  }
 
   friend std::ostream & operator<<(std::ostream &os, const ProfileSaverOptions& pso) {
     os << "enabled_" << pso.enabled_
@@ -106,6 +112,7 @@
   uint32_t min_classes_to_save_;
   uint32_t min_notification_before_wake_;
   uint32_t max_notification_before_wake_;
+  std::string profile_path_;
 };
 
 }  // namespace art
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index f42a8da..d6881aa 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -108,9 +108,15 @@
     }
   }
 
-  void IncrementInlineUse() {
-    DCHECK_NE(current_inline_uses_, std::numeric_limits<uint16_t>::max());
+  // Increments the number of times this method is currently being inlined.
+  // Returns whether it was successful, that is it could increment without
+  // overflowing.
+  bool IncrementInlineUse() {
+    if (current_inline_uses_ == std::numeric_limits<uint16_t>::max()) {
+      return false;
+    }
     current_inline_uses_++;
+    return true;
   }
 
   void DecrementInlineUse() {
diff --git a/runtime/jvalue.h b/runtime/jvalue.h
index 398bfbc..f61a07c 100644
--- a/runtime/jvalue.h
+++ b/runtime/jvalue.h
@@ -39,7 +39,9 @@
   }
 
   uint16_t GetC() const { return c; }
-  void SetC(uint16_t new_c) { c = new_c; }
+  void SetC(uint16_t new_c) {
+    j = static_cast<int64_t>(new_c);  // Zero-extend to 64 bits.
+  }
 
   double GetD() const { return d; }
   void SetD(double new_d) { d = new_d; }
@@ -66,7 +68,9 @@
   }
 
   uint8_t GetZ() const { return z; }
-  void SetZ(uint8_t new_z) { z = new_z; }
+  void SetZ(uint8_t new_z) {
+    j = static_cast<int64_t>(new_z);  // Zero-extend to 64 bits.
+  }
 
   mirror::Object** GetGCRoot() { return &l; }
 
diff --git a/runtime/linear_alloc.cc b/runtime/linear_alloc.cc
index f91b0ed..e9db9b8 100644
--- a/runtime/linear_alloc.cc
+++ b/runtime/linear_alloc.cc
@@ -33,6 +33,11 @@
   return allocator_.Alloc(size);
 }
 
+void* LinearAlloc::AllocAlign16(Thread* self, size_t size) {
+  MutexLock mu(self, lock_);
+  return allocator_.AllocAlign16(size);
+}
+
 size_t LinearAlloc::GetUsedMemory() const {
   MutexLock mu(Thread::Current(), lock_);
   return allocator_.BytesUsed();
diff --git a/runtime/linear_alloc.h b/runtime/linear_alloc.h
index df7f17d..384b2e3 100644
--- a/runtime/linear_alloc.h
+++ b/runtime/linear_alloc.h
@@ -29,6 +29,7 @@
   explicit LinearAlloc(ArenaPool* pool);
 
   void* Alloc(Thread* self, size_t size) REQUIRES(!lock_);
+  void* AllocAlign16(Thread* self, size_t size) REQUIRES(!lock_);
 
   // Realloc never frees the input pointer, it is the caller's job to do this if necessary.
   void* Realloc(Thread* self, void* ptr, size_t old_size, size_t new_size) REQUIRES(!lock_);
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index 93c212b..40309b9 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -962,4 +962,52 @@
   }
 }
 
+void MemMap::AlignBy(size_t size) {
+  CHECK_EQ(begin_, base_begin_) << "Unsupported";
+  CHECK_EQ(size_, base_size_) << "Unsupported";
+  CHECK_GT(size, static_cast<size_t>(kPageSize));
+  CHECK_ALIGNED(size, kPageSize);
+  if (IsAlignedParam(reinterpret_cast<uintptr_t>(base_begin_), size) &&
+      IsAlignedParam(base_size_, size)) {
+    // Already aligned.
+    return;
+  }
+  uint8_t* base_begin = reinterpret_cast<uint8_t*>(base_begin_);
+  uint8_t* base_end = base_begin + base_size_;
+  uint8_t* aligned_base_begin = AlignUp(base_begin, size);
+  uint8_t* aligned_base_end = AlignDown(base_end, size);
+  CHECK_LE(base_begin, aligned_base_begin);
+  CHECK_LE(aligned_base_end, base_end);
+  size_t aligned_base_size = aligned_base_end - aligned_base_begin;
+  CHECK_LT(aligned_base_begin, aligned_base_end)
+      << "base_begin = " << reinterpret_cast<void*>(base_begin)
+      << " base_end = " << reinterpret_cast<void*>(base_end);
+  CHECK_GE(aligned_base_size, size);
+  // Unmap the unaligned parts.
+  if (base_begin < aligned_base_begin) {
+    MEMORY_TOOL_MAKE_UNDEFINED(base_begin, aligned_base_begin - base_begin);
+    CHECK_EQ(munmap(base_begin, aligned_base_begin - base_begin), 0)
+        << "base_begin=" << reinterpret_cast<void*>(base_begin)
+        << " aligned_base_begin=" << reinterpret_cast<void*>(aligned_base_begin);
+  }
+  if (aligned_base_end < base_end) {
+    MEMORY_TOOL_MAKE_UNDEFINED(aligned_base_end, base_end - aligned_base_end);
+    CHECK_EQ(munmap(aligned_base_end, base_end - aligned_base_end), 0)
+        << "base_end=" << reinterpret_cast<void*>(base_end)
+        << " aligned_base_end=" << reinterpret_cast<void*>(aligned_base_end);
+  }
+  std::lock_guard<std::mutex> mu(*mem_maps_lock_);
+  base_begin_ = aligned_base_begin;
+  base_size_ = aligned_base_size;
+  begin_ = aligned_base_begin;
+  size_ = aligned_base_size;
+  DCHECK(maps_ != nullptr);
+  if (base_begin < aligned_base_begin) {
+    auto it = maps_->find(base_begin);
+    CHECK(it != maps_->end()) << "MemMap not found";
+    maps_->erase(it);
+    maps_->insert(std::make_pair(base_begin_, this));
+  }
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 71db3f7..ceb4c33 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -193,6 +193,9 @@
   // intermittently.
   void TryReadable();
 
+  // Align the map by unmapping the unaligned parts at the lower and the higher ends.
+  void AlignBy(size_t size);
+
  private:
   MemMap(const std::string& name,
          uint8_t* begin,
@@ -222,10 +225,10 @@
                            bool low_4gb);
 
   const std::string name_;
-  uint8_t* const begin_;  // Start of data.
+  uint8_t* begin_;  // Start of data. May be changed by AlignBy.
   size_t size_;  // Length of data.
 
-  void* const base_begin_;  // Page-aligned base address.
+  void* base_begin_;  // Page-aligned base address. May be changed by AlignBy.
   size_t base_size_;  // Length of mapping. May be changed by RemapAtEnd (ie Zygote).
   int prot_;  // Protection of the map.
 
diff --git a/runtime/mem_map_test.cc b/runtime/mem_map_test.cc
index e703b78..aa306ac 100644
--- a/runtime/mem_map_test.cc
+++ b/runtime/mem_map_test.cc
@@ -431,4 +431,108 @@
   ASSERT_FALSE(MemMap::CheckNoGaps(map0.get(), map2.get()));
 }
 
+TEST_F(MemMapTest, AlignBy) {
+  CommonInit();
+  std::string error_msg;
+  // Cast the page size to size_t.
+  const size_t page_size = static_cast<size_t>(kPageSize);
+  // Map a region.
+  std::unique_ptr<MemMap> m0(MemMap::MapAnonymous("MemMapTest_AlignByTest_map0",
+                                                  nullptr,
+                                                  14 * page_size,
+                                                  PROT_READ | PROT_WRITE,
+                                                  false,
+                                                  false,
+                                                  &error_msg));
+  uint8_t* base0 = m0->Begin();
+  ASSERT_TRUE(base0 != nullptr) << error_msg;
+  ASSERT_EQ(m0->Size(), 14 * page_size);
+  ASSERT_EQ(BaseBegin(m0.get()), base0);
+  ASSERT_EQ(BaseSize(m0.get()), m0->Size());
+
+  // Break it into several regions by using RemapAtEnd.
+  std::unique_ptr<MemMap> m1(m0->RemapAtEnd(base0 + 3 * page_size,
+                                            "MemMapTest_AlignByTest_map1",
+                                            PROT_READ | PROT_WRITE,
+                                            &error_msg));
+  uint8_t* base1 = m1->Begin();
+  ASSERT_TRUE(base1 != nullptr) << error_msg;
+  ASSERT_EQ(base1, base0 + 3 * page_size);
+  ASSERT_EQ(m0->Size(), 3 * page_size);
+
+  std::unique_ptr<MemMap> m2(m1->RemapAtEnd(base1 + 4 * page_size,
+                                            "MemMapTest_AlignByTest_map2",
+                                            PROT_READ | PROT_WRITE,
+                                            &error_msg));
+  uint8_t* base2 = m2->Begin();
+  ASSERT_TRUE(base2 != nullptr) << error_msg;
+  ASSERT_EQ(base2, base1 + 4 * page_size);
+  ASSERT_EQ(m1->Size(), 4 * page_size);
+
+  std::unique_ptr<MemMap> m3(m2->RemapAtEnd(base2 + 3 * page_size,
+                                            "MemMapTest_AlignByTest_map1",
+                                            PROT_READ | PROT_WRITE,
+                                            &error_msg));
+  uint8_t* base3 = m3->Begin();
+  ASSERT_TRUE(base3 != nullptr) << error_msg;
+  ASSERT_EQ(base3, base2 + 3 * page_size);
+  ASSERT_EQ(m2->Size(), 3 * page_size);
+  ASSERT_EQ(m3->Size(), 4 * page_size);
+
+  uint8_t* end0 = base0 + m0->Size();
+  uint8_t* end1 = base1 + m1->Size();
+  uint8_t* end2 = base2 + m2->Size();
+  uint8_t* end3 = base3 + m3->Size();
+
+  ASSERT_EQ(static_cast<size_t>(end3 - base0), 14 * page_size);
+
+  if (IsAlignedParam(base0, 2 * page_size)) {
+    ASSERT_FALSE(IsAlignedParam(base1, 2 * page_size));
+    ASSERT_FALSE(IsAlignedParam(base2, 2 * page_size));
+    ASSERT_TRUE(IsAlignedParam(base3, 2 * page_size));
+    ASSERT_TRUE(IsAlignedParam(end3, 2 * page_size));
+  } else {
+    ASSERT_TRUE(IsAlignedParam(base1, 2 * page_size));
+    ASSERT_TRUE(IsAlignedParam(base2, 2 * page_size));
+    ASSERT_FALSE(IsAlignedParam(base3, 2 * page_size));
+    ASSERT_FALSE(IsAlignedParam(end3, 2 * page_size));
+  }
+
+  // Align by 2 * page_size;
+  m0->AlignBy(2 * page_size);
+  m1->AlignBy(2 * page_size);
+  m2->AlignBy(2 * page_size);
+  m3->AlignBy(2 * page_size);
+
+  EXPECT_TRUE(IsAlignedParam(m0->Begin(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m1->Begin(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m2->Begin(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m3->Begin(), 2 * page_size));
+
+  EXPECT_TRUE(IsAlignedParam(m0->Begin() + m0->Size(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m1->Begin() + m1->Size(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m2->Begin() + m2->Size(), 2 * page_size));
+  EXPECT_TRUE(IsAlignedParam(m3->Begin() + m3->Size(), 2 * page_size));
+
+  if (IsAlignedParam(base0, 2 * page_size)) {
+    EXPECT_EQ(m0->Begin(), base0);
+    EXPECT_EQ(m0->Begin() + m0->Size(), end0 - page_size);
+    EXPECT_EQ(m1->Begin(), base1 + page_size);
+    EXPECT_EQ(m1->Begin() + m1->Size(), end1 - page_size);
+    EXPECT_EQ(m2->Begin(), base2 + page_size);
+    EXPECT_EQ(m2->Begin() + m2->Size(), end2);
+    EXPECT_EQ(m3->Begin(), base3);
+    EXPECT_EQ(m3->Begin() + m3->Size(), end3);
+  } else {
+    EXPECT_EQ(m0->Begin(), base0 + page_size);
+    EXPECT_EQ(m0->Begin() + m0->Size(), end0);
+    EXPECT_EQ(m1->Begin(), base1);
+    EXPECT_EQ(m1->Begin() + m1->Size(), end1);
+    EXPECT_EQ(m2->Begin(), base2);
+    EXPECT_EQ(m2->Begin() + m2->Size(), end2 - page_size);
+    EXPECT_EQ(m3->Begin(), base3 + page_size);
+    EXPECT_EQ(m3->Begin() + m3->Size(), end3 - page_size);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/method_handles.cc b/runtime/method_handles.cc
index 58c5d17..bd7c4ad 100644
--- a/runtime/method_handles.cc
+++ b/runtime/method_handles.cc
@@ -49,10 +49,19 @@
 bool GetUnboxedPrimitiveType(ObjPtr<mirror::Class> klass, Primitive::Type* type)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
-#define LOOKUP_PRIMITIVE(primitive, _, __, ___)                         \
-  if (klass->DescriptorEquals(Primitive::BoxedDescriptor(primitive))) { \
-    *type = primitive;                                                  \
-    return true;                                                        \
+  std::string storage;
+  const char* descriptor = klass->GetDescriptor(&storage);
+  static const char kJavaLangPrefix[] = "Ljava/lang/";
+  static const size_t kJavaLangPrefixSize = sizeof(kJavaLangPrefix) - 1;
+  if (strncmp(descriptor, kJavaLangPrefix, kJavaLangPrefixSize) != 0) {
+    return false;
+  }
+
+  descriptor += kJavaLangPrefixSize;
+#define LOOKUP_PRIMITIVE(primitive, _, java_name, ___) \
+  if (strcmp(descriptor, #java_name ";") == 0) {       \
+    *type = primitive;                                 \
+    return true;                                       \
   }
 
   PRIMITIVES_LIST(LOOKUP_PRIMITIVE);
@@ -141,21 +150,23 @@
     if (from->DescriptorEquals("Ljava/lang/Object;")) {
       // Object might be converted into a primitive during unboxing.
       return true;
-    } else if (Primitive::IsNumericType(to_primitive) &&
-               from->DescriptorEquals("Ljava/lang/Number;")) {
+    }
+
+    if (Primitive::IsNumericType(to_primitive) && from->DescriptorEquals("Ljava/lang/Number;")) {
       // Number might be unboxed into any of the number primitive types.
       return true;
     }
+
     Primitive::Type unboxed_type;
     if (GetUnboxedPrimitiveType(from, &unboxed_type)) {
       if (unboxed_type == to_primitive) {
         // Straightforward unboxing conversion such as Boolean => boolean.
         return true;
-      } else {
-        // Check if widening operations for numeric primitives would work,
-        // such as Byte => byte => long.
-        return Primitive::IsWidenable(unboxed_type, to_primitive);
       }
+
+      // Check if widening operations for numeric primitives would work,
+      // such as Byte => byte => long.
+      return Primitive::IsWidenable(unboxed_type, to_primitive);
     }
   }
 
@@ -372,25 +383,18 @@
 static inline size_t GetInsForProxyOrNativeMethod(ArtMethod* method)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(method->IsNative() || method->IsProxyMethod());
-
   method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
-  size_t num_ins = 0;
-  // Separate accounting for the receiver, which isn't a part of the
-  // shorty.
-  if (!method->IsStatic()) {
-    ++num_ins;
-  }
+  uint32_t shorty_length = 0;
+  const char* shorty = method->GetShorty(&shorty_length);
 
-  uint32_t shorty_len = 0;
-  const char* shorty = method->GetShorty(&shorty_len);
-  for (size_t i = 1; i < shorty_len; ++i) {
-    const char c = shorty[i];
-    ++num_ins;
-    if (c == 'J' || c == 'D') {
+  // Static methods do not include the receiver. The receiver isn't included
+  // in the shorty_length though the return value is.
+  size_t num_ins = method->IsStatic() ? shorty_length - 1 : shorty_length;
+  for (const char* c = shorty + 1; *c != '\0'; ++c) {
+    if (*c == 'J' || *c == 'D') {
       ++num_ins;
     }
   }
-
   return num_ins;
 }
 
@@ -402,7 +406,10 @@
   ObjPtr<mirror::ObjectArray<mirror::Class>> param_types(callsite_type->GetPTypes());
   if (param_types->GetLength() == 1) {
     ObjPtr<mirror::Class> param(param_types->GetWithoutChecks(0));
-    return param == WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_EmulatedStackFrame);
+    // NB Comparing descriptor here as it appears faster in cycle simulation than using:
+    //   param == WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_EmulatedStackFrame)
+    // Costs are 98 vs 173 cycles per invocation.
+    return param->DescriptorEquals("Ldalvik/system/EmulatedStackFrame;");
   }
 
   return false;
@@ -416,35 +423,8 @@
                                      ShadowFrame& shadow_frame,
                                      const uint32_t (&args)[Instruction::kMaxVarArgRegs],
                                      uint32_t first_arg,
-                                     JValue* result,
-                                     const mirror::MethodHandle::Kind handle_kind)
+                                     JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  // For virtual and interface methods ensure called_method points to
-  // the actual method to invoke.
-  if (handle_kind == mirror::MethodHandle::Kind::kInvokeVirtual ||
-      handle_kind == mirror::MethodHandle::Kind::kInvokeInterface) {
-    uint32_t receiver_reg = is_range ? first_arg : args[0];
-    ObjPtr<mirror::Object> receiver(shadow_frame.GetVRegReference(receiver_reg));
-    if (IsCallerTransformer(callsite_type)) {
-      // The current receiver is an emulated stack frame, the method's
-      // receiver needs to be fetched from there as the emulated frame
-      // will be unpacked into a new frame.
-      receiver = ObjPtr<mirror::EmulatedStackFrame>::DownCast(receiver)->GetReceiver();
-    }
-
-    ObjPtr<mirror::Class> declaring_class(called_method->GetDeclaringClass());
-    if (receiver == nullptr || receiver->GetClass() != declaring_class) {
-      // Verify that _vRegC is an object reference and of the type expected by
-      // the receiver.
-      if (!VerifyObjectIsClass(receiver, declaring_class)) {
-        DCHECK(self->IsExceptionPending());
-        return false;
-      }
-      called_method = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(
-          called_method, kRuntimePointerSize);
-    }
-  }
-
   // Compute method information.
   const DexFile::CodeItem* code_item = called_method->GetCodeItem();
 
@@ -513,17 +493,23 @@
           result->SetL(0);
           return false;
         }
-      } else if (!ConvertAndCopyArgumentsFromCallerFrame<is_range>(self,
-                                                                   callsite_type,
-                                                                   target_type,
-                                                                   shadow_frame,
-                                                                   args,
-                                                                   first_arg,
-                                                                   first_dest_reg,
-                                                                   new_shadow_frame)) {
-        DCHECK(self->IsExceptionPending());
-        result->SetL(0);
-        return false;
+      } else {
+        if (!callsite_type->IsConvertible(target_type.Get())) {
+          ThrowWrongMethodTypeException(target_type.Get(), callsite_type.Get());
+          return false;
+        }
+        if (!ConvertAndCopyArgumentsFromCallerFrame<is_range>(self,
+                                                              callsite_type,
+                                                              target_type,
+                                                              shadow_frame,
+                                                              args,
+                                                              first_arg,
+                                                              first_dest_reg,
+                                                              new_shadow_frame)) {
+          DCHECK(self->IsExceptionPending());
+          result->SetL(0);
+          return false;
+        }
       }
     }
   }
@@ -548,13 +534,13 @@
     if (ConvertReturnValue(emulated_stack_type, target_type, &local_result)) {
       emulated_stack_frame->SetReturnValue(self, local_result);
       return true;
-    } else {
-      DCHECK(self->IsExceptionPending());
-      return false;
     }
-  } else {
-    return ConvertReturnValue(callsite_type, target_type, result);
+
+    DCHECK(self->IsExceptionPending());
+    return false;
   }
+
+  return ConvertReturnValue(callsite_type, target_type, result);
 }
 
 template <bool is_range>
@@ -650,98 +636,130 @@
   return klass;
 }
 
+ArtMethod* RefineTargetMethod(Thread* self,
+                              ShadowFrame& shadow_frame,
+                              const mirror::MethodHandle::Kind& handle_kind,
+                              Handle<mirror::MethodType> handle_type,
+                              Handle<mirror::MethodType> callsite_type,
+                              const uint32_t receiver_reg,
+                              ArtMethod* target_method)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (handle_kind == mirror::MethodHandle::Kind::kInvokeVirtual ||
+      handle_kind == mirror::MethodHandle::Kind::kInvokeInterface) {
+    // For virtual and interface methods ensure target_method points to
+    // the actual method to invoke.
+    ObjPtr<mirror::Object> receiver(shadow_frame.GetVRegReference(receiver_reg));
+    if (IsCallerTransformer(callsite_type)) {
+      // The current receiver is an emulated stack frame, the method's
+      // receiver needs to be fetched from there as the emulated frame
+      // will be unpacked into a new frame.
+      receiver = ObjPtr<mirror::EmulatedStackFrame>::DownCast(receiver)->GetReceiver();
+    }
+
+    ObjPtr<mirror::Class> declaring_class(target_method->GetDeclaringClass());
+    if (receiver == nullptr || receiver->GetClass() != declaring_class) {
+      // Verify that _vRegC is an object reference and of the type expected by
+      // the receiver.
+      if (!VerifyObjectIsClass(receiver, declaring_class)) {
+        DCHECK(self->IsExceptionPending());
+        return nullptr;
+      }
+      return receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(
+          target_method, kRuntimePointerSize);
+    }
+  } else if (handle_kind == mirror::MethodHandle::Kind::kInvokeDirect) {
+    // String constructors are a special case, they are replaced with
+    // StringFactory methods.
+    if (target_method->IsConstructor() && target_method->GetDeclaringClass()->IsStringClass()) {
+      DCHECK(handle_type->GetRType()->IsStringClass());
+      return WellKnownClasses::StringInitToStringFactory(target_method);
+    }
+  } else if (handle_kind == mirror::MethodHandle::Kind::kInvokeSuper) {
+    ObjPtr<mirror::Class> declaring_class = target_method->GetDeclaringClass();
+
+    // Note that we're not dynamically dispatching on the type of the receiver
+    // here. We use the static type of the "receiver" object that we've
+    // recorded in the method handle's type, which will be the same as the
+    // special caller that was specified at the point of lookup.
+    ObjPtr<mirror::Class> referrer_class = handle_type->GetPTypes()->Get(0);
+    if (!declaring_class->IsInterface()) {
+      ObjPtr<mirror::Class> super_class = referrer_class->GetSuperClass();
+      uint16_t vtable_index = target_method->GetMethodIndex();
+      DCHECK(super_class != nullptr);
+      DCHECK(super_class->HasVTable());
+      // Note that super_class is a super of referrer_class and target_method
+      // will always be declared by super_class (or one of its super classes).
+      DCHECK_LT(vtable_index, super_class->GetVTableLength());
+      return super_class->GetVTableEntry(vtable_index, kRuntimePointerSize);
+    } else {
+      return referrer_class->FindVirtualMethodForInterfaceSuper(target_method, kRuntimePointerSize);
+    }
+  }
+  return target_method;
+}
+
 template <bool is_range>
-bool DoInvokePolymorphicUnchecked(Thread* self,
-                                  ShadowFrame& shadow_frame,
-                                  Handle<mirror::MethodHandle> method_handle,
-                                  Handle<mirror::MethodType> callsite_type,
-                                  const uint32_t (&args)[Instruction::kMaxVarArgRegs],
-                                  uint32_t first_arg,
-                                  JValue* result)
+bool DoInvokePolymorphicMethod(Thread* self,
+                               ShadowFrame& shadow_frame,
+                               Handle<mirror::MethodHandle> method_handle,
+                               Handle<mirror::MethodType> callsite_type,
+                               const uint32_t (&args)[Instruction::kMaxVarArgRegs],
+                               uint32_t first_arg,
+                               JValue* result)
   REQUIRES_SHARED(Locks::mutator_lock_) {
   StackHandleScope<1> hs(self);
   Handle<mirror::MethodType> handle_type(hs.NewHandle(method_handle->GetMethodType()));
   const mirror::MethodHandle::Kind handle_kind = method_handle->GetHandleKind();
-  if (IsInvoke(handle_kind)) {
-    // Get the method we're actually invoking along with the kind of
-    // invoke that is desired. We don't need to perform access checks at this
-    // point because they would have been performed on our behalf at the point
-    // of creation of the method handle.
-    ArtMethod* called_method = method_handle->GetTargetMethod();
-    CHECK(called_method != nullptr);
+  DCHECK(IsInvoke(handle_kind));
 
-    if (handle_kind == mirror::MethodHandle::Kind::kInvokeVirtual ||
-        handle_kind == mirror::MethodHandle::Kind::kInvokeInterface) {
-      // TODO: Unfortunately, we have to postpone dynamic receiver based checks
-      // because the receiver might be cast or might come from an emulated stack
-      // frame, which means that it is unknown at this point. We perform these
-      // checks inside DoCallPolymorphic right before we do the actual invoke.
-    } else if (handle_kind == mirror::MethodHandle::Kind::kInvokeDirect) {
-      // String constructors are a special case, they are replaced with StringFactory
-      // methods.
-      if (called_method->IsConstructor() && called_method->GetDeclaringClass()->IsStringClass()) {
-        DCHECK(handle_type->GetRType()->IsStringClass());
-        called_method = WellKnownClasses::StringInitToStringFactory(called_method);
-      }
-    } else if (handle_kind == mirror::MethodHandle::Kind::kInvokeSuper) {
-      ObjPtr<mirror::Class> declaring_class = called_method->GetDeclaringClass();
+  // Get the method we're actually invoking along with the kind of
+  // invoke that is desired. We don't need to perform access checks at this
+  // point because they would have been performed on our behalf at the point
+  // of creation of the method handle.
+  ArtMethod* target_method = method_handle->GetTargetMethod();
+  uint32_t receiver_reg = is_range ? first_arg: args[0];
+  ArtMethod* called_method = RefineTargetMethod(self,
+                                                shadow_frame,
+                                                handle_kind,
+                                                handle_type,
+                                                callsite_type,
+                                                receiver_reg,
+                                                target_method);
+  if (called_method == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return false;
+  }
 
-      // Note that we're not dynamically dispatching on the type of the receiver
-      // here. We use the static type of the "receiver" object that we've
-      // recorded in the method handle's type, which will be the same as the
-      // special caller that was specified at the point of lookup.
-      ObjPtr<mirror::Class> referrer_class = handle_type->GetPTypes()->Get(0);
-      if (!declaring_class->IsInterface()) {
-        ObjPtr<mirror::Class> super_class = referrer_class->GetSuperClass();
-        uint16_t vtable_index = called_method->GetMethodIndex();
-        DCHECK(super_class != nullptr);
-        DCHECK(super_class->HasVTable());
-        // Note that super_class is a super of referrer_class and called_method
-        // will always be declared by super_class (or one of its super classes).
-        DCHECK_LT(vtable_index, super_class->GetVTableLength());
-        called_method = super_class->GetVTableEntry(vtable_index, kRuntimePointerSize);
-      } else {
-        called_method = referrer_class->FindVirtualMethodForInterfaceSuper(
-            called_method, kRuntimePointerSize);
-      }
-      CHECK(called_method != nullptr);
-    }
-    if (IsInvokeTransform(handle_kind)) {
-      // There are two cases here - method handles representing regular
-      // transforms and those representing call site transforms. Method
-      // handles for call site transforms adapt their MethodType to match
-      // the call site. For these, the |callee_type| is the same as the
-      // |callsite_type|. The VarargsCollector is such a tranform, its
-      // method type depends on the call site, ie. x(a) or x(a, b), or
-      // x(a, b, c). The VarargsCollector invokes a variable arity method
-      // with the arity arguments in an array.
-      Handle<mirror::MethodType> callee_type =
-          (handle_kind == mirror::MethodHandle::Kind::kInvokeCallSiteTransform) ? callsite_type
-          : handle_type;
-      return DoCallTransform<is_range>(called_method,
+  if (IsInvokeTransform(handle_kind)) {
+    // There are two cases here - method handles representing regular
+    // transforms and those representing call site transforms. Method
+    // handles for call site transforms adapt their MethodType to match
+    // the call site. For these, the |callee_type| is the same as the
+    // |callsite_type|. The VarargsCollector is such a tranform, its
+    // method type depends on the call site, ie. x(a) or x(a, b), or
+    // x(a, b, c). The VarargsCollector invokes a variable arity method
+    // with the arity arguments in an array.
+    Handle<mirror::MethodType> callee_type =
+        (handle_kind == mirror::MethodHandle::Kind::kInvokeCallSiteTransform) ? callsite_type
+        : handle_type;
+    return DoCallTransform<is_range>(called_method,
+                                     callsite_type,
+                                     callee_type,
+                                     self,
+                                     shadow_frame,
+                                     method_handle /* receiver */,
+                                     args,
+                                     first_arg,
+                                     result);
+  } else {
+    return DoCallPolymorphic<is_range>(called_method,
                                        callsite_type,
-                                       callee_type,
+                                       handle_type,
                                        self,
                                        shadow_frame,
-                                       method_handle /* receiver */,
                                        args,
                                        first_arg,
                                        result);
-
-    } else {
-      return DoCallPolymorphic<is_range>(called_method,
-                                         callsite_type,
-                                         handle_type,
-                                         self,
-                                         shadow_frame,
-                                         args,
-                                         first_arg,
-                                         result,
-                                         handle_kind);
-    }
-  } else {
-    LOG(FATAL) << "Unreachable: " << handle_kind;
-    UNREACHABLE();
   }
 }
 
@@ -948,55 +966,30 @@
   ObjPtr<mirror::MethodType> handle_type(method_handle->GetMethodType());
   CHECK(handle_type != nullptr);
 
-  if (!IsInvokeTransform(handle_kind)) {
-    if (UNLIKELY(!IsCallerTransformer(callsite_type) &&
-                 !callsite_type->IsConvertible(handle_type.Ptr()))) {
+  if (IsFieldAccess(handle_kind)) {
+    DCHECK(!callsite_type->IsExactMatch(handle_type.Ptr()));
+    if (!callsite_type->IsConvertible(handle_type.Ptr())) {
       ThrowWrongMethodTypeException(handle_type.Ptr(), callsite_type.Get());
       return false;
     }
+    const bool do_convert = true;
+    return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
+        self,
+        shadow_frame,
+        method_handle,
+        callsite_type,
+        args,
+        first_arg,
+        result);
   }
 
-  if (IsFieldAccess(handle_kind)) {
-    if (UNLIKELY(callsite_type->IsExactMatch(handle_type.Ptr()))) {
-      const bool do_convert = false;
-      return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
-          self,
-          shadow_frame,
-          method_handle,
-          callsite_type,
-          args,
-          first_arg,
-          result);
-    } else {
-      const bool do_convert = true;
-      return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
-          self,
-          shadow_frame,
-          method_handle,
-          callsite_type,
-          args,
-          first_arg,
-          result);
-    }
-  }
-
-  if (UNLIKELY(callsite_type->IsExactMatch(handle_type.Ptr()))) {
-    return DoInvokePolymorphicUnchecked<is_range>(self,
-                                                  shadow_frame,
-                                                  method_handle,
-                                                  callsite_type,
-                                                  args,
-                                                  first_arg,
-                                                  result);
-  } else {
-    return DoInvokePolymorphicUnchecked<is_range>(self,
-                                                  shadow_frame,
-                                                  method_handle,
-                                                  callsite_type,
-                                                  args,
-                                                  first_arg,
-                                                  result);
-  }
+  return DoInvokePolymorphicMethod<is_range>(self,
+                                             shadow_frame,
+                                             method_handle,
+                                             callsite_type,
+                                             args,
+                                             first_arg,
+                                             result);
 }
 
 template <bool is_range>
@@ -1008,32 +1001,9 @@
                               uint32_t first_arg,
                               JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  // We need to check the nominal type of the handle in addition to the
-  // real type. The "nominal" type is present when MethodHandle.asType is
-  // called any handle, and results in the declared type of the handle
-  // changing.
-  ObjPtr<mirror::MethodType> nominal_type(method_handle->GetNominalType());
-  if (UNLIKELY(nominal_type != nullptr)) {
-    if (UNLIKELY(!callsite_type->IsExactMatch(nominal_type.Ptr()))) {
-      ThrowWrongMethodTypeException(nominal_type.Ptr(), callsite_type.Get());
-      return false;
-    }
-    return DoInvokePolymorphicNonExact<is_range>(self,
-                                                 shadow_frame,
-                                                 method_handle,
-                                                 callsite_type,
-                                                 args,
-                                                 first_arg,
-                                                 result);
-  }
-
-  ObjPtr<mirror::MethodType> handle_type(method_handle->GetMethodType());
-  if (UNLIKELY(!callsite_type->IsExactMatch(handle_type.Ptr()))) {
-    ThrowWrongMethodTypeException(handle_type.Ptr(), callsite_type.Get());
-    return false;
-  }
-
+  StackHandleScope<1> hs(self);
   const mirror::MethodHandle::Kind handle_kind = method_handle->GetHandleKind();
+  Handle<mirror::MethodType> method_handle_type(hs.NewHandle(method_handle->GetMethodType()));
   if (IsFieldAccess(handle_kind)) {
     const bool do_convert = false;
     return DoInvokePolymorphicFieldAccess<is_range, do_convert>(
@@ -1046,13 +1016,68 @@
         result);
   }
 
-  return DoInvokePolymorphicUnchecked<is_range>(self,
+  // Slow-path check.
+  if (IsInvokeTransform(handle_kind) || IsCallerTransformer(callsite_type)) {
+    return DoInvokePolymorphicMethod<is_range>(self,
+                                               shadow_frame,
+                                               method_handle,
+                                               callsite_type,
+                                               args,
+                                               first_arg,
+                                               result);
+  }
+
+  // On the fast-path. This is equivalent to DoCallPolymoprhic without the conversion paths.
+  ArtMethod* target_method = method_handle->GetTargetMethod();
+  uint32_t receiver_reg = is_range ? first_arg : args[0];
+  ArtMethod* called_method = RefineTargetMethod(self,
                                                 shadow_frame,
-                                                method_handle,
+                                                handle_kind,
+                                                method_handle_type,
                                                 callsite_type,
-                                                args,
-                                                first_arg,
-                                                result);
+                                                receiver_reg,
+                                                target_method);
+  if (called_method == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return false;
+  }
+
+  // Compute method information.
+  const DexFile::CodeItem* code_item = called_method->GetCodeItem();
+  uint16_t num_regs;
+  size_t num_input_regs;
+  size_t first_dest_reg;
+  if (LIKELY(code_item != nullptr)) {
+    num_regs = code_item->registers_size_;
+    first_dest_reg = num_regs - code_item->ins_size_;
+    num_input_regs = code_item->ins_size_;
+    // Parameter registers go at the end of the shadow frame.
+    DCHECK_NE(first_dest_reg, (size_t)-1);
+  } else {
+    // No local regs for proxy and native methods.
+    DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
+    num_regs = num_input_regs = GetInsForProxyOrNativeMethod(called_method);
+    first_dest_reg = 0;
+  }
+
+  // Allocate shadow frame on the stack.
+  const char* old_cause = self->StartAssertNoThreadSuspension("DoCallCommon");
+  ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, /* dex pc */ 0);
+  ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get();
+  CopyArgumentsFromCallerFrame<is_range>(shadow_frame,
+                                         new_shadow_frame,
+                                         args,
+                                         first_arg,
+                                         first_dest_reg,
+                                         num_input_regs);
+  self->EndAssertNoThreadSuspension(old_cause);
+
+  PerformCall(self, code_item, shadow_frame.GetMethod(), first_dest_reg, new_shadow_frame, result);
+  if (self->IsExceptionPending()) {
+    return false;
+  }
+  return true;
 }
 
 }  // namespace
@@ -1067,7 +1092,35 @@
                          uint32_t first_arg,
                          JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::MethodType> method_handle_type = method_handle->GetMethodType();
   if (IsMethodHandleInvokeExact(invoke_method)) {
+    // We need to check the nominal type of the handle in addition to the
+    // real type. The "nominal" type is present when MethodHandle.asType is
+    // called any handle, and results in the declared type of the handle
+    // changing.
+    ObjPtr<mirror::MethodType> nominal_type(method_handle->GetNominalType());
+    if (UNLIKELY(nominal_type != nullptr)) {
+      if (UNLIKELY(!callsite_type->IsExactMatch(nominal_type.Ptr()))) {
+        ThrowWrongMethodTypeException(nominal_type.Ptr(), callsite_type.Get());
+        return false;
+      }
+
+      if (LIKELY(!nominal_type->IsExactMatch(method_handle_type.Ptr()))) {
+        // Different nominal type means we have to treat as non-exact.
+        return DoInvokePolymorphicNonExact<is_range>(self,
+                                                     shadow_frame,
+                                                     method_handle,
+                                                     callsite_type,
+                                                     args,
+                                                     first_arg,
+                                                     result);
+      }
+    }
+
+    if (!callsite_type->IsExactMatch(method_handle_type.Ptr())) {
+      ThrowWrongMethodTypeException(method_handle_type.Ptr(), callsite_type.Get());
+      return false;
+    }
     return DoInvokePolymorphicExact<is_range>(self,
                                               shadow_frame,
                                               method_handle,
@@ -1076,6 +1129,16 @@
                                               first_arg,
                                               result);
   } else {
+    if (UNLIKELY(callsite_type->IsExactMatch(method_handle_type.Ptr()))) {
+      // A non-exact invoke that can be invoked exactly.
+      return DoInvokePolymorphicExact<is_range>(self,
+                                                shadow_frame,
+                                                method_handle,
+                                                callsite_type,
+                                                args,
+                                                first_arg,
+                                                result);
+    }
     return DoInvokePolymorphicNonExact<is_range>(self,
                                                  shadow_frame,
                                                  method_handle,
diff --git a/runtime/method_info.h b/runtime/method_info.h
new file mode 100644
index 0000000..5a72125
--- /dev/null
+++ b/runtime/method_info.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_METHOD_INFO_H_
+#define ART_RUNTIME_METHOD_INFO_H_
+
+#include "base/logging.h"
+#include "leb128.h"
+#include "memory_region.h"
+
+namespace art {
+
+// Method info is for not dedupe friendly data of a method. Currently it only holds methods indices.
+// Putting this data in MethodInfo instead of code infos saves ~5% oat size.
+class MethodInfo {
+  using MethodIndexType = uint16_t;
+
+ public:
+  // Reading mode
+  explicit MethodInfo(const uint8_t* ptr) {
+    if (ptr != nullptr) {
+      num_method_indices_ = DecodeUnsignedLeb128(&ptr);
+      region_ = MemoryRegion(const_cast<uint8_t*>(ptr),
+                             num_method_indices_ * sizeof(MethodIndexType));
+    }
+  }
+
+  // Writing mode
+  MethodInfo(uint8_t* ptr, size_t num_method_indices) : num_method_indices_(num_method_indices) {
+    DCHECK(ptr != nullptr);
+    ptr = EncodeUnsignedLeb128(ptr, num_method_indices_);
+    region_ = MemoryRegion(ptr, num_method_indices_ * sizeof(MethodIndexType));
+  }
+
+  static size_t ComputeSize(size_t num_method_indices) {
+    uint8_t temp[8];
+    uint8_t* ptr = temp;
+    ptr = EncodeUnsignedLeb128(ptr, num_method_indices);
+    return (ptr - temp) + num_method_indices * sizeof(MethodIndexType);
+  }
+
+  ALWAYS_INLINE MethodIndexType GetMethodIndex(size_t index) const {
+    // Use bit functions to avoid pesky alignment requirements.
+    return region_.LoadBits(index * BitSizeOf<MethodIndexType>(), BitSizeOf<MethodIndexType>());
+  }
+
+  void SetMethodIndex(size_t index, MethodIndexType method_index) {
+    region_.StoreBits(index * BitSizeOf<MethodIndexType>(),
+                      method_index,
+                      BitSizeOf<MethodIndexType>());
+  }
+
+  size_t NumMethodIndices() const {
+    return num_method_indices_;
+  }
+
+ private:
+  size_t num_method_indices_ = 0u;
+  MemoryRegion region_;
+};
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_METHOD_INFO_H_
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index f56226b..04c80c5 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -402,8 +402,8 @@
     return (T)static_cast<uintptr_t>(
         AsLongArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
   }
-  return (T)static_cast<uintptr_t>(
-      AsIntArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx));
+  return (T)static_cast<uintptr_t>(static_cast<uint32_t>(
+      AsIntArray<kVerifyFlags, kReadBarrierOption>()->GetWithoutChecks(idx)));
 }
 
 template<bool kTransactionActive, bool kUnchecked>
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 16cf30f..51d9d24 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -198,6 +198,13 @@
   T GetElementPtrSize(uint32_t idx, PointerSize ptr_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  void** ElementAddress(size_t index, PointerSize ptr_size) REQUIRES_SHARED(Locks::mutator_lock_) {
+    DCHECK_LT(index, static_cast<size_t>(GetLength()));
+    return reinterpret_cast<void**>(reinterpret_cast<uint8_t*>(this) +
+                                    Array::DataOffset(static_cast<size_t>(ptr_size)).Uint32Value() +
+                                    static_cast<size_t>(ptr_size) * index);
+  }
+
   template<bool kTransactionActive = false, bool kUnchecked = false>
   void SetElementPtrSize(uint32_t idx, uint64_t element, PointerSize ptr_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 2cff47e..be3b937 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -29,6 +29,7 @@
 #include "dex_file.h"
 #include "gc/heap-inl.h"
 #include "iftable.h"
+#include "class_ext-inl.h"
 #include "object_array-inl.h"
 #include "read_barrier-inl.h"
 #include "reference-inl.h"
@@ -83,6 +84,12 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
+inline ClassExt* Class::GetExtData() {
+  return GetFieldObject<ClassExt, kVerifyFlags, kReadBarrierOption>(
+      OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
+}
+
+template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline DexCache* Class::GetDexCache() {
   return GetFieldObject<DexCache, kVerifyFlags, kReadBarrierOption>(
       OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_));
@@ -841,7 +848,7 @@
   }
 }
 
-inline ObjectArray<Class>* Class::GetInterfaces() {
+inline ObjectArray<Class>* Class::GetProxyInterfaces() {
   CHECK(IsProxyClass());
   // First static field.
   auto* field = GetStaticField(0);
@@ -850,7 +857,7 @@
   return GetFieldObject<ObjectArray<Class>>(field_offset);
 }
 
-inline ObjectArray<ObjectArray<Class>>* Class::GetThrows() {
+inline ObjectArray<ObjectArray<Class>>* Class::GetProxyThrows() {
   CHECK(IsProxyClass());
   // Second static field.
   auto* field = GetStaticField(1);
@@ -920,7 +927,7 @@
   } else if (IsArrayClass()) {
     return 2;
   } else if (IsProxyClass()) {
-    ObjectArray<Class>* interfaces = GetInterfaces();
+    ObjectArray<Class>* interfaces = GetProxyInterfaces();
     return interfaces != nullptr ? interfaces->GetLength() : 0;
   } else {
     const DexFile::TypeList* interfaces = GetInterfaceTypeList();
@@ -951,6 +958,10 @@
   for (ArtMethod& method : GetMethods(pointer_size)) {
     method.VisitRoots<kReadBarrierOption>(visitor, pointer_size);
   }
+  ObjPtr<ClassExt> ext(GetExtData<kDefaultVerifyFlags, kReadBarrierOption>());
+  if (!ext.IsNull()) {
+    ext->VisitNativeRoots<kReadBarrierOption, Visitor>(visitor, pointer_size);
+  }
 }
 
 inline IterationRange<StrideIterator<ArtMethod>> Class::GetDirectMethods(PointerSize pointer_size) {
@@ -1086,7 +1097,9 @@
   if (!IsTemp() && ShouldHaveEmbeddedVTable<kVerifyNone, kReadBarrierOption>()) {
     for (int32_t i = 0, count = GetEmbeddedVTableLength(); i < count; ++i) {
       ArtMethod* method = GetEmbeddedVTableEntry(i, pointer_size);
-      ArtMethod* new_method = visitor(method);
+      void** dest_addr = reinterpret_cast<void**>(reinterpret_cast<uintptr_t>(dest) +
+          EmbeddedVTableEntryOffset(i, pointer_size).Uint32Value());
+      ArtMethod* new_method = visitor(method, dest_addr);
       if (method != new_method) {
         dest->SetEmbeddedVTableEntryUnchecked(i, new_method, pointer_size);
       }
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index eb2ec9b..26af488 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -64,10 +64,6 @@
   java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
-ClassExt* Class::GetExtData() {
-  return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
-}
-
 ClassExt* Class::EnsureExtDataPresent(Thread* self) {
   ObjPtr<ClassExt> existing(GetExtData());
   if (!existing.IsNull()) {
@@ -946,7 +942,7 @@
     DCHECK(interface != nullptr);
     return interface;
   } else if (klass->IsProxyClass()) {
-    ObjPtr<ObjectArray<Class>> interfaces = klass->GetInterfaces();
+    ObjPtr<ObjectArray<Class>> interfaces = klass->GetProxyInterfaces();
     DCHECK(interfaces != nullptr);
     return interfaces->Get(idx);
   } else {
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index d34f09c..27aecd5 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -206,10 +206,10 @@
     return status >= kStatusResolved || status == kStatusErrorResolved;
   }
 
-  // Returns true if the class was compile-time verified.
+  // Returns true if the class should be verified at runtime.
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool IsCompileTimeVerified() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetStatus<kVerifyFlags>() >= kStatusRetryVerificationAtRuntime;
+  bool ShouldVerifyAtRuntime() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetStatus<kVerifyFlags>() == kStatusRetryVerificationAtRuntime;
   }
 
   // Returns true if the class has been verified.
@@ -595,7 +595,7 @@
   // The size of java.lang.Class.class.
   static uint32_t ClassClassSize(PointerSize pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 73;
+    uint32_t vtable_entries = Object::kVTableLength + 67;
     return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
@@ -1162,6 +1162,8 @@
 
   void SetClinitThreadId(pid_t new_clinit_thread_id) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ClassExt* GetExtData() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns the ExtData for this class, allocating one if necessary. This should be the only way
@@ -1262,10 +1264,10 @@
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // For proxy class only.
-  ObjectArray<Class>* GetInterfaces() REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjectArray<Class>* GetProxyInterfaces() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // For proxy class only.
-  ObjectArray<ObjectArray<Class>>* GetThrows() REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjectArray<ObjectArray<Class>>* GetProxyThrows() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // For reference class only.
   MemberOffset GetDisableIntrinsicFlagOffset() REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/mirror/class_ext-inl.h b/runtime/mirror/class_ext-inl.h
new file mode 100644
index 0000000..feaac85
--- /dev/null
+++ b/runtime/mirror/class_ext-inl.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_CLASS_EXT_INL_H_
+#define ART_RUNTIME_MIRROR_CLASS_EXT_INL_H_
+
+#include "class_ext.h"
+
+#include "art_method-inl.h"
+
+namespace art {
+namespace mirror {
+
+template<ReadBarrierOption kReadBarrierOption, class Visitor>
+void ClassExt::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) {
+  ObjPtr<PointerArray> arr(GetObsoleteMethods<kDefaultVerifyFlags, kReadBarrierOption>());
+  if (arr.IsNull()) {
+    return;
+  }
+  int32_t len = arr->GetLength();
+  for (int32_t i = 0; i < len; i++) {
+    ArtMethod* method = arr->GetElementPtrSize<ArtMethod*,
+                                               kDefaultVerifyFlags,
+                                               kReadBarrierOption>(i, pointer_size);
+    if (method != nullptr) {
+      method->VisitRoots<kReadBarrierOption>(visitor, pointer_size);
+    }
+  }
+}
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_CLASS_EXT_INL_H_
diff --git a/runtime/mirror/class_ext.cc b/runtime/mirror/class_ext.cc
index 7270079..94e4b88 100644
--- a/runtime/mirror/class_ext.cc
+++ b/runtime/mirror/class_ext.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "class_ext.h"
+#include "class_ext-inl.h"
 
 #include "art_method-inl.h"
 #include "base/casts.h"
@@ -24,7 +24,6 @@
 #include "gc/accounting/card_table-inl.h"
 #include "object-inl.h"
 #include "object_array.h"
-#include "object_array-inl.h"
 #include "stack_trace_element.h"
 #include "utils.h"
 #include "well_known_classes.h"
@@ -34,6 +33,11 @@
 
 GcRoot<Class> ClassExt::dalvik_system_ClassExt_;
 
+uint32_t ClassExt::ClassSize(PointerSize pointer_size) {
+  uint32_t vtable_entries = Object::kVTableLength;
+  return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
+}
+
 void ClassExt::SetObsoleteArrays(ObjPtr<PointerArray> methods,
                                  ObjPtr<ObjectArray<DexCache>> dex_caches) {
   DCHECK_EQ(GetLockOwnerThreadId(), Thread::Current()->GetThreadId())
@@ -113,9 +117,9 @@
   }
 }
 
-void ClassExt::SetOriginalDexFileBytes(ObjPtr<ByteArray> bytes) {
+void ClassExt::SetOriginalDexFile(ObjPtr<Object> bytes) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ClassExt, original_dex_file_bytes_), bytes);
+  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ClassExt, original_dex_file_), bytes);
 }
 
 void ClassExt::SetClass(ObjPtr<Class> dalvik_system_ClassExt) {
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
index ad8a61b..708665d 100644
--- a/runtime/mirror/class_ext.h
+++ b/runtime/mirror/class_ext.h
@@ -17,9 +17,8 @@
 #ifndef ART_RUNTIME_MIRROR_CLASS_EXT_H_
 #define ART_RUNTIME_MIRROR_CLASS_EXT_H_
 
-#include "class-inl.h"
-
 #include "array.h"
+#include "class.h"
 #include "dex_cache.h"
 #include "gc_root.h"
 #include "object.h"
@@ -36,10 +35,7 @@
 // C++ mirror of dalvik.system.ClassExt
 class MANAGED ClassExt : public Object {
  public:
-  static uint32_t ClassSize(PointerSize pointer_size) {
-    uint32_t vtable_entries = Object::kVTableLength;
-    return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
-  }
+  static uint32_t ClassSize(PointerSize pointer_size);
 
   // Size of an instance of dalvik.system.ClassExt.
   static constexpr uint32_t InstanceSize() {
@@ -57,15 +53,18 @@
         OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_dex_caches_));
   }
 
-  PointerArray* GetObsoleteMethods() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObject<PointerArray>(OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_));
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
+  inline PointerArray* GetObsoleteMethods() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<PointerArray, kVerifyFlags, kReadBarrierOption>(
+        OFFSET_OF_OBJECT_MEMBER(ClassExt, obsolete_methods_));
   }
 
-  ByteArray* GetOriginalDexFileBytes() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldObject<ByteArray>(OFFSET_OF_OBJECT_MEMBER(ClassExt, original_dex_file_bytes_));
+  Object* GetOriginalDexFile() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<Object>(OFFSET_OF_OBJECT_MEMBER(ClassExt, original_dex_file_));
   }
 
-  void SetOriginalDexFileBytes(ObjPtr<ByteArray> bytes) REQUIRES_SHARED(Locks::mutator_lock_);
+  void SetOriginalDexFile(ObjPtr<Object> bytes) REQUIRES_SHARED(Locks::mutator_lock_);
 
   void SetObsoleteArrays(ObjPtr<PointerArray> methods, ObjPtr<ObjectArray<DexCache>> dex_caches)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -78,6 +77,10 @@
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier, class Visitor>
+  inline void VisitNativeRoots(Visitor& visitor, PointerSize pointer_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   static ClassExt* Alloc(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
@@ -86,7 +89,7 @@
 
   HeapReference<PointerArray> obsolete_methods_;
 
-  HeapReference<ByteArray> original_dex_file_bytes_;
+  HeapReference<Object> original_dex_file_;
 
   // The saved verification error of this class.
   HeapReference<Object> verify_error_;
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index 29bf6a0..5d3af50 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -24,6 +24,7 @@
 #include "base/casts.h"
 #include "base/enums.h"
 #include "base/logging.h"
+#include "dex_file.h"
 #include "gc_root.h"
 #include "mirror/class.h"
 #include "mirror/call_site.h"
@@ -36,8 +37,17 @@
 namespace art {
 namespace mirror {
 
+template <typename T>
+inline void NativeDexCachePair<T>::Initialize(std::atomic<NativeDexCachePair<T>>* dex_cache,
+                                              PointerSize pointer_size) {
+  NativeDexCachePair<T> first_elem;
+  first_elem.object = nullptr;
+  first_elem.index = InvalidIndexForSlot(0);
+  DexCache::SetNativePairPtrSize(dex_cache, 0, first_elem, pointer_size);
+}
+
 inline uint32_t DexCache::ClassSize(PointerSize pointer_size) {
-  uint32_t vtable_entries = Object::kVTableLength + 5;
+  const uint32_t vtable_entries = Object::kVTableLength;
   return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
 }
 
@@ -164,20 +174,36 @@
   }
 }
 
+inline uint32_t DexCache::FieldSlotIndex(uint32_t field_idx) {
+  DCHECK_LT(field_idx, GetDexFile()->NumFieldIds());
+  const uint32_t slot_idx = field_idx % kDexCacheFieldCacheSize;
+  DCHECK_LT(slot_idx, NumResolvedFields());
+  return slot_idx;
+}
+
 inline ArtField* DexCache::GetResolvedField(uint32_t field_idx, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
-  DCHECK_LT(field_idx, NumResolvedFields());  // NOTE: Unchecked, i.e. not throwing AIOOB.
-  ArtField* field = GetElementPtrSize(GetResolvedFields(), field_idx, ptr_size);
-  if (field == nullptr || field->GetDeclaringClass()->IsErroneous()) {
-    return nullptr;
-  }
-  return field;
+  auto pair = GetNativePairPtrSize(GetResolvedFields(), FieldSlotIndex(field_idx), ptr_size);
+  return pair.GetObjectForIndex(field_idx);
 }
 
 inline void DexCache::SetResolvedField(uint32_t field_idx, ArtField* field, PointerSize ptr_size) {
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
-  DCHECK_LT(field_idx, NumResolvedFields());  // NOTE: Unchecked, i.e. not throwing AIOOB.
-  SetElementPtrSize(GetResolvedFields(), field_idx, field, ptr_size);
+  DCHECK(field != nullptr);
+  FieldDexCachePair pair(field, field_idx);
+  SetNativePairPtrSize(GetResolvedFields(), FieldSlotIndex(field_idx), pair, ptr_size);
+}
+
+inline void DexCache::ClearResolvedField(uint32_t field_idx, PointerSize ptr_size) {
+  DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), ptr_size);
+  uint32_t slot_idx = FieldSlotIndex(field_idx);
+  auto* resolved_fields = GetResolvedFields();
+  // This is racy but should only be called from the single-threaded ImageWriter.
+  DCHECK(Runtime::Current()->IsAotCompiler());
+  if (GetNativePairPtrSize(resolved_fields, slot_idx, ptr_size).index == field_idx) {
+    FieldDexCachePair cleared(nullptr, FieldDexCachePair::InvalidIndexForSlot(slot_idx));
+    SetNativePairPtrSize(resolved_fields, slot_idx, cleared, ptr_size);
+  }
 }
 
 inline ArtMethod* DexCache::GetResolvedMethod(uint32_t method_idx, PointerSize ptr_size) {
@@ -225,6 +251,40 @@
   }
 }
 
+template <typename T>
+NativeDexCachePair<T> DexCache::GetNativePairPtrSize(std::atomic<NativeDexCachePair<T>>* pair_array,
+                                                     size_t idx,
+                                                     PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
+    auto* array = reinterpret_cast<std::atomic<ConversionPair64>*>(pair_array);
+    ConversionPair64 value = AtomicLoadRelaxed16B(&array[idx]);
+    return NativeDexCachePair<T>(reinterpret_cast64<T*>(value.first),
+                                 dchecked_integral_cast<size_t>(value.second));
+  } else {
+    auto* array = reinterpret_cast<std::atomic<ConversionPair32>*>(pair_array);
+    ConversionPair32 value = array[idx].load(std::memory_order_relaxed);
+    return NativeDexCachePair<T>(reinterpret_cast<T*>(value.first), value.second);
+  }
+}
+
+template <typename T>
+void DexCache::SetNativePairPtrSize(std::atomic<NativeDexCachePair<T>>* pair_array,
+                                    size_t idx,
+                                    NativeDexCachePair<T> pair,
+                                    PointerSize ptr_size) {
+  if (ptr_size == PointerSize::k64) {
+    auto* array = reinterpret_cast<std::atomic<ConversionPair64>*>(pair_array);
+    ConversionPair64 v(reinterpret_cast64<uint64_t>(pair.object), pair.index);
+    AtomicStoreRelease16B(&array[idx], v);
+  } else {
+    auto* array = reinterpret_cast<std::atomic<ConversionPair32>*>(pair_array);
+    ConversionPair32 v(
+        dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(pair.object)),
+        dchecked_integral_cast<uint32_t>(pair.index));
+    array[idx].store(v, std::memory_order_release);
+  }
+}
+
 template <typename T,
           ReadBarrierOption kReadBarrierOption,
           typename Visitor>
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 1b8b391..c95d92e 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -52,8 +52,12 @@
              dex_file->NumTypeIds() != 0u ||
              dex_file->NumMethodIds() != 0u ||
              dex_file->NumFieldIds() != 0u) {
+    static_assert(ArenaAllocator::kAlignment == 8, "Expecting arena alignment of 8.");
+    DCHECK(layout.Alignment() == 8u || layout.Alignment() == 16u);
     // Zero-initialized.
-    raw_arrays = reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size()));
+    raw_arrays = (layout.Alignment() == 16u)
+        ? reinterpret_cast<uint8_t*>(linear_alloc->AllocAlign16(self, layout.Size()))
+        : reinterpret_cast<uint8_t*>(linear_alloc->Alloc(self, layout.Size()));
   }
 
   mirror::StringDexCacheType* strings = (dex_file->NumStringIds() == 0u) ? nullptr :
@@ -62,17 +66,21 @@
       reinterpret_cast<mirror::TypeDexCacheType*>(raw_arrays + layout.TypesOffset());
   ArtMethod** methods = (dex_file->NumMethodIds() == 0u) ? nullptr :
       reinterpret_cast<ArtMethod**>(raw_arrays + layout.MethodsOffset());
-  ArtField** fields = (dex_file->NumFieldIds() == 0u) ? nullptr :
-      reinterpret_cast<ArtField**>(raw_arrays + layout.FieldsOffset());
+  mirror::FieldDexCacheType* fields = (dex_file->NumFieldIds() == 0u) ? nullptr :
+      reinterpret_cast<mirror::FieldDexCacheType*>(raw_arrays + layout.FieldsOffset());
 
-  size_t num_strings = mirror::DexCache::kDexCacheStringCacheSize;
+  size_t num_strings = kDexCacheStringCacheSize;
   if (dex_file->NumStringIds() < num_strings) {
     num_strings = dex_file->NumStringIds();
   }
-  size_t num_types = mirror::DexCache::kDexCacheTypeCacheSize;
+  size_t num_types = kDexCacheTypeCacheSize;
   if (dex_file->NumTypeIds() < num_types) {
     num_types = dex_file->NumTypeIds();
   }
+  size_t num_fields = kDexCacheFieldCacheSize;
+  if (dex_file->NumFieldIds() < num_fields) {
+    num_fields = dex_file->NumFieldIds();
+  }
 
   // Note that we allocate the method type dex caches regardless of this flag,
   // and we make sure here that they're not used by the runtime. This is in the
@@ -80,17 +88,17 @@
   //
   // If this needs to be mitigated in a production system running this code,
   // DexCache::kDexCacheMethodTypeCacheSize can be set to zero.
-  mirror::MethodTypeDexCacheType* method_types = nullptr;
+  MethodTypeDexCacheType* method_types = nullptr;
   size_t num_method_types = 0;
 
-  if (dex_file->NumProtoIds() < mirror::DexCache::kDexCacheMethodTypeCacheSize) {
+  if (dex_file->NumProtoIds() < kDexCacheMethodTypeCacheSize) {
     num_method_types = dex_file->NumProtoIds();
   } else {
-    num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
+    num_method_types = kDexCacheMethodTypeCacheSize;
   }
 
   if (num_method_types > 0) {
-    method_types = reinterpret_cast<mirror::MethodTypeDexCacheType*>(
+    method_types = reinterpret_cast<MethodTypeDexCacheType*>(
         raw_arrays + layout.MethodTypesOffset());
   }
 
@@ -98,13 +106,13 @@
       ? nullptr
       : reinterpret_cast<GcRoot<mirror::CallSite>*>(raw_arrays + layout.CallSitesOffset());
 
-  DCHECK_ALIGNED(raw_arrays, alignof(mirror::StringDexCacheType)) <<
+  DCHECK_ALIGNED(raw_arrays, alignof(StringDexCacheType)) <<
                  "Expected raw_arrays to align to StringDexCacheType.";
-  DCHECK_ALIGNED(layout.StringsOffset(), alignof(mirror::StringDexCacheType)) <<
+  DCHECK_ALIGNED(layout.StringsOffset(), alignof(StringDexCacheType)) <<
                  "Expected StringsOffset() to align to StringDexCacheType.";
-  DCHECK_ALIGNED(strings, alignof(mirror::StringDexCacheType)) <<
+  DCHECK_ALIGNED(strings, alignof(StringDexCacheType)) <<
                  "Expected strings to align to StringDexCacheType.";
-  static_assert(alignof(mirror::StringDexCacheType) == 8u,
+  static_assert(alignof(StringDexCacheType) == 8u,
                 "Expected StringDexCacheType to have align of 8.");
   if (kIsDebugBuild) {
     // Sanity check to make sure all the dex cache arrays are empty. b/28992179
@@ -117,10 +125,11 @@
       CHECK(types[i].load(std::memory_order_relaxed).object.IsNull());
     }
     for (size_t i = 0; i < dex_file->NumMethodIds(); ++i) {
-      CHECK(mirror::DexCache::GetElementPtrSize(methods, i, image_pointer_size) == nullptr);
+      CHECK(GetElementPtrSize(methods, i, image_pointer_size) == nullptr);
     }
-    for (size_t i = 0; i < dex_file->NumFieldIds(); ++i) {
-      CHECK(mirror::DexCache::GetElementPtrSize(fields, i, image_pointer_size) == nullptr);
+    for (size_t i = 0; i < num_fields; ++i) {
+      CHECK_EQ(GetNativePairPtrSize(fields, i, image_pointer_size).index, 0u);
+      CHECK(GetNativePairPtrSize(fields, i, image_pointer_size).object == nullptr);
     }
     for (size_t i = 0; i < num_method_types; ++i) {
       CHECK_EQ(method_types[i].load(std::memory_order_relaxed).index, 0u);
@@ -136,6 +145,9 @@
   if (types != nullptr) {
     mirror::TypeDexCachePair::Initialize(types);
   }
+  if (fields != nullptr) {
+    mirror::FieldDexCachePair::Initialize(fields, image_pointer_size);
+  }
   if (method_types != nullptr) {
     mirror::MethodTypeDexCachePair::Initialize(method_types);
   }
@@ -148,7 +160,7 @@
                   methods,
                   dex_file->NumMethodIds(),
                   fields,
-                  dex_file->NumFieldIds(),
+                  num_fields,
                   method_types,
                   num_method_types,
                   call_sites,
@@ -164,7 +176,7 @@
                     uint32_t num_resolved_types,
                     ArtMethod** resolved_methods,
                     uint32_t num_resolved_methods,
-                    ArtField** resolved_fields,
+                    FieldDexCacheType* resolved_fields,
                     uint32_t num_resolved_fields,
                     MethodTypeDexCacheType* resolved_method_types,
                     uint32_t num_resolved_method_types,
@@ -218,5 +230,23 @@
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location);
 }
 
+#if !defined(__aarch64__) && !defined(__x86_64__)
+static pthread_mutex_t dex_cache_slow_atomic_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+DexCache::ConversionPair64 DexCache::AtomicLoadRelaxed16B(std::atomic<ConversionPair64>* target) {
+  pthread_mutex_lock(&dex_cache_slow_atomic_mutex);
+  DexCache::ConversionPair64 value = *reinterpret_cast<ConversionPair64*>(target);
+  pthread_mutex_unlock(&dex_cache_slow_atomic_mutex);
+  return value;
+}
+
+void DexCache::AtomicStoreRelease16B(std::atomic<ConversionPair64>* target,
+                                     ConversionPair64 value) {
+  pthread_mutex_lock(&dex_cache_slow_atomic_mutex);
+  *reinterpret_cast<ConversionPair64*>(target) = value;
+  pthread_mutex_unlock(&dex_cache_slow_atomic_mutex);
+}
+#endif
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 0579198..cf570b8 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -65,7 +65,7 @@
   DexCachePair(ObjPtr<T> object, uint32_t index)
       : object(object),
         index(index) {}
-  DexCachePair() = default;
+  DexCachePair() : index(0) {}
   DexCachePair(const DexCachePair<T>&) = default;
   DexCachePair& operator=(const DexCachePair<T>&) = default;
 
@@ -91,12 +91,44 @@
   }
 };
 
+template <typename T> struct PACKED(2 * __SIZEOF_POINTER__) NativeDexCachePair {
+  T* object;
+  size_t index;
+  // This is similar to DexCachePair except that we're storing a native pointer
+  // instead of a GC root. See DexCachePair for the details.
+  NativeDexCachePair(T* object, uint32_t index)
+      : object(object),
+        index(index) {}
+  NativeDexCachePair() : object(nullptr), index(0u) { }
+  NativeDexCachePair(const NativeDexCachePair<T>&) = default;
+  NativeDexCachePair& operator=(const NativeDexCachePair<T>&) = default;
+
+  static void Initialize(std::atomic<NativeDexCachePair<T>>* dex_cache, PointerSize pointer_size);
+
+  static uint32_t InvalidIndexForSlot(uint32_t slot) {
+    // Since the cache size is a power of two, 0 will always map to slot 0.
+    // Use 1 for slot 0 and 0 for all other slots.
+    return (slot == 0) ? 1u : 0u;
+  }
+
+  T* GetObjectForIndex(uint32_t idx) REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (idx != index) {
+      return nullptr;
+    }
+    DCHECK(object != nullptr);
+    return object;
+  }
+};
+
 using TypeDexCachePair = DexCachePair<Class>;
 using TypeDexCacheType = std::atomic<TypeDexCachePair>;
 
 using StringDexCachePair = DexCachePair<String>;
 using StringDexCacheType = std::atomic<StringDexCachePair>;
 
+using FieldDexCachePair = NativeDexCachePair<ArtField>;
+using FieldDexCacheType = std::atomic<FieldDexCachePair>;
+
 using MethodTypeDexCachePair = DexCachePair<MethodType>;
 using MethodTypeDexCacheType = std::atomic<MethodTypeDexCachePair>;
 
@@ -116,6 +148,11 @@
   static_assert(IsPowerOfTwo(kDexCacheStringCacheSize),
                 "String dex cache size is not a power of 2.");
 
+  // Size of field dex cache. Needs to be a power of 2 for entrypoint assumptions to hold.
+  static constexpr size_t kDexCacheFieldCacheSize = 1024;
+  static_assert(IsPowerOfTwo(kDexCacheFieldCacheSize),
+                "Field dex cache size is not a power of 2.");
+
   // Size of method type dex cache. Needs to be a power of 2 for entrypoint assumptions
   // to hold.
   static constexpr size_t kDexCacheMethodTypeCacheSize = 1024;
@@ -130,6 +167,10 @@
     return kDexCacheStringCacheSize;
   }
 
+  static constexpr size_t StaticArtFieldSize() {
+    return kDexCacheFieldCacheSize;
+  }
+
   static constexpr size_t StaticMethodTypeSize() {
     return kDexCacheMethodTypeCacheSize;
   }
@@ -171,10 +212,6 @@
     return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_));
   }
 
-  static MemberOffset DexOffset() {
-    return OFFSET_OF_OBJECT_MEMBER(DexCache, dex_);
-  }
-
   static MemberOffset StringsOffset() {
     return OFFSET_OF_OBJECT_MEMBER(DexCache, strings_);
   }
@@ -255,6 +292,8 @@
   // Pointer sized variant, used for patching.
   ALWAYS_INLINE void SetResolvedField(uint32_t idx, ArtField* field, PointerSize ptr_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
+  ALWAYS_INLINE void ClearResolvedField(uint32_t idx, PointerSize ptr_size)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   MethodType* GetResolvedMethodType(uint32_t proto_idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -299,11 +338,11 @@
     SetFieldPtr<false>(ResolvedMethodsOffset(), resolved_methods);
   }
 
-  ArtField** GetResolvedFields() ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldPtr<ArtField**>(ResolvedFieldsOffset());
+  FieldDexCacheType* GetResolvedFields() ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldPtr<FieldDexCacheType*>(ResolvedFieldsOffset());
   }
 
-  void SetResolvedFields(ArtField** resolved_fields)
+  void SetResolvedFields(FieldDexCacheType* resolved_fields)
       ALWAYS_INLINE
       REQUIRES_SHARED(Locks::mutator_lock_) {
     SetFieldPtr<false>(ResolvedFieldsOffset(), resolved_fields);
@@ -376,6 +415,22 @@
   template <typename PtrType>
   static void SetElementPtrSize(PtrType* ptr_array, size_t idx, PtrType ptr, PointerSize ptr_size);
 
+  template <typename T>
+  static NativeDexCachePair<T> GetNativePairPtrSize(std::atomic<NativeDexCachePair<T>>* pair_array,
+                                                    size_t idx,
+                                                    PointerSize ptr_size);
+
+  template <typename T>
+  static void SetNativePairPtrSize(std::atomic<NativeDexCachePair<T>>* pair_array,
+                                   size_t idx,
+                                   NativeDexCachePair<T> pair,
+                                   PointerSize ptr_size);
+
+  uint32_t StringSlotIndex(dex::StringIndex string_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  uint32_t TypeSlotIndex(dex::TypeIndex type_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  uint32_t FieldSlotIndex(uint32_t field_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  uint32_t MethodTypeSlotIndex(uint32_t proto_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   void Init(const DexFile* dex_file,
             ObjPtr<String> location,
@@ -385,7 +440,7 @@
             uint32_t num_resolved_types,
             ArtMethod** resolved_methods,
             uint32_t num_resolved_methods,
-            ArtField** resolved_fields,
+            FieldDexCacheType* resolved_fields,
             uint32_t num_resolved_fields,
             MethodTypeDexCacheType* resolved_method_types,
             uint32_t num_resolved_method_types,
@@ -394,9 +449,18 @@
             PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  uint32_t StringSlotIndex(dex::StringIndex string_idx) REQUIRES_SHARED(Locks::mutator_lock_);
-  uint32_t TypeSlotIndex(dex::TypeIndex type_idx) REQUIRES_SHARED(Locks::mutator_lock_);
-  uint32_t MethodTypeSlotIndex(uint32_t proto_idx) REQUIRES_SHARED(Locks::mutator_lock_);
+  // std::pair<> is not trivially copyable and as such it is unsuitable for atomic operations,
+  // so we use a custom pair class for loading and storing the NativeDexCachePair<>.
+  template <typename IntType>
+  struct PACKED(2 * sizeof(IntType)) ConversionPair {
+    ConversionPair(IntType f, IntType s) : first(f), second(s) { }
+    ConversionPair(const ConversionPair&) = default;
+    ConversionPair& operator=(const ConversionPair&) = default;
+    IntType first;
+    IntType second;
+  };
+  using ConversionPair32 = ConversionPair<uint32_t>;
+  using ConversionPair64 = ConversionPair<uint64_t>;
 
   // Visit instance fields of the dex cache as well as its associated arrays.
   template <bool kVisitNativeRoots,
@@ -406,12 +470,58 @@
   void VisitReferences(ObjPtr<Class> klass, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
 
-  HeapReference<Object> dex_;
+  // Due to lack of 16-byte atomics support, we use hand-crafted routines.
+#if  defined(__aarch64__)
+  // 16-byte atomics are supported on aarch64.
+  ALWAYS_INLINE static ConversionPair64 AtomicLoadRelaxed16B(
+      std::atomic<ConversionPair64>* target) {
+    return target->load(std::memory_order_relaxed);
+  }
+
+  ALWAYS_INLINE static void AtomicStoreRelease16B(
+      std::atomic<ConversionPair64>* target, ConversionPair64 value) {
+    target->store(value, std::memory_order_release);
+  }
+#elif defined(__x86_64__)
+  ALWAYS_INLINE static ConversionPair64 AtomicLoadRelaxed16B(
+      std::atomic<ConversionPair64>* target) {
+    uint64_t first, second;
+    __asm__ __volatile__(
+        "lock cmpxchg16b (%2)"
+        : "=&a"(first), "=&d"(second)
+        : "r"(target), "a"(0), "d"(0), "b"(0), "c"(0)
+        : "cc");
+    return ConversionPair64(first, second);
+  }
+
+  ALWAYS_INLINE static void AtomicStoreRelease16B(
+      std::atomic<ConversionPair64>* target, ConversionPair64 value) {
+    uint64_t first, second;
+    __asm__ __volatile__ (
+        "movq (%2), %%rax\n\t"
+        "movq 8(%2), %%rdx\n\t"
+        "1:\n\t"
+        "lock cmpxchg16b (%2)\n\t"
+        "jnz 1b"
+        : "=&a"(first), "=&d"(second)
+        : "r"(target), "b"(value.first), "c"(value.second)
+        : "cc");
+  }
+#else
+  static ConversionPair64 AtomicLoadRelaxed16B(std::atomic<ConversionPair64>* target);
+  static void AtomicStoreRelease16B(std::atomic<ConversionPair64>* target, ConversionPair64 value);
+#endif
+
   HeapReference<String> location_;
+  // Number of elements in the call_sites_ array. Note that this appears here
+  // because of our packing logic for 32 bit fields.
+  uint32_t num_resolved_call_sites_;
+
   uint64_t dex_file_;               // const DexFile*
   uint64_t resolved_call_sites_;    // GcRoot<CallSite>* array with num_resolved_call_sites_
                                     // elements.
-  uint64_t resolved_fields_;        // ArtField*, array with num_resolved_fields_ elements.
+  uint64_t resolved_fields_;        // std::atomic<FieldDexCachePair>*, array with
+                                    // num_resolved_fields_ elements.
   uint64_t resolved_method_types_;  // std::atomic<MethodTypeDexCachePair>* array with
                                     // num_resolved_method_types_ elements.
   uint64_t resolved_methods_;       // ArtMethod*, array with num_resolved_methods_ elements.
@@ -419,7 +529,6 @@
   uint64_t strings_;                // std::atomic<StringDexCachePair>*, array with num_strings_
                                     // elements.
 
-  uint32_t num_resolved_call_sites_;    // Number of elements in the call_sites_ array.
   uint32_t num_resolved_fields_;        // Number of elements in the resolved_fields_ array.
   uint32_t num_resolved_method_types_;  // Number of elements in the resolved_method_types_ array.
   uint32_t num_resolved_methods_;       // Number of elements in the resolved_methods_ array.
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index ef0aaaa..71a47f6 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -54,7 +54,8 @@
   EXPECT_TRUE(dex_cache->StaticTypeSize() == dex_cache->NumResolvedTypes()
       || java_lang_dex_file_->NumTypeIds() == dex_cache->NumResolvedTypes());
   EXPECT_EQ(java_lang_dex_file_->NumMethodIds(), dex_cache->NumResolvedMethods());
-  EXPECT_EQ(java_lang_dex_file_->NumFieldIds(),  dex_cache->NumResolvedFields());
+  EXPECT_TRUE(dex_cache->StaticArtFieldSize() == dex_cache->NumResolvedFields()
+      || java_lang_dex_file_->NumFieldIds() ==  dex_cache->NumResolvedFields());
   EXPECT_TRUE(dex_cache->StaticMethodTypeSize() == dex_cache->NumResolvedMethodTypes()
       || java_lang_dex_file_->NumProtoIds() == dex_cache->NumResolvedMethodTypes());
 }
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index f6b6489..54034c2 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -68,8 +68,16 @@
     }
   }
   mirror::DexCache* const dex_cache = declaring_class->GetDexCache();
-  ArtField* const art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), kRuntimePointerSize);
-  CHECK(art_field != nullptr);
+  ArtField* art_field = dex_cache->GetResolvedField(GetDexFieldIndex(), kRuntimePointerSize);
+  if (UNLIKELY(art_field == nullptr)) {
+    if (IsStatic()) {
+      art_field = declaring_class->FindDeclaredStaticField(dex_cache, GetDexFieldIndex());
+    } else {
+      art_field = declaring_class->FindInstanceField(dex_cache, GetDexFieldIndex());
+    }
+    CHECK(art_field != nullptr);
+    dex_cache->SetResolvedField(GetDexFieldIndex(), art_field, kRuntimePointerSize);
+  }
   CHECK_EQ(declaring_class, art_field->GetDeclaringClass());
   return art_field;
 }
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index 8e591e4..811f1ea 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -187,10 +187,12 @@
   uint32_t rb_state = lw.ReadBarrierState();
   return rb_state;
 #else
-  // mips/mips64
-  LOG(FATAL) << "Unreachable";
-  UNREACHABLE();
-  UNUSED(fake_address_dependency);
+  // MIPS32/MIPS64: use a memory barrier to prevent load-load reordering.
+  LockWord lw = GetLockWord(false);
+  *fake_address_dependency = 0;
+  std::atomic_thread_fence(std::memory_order_acquire);
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
 #endif
 }
 
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 4541ce2..f7ab26d 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -538,10 +538,10 @@
                                          PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (pointer_size == PointerSize::k32) {
-      intptr_t ptr  = reinterpret_cast<intptr_t>(new_value);
-      DCHECK_EQ(static_cast<int32_t>(ptr), ptr);  // Check that we dont lose any non 0 bits.
+      uintptr_t ptr  = reinterpret_cast<uintptr_t>(new_value);
+      DCHECK_EQ(static_cast<uint32_t>(ptr), ptr);  // Check that we dont lose any non 0 bits.
       SetField32<kTransactionActive, kCheckTransaction, kVerifyFlags>(
-          field_offset, static_cast<int32_t>(ptr));
+          field_offset, static_cast<int32_t>(static_cast<uint32_t>(ptr)));
     } else {
       SetField64<kTransactionActive, kCheckTransaction, kVerifyFlags>(
           field_offset, reinterpret_cast64<int64_t>(new_value));
@@ -591,7 +591,8 @@
   ALWAYS_INLINE T GetFieldPtrWithSize(MemberOffset field_offset, PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (pointer_size == PointerSize::k32) {
-      return reinterpret_cast<T>(GetField32<kVerifyFlags, kIsVolatile>(field_offset));
+      uint64_t address = static_cast<uint32_t>(GetField32<kVerifyFlags, kIsVolatile>(field_offset));
+      return reinterpret_cast<T>(static_cast<uintptr_t>(address));
     } else {
       int64_t v = GetField64<kVerifyFlags, kIsVolatile>(field_offset);
       return reinterpret_cast64<T>(v);
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 884b88a..de0e75b 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -89,16 +89,17 @@
   return true;
 }
 
-ObjPtr<String> String::DoReplace(Thread* self, uint16_t old_c, uint16_t new_c) {
-  DCHECK(IsCompressed() ? ContainsElement(ArrayRef<uint8_t>(value_compressed_, GetLength()), old_c)
-                        : ContainsElement(ArrayRef<uint16_t>(value_, GetLength()), old_c));
-  int32_t length = GetLength();
+ObjPtr<String> String::DoReplace(Thread* self, Handle<String> src, uint16_t old_c, uint16_t new_c) {
+  int32_t length = src->GetLength();
+  DCHECK(src->IsCompressed()
+             ? ContainsElement(ArrayRef<uint8_t>(src->value_compressed_, length), old_c)
+             : ContainsElement(ArrayRef<uint16_t>(src->value_, length), old_c));
   bool compressible =
       kUseStringCompression &&
       IsASCII(new_c) &&
-      (IsCompressed() || (!IsASCII(old_c) && AllASCIIExcept(value_, length, old_c)));
+      (src->IsCompressed() || (!IsASCII(old_c) && AllASCIIExcept(src->value_, length, old_c)));
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  const int32_t length_with_flag = String::GetFlaggedCount(GetLength(), compressible);
+  const int32_t length_with_flag = String::GetFlaggedCount(length, compressible);
   SetStringCountVisitor visitor(length_with_flag);
   ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
@@ -109,10 +110,10 @@
       return dchecked_integral_cast<uint8_t>((old_c != c) ? c : new_c);
     };
     uint8_t* out = string->value_compressed_;
-    if (LIKELY(IsCompressed())) {  // LIKELY(compressible == IsCompressed())
-      std::transform(value_compressed_, value_compressed_ + length, out, replace);
+    if (LIKELY(src->IsCompressed())) {  // LIKELY(compressible == src->IsCompressed())
+      std::transform(src->value_compressed_, src->value_compressed_ + length, out, replace);
     } else {
-      std::transform(value_, value_ + length, out, replace);
+      std::transform(src->value_, src->value_ + length, out, replace);
     }
     DCHECK(kUseStringCompression && AllASCII(out, length));
   } else {
@@ -120,10 +121,10 @@
       return (old_c != c) ? c : new_c;
     };
     uint16_t* out = string->value_;
-    if (UNLIKELY(IsCompressed())) {  // LIKELY(compressible == IsCompressed())
-      std::transform(value_compressed_, value_compressed_ + length, out, replace);
+    if (UNLIKELY(src->IsCompressed())) {  // LIKELY(compressible == src->IsCompressed())
+      std::transform(src->value_compressed_, src->value_compressed_ + length, out, replace);
     } else {
-      std::transform(value_, value_ + length, out, replace);
+      std::transform(src->value_, src->value_ + length, out, replace);
     }
     DCHECK(!kUseStringCompression || !AllASCII(out, length));
   }
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index dbb5a4c..b59bbfb 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -96,7 +96,7 @@
 
   // Create a new string where all occurences of `old_c` are replaced with `new_c`.
   // String.doReplace(char, char) is called from String.replace(char, char) when there is a match.
-  ObjPtr<String> DoReplace(Thread* self, uint16_t old_c, uint16_t new_c)
+  static ObjPtr<String> DoReplace(Thread* self, Handle<String> src, uint16_t old_c, uint16_t new_c)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   ObjPtr<String> Intern() REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/monitor.h b/runtime/monitor.h
index 1fa4682..e80d31c 100644
--- a/runtime/monitor.h
+++ b/runtime/monitor.h
@@ -354,7 +354,7 @@
 // For use only by the JDWP implementation.
 class MonitorInfo {
  public:
-  MonitorInfo() = default;
+  MonitorInfo() : owner_(nullptr), entry_count_(0) {}
   MonitorInfo(const MonitorInfo&) = default;
   MonitorInfo& operator=(const MonitorInfo&) = default;
   explicit MonitorInfo(mirror::Object* o) REQUIRES(Locks::mutator_lock_);
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index efc42fd..11f8505 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -17,6 +17,8 @@
 #include "dalvik_system_VMRuntime.h"
 
 #ifdef ART_TARGET_ANDROID
+#include <sys/time.h>
+#include <sys/resource.h>
 extern "C" void android_set_application_target_sdk_version(uint32_t version);
 #endif
 #include <limits.h>
@@ -444,12 +446,17 @@
   if (!kPreloadDexCachesCollectStats) {
     return;
   }
+  // TODO: Update for hash-based DexCache arrays.
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   Thread* const self = Thread::Current();
   for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
     CHECK(dex_file != nullptr);
+    // In fallback mode, not all boot classpath components might be registered, yet.
+    if (!class_linker->IsDexFileRegistered(self, *dex_file)) {
+      continue;
+    }
     ObjPtr<mirror::DexCache> const dex_cache = class_linker->FindDexCache(self, *dex_file);
-    CHECK(dex_cache != nullptr);  // Boot class path dex caches are never unloaded.
+    DCHECK(dex_cache != nullptr);  // Boot class path dex caches are never unloaded.
     for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
       ObjPtr<mirror::String> string = dex_cache->GetResolvedString(dex::StringIndex(j));
       if (string != nullptr) {
@@ -463,7 +470,7 @@
       }
     }
     for (size_t j = 0; j < dex_cache->NumResolvedFields(); j++) {
-      ArtField* field = class_linker->GetResolvedField(j, dex_cache);
+      ArtField* field = dex_cache->GetResolvedField(j, class_linker->GetImagePointerSize());
       if (field != nullptr) {
         filled->num_fields++;
       }
@@ -580,9 +587,7 @@
 static void VMRuntime_registerAppInfo(JNIEnv* env,
                                       jclass clazz ATTRIBUTE_UNUSED,
                                       jstring profile_file,
-                                      jstring app_dir,
-                                      jobjectArray code_paths,
-                                      jstring foreign_dex_profile_path) {
+                                      jobjectArray code_paths) {
   std::vector<std::string> code_paths_vec;
   int code_paths_length = env->GetArrayLength(code_paths);
   for (int i = 0; i < code_paths_length; i++) {
@@ -596,22 +601,7 @@
   std::string profile_file_str(raw_profile_file);
   env->ReleaseStringUTFChars(profile_file, raw_profile_file);
 
-  std::string foreign_dex_profile_path_str = "";
-  if (foreign_dex_profile_path != nullptr) {
-    const char* raw_foreign_dex_profile_path =
-        env->GetStringUTFChars(foreign_dex_profile_path, nullptr);
-    foreign_dex_profile_path_str.assign(raw_foreign_dex_profile_path);
-    env->ReleaseStringUTFChars(foreign_dex_profile_path, raw_foreign_dex_profile_path);
-  }
-
-  const char* raw_app_dir = env->GetStringUTFChars(app_dir, nullptr);
-  std::string app_dir_str(raw_app_dir);
-  env->ReleaseStringUTFChars(app_dir, raw_app_dir);
-
-  Runtime::Current()->RegisterAppInfo(code_paths_vec,
-                                      profile_file_str,
-                                      foreign_dex_profile_path_str,
-                                      app_dir_str);
+  Runtime::Current()->RegisterAppInfo(code_paths_vec, profile_file_str);
 }
 
 static jboolean VMRuntime_isBootClassPathOnDisk(JNIEnv* env, jclass, jstring java_instruction_set) {
@@ -641,6 +631,23 @@
   return Runtime::Current()->GetPrunedDalvikCache() ? JNI_TRUE : JNI_FALSE;
 }
 
+static void VMRuntime_setSystemDaemonThreadPriority(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                    jclass klass ATTRIBUTE_UNUSED) {
+#ifdef ART_TARGET_ANDROID
+  Thread* self = Thread::Current();
+  DCHECK(self != nullptr);
+  pid_t tid = self->GetTid();
+  // We use a priority lower than the default for the system daemon threads (eg HeapTaskDaemon) to
+  // avoid jank due to CPU contentions between GC and other UI-related threads. b/36631902.
+  // We may use a native priority that doesn't have a corresponding java.lang.Thread-level priority.
+  static constexpr int kSystemDaemonNiceValue = 4;  // priority 124
+  if (setpriority(PRIO_PROCESS, tid, kSystemDaemonNiceValue) != 0) {
+    PLOG(INFO) << *self << " setpriority(PRIO_PROCESS, " << tid << ", "
+               << kSystemDaemonNiceValue << ") failed";
+  }
+#endif
+}
+
 static JNINativeMethod gMethods[] = {
   FAST_NATIVE_METHOD(VMRuntime, addressOf, "(Ljava/lang/Object;)J"),
   NATIVE_METHOD(VMRuntime, bootClassPath, "()Ljava/lang/String;"),
@@ -674,11 +681,11 @@
   FAST_NATIVE_METHOD(VMRuntime, is64Bit, "()Z"),
   FAST_NATIVE_METHOD(VMRuntime, isCheckJniEnabled, "()Z"),
   NATIVE_METHOD(VMRuntime, preloadDexCaches, "()V"),
-  NATIVE_METHOD(VMRuntime, registerAppInfo,
-                "(Ljava/lang/String;Ljava/lang/String;[Ljava/lang/String;Ljava/lang/String;)V"),
+  NATIVE_METHOD(VMRuntime, registerAppInfo, "(Ljava/lang/String;[Ljava/lang/String;)V"),
   NATIVE_METHOD(VMRuntime, isBootClassPathOnDisk, "(Ljava/lang/String;)Z"),
   NATIVE_METHOD(VMRuntime, getCurrentInstructionSet, "()Ljava/lang/String;"),
   NATIVE_METHOD(VMRuntime, didPruneDalvikCache, "()Z"),
+  NATIVE_METHOD(VMRuntime, setSystemDaemonThreadPriority, "()V"),
 };
 
 void register_dalvik_system_VMRuntime(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_ZygoteHooks.cc b/runtime/native/dalvik_system_ZygoteHooks.cc
index 100f476..836ba81 100644
--- a/runtime/native/dalvik_system_ZygoteHooks.cc
+++ b/runtime/native/dalvik_system_ZygoteHooks.cc
@@ -74,12 +74,40 @@
   }
 }
 
-static void DoCollectNonDebuggableCallback(Thread* thread, void* data ATTRIBUTE_UNUSED)
+class ClassSet {
+ public:
+  // The number of classes we reasonably expect to have to look at. Realistically the number is more
+  // ~10 but there is little harm in having some extra.
+  static constexpr int kClassSetCapacity = 100;
+
+  explicit ClassSet(Thread* const self) : self_(self) {
+    self_->GetJniEnv()->PushFrame(kClassSetCapacity);
+  }
+
+  ~ClassSet() {
+    self_->GetJniEnv()->PopFrame();
+  }
+
+  void AddClass(ObjPtr<mirror::Class> klass) REQUIRES(Locks::mutator_lock_) {
+    class_set_.insert(self_->GetJniEnv()->AddLocalReference<jclass>(klass.Ptr()));
+  }
+
+  const std::unordered_set<jclass>& GetClasses() const {
+    return class_set_;
+  }
+
+ private:
+  Thread* const self_;
+  std::unordered_set<jclass> class_set_;
+};
+
+static void DoCollectNonDebuggableCallback(Thread* thread, void* data)
     REQUIRES(Locks::mutator_lock_) {
   class NonDebuggableStacksVisitor : public StackVisitor {
    public:
-    explicit NonDebuggableStacksVisitor(Thread* t)
-        : StackVisitor(t, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames) {}
+    NonDebuggableStacksVisitor(Thread* t, ClassSet* class_set)
+        : StackVisitor(t, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+          class_set_(class_set) {}
 
     ~NonDebuggableStacksVisitor() OVERRIDE {}
 
@@ -87,7 +115,7 @@
       if (GetMethod()->IsRuntimeMethod()) {
         return true;
       }
-      NonDebuggableClasses::AddNonDebuggableClass(GetMethod()->GetDeclaringClass());
+      class_set_->AddClass(GetMethod()->GetDeclaringClass());
       if (kIsDebugBuild) {
         LOG(INFO) << GetMethod()->GetDeclaringClass()->PrettyClass()
                   << " might not be fully debuggable/deoptimizable due to "
@@ -95,16 +123,31 @@
       }
       return true;
     }
+
+   private:
+    ClassSet* class_set_;
   };
-  NonDebuggableStacksVisitor visitor(thread);
+  NonDebuggableStacksVisitor visitor(thread, reinterpret_cast<ClassSet*>(data));
   visitor.WalkStack();
 }
 
-static void CollectNonDebuggableClasses() {
+static void CollectNonDebuggableClasses() REQUIRES(!Locks::mutator_lock_) {
   Runtime* const runtime = Runtime::Current();
-  ScopedSuspendAll suspend("Checking stacks for non-obsoletable methods!", /*long_suspend*/false);
-  MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
-  runtime->GetThreadList()->ForEach(DoCollectNonDebuggableCallback, nullptr);
+  Thread* const self = Thread::Current();
+  // Get the mutator lock.
+  ScopedObjectAccess soa(self);
+  ClassSet classes(self);
+  {
+    // Drop the shared mutator lock.
+    ScopedThreadSuspension sts(self, art::ThreadState::kNative);
+    // Get exclusive mutator lock with suspend all.
+    ScopedSuspendAll suspend("Checking stacks for non-obsoletable methods!", /*long_suspend*/false);
+    MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+    runtime->GetThreadList()->ForEach(DoCollectNonDebuggableCallback, &classes);
+  }
+  for (jclass klass : classes.GetClasses()) {
+    NonDebuggableClasses::AddNonDebuggableClass(klass);
+  }
 }
 
 static void EnableDebugFeatures(uint32_t debug_flags) {
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index c8431c0..381dc7b 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -108,10 +108,50 @@
   return soa.AddLocalReference<jstring>(mirror::Class::ComputeName(hs.NewHandle(c)));
 }
 
-static jobjectArray Class_getProxyInterfaces(JNIEnv* env, jobject javaThis) {
+// TODO: Move this to mirror::Class ? Other mirror types that commonly appear
+// as arrays have a GetArrayClass() method.
+static ObjPtr<mirror::Class> GetClassArrayClass(Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Class> class_class = mirror::Class::GetJavaLangClass();
+  return Runtime::Current()->GetClassLinker()->FindArrayClass(self, &class_class);
+}
+
+static jobjectArray Class_getInterfacesInternal(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::Class> c = DecodeClass(soa, javaThis);
-  return soa.AddLocalReference<jobjectArray>(c->GetInterfaces()->Clone(soa.Self()));
+  StackHandleScope<4> hs(soa.Self());
+  Handle<mirror::Class> klass = hs.NewHandle(DecodeClass(soa, javaThis));
+
+  if (klass->IsProxyClass()) {
+    return soa.AddLocalReference<jobjectArray>(klass->GetProxyInterfaces()->Clone(soa.Self()));
+  }
+
+  const DexFile::TypeList* iface_list = klass->GetInterfaceTypeList();
+  if (iface_list == nullptr) {
+    return nullptr;
+  }
+
+  const uint32_t num_ifaces = iface_list->Size();
+  Handle<mirror::Class> class_array_class = hs.NewHandle(GetClassArrayClass(soa.Self()));
+  Handle<mirror::ObjectArray<mirror::Class>> ifaces = hs.NewHandle(
+      mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class.Get(), num_ifaces));
+  if (ifaces.IsNull()) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    return nullptr;
+  }
+
+  // Check that we aren't in an active transaction, we call SetWithoutChecks
+  // with kActiveTransaction == false.
+  DCHECK(!Runtime::Current()->IsActiveTransaction());
+
+  MutableHandle<mirror::Class> interface(hs.NewHandle<mirror::Class>(nullptr));
+  for (uint32_t i = 0; i < num_ifaces; ++i) {
+    const dex::TypeIndex type_idx = iface_list->GetTypeItem(i).type_idx_;
+    interface.Assign(ClassLinker::LookupResolvedType(
+        type_idx, klass->GetDexCache(), klass->GetClassLoader()));
+    ifaces->SetWithoutChecks<false>(i, interface.Get());
+  }
+
+  return soa.AddLocalReference<jobjectArray>(ifaces.Get());
 }
 
 static mirror::ObjectArray<mirror::Field>* GetDeclaredFields(
@@ -501,9 +541,7 @@
       // Pending exception from GetDeclaredClasses.
       return nullptr;
     }
-    ObjPtr<mirror::Class> class_class = mirror::Class::GetJavaLangClass();
-    ObjPtr<mirror::Class> class_array_class =
-        Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
+    ObjPtr<mirror::Class> class_array_class = GetClassArrayClass(soa.Self());
     if (class_array_class == nullptr) {
       return nullptr;
     }
@@ -736,8 +774,8 @@
   FAST_NATIVE_METHOD(Class, getEnclosingMethodNative, "()Ljava/lang/reflect/Method;"),
   FAST_NATIVE_METHOD(Class, getInnerClassFlags, "(I)I"),
   FAST_NATIVE_METHOD(Class, getInnerClassName, "()Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(Class, getInterfacesInternal, "()[Ljava/lang/Class;"),
   FAST_NATIVE_METHOD(Class, getNameNative, "()Ljava/lang/String;"),
-  FAST_NATIVE_METHOD(Class, getProxyInterfaces, "()[Ljava/lang/Class;"),
   FAST_NATIVE_METHOD(Class, getPublicDeclaredFields, "()[Ljava/lang/reflect/Field;"),
   FAST_NATIVE_METHOD(Class, getSignatureAnnotation, "()[Ljava/lang/String;"),
   FAST_NATIVE_METHOD(Class, isAnonymousClass, "()Z"),
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
deleted file mode 100644
index 8fda4df..0000000
--- a/runtime/native/java_lang_DexCache.cc
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (C) 2008 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "java_lang_DexCache.h"
-
-#include "dex_file.h"
-#include "dex_file_types.h"
-#include "jni_internal.h"
-#include "mirror/class-inl.h"
-#include "mirror/dex_cache-inl.h"
-#include "mirror/object-inl.h"
-#include "scoped_fast_native_object_access-inl.h"
-#include "well_known_classes.h"
-
-namespace art {
-
-static jobject DexCache_getDexNative(JNIEnv* env, jobject javaDexCache) {
-  ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
-  // Should only be called while holding the lock on the dex cache.
-  DCHECK_EQ(dex_cache->GetLockOwnerThreadId(), soa.Self()->GetThreadId());
-  const DexFile* dex_file = dex_cache->GetDexFile();
-  if (dex_file == nullptr) {
-    return nullptr;
-  }
-  void* address = const_cast<void*>(reinterpret_cast<const void*>(dex_file->Begin()));
-  jobject byte_buffer = env->NewDirectByteBuffer(address, dex_file->Size());
-  if (byte_buffer == nullptr) {
-    DCHECK(soa.Self()->IsExceptionPending());
-    return nullptr;
-  }
-
-  jvalue args[1];
-  args[0].l = byte_buffer;
-  return env->CallStaticObjectMethodA(WellKnownClasses::com_android_dex_Dex,
-                                      WellKnownClasses::com_android_dex_Dex_create,
-                                      args);
-}
-
-static jobject DexCache_getResolvedType(JNIEnv* env, jobject javaDexCache, jint type_index) {
-  ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
-  CHECK_LT(static_cast<size_t>(type_index), dex_cache->GetDexFile()->NumTypeIds());
-  return soa.AddLocalReference<jobject>(dex_cache->GetResolvedType(dex::TypeIndex(type_index)));
-}
-
-static jobject DexCache_getResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index) {
-  ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
-  CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
-  return soa.AddLocalReference<jobject>(
-      dex_cache->GetResolvedString(dex::StringIndex(string_index)));
-}
-
-static void DexCache_setResolvedType(JNIEnv* env,
-                                     jobject javaDexCache,
-                                     jint type_index,
-                                     jobject type) {
-  ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
-  const DexFile& dex_file = *dex_cache->GetDexFile();
-  CHECK_LT(static_cast<size_t>(type_index), dex_file.NumTypeIds());
-  ObjPtr<mirror::Class> t = soa.Decode<mirror::Class>(type);
-  if (t != nullptr && t->DescriptorEquals(dex_file.StringByTypeIdx(dex::TypeIndex(type_index)))) {
-    ClassTable* table =
-        Runtime::Current()->GetClassLinker()->FindClassTable(soa.Self(), dex_cache);
-    if (table != nullptr && table->TryInsert(t) == t) {
-      dex_cache->SetResolvedType(dex::TypeIndex(type_index), t);
-    }
-  }
-}
-
-static void DexCache_setResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index,
-                                       jobject string) {
-  ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
-  CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
-  ObjPtr<mirror::String> s = soa.Decode<mirror::String>(string);
-  if (s != nullptr) {
-    dex_cache->SetResolvedString(dex::StringIndex(string_index), s);
-  }
-}
-
-static JNINativeMethod gMethods[] = {
-  FAST_NATIVE_METHOD(DexCache, getDexNative, "()Lcom/android/dex/Dex;"),
-  FAST_NATIVE_METHOD(DexCache, getResolvedType, "(I)Ljava/lang/Class;"),
-  FAST_NATIVE_METHOD(DexCache, getResolvedString, "(I)Ljava/lang/String;"),
-  FAST_NATIVE_METHOD(DexCache, setResolvedType, "(ILjava/lang/Class;)V"),
-  FAST_NATIVE_METHOD(DexCache, setResolvedString, "(ILjava/lang/String;)V"),
-};
-
-void register_java_lang_DexCache(JNIEnv* env) {
-  REGISTER_NATIVE_METHODS("java/lang/DexCache");
-}
-
-}  // namespace art
diff --git a/runtime/native/java_lang_Object.cc b/runtime/native/java_lang_Object.cc
index 6989244..fb4f99a 100644
--- a/runtime/native/java_lang_Object.cc
+++ b/runtime/native/java_lang_Object.cc
@@ -48,12 +48,19 @@
   soa.Decode<mirror::Object>(java_this)->Wait(soa.Self(), ms, ns);
 }
 
+static jint Object_identityHashCodeNative(JNIEnv* env, jclass, jobject javaObject) {
+  ScopedFastNativeObjectAccess soa(env);
+  ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(javaObject);
+  return static_cast<jint>(o->IdentityHashCode());
+}
+
 static JNINativeMethod gMethods[] = {
   FAST_NATIVE_METHOD(Object, internalClone, "()Ljava/lang/Object;"),
   FAST_NATIVE_METHOD(Object, notify, "()V"),
   FAST_NATIVE_METHOD(Object, notifyAll, "()V"),
   OVERLOADED_FAST_NATIVE_METHOD(Object, wait, "()V", wait),
   OVERLOADED_FAST_NATIVE_METHOD(Object, wait, "(JI)V", waitJI),
+  FAST_NATIVE_METHOD(Object, identityHashCodeNative, "(Ljava/lang/Object;)I"),
 };
 
 void register_java_lang_Object(JNIEnv* env) {
diff --git a/runtime/native/java_lang_String.cc b/runtime/native/java_lang_String.cc
index 2e561ff..bf33bf2 100644
--- a/runtime/native/java_lang_String.cc
+++ b/runtime/native/java_lang_String.cc
@@ -101,8 +101,9 @@
 
 static jstring String_doReplace(JNIEnv* env, jobject java_this, jchar old_c, jchar new_c) {
   ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::String> result =
-      soa.Decode<mirror::String>(java_this)->DoReplace(soa.Self(), old_c, new_c);
+  StackHandleScope<1> hs(soa.Self());
+  Handle<mirror::String> string = hs.NewHandle(soa.Decode<mirror::String>(java_this));
+  ObjPtr<mirror::String> result = mirror::String::DoReplace(soa.Self(), string, old_c, new_c);
   return soa.AddLocalReference<jstring>(result);
 }
 
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index d7c9cd0..2cabce8 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -227,15 +227,6 @@
       javaDst, dstPos, count);
 }
 
-static jint System_identityHashCode(JNIEnv* env, jclass, jobject javaObject) {
-  if (UNLIKELY(javaObject == nullptr)) {
-    return 0;
-  }
-  ScopedFastNativeObjectAccess soa(env);
-  ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(javaObject);
-  return static_cast<jint>(o->IdentityHashCode());
-}
-
 static JNINativeMethod gMethods[] = {
   FAST_NATIVE_METHOD(System, arraycopy, "(Ljava/lang/Object;ILjava/lang/Object;II)V"),
   FAST_NATIVE_METHOD(System, arraycopyCharUnchecked, "([CI[CII)V"),
@@ -246,7 +237,6 @@
   FAST_NATIVE_METHOD(System, arraycopyFloatUnchecked, "([FI[FII)V"),
   FAST_NATIVE_METHOD(System, arraycopyDoubleUnchecked, "([DI[DII)V"),
   FAST_NATIVE_METHOD(System, arraycopyBooleanUnchecked, "([ZI[ZII)V"),
-  FAST_NATIVE_METHOD(System, identityHashCode, "(Ljava/lang/Object;)I"),
 };
 
 void register_java_lang_System(JNIEnv* env) {
diff --git a/runtime/native/java_lang_Void.cc b/runtime/native/java_lang_Void.cc
new file mode 100644
index 0000000..96bfd1b
--- /dev/null
+++ b/runtime/native/java_lang_Void.cc
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "java_lang_Void.h"
+
+#include "class_linker.h"
+#include "jni_internal.h"
+#include "runtime.h"
+#include "scoped_fast_native_object_access-inl.h"
+
+namespace art {
+
+static jclass Void_lookupType(JNIEnv* env, jclass) {
+  ScopedFastNativeObjectAccess soa(env);
+  return soa.AddLocalReference<jclass>(
+      Runtime::Current()->GetClassLinker()->GetClassRoot(ClassLinker::kPrimitiveVoid));
+}
+
+static JNINativeMethod gMethods[] = {
+  FAST_NATIVE_METHOD(Void, lookupType, "()Ljava/lang/Class;"),
+};
+
+void register_java_lang_Void(JNIEnv* env) {
+  REGISTER_NATIVE_METHODS("java/lang/Void");
+}
+
+}  // namespace art
diff --git a/runtime/native/java_lang_DexCache.h b/runtime/native/java_lang_Void.h
similarity index 71%
rename from runtime/native/java_lang_DexCache.h
rename to runtime/native/java_lang_Void.h
index b1c1f5e..8777d80 100644
--- a/runtime/native/java_lang_DexCache.h
+++ b/runtime/native/java_lang_Void.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,15 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_DEXCACHE_H_
-#define ART_RUNTIME_NATIVE_JAVA_LANG_DEXCACHE_H_
+#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_VOID_H_
+#define ART_RUNTIME_NATIVE_JAVA_LANG_VOID_H_
 
 #include <jni.h>
 
 namespace art {
 
-void register_java_lang_DexCache(JNIEnv* env);
+void register_java_lang_Void(JNIEnv* env);
 
 }  // namespace art
 
-#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_DEXCACHE_H_
+#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_VOID_H_
diff --git a/runtime/native/java_lang_reflect_Executable.cc b/runtime/native/java_lang_reflect_Executable.cc
index bc23bed..8f226ce 100644
--- a/runtime/native/java_lang_reflect_Executable.cc
+++ b/runtime/native/java_lang_reflect_Executable.cc
@@ -194,12 +194,146 @@
   return annotations::IsMethodAnnotationPresent(method, klass);
 }
 
+static jint Executable_compareMethodParametersInternal(JNIEnv* env,
+                                                       jobject thisMethod,
+                                                       jobject otherMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* this_method = ArtMethod::FromReflectedMethod(soa, thisMethod);
+  ArtMethod* other_method = ArtMethod::FromReflectedMethod(soa, otherMethod);
+
+  this_method = this_method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+  other_method = other_method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+
+  const DexFile::TypeList* this_list = this_method->GetParameterTypeList();
+  const DexFile::TypeList* other_list = other_method->GetParameterTypeList();
+
+  if (this_list == other_list) {
+    return 0;
+  }
+
+  if (this_list == nullptr && other_list != nullptr) {
+    return -1;
+  }
+
+  if (other_list == nullptr && this_list != nullptr) {
+    return 1;
+  }
+
+  const int32_t this_size = this_list->Size();
+  const int32_t other_size = other_list->Size();
+
+  if (this_size != other_size) {
+    return (this_size - other_size);
+  }
+
+  for (int32_t i = 0; i < this_size; ++i) {
+    const DexFile::TypeId& lhs = this_method->GetDexFile()->GetTypeId(
+        this_list->GetTypeItem(i).type_idx_);
+    const DexFile::TypeId& rhs = other_method->GetDexFile()->GetTypeId(
+        other_list->GetTypeItem(i).type_idx_);
+
+    uint32_t lhs_len, rhs_len;
+    const char* lhs_data = this_method->GetDexFile()->StringDataAndUtf16LengthByIdx(
+        lhs.descriptor_idx_, &lhs_len);
+    const char* rhs_data = other_method->GetDexFile()->StringDataAndUtf16LengthByIdx(
+        rhs.descriptor_idx_, &rhs_len);
+
+    int cmp = strcmp(lhs_data, rhs_data);
+    if (cmp != 0) {
+      return (cmp < 0) ? -1 : 1;
+    }
+  }
+
+  return 0;
+}
+
+static jobject Executable_getMethodNameInternal(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+  return soa.AddLocalReference<jobject>(method->GetNameAsString(soa.Self()));
+}
+
+static jobject Executable_getMethodReturnTypeInternal(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+  ObjPtr<mirror::Class> return_type(method->GetReturnType(true /* resolve */));
+  if (return_type.IsNull()) {
+    CHECK(soa.Self()->IsExceptionPending());
+    return nullptr;
+  }
+
+  return soa.AddLocalReference<jobject>(return_type);
+}
+
+// TODO: Move this to mirror::Class ? Other mirror types that commonly appear
+// as arrays have a GetArrayClass() method. This is duplicated in
+// java_lang_Class.cc as well.
+static ObjPtr<mirror::Class> GetClassArrayClass(Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Class> class_class = mirror::Class::GetJavaLangClass();
+  return Runtime::Current()->GetClassLinker()->FindArrayClass(self, &class_class);
+}
+
+static jobjectArray Executable_getParameterTypesInternal(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+
+  const DexFile::TypeList* params = method->GetParameterTypeList();
+  if (params == nullptr) {
+    return nullptr;
+  }
+
+  const uint32_t num_params = params->Size();
+
+  StackHandleScope<3> hs(soa.Self());
+  Handle<mirror::Class> class_array_class = hs.NewHandle(GetClassArrayClass(soa.Self()));
+  Handle<mirror::ObjectArray<mirror::Class>> ptypes = hs.NewHandle(
+      mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class.Get(), num_params));
+  if (ptypes.IsNull()) {
+    DCHECK(soa.Self()->IsExceptionPending());
+    return nullptr;
+  }
+
+  MutableHandle<mirror::Class> param(hs.NewHandle<mirror::Class>(nullptr));
+  for (uint32_t i = 0; i < num_params; ++i) {
+    const dex::TypeIndex type_idx = params->GetTypeItem(i).type_idx_;
+    param.Assign(Runtime::Current()->GetClassLinker()->ResolveType(type_idx, method));
+    if (param.Get() == nullptr) {
+      DCHECK(soa.Self()->IsExceptionPending());
+      return nullptr;
+    }
+    ptypes->SetWithoutChecks<false>(i, param.Get());
+  }
+
+  return soa.AddLocalReference<jobjectArray>(ptypes.Get());
+}
+
+static jint Executable_getParameterCountInternal(JNIEnv* env, jobject javaMethod) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
+  method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+
+  const DexFile::TypeList* params = method->GetParameterTypeList();
+  return (params == nullptr) ? 0 : params->Size();
+}
+
+
 static JNINativeMethod gMethods[] = {
+  FAST_NATIVE_METHOD(Executable, compareMethodParametersInternal,
+                     "(Ljava/lang/reflect/Method;)I"),
   FAST_NATIVE_METHOD(Executable, getAnnotationNative,
-                "(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
-  FAST_NATIVE_METHOD(Executable, getDeclaredAnnotationsNative, "()[Ljava/lang/annotation/Annotation;"),
+                     "(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
+  FAST_NATIVE_METHOD(Executable, getDeclaredAnnotationsNative,
+                     "()[Ljava/lang/annotation/Annotation;"),
   FAST_NATIVE_METHOD(Executable, getParameterAnnotationsNative,
-                "()[[Ljava/lang/annotation/Annotation;"),
+                     "()[[Ljava/lang/annotation/Annotation;"),
+  FAST_NATIVE_METHOD(Executable, getMethodNameInternal, "()Ljava/lang/String;"),
+  FAST_NATIVE_METHOD(Executable, getMethodReturnTypeInternal, "()Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Executable, getParameterTypesInternal, "()[Ljava/lang/Class;"),
+  FAST_NATIVE_METHOD(Executable, getParameterCountInternal, "()I"),
   FAST_NATIVE_METHOD(Executable, getParameters0, "()[Ljava/lang/reflect/Parameter;"),
   FAST_NATIVE_METHOD(Executable, getSignatureAnnotation, "()[Ljava/lang/String;"),
   FAST_NATIVE_METHOD(Executable, isAnnotationPresentNative, "(Ljava/lang/Class;)Z"),
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 9cf80a5..9198964 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -456,6 +456,13 @@
   return reinterpret_cast<jlong>(field);
 }
 
+static jobject Field_getNameInternal(JNIEnv* env, jobject javaField) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtField* field = soa.Decode<mirror::Field>(javaField)->GetArtField();
+  return soa.AddLocalReference<jobject>(
+      field->GetStringName(soa.Self(), true /* resolve */));
+}
+
 static jobjectArray Field_getDeclaredAnnotations(JNIEnv* env, jobject javaField) {
   ScopedFastNativeObjectAccess soa(env);
   ArtField* field = soa.Decode<mirror::Field>(javaField)->GetArtField();
@@ -506,6 +513,7 @@
   FAST_NATIVE_METHOD(Field, getFloat,   "(Ljava/lang/Object;)F"),
   FAST_NATIVE_METHOD(Field, getInt,     "(Ljava/lang/Object;)I"),
   FAST_NATIVE_METHOD(Field, getLong,    "(Ljava/lang/Object;)J"),
+  FAST_NATIVE_METHOD(Field, getNameInternal, "()Ljava/lang/String;"),
   FAST_NATIVE_METHOD(Field, getShort,   "(Ljava/lang/Object;)S"),
   FAST_NATIVE_METHOD(Field, isAnnotationPresentNative, "(Ljava/lang/Class;)Z"),
   FAST_NATIVE_METHOD(Field, set,        "(Ljava/lang/Object;Ljava/lang/Object;)V"),
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index 6e5e3d9..6f0130e 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -55,7 +55,8 @@
       ++i;
     }
     CHECK_NE(throws_index, -1);
-    mirror::ObjectArray<mirror::Class>* declared_exceptions = klass->GetThrows()->Get(throws_index);
+    mirror::ObjectArray<mirror::Class>* declared_exceptions =
+        klass->GetProxyThrows()->Get(throws_index);
     return soa.AddLocalReference<jobjectArray>(declared_exceptions->Clone(soa.Self()));
   } else {
     mirror::ObjectArray<mirror::Class>* result_array =
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index c58854b..d77cfa1 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -118,7 +118,7 @@
       for (int signal = 0; signal < _NSIG; ++signal) {
         android::NativeBridgeSignalHandlerFn fn = android::NativeBridgeGetSignalHandler(signal);
         if (fn != nullptr) {
-          SetSpecialSignalHandlerFn(signal, fn);
+          AddSpecialSignalHandlerFn(signal, fn);
         }
       }
 #endif
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index 7460d62..cbc5024 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -105,7 +105,7 @@
   if (pid == -1) {
     close(caller_to_addr2line[0]);
     close(caller_to_addr2line[1]);
-    close(addr2line_to_caller[1]);
+    close(addr2line_to_caller[0]);
     close(addr2line_to_caller[1]);
     return nullptr;
   }
diff --git a/runtime/non_debuggable_classes.cc b/runtime/non_debuggable_classes.cc
index db121a9..829ea65 100644
--- a/runtime/non_debuggable_classes.cc
+++ b/runtime/non_debuggable_classes.cc
@@ -27,16 +27,16 @@
 
 std::vector<jclass>  NonDebuggableClasses::non_debuggable_classes;
 
-void NonDebuggableClasses::AddNonDebuggableClass(ObjPtr<mirror::Class> klass) {
+void NonDebuggableClasses::AddNonDebuggableClass(jclass klass) {
   Thread* self = Thread::Current();
   JNIEnvExt* env = self->GetJniEnv();
+  ObjPtr<mirror::Class> mirror_klass(self->DecodeJObject(klass)->AsClass());
   for (jclass c : non_debuggable_classes) {
-    if (self->DecodeJObject(c)->AsClass() == klass.Ptr()) {
+    if (self->DecodeJObject(c)->AsClass() == mirror_klass.Ptr()) {
       return;
     }
   }
-  ScopedLocalRef<jclass> lr(env, env->AddLocalReference<jclass>(klass));
-  non_debuggable_classes.push_back(reinterpret_cast<jclass>(env->NewGlobalRef(lr.get())));
+  non_debuggable_classes.push_back(reinterpret_cast<jclass>(env->NewGlobalRef(klass)));
 }
 
 }  // namespace art
diff --git a/runtime/non_debuggable_classes.h b/runtime/non_debuggable_classes.h
index b72afd8..e1b5633 100644
--- a/runtime/non_debuggable_classes.h
+++ b/runtime/non_debuggable_classes.h
@@ -21,21 +21,17 @@
 
 #include "base/mutex.h"
 #include "jni.h"
-#include "obj_ptr.h"
 
 namespace art {
 
-namespace mirror {
-class Class;
-}  // namespace mirror
-
 struct NonDebuggableClasses {
  public:
   static const std::vector<jclass>& GetNonDebuggableClasses() {
     return non_debuggable_classes;
   }
 
-  static void AddNonDebuggableClass(ObjPtr<mirror::Class> klass) REQUIRES(Locks::mutator_lock_);
+  static void AddNonDebuggableClass(jclass klass)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
   static std::vector<jclass> non_debuggable_classes;
diff --git a/runtime/nth_caller_visitor.h b/runtime/nth_caller_visitor.h
index f72a853..71c6a82 100644
--- a/runtime/nth_caller_visitor.h
+++ b/runtime/nth_caller_visitor.h
@@ -31,7 +31,8 @@
         n(n_in),
         include_runtime_and_upcalls_(include_runtime_and_upcalls),
         count(0),
-        caller(nullptr) {}
+        caller(nullptr),
+        caller_pc(0) {}
 
   bool VisitFrame() REQUIRES_SHARED(Locks::mutator_lock_) {
     ArtMethod* m = GetMethod();
diff --git a/runtime/oat.h b/runtime/oat.h
index 1544121..faa0129 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '1', '1', '4', '\0' };  // hash-based DexCache types.
+  static constexpr uint8_t kOatVersion[] = { '1', '1', '7', '\0' };  // Read barriers on MIPS.
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index 5ae2fc5..db6f8ee 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -430,8 +430,7 @@
       // starts up.
       LOG(WARNING) << "Dex location " << dex_location_ << " does not seem to include dex file. "
         << "Allow oat file use. This is potentially dangerous.";
-    } else if (file.GetOatHeader().GetImageFileLocationOatChecksum()
-        != GetCombinedImageChecksum()) {
+    } else if (file.GetOatHeader().GetImageFileLocationOatChecksum() != image_info->oat_checksum) {
       VLOG(oat) << "Oat image checksum does not match image checksum.";
       return kOatBootImageOutOfDate;
     }
@@ -641,15 +640,8 @@
   std::string dir = location.substr(0, pos+1);
   dir += "oat/" + std::string(GetInstructionSetString(isa));
 
-  // Find the file portion of the dex location.
-  std::string file;
-  if (pos == std::string::npos) {
-    file = location;
-  } else {
-    file = location.substr(pos+1);
-  }
-
   // Get the base part of the file without the extension.
+  std::string file = location.substr(pos+1);
   pos = file.rfind('.');
   if (pos == std::string::npos) {
     *error_msg = "Dex location " + location + " has no extension.";
@@ -726,68 +718,36 @@
   return required_dex_checksums_found_ ? &cached_required_dex_checksums_ : nullptr;
 }
 
+std::unique_ptr<OatFileAssistant::ImageInfo>
+OatFileAssistant::ImageInfo::GetRuntimeImageInfo(InstructionSet isa, std::string* error_msg) {
+  CHECK(error_msg != nullptr);
+
+  Runtime* runtime = Runtime::Current();
+  std::unique_ptr<ImageInfo> info(new ImageInfo());
+  info->location = runtime->GetImageLocation();
+
+  std::unique_ptr<ImageHeader> image_header(
+      gc::space::ImageSpace::ReadImageHeader(info->location.c_str(), isa, error_msg));
+  if (image_header == nullptr) {
+    return nullptr;
+  }
+
+  info->oat_checksum = image_header->GetOatChecksum();
+  info->oat_data_begin = reinterpret_cast<uintptr_t>(image_header->GetOatDataBegin());
+  info->patch_delta = image_header->GetPatchDelta();
+  return info;
+}
+
 const OatFileAssistant::ImageInfo* OatFileAssistant::GetImageInfo() {
   if (!image_info_load_attempted_) {
     image_info_load_attempted_ = true;
-
-    Runtime* runtime = Runtime::Current();
-    std::vector<gc::space::ImageSpace*> image_spaces = runtime->GetHeap()->GetBootImageSpaces();
-    if (!image_spaces.empty()) {
-      cached_image_info_.location = image_spaces[0]->GetImageLocation();
-
-      if (isa_ == kRuntimeISA) {
-        const ImageHeader& image_header = image_spaces[0]->GetImageHeader();
-        cached_image_info_.oat_checksum = image_header.GetOatChecksum();
-        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
-            image_header.GetOatDataBegin());
-        cached_image_info_.patch_delta = image_header.GetPatchDelta();
-      } else {
-        std::string error_msg;
-        std::unique_ptr<ImageHeader> image_header(
-            gc::space::ImageSpace::ReadImageHeader(cached_image_info_.location.c_str(),
-                                                   isa_,
-                                                   &error_msg));
-        CHECK(image_header != nullptr) << error_msg;
-        cached_image_info_.oat_checksum = image_header->GetOatChecksum();
-        cached_image_info_.oat_data_begin = reinterpret_cast<uintptr_t>(
-            image_header->GetOatDataBegin());
-        cached_image_info_.patch_delta = image_header->GetPatchDelta();
-      }
-    }
-    image_info_load_succeeded_ = (!image_spaces.empty());
-
-    combined_image_checksum_ = CalculateCombinedImageChecksum(isa_);
-  }
-  return image_info_load_succeeded_ ? &cached_image_info_ : nullptr;
-}
-
-// TODO: Use something better than xor.
-uint32_t OatFileAssistant::CalculateCombinedImageChecksum(InstructionSet isa) {
-  uint32_t checksum = 0;
-  std::vector<gc::space::ImageSpace*> image_spaces =
-      Runtime::Current()->GetHeap()->GetBootImageSpaces();
-  if (isa == kRuntimeISA) {
-    for (gc::space::ImageSpace* image_space : image_spaces) {
-      checksum ^= image_space->GetImageHeader().GetOatChecksum();
-    }
-  } else {
-    for (gc::space::ImageSpace* image_space : image_spaces) {
-      std::string location = image_space->GetImageLocation();
-      std::string error_msg;
-      std::unique_ptr<ImageHeader> image_header(
-          gc::space::ImageSpace::ReadImageHeader(location.c_str(), isa, &error_msg));
-      CHECK(image_header != nullptr) << error_msg;
-      checksum ^= image_header->GetOatChecksum();
+    std::string error_msg;
+    cached_image_info_ = ImageInfo::GetRuntimeImageInfo(isa_, &error_msg);
+    if (cached_image_info_ == nullptr) {
+      LOG(WARNING) << "Unable to get runtime image info: " << error_msg;
     }
   }
-  return checksum;
-}
-
-uint32_t OatFileAssistant::GetCombinedImageChecksum() {
-  if (!image_info_load_attempted_) {
-    GetImageInfo();
-  }
-  return combined_image_checksum_;
+  return cached_image_info_.get();
 }
 
 OatFileAssistant::OatFileInfo& OatFileAssistant::GetBestInfo() {
diff --git a/runtime/oat_file_assistant.h b/runtime/oat_file_assistant.h
index 3ede29f..b84e711 100644
--- a/runtime/oat_file_assistant.h
+++ b/runtime/oat_file_assistant.h
@@ -276,14 +276,15 @@
                                        std::string* oat_filename,
                                        std::string* error_msg);
 
-  static uint32_t CalculateCombinedImageChecksum(InstructionSet isa = kRuntimeISA);
-
  private:
   struct ImageInfo {
     uint32_t oat_checksum = 0;
     uintptr_t oat_data_begin = 0;
     int32_t patch_delta = 0;
     std::string location;
+
+    static std::unique_ptr<ImageInfo> GetRuntimeImageInfo(InstructionSet isa,
+                                                          std::string* error_msg);
   };
 
   class OatFileInfo {
@@ -368,7 +369,7 @@
     std::unique_ptr<OatFile> file_;
 
     bool status_attempted_ = false;
-    OatStatus status_;
+    OatStatus status_ = OatStatus::kOatCannotOpen;
 
     // For debugging only.
     // If this flag is set, the file has been released to the user and the
@@ -414,8 +415,6 @@
   // The caller shouldn't clean up or free the returned pointer.
   const ImageInfo* GetImageInfo();
 
-  uint32_t GetCombinedImageChecksum();
-
   // To implement Lock(), we lock a dummy file where the oat file would go
   // (adding ".flock" to the target file name) and retain the lock for the
   // remaining lifetime of the OatFileAssistant object.
@@ -445,9 +444,7 @@
   // TODO: The image info should probably be moved out of the oat file
   // assistant to an image file manager.
   bool image_info_load_attempted_ = false;
-  bool image_info_load_succeeded_ = false;
-  ImageInfo cached_image_info_;
-  uint32_t combined_image_checksum_ = 0;
+  std::unique_ptr<ImageInfo> cached_image_info_;
 
   DISALLOW_COPY_AND_ASSIGN(OatFileAssistant);
 };
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 7079614..d04dbbe 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -23,6 +23,7 @@
 #include "android-base/stringprintf.h"
 
 #include "art_field-inl.h"
+#include "base/bit_vector-inl.h"
 #include "base/logging.h"
 #include "base/stl_util.h"
 #include "base/systrace.h"
@@ -145,13 +146,52 @@
   return oat_files;
 }
 
+class TypeIndexInfo {
+ public:
+  explicit TypeIndexInfo(const DexFile* dex_file)
+      : type_indexes_(GenerateTypeIndexes(dex_file)),
+        iter_(type_indexes_.Indexes().begin()),
+        end_(type_indexes_.Indexes().end()) { }
+
+  BitVector& GetTypeIndexes() {
+    return type_indexes_;
+  }
+  BitVector::IndexIterator& GetIterator() {
+    return iter_;
+  }
+  BitVector::IndexIterator& GetIteratorEnd() {
+    return end_;
+  }
+  void AdvanceIterator() {
+    iter_++;
+  }
+
+ private:
+  static BitVector GenerateTypeIndexes(const DexFile* dex_file) {
+    BitVector type_indexes(/*start_bits*/0, /*expandable*/true, Allocator::GetMallocAllocator());
+    for (uint16_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+      const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+      uint16_t type_idx = class_def.class_idx_.index_;
+      type_indexes.SetBit(type_idx);
+    }
+    return type_indexes;
+  }
+
+  // BitVector with bits set for the type indexes of all classes in the input dex file.
+  BitVector type_indexes_;
+  BitVector::IndexIterator iter_;
+  BitVector::IndexIterator end_;
+};
+
 class DexFileAndClassPair : ValueObject {
  public:
-  DexFileAndClassPair(const DexFile* dex_file, size_t current_class_index, bool from_loaded_oat)
-     : cached_descriptor_(GetClassDescriptor(dex_file, current_class_index)),
+  DexFileAndClassPair(const DexFile* dex_file, TypeIndexInfo* type_info, bool from_loaded_oat)
+     : type_info_(type_info),
        dex_file_(dex_file),
-       current_class_index_(current_class_index),
-       from_loaded_oat_(from_loaded_oat) {}
+       cached_descriptor_(dex_file_->StringByTypeIdx(dex::TypeIndex(*type_info->GetIterator()))),
+       from_loaded_oat_(from_loaded_oat) {
+    type_info_->AdvanceIterator();
+  }
 
   DexFileAndClassPair(const DexFileAndClassPair& rhs) = default;
 
@@ -172,16 +212,12 @@
   }
 
   bool DexFileHasMoreClasses() const {
-    return current_class_index_ + 1 < dex_file_->NumClassDefs();
+    return type_info_->GetIterator() != type_info_->GetIteratorEnd();
   }
 
   void Next() {
-    ++current_class_index_;
-    cached_descriptor_ = GetClassDescriptor(dex_file_, current_class_index_);
-  }
-
-  size_t GetCurrentClassIndex() const {
-    return current_class_index_;
+    cached_descriptor_ = dex_file_->StringByTypeIdx(dex::TypeIndex(*type_info_->GetIterator()));
+    type_info_->AdvanceIterator();
   }
 
   bool FromLoadedOat() const {
@@ -193,42 +229,36 @@
   }
 
  private:
-  static const char* GetClassDescriptor(const DexFile* dex_file, size_t index) {
-    DCHECK(IsUint<16>(index));
-    const DexFile::ClassDef& class_def = dex_file->GetClassDef(static_cast<uint16_t>(index));
-    return dex_file->StringByTypeIdx(class_def.class_idx_);
-  }
-
-  const char* cached_descriptor_;
+  TypeIndexInfo* type_info_;
   const DexFile* dex_file_;
-  size_t current_class_index_;
+  const char* cached_descriptor_;
   bool from_loaded_oat_;  // We only need to compare mismatches between what we load now
                           // and what was loaded before. Any old duplicates must have been
                           // OK, and any new "internal" duplicates are as well (they must
                           // be from multidex, which resolves correctly).
 };
 
-static void AddDexFilesFromOat(const OatFile* oat_file,
-                               bool already_loaded,
-                               /*out*/std::priority_queue<DexFileAndClassPair>* heap,
-                               std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
+static void AddDexFilesFromOat(
+    const OatFile* oat_file,
+    /*out*/std::vector<const DexFile*>* dex_files,
+    std::vector<std::unique_ptr<const DexFile>>* opened_dex_files) {
   for (const OatDexFile* oat_dex_file : oat_file->GetOatDexFiles()) {
     std::string error;
     std::unique_ptr<const DexFile> dex_file = oat_dex_file->OpenDexFile(&error);
     if (dex_file == nullptr) {
       LOG(WARNING) << "Could not create dex file from oat file: " << error;
     } else if (dex_file->NumClassDefs() > 0U) {
-      heap->emplace(dex_file.get(), /*current_class_index*/0U, already_loaded);
+      dex_files->push_back(dex_file.get());
       opened_dex_files->push_back(std::move(dex_file));
     }
   }
 }
 
-static void AddNext(/*inout*/DexFileAndClassPair* original,
-                    /*inout*/std::priority_queue<DexFileAndClassPair>* heap) {
-  if (original->DexFileHasMoreClasses()) {
-    original->Next();
-    heap->push(std::move(*original));
+static void AddNext(/*inout*/DexFileAndClassPair& original,
+                    /*inout*/std::priority_queue<DexFileAndClassPair>& heap) {
+  if (original.DexFileHasMoreClasses()) {
+    original.Next();
+    heap.push(std::move(original));
   }
 }
 
@@ -297,7 +327,8 @@
 static bool GetDexFilesFromClassLoader(
     ScopedObjectAccessAlreadyRunnable& soa,
     mirror::ClassLoader* class_loader,
-    std::priority_queue<DexFileAndClassPair>* queue) REQUIRES_SHARED(Locks::mutator_lock_) {
+    std::vector<const DexFile*>* dex_files)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   if (ClassLinker::IsBootClassLoader(soa, class_loader)) {
     // The boot class loader. We don't load any of these files, as we know we compiled against
     // them correctly.
@@ -312,7 +343,7 @@
     return false;
   }
 
-  bool recursive_result = GetDexFilesFromClassLoader(soa, class_loader->GetParent(), queue);
+  bool recursive_result = GetDexFilesFromClassLoader(soa, class_loader->GetParent(), dex_files);
   if (!recursive_result) {
     // Something wrong up the chain.
     return false;
@@ -322,7 +353,7 @@
   auto GetDexFilesFn = [&] (const DexFile* cp_dex_file)
             REQUIRES_SHARED(Locks::mutator_lock_) {
     if (cp_dex_file->NumClassDefs() > 0) {
-      queue->emplace(cp_dex_file, 0U, true);
+      dex_files->push_back(cp_dex_file);
     }
     return true;  // Continue looking.
   };
@@ -341,7 +372,8 @@
 static void GetDexFilesFromDexElementsArray(
     ScopedObjectAccessAlreadyRunnable& soa,
     Handle<mirror::ObjectArray<mirror::Object>> dex_elements,
-    std::priority_queue<DexFileAndClassPair>* queue) REQUIRES_SHARED(Locks::mutator_lock_) {
+    std::vector<const DexFile*>* dex_files)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   if (dex_elements == nullptr) {
     // Nothing to do.
     return;
@@ -360,7 +392,7 @@
   auto GetDexFilesFn = [&] (const DexFile* cp_dex_file)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (cp_dex_file != nullptr && cp_dex_file->NumClassDefs() > 0) {
-      queue->emplace(cp_dex_file, 0U, true);
+      dex_files->push_back(cp_dex_file);
     }
     return true;  // Continue looking.
   };
@@ -389,43 +421,95 @@
 }
 
 static bool AreSharedLibrariesOk(const std::string& shared_libraries,
-                                 std::priority_queue<DexFileAndClassPair>& queue) {
+                                 std::vector<const DexFile*>& dex_files) {
+  // If no shared libraries, we expect no dex files.
   if (shared_libraries.empty()) {
-    if (queue.empty()) {
-      // No shared libraries or oat files, as expected.
-      return true;
-    }
-  } else {
-    if (shared_libraries.compare(OatFile::kSpecialSharedLibrary) == 0) {
-      // If we find the special shared library, skip the shared libraries check.
-      return true;
-    }
-    // Shared libraries is a series of dex file paths and their checksums, each separated by '*'.
-    std::vector<std::string> shared_libraries_split;
-    Split(shared_libraries, '*', &shared_libraries_split);
-
-    size_t index = 0;
-    std::priority_queue<DexFileAndClassPair> temp = queue;
-    while (!temp.empty() && index < shared_libraries_split.size() - 1) {
-      DexFileAndClassPair pair(temp.top());
-      const DexFile* dex_file = pair.GetDexFile();
-      const std::string& dex_filename = dex_file->GetLocation();
-      if (dex_filename != shared_libraries_split[index]) {
-        break;
-      }
-      char* end;
-      size_t shared_lib_checksum = strtoul(shared_libraries_split[index + 1].c_str(), &end, 10);
-      uint32_t dex_checksum = dex_file->GetLocationChecksum();
-      if (*end != '\0' || dex_checksum != shared_lib_checksum) {
-        break;
-      }
-      temp.pop();
-      index += 2;
-    }
-
-    // Check is successful if it made it through the queue and all the shared libraries.
-    return temp.empty() && index == shared_libraries_split.size();
+    return dex_files.empty();
   }
+  // If we find the special shared library, skip the shared libraries check.
+  if (shared_libraries.compare(OatFile::kSpecialSharedLibrary) == 0) {
+    return true;
+  }
+  // Shared libraries is a series of dex file paths and their checksums, each separated by '*'.
+  std::vector<std::string> shared_libraries_split;
+  Split(shared_libraries, '*', &shared_libraries_split);
+
+  // Sanity check size of dex files and split shared libraries. Should be 2x as many entries in
+  // the split shared libraries since it contains pairs of filename/checksum.
+  if (dex_files.size() * 2 != shared_libraries_split.size()) {
+    return false;
+  }
+
+  for (size_t i = 0; i < dex_files.size(); ++i) {
+    if (dex_files[i]->GetLocation() != shared_libraries_split[i * 2]) {
+      return false;
+    }
+    char* end;
+    size_t shared_lib_checksum = strtoul(shared_libraries_split[i * 2 + 1].c_str(), &end, 10);
+    uint32_t dex_checksum = dex_files[i]->GetLocationChecksum();
+    if (*end != '\0' || dex_checksum != shared_lib_checksum) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+static bool CollisionCheck(std::vector<const DexFile*>& dex_files_loaded,
+                           std::vector<const DexFile*>& dex_files_unloaded,
+                           std::string* error_msg /*out*/) {
+  // Generate type index information for each dex file.
+  std::vector<TypeIndexInfo> loaded_types;
+  for (const DexFile* dex_file : dex_files_loaded) {
+    loaded_types.push_back(TypeIndexInfo(dex_file));
+  }
+  std::vector<TypeIndexInfo> unloaded_types;
+  for (const DexFile* dex_file : dex_files_unloaded) {
+    unloaded_types.push_back(TypeIndexInfo(dex_file));
+  }
+
+  // Populate the queue of dex file and class pairs with the loaded and unloaded dex files.
+  std::priority_queue<DexFileAndClassPair> queue;
+  for (size_t i = 0; i < dex_files_loaded.size(); ++i) {
+    if (loaded_types[i].GetIterator() != loaded_types[i].GetIteratorEnd()) {
+      queue.emplace(dex_files_loaded[i], &loaded_types[i], /*from_loaded_oat*/true);
+    }
+  }
+  for (size_t i = 0; i < dex_files_unloaded.size(); ++i) {
+    if (unloaded_types[i].GetIterator() != unloaded_types[i].GetIteratorEnd()) {
+      queue.emplace(dex_files_unloaded[i], &unloaded_types[i], /*from_loaded_oat*/false);
+    }
+  }
+
+  // Now drain the queue.
+  while (!queue.empty()) {
+    // Modifying the top element is only safe if we pop right after.
+    DexFileAndClassPair compare_pop(queue.top());
+    queue.pop();
+
+    // Compare against the following elements.
+    while (!queue.empty()) {
+      DexFileAndClassPair top(queue.top());
+      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
+        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
+        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
+          *error_msg =
+              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
+                           compare_pop.GetCachedDescriptor(),
+                           compare_pop.GetDexFile()->GetLocation().c_str(),
+                           top.GetDexFile()->GetLocation().c_str());
+          return true;
+        }
+        queue.pop();
+        AddNext(top, queue);
+      } else {
+        // Something else. Done here.
+        break;
+      }
+    }
+    AddNext(compare_pop, queue);
+  }
+
   return false;
 }
 
@@ -450,7 +534,7 @@
   DCHECK(oat_file != nullptr);
   DCHECK(error_msg != nullptr);
 
-  std::priority_queue<DexFileAndClassPair> queue;
+  std::vector<const DexFile*> dex_files_loaded;
 
   // Try to get dex files from the given class loader. If the class loader is null, or we do
   // not support one of the class loaders in the chain, conservatively compare against all
@@ -464,12 +548,12 @@
     Handle<mirror::ObjectArray<mirror::Object>> h_dex_elements =
         hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Object>>(dex_elements));
     if (h_class_loader != nullptr &&
-        GetDexFilesFromClassLoader(soa, h_class_loader.Get(), &queue)) {
+        GetDexFilesFromClassLoader(soa, h_class_loader.Get(), &dex_files_loaded)) {
       class_loader_ok = true;
 
       // In this case, also take into account the dex_elements array, if given. We don't need to
       // read it otherwise, as we'll compare against all open oat files anyways.
-      GetDexFilesFromDexElementsArray(soa, h_dex_elements, &queue);
+      GetDexFilesFromDexElementsArray(soa, h_dex_elements, &dex_files_loaded);
     } else if (h_class_loader != nullptr) {
       VLOG(class_linker) << "Something unsupported with "
                          << mirror::Class::PrettyClass(h_class_loader->GetClass());
@@ -486,10 +570,8 @@
   if (!class_loader_ok) {
     // Add dex files from already loaded oat files, but skip boot.
 
-    // Clean up the queue.
-    while (!queue.empty()) {
-      queue.pop();
-    }
+    // Clean up the dex files.
+    dex_files_loaded.clear();
 
     std::vector<const OatFile*> boot_oat_files = GetBootOatFiles();
     // The same OatFile can be loaded multiple times at different addresses. In this case, we don't
@@ -503,10 +585,7 @@
           boot_oat_files.end() && location != oat_file->GetLocation() &&
           unique_locations.find(location) == unique_locations.end()) {
         unique_locations.insert(location);
-        AddDexFilesFromOat(loaded_oat_file.get(),
-                           /*already_loaded*/true,
-                           &queue,
-                           /*out*/&opened_dex_files);
+        AddDexFilesFromOat(loaded_oat_file.get(), &dex_files_loaded, &opened_dex_files);
       }
     }
   }
@@ -514,46 +593,15 @@
   // Exit if shared libraries are ok. Do a full duplicate classes check otherwise.
   const std::string
       shared_libraries(oat_file->GetOatHeader().GetStoreValueByKey(OatHeader::kClassPathKey));
-  if (AreSharedLibrariesOk(shared_libraries, queue)) {
+  if (AreSharedLibrariesOk(shared_libraries, dex_files_loaded)) {
     return false;
   }
 
   ScopedTrace st("Collision check");
-
   // Add dex files from the oat file to check.
-  AddDexFilesFromOat(oat_file, /*already_loaded*/false, &queue, &opened_dex_files);
-
-  // Now drain the queue.
-  while (!queue.empty()) {
-    // Modifying the top element is only safe if we pop right after.
-    DexFileAndClassPair compare_pop(queue.top());
-    queue.pop();
-
-    // Compare against the following elements.
-    while (!queue.empty()) {
-      DexFileAndClassPair top(queue.top());
-
-      if (strcmp(compare_pop.GetCachedDescriptor(), top.GetCachedDescriptor()) == 0) {
-        // Same descriptor. Check whether it's crossing old-oat-files to new-oat-files.
-        if (compare_pop.FromLoadedOat() != top.FromLoadedOat()) {
-          *error_msg =
-              StringPrintf("Found duplicated class when checking oat files: '%s' in %s and %s",
-                           compare_pop.GetCachedDescriptor(),
-                           compare_pop.GetDexFile()->GetLocation().c_str(),
-                           top.GetDexFile()->GetLocation().c_str());
-          return true;
-        }
-        queue.pop();
-        AddNext(&top, &queue);
-      } else {
-        // Something else. Done here.
-        break;
-      }
-    }
-    AddNext(&compare_pop, &queue);
-  }
-
-  return false;
+  std::vector<const DexFile*> dex_files_unloaded;
+  AddDexFilesFromOat(oat_file, &dex_files_unloaded, &opened_dex_files);
+  return CollisionCheck(dex_files_loaded, dex_files_unloaded, error_msg);
 }
 
 std::vector<std::unique_ptr<const DexFile>> OatFileManager::OpenDexFilesFromOat(
@@ -729,9 +777,6 @@
     }
   }
 
-  // TODO(calin): Consider optimizing this knowing that is useless to record the
-  // use of fully compiled apks.
-  Runtime::Current()->NotifyDexLoaded(dex_location);
   return dex_files;
 }
 
diff --git a/runtime/oat_quick_method_header.cc b/runtime/oat_quick_method_header.cc
index b4e4285..8eef586 100644
--- a/runtime/oat_quick_method_header.cc
+++ b/runtime/oat_quick_method_header.cc
@@ -22,13 +22,14 @@
 
 namespace art {
 
-OatQuickMethodHeader::OatQuickMethodHeader(
-    uint32_t vmap_table_offset,
-    uint32_t frame_size_in_bytes,
-    uint32_t core_spill_mask,
-    uint32_t fp_spill_mask,
-    uint32_t code_size)
+OatQuickMethodHeader::OatQuickMethodHeader(uint32_t vmap_table_offset,
+                                           uint32_t method_info_offset,
+                                           uint32_t frame_size_in_bytes,
+                                           uint32_t core_spill_mask,
+                                           uint32_t fp_spill_mask,
+                                           uint32_t code_size)
     : vmap_table_offset_(vmap_table_offset),
+      method_info_offset_(method_info_offset),
       frame_info_(frame_size_in_bytes, core_spill_mask, fp_spill_mask),
       code_size_(code_size) {}
 
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 3cdde5a..f2a2af2 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -20,6 +20,7 @@
 #include "arch/instruction_set.h"
 #include "base/macros.h"
 #include "quick/quick_method_frame_info.h"
+#include "method_info.h"
 #include "stack_map.h"
 #include "utils.h"
 
@@ -30,11 +31,13 @@
 // OatQuickMethodHeader precedes the raw code chunk generated by the compiler.
 class PACKED(4) OatQuickMethodHeader {
  public:
-  explicit OatQuickMethodHeader(uint32_t vmap_table_offset = 0U,
-                                uint32_t frame_size_in_bytes = 0U,
-                                uint32_t core_spill_mask = 0U,
-                                uint32_t fp_spill_mask = 0U,
-                                uint32_t code_size = 0U);
+  OatQuickMethodHeader() = default;
+  explicit OatQuickMethodHeader(uint32_t vmap_table_offset,
+                                uint32_t method_info_offset,
+                                uint32_t frame_size_in_bytes,
+                                uint32_t core_spill_mask,
+                                uint32_t fp_spill_mask,
+                                uint32_t code_size);
 
   ~OatQuickMethodHeader();
 
@@ -63,8 +66,7 @@
 
   const void* GetOptimizedCodeInfoPtr() const {
     DCHECK(IsOptimized());
-    const void* data = reinterpret_cast<const void*>(code_ - vmap_table_offset_);
-    return data;
+    return reinterpret_cast<const void*>(code_ - vmap_table_offset_);
   }
 
   uint8_t* GetOptimizedCodeInfoPtr() {
@@ -76,6 +78,20 @@
     return CodeInfo(GetOptimizedCodeInfoPtr());
   }
 
+  const void* GetOptimizedMethodInfoPtr() const {
+    DCHECK(IsOptimized());
+    return reinterpret_cast<const void*>(code_ - method_info_offset_);
+  }
+
+  uint8_t* GetOptimizedMethodInfoPtr() {
+    DCHECK(IsOptimized());
+    return code_ - method_info_offset_;
+  }
+
+  MethodInfo GetOptimizedMethodInfo() const {
+    return MethodInfo(reinterpret_cast<const uint8_t*>(GetOptimizedMethodInfoPtr()));
+  }
+
   const uint8_t* GetCode() const {
     return code_;
   }
@@ -100,6 +116,18 @@
     return &vmap_table_offset_;
   }
 
+  uint32_t GetMethodInfoOffset() const {
+    return method_info_offset_;
+  }
+
+  void SetMethodInfoOffset(uint32_t offset) {
+    method_info_offset_ = offset;
+  }
+
+  const uint32_t* GetMethodInfoOffsetAddr() const {
+    return &method_info_offset_;
+  }
+
   const uint8_t* GetVmapTable() const {
     CHECK(!IsOptimized()) << "Unimplemented vmap table for optimizing compiler";
     return (vmap_table_offset_ == 0) ? nullptr : code_ - vmap_table_offset_;
@@ -160,12 +188,17 @@
   static constexpr uint32_t kCodeSizeMask = ~kShouldDeoptimizeMask;
 
   // The offset in bytes from the start of the vmap table to the end of the header.
-  uint32_t vmap_table_offset_;
+  uint32_t vmap_table_offset_ = 0u;
+  // The offset in bytes from the start of the method info to the end of the header.
+  // The method info offset is not in the CodeInfo since CodeInfo has good dedupe properties that
+  // would be lost from doing so. The method info memory region contains method indices since they
+  // are hard to dedupe.
+  uint32_t method_info_offset_ = 0u;
   // The stack frame information.
   QuickMethodFrameInfo frame_info_;
   // The code size in bytes. The highest bit is used to signify if the compiled
   // code with the method header has should_deoptimize flag.
-  uint32_t code_size_;
+  uint32_t code_size_ = 0u;
   // The actual code.
   uint8_t code_[0];
 };
diff --git a/runtime/openjdkjvmti/Android.bp b/runtime/openjdkjvmti/Android.bp
index c01e3f4..e38f265 100644
--- a/runtime/openjdkjvmti/Android.bp
+++ b/runtime/openjdkjvmti/Android.bp
@@ -13,11 +13,18 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
+cc_library_headers {
+    name: "libopenjdkjvmti_headers",
+    host_supported: true,
+    export_include_dirs: ["include"],
+}
+
 cc_defaults {
     name: "libopenjdkjvmti_defaults",
     defaults: ["art_defaults"],
     host_supported: true,
     srcs: ["events.cc",
+           "fixed_up_dex_file.cc",
            "object_tagging.cc",
            "OpenjdkJvmTi.cc",
            "ti_class.cc",
@@ -40,6 +47,7 @@
            "ti_timers.cc",
            "transform.cc"],
     include_dirs: ["art/runtime"],
+    header_libs: ["libopenjdkjvmti_headers"],
     shared_libs: [
         "libbase",
         "libnativehelper",
@@ -49,7 +57,10 @@
 art_cc_library {
     name: "libopenjdkjvmti",
     defaults: ["libopenjdkjvmti_defaults"],
-    shared_libs: ["libart"],
+    shared_libs: [
+        "libart",
+        "libart-compiler",
+    ],
 }
 
 art_cc_library {
@@ -58,5 +69,8 @@
         "art_debug_defaults",
         "libopenjdkjvmti_defaults",
     ],
-    shared_libs: ["libartd"],
+    shared_libs: [
+        "libartd",
+        "libartd-compiler",
+    ],
 }
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index 450b6b6..39e603e 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -35,7 +35,7 @@
 
 #include <jni.h>
 
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
 #include "art_jvmti.h"
 #include "base/logging.h"
@@ -66,10 +66,6 @@
 #include "ti_timers.h"
 #include "transform.h"
 
-// TODO Remove this at some point by annotating all the methods. It was put in to make the skeleton
-// easier to create.
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-
 namespace openjdkjvmti {
 
 EventHandler gEventHandler;
@@ -83,20 +79,26 @@
 
 class JvmtiFunctions {
  private:
-  static bool IsValidEnv(jvmtiEnv* env) {
-    return env != nullptr;
+  static jvmtiError getEnvironmentError(jvmtiEnv* env) {
+    if (env == nullptr) {
+      return ERR(INVALID_ENVIRONMENT);
+    } else if (art::Thread::Current() == nullptr) {
+      return ERR(UNATTACHED_THREAD);
+    } else {
+      return OK;
+    }
   }
 
-#define ENSURE_VALID_ENV(env)          \
-  do {                                 \
-    if (!IsValidEnv(env)) {            \
-      return ERR(INVALID_ENVIRONMENT); \
-    }                                  \
+#define ENSURE_VALID_ENV(env)                                            \
+  do {                                                                   \
+    jvmtiError ensure_valid_env_ ## __LINE__ = getEnvironmentError(env); \
+    if (ensure_valid_env_ ## __LINE__ != OK) {                           \
+      return ensure_valid_env_ ## __LINE__ ;                             \
+    }                                                                    \
   } while (false)
 
 #define ENSURE_HAS_CAP(env, cap) \
   do { \
-    ENSURE_VALID_ENV(env); \
     if (ArtJvmTiEnv::AsArtJvmTiEnv(env)->capabilities.cap != 1) { \
       return ERR(MUST_POSSESS_CAPABILITY); \
     } \
@@ -125,76 +127,92 @@
   }
 
   static jvmtiError GetThreadState(jvmtiEnv* env, jthread thread, jint* thread_state_ptr) {
+    ENSURE_VALID_ENV(env);
     return ThreadUtil::GetThreadState(env, thread, thread_state_ptr);
   }
 
   static jvmtiError GetCurrentThread(jvmtiEnv* env, jthread* thread_ptr) {
+    ENSURE_VALID_ENV(env);
     return ThreadUtil::GetCurrentThread(env, thread_ptr);
   }
 
   static jvmtiError GetAllThreads(jvmtiEnv* env, jint* threads_count_ptr, jthread** threads_ptr) {
+    ENSURE_VALID_ENV(env);
     return ThreadUtil::GetAllThreads(env, threads_count_ptr, threads_ptr);
   }
 
-  static jvmtiError SuspendThread(jvmtiEnv* env, jthread thread) {
+  static jvmtiError SuspendThread(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_suspend);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SuspendThreadList(jvmtiEnv* env,
-                                      jint request_count,
-                                      const jthread* request_list,
-                                      jvmtiError* results) {
+                                      jint request_count ATTRIBUTE_UNUSED,
+                                      const jthread* request_list ATTRIBUTE_UNUSED,
+                                      jvmtiError* results ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_suspend);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ResumeThread(jvmtiEnv* env, jthread thread) {
+  static jvmtiError ResumeThread(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_suspend);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError ResumeThreadList(jvmtiEnv* env,
-                                     jint request_count,
-                                     const jthread* request_list,
-                                     jvmtiError* results) {
+                                     jint request_count ATTRIBUTE_UNUSED,
+                                     const jthread* request_list ATTRIBUTE_UNUSED,
+                                     jvmtiError* results ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_suspend);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError StopThread(jvmtiEnv* env, jthread thread, jobject exception) {
+  static jvmtiError StopThread(jvmtiEnv* env,
+                               jthread thread ATTRIBUTE_UNUSED,
+                               jobject exception ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_signal_thread);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError InterruptThread(jvmtiEnv* env, jthread thread) {
+  static jvmtiError InterruptThread(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_signal_thread);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetThreadInfo(jvmtiEnv* env, jthread thread, jvmtiThreadInfo* info_ptr) {
+    ENSURE_VALID_ENV(env);
     return ThreadUtil::GetThreadInfo(env, thread, info_ptr);
   }
 
   static jvmtiError GetOwnedMonitorInfo(jvmtiEnv* env,
-                                        jthread thread,
-                                        jint* owned_monitor_count_ptr,
-                                        jobject** owned_monitors_ptr) {
+                                        jthread thread ATTRIBUTE_UNUSED,
+                                        jint* owned_monitor_count_ptr ATTRIBUTE_UNUSED,
+                                        jobject** owned_monitors_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_owned_monitor_info);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError GetOwnedMonitorStackDepthInfo(jvmtiEnv* env,
-                                                  jthread thread,
-                                                  jint* monitor_info_count_ptr,
-                                                  jvmtiMonitorStackDepthInfo** monitor_info_ptr) {
+  static jvmtiError GetOwnedMonitorStackDepthInfo(
+      jvmtiEnv* env,
+      jthread thread ATTRIBUTE_UNUSED,
+      jint* monitor_info_count_ptr ATTRIBUTE_UNUSED,
+      jvmtiMonitorStackDepthInfo** monitor_info_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_owned_monitor_stack_depth_info);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetCurrentContendedMonitor(jvmtiEnv* env,
-                                               jthread thread,
-                                               jobject* monitor_ptr) {
+                                               jthread thread ATTRIBUTE_UNUSED,
+                                               jobject* monitor_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_current_contended_monitor);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -204,26 +222,31 @@
                                    jvmtiStartFunction proc,
                                    const void* arg,
                                    jint priority) {
+    ENSURE_VALID_ENV(env);
     return ThreadUtil::RunAgentThread(env, thread, proc, arg, priority);
   }
 
   static jvmtiError SetThreadLocalStorage(jvmtiEnv* env, jthread thread, const void* data) {
+    ENSURE_VALID_ENV(env);
     return ThreadUtil::SetThreadLocalStorage(env, thread, data);
   }
 
   static jvmtiError GetThreadLocalStorage(jvmtiEnv* env, jthread thread, void** data_ptr) {
+    ENSURE_VALID_ENV(env);
     return ThreadUtil::GetThreadLocalStorage(env, thread, data_ptr);
   }
 
   static jvmtiError GetTopThreadGroups(jvmtiEnv* env,
                                        jint* group_count_ptr,
                                        jthreadGroup** groups_ptr) {
+    ENSURE_VALID_ENV(env);
     return ThreadGroupUtil::GetTopThreadGroups(env, group_count_ptr, groups_ptr);
   }
 
   static jvmtiError GetThreadGroupInfo(jvmtiEnv* env,
                                        jthreadGroup group,
                                        jvmtiThreadGroupInfo* info_ptr) {
+    ENSURE_VALID_ENV(env);
     return ThreadGroupUtil::GetThreadGroupInfo(env, group, info_ptr);
   }
 
@@ -233,6 +256,7 @@
                                            jthread** threads_ptr,
                                            jint* group_count_ptr,
                                            jthreadGroup** groups_ptr) {
+    ENSURE_VALID_ENV(env);
     return ThreadGroupUtil::GetThreadGroupChildren(env,
                                                    group,
                                                    thread_count_ptr,
@@ -247,6 +271,7 @@
                                   jint max_frame_count,
                                   jvmtiFrameInfo* frame_buffer,
                                   jint* count_ptr) {
+    ENSURE_VALID_ENV(env);
     return StackUtil::GetStackTrace(env,
                                     thread,
                                     start_depth,
@@ -259,6 +284,7 @@
                                       jint max_frame_count,
                                       jvmtiStackInfo** stack_info_ptr,
                                       jint* thread_count_ptr) {
+    ENSURE_VALID_ENV(env);
     return StackUtil::GetAllStackTraces(env, max_frame_count, stack_info_ptr, thread_count_ptr);
   }
 
@@ -267,6 +293,7 @@
                                              const jthread* thread_list,
                                              jint max_frame_count,
                                              jvmtiStackInfo** stack_info_ptr) {
+    ENSURE_VALID_ENV(env);
     return StackUtil::GetThreadListStackTraces(env,
                                                thread_count,
                                                thread_list,
@@ -275,10 +302,12 @@
   }
 
   static jvmtiError GetFrameCount(jvmtiEnv* env, jthread thread, jint* count_ptr) {
+    ENSURE_VALID_ENV(env);
     return StackUtil::GetFrameCount(env, thread, count_ptr);
   }
 
-  static jvmtiError PopFrame(jvmtiEnv* env, jthread thread) {
+  static jvmtiError PopFrame(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_pop_frame);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -288,40 +317,60 @@
                                      jint depth,
                                      jmethodID* method_ptr,
                                      jlocation* location_ptr) {
+    ENSURE_VALID_ENV(env);
     return StackUtil::GetFrameLocation(env, thread, depth, method_ptr, location_ptr);
   }
 
-  static jvmtiError NotifyFramePop(jvmtiEnv* env, jthread thread, jint depth) {
+  static jvmtiError NotifyFramePop(jvmtiEnv* env,
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_generate_frame_pop_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnObject(jvmtiEnv* env, jthread thread, jobject value) {
+  static jvmtiError ForceEarlyReturnObject(jvmtiEnv* env,
+                                           jthread thread ATTRIBUTE_UNUSED,
+                                           jobject value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnInt(jvmtiEnv* env, jthread thread, jint value) {
+  static jvmtiError ForceEarlyReturnInt(jvmtiEnv* env,
+                                        jthread thread ATTRIBUTE_UNUSED,
+                                        jint value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnLong(jvmtiEnv* env, jthread thread, jlong value) {
+  static jvmtiError ForceEarlyReturnLong(jvmtiEnv* env,
+                                         jthread thread ATTRIBUTE_UNUSED,
+                                         jlong value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnFloat(jvmtiEnv* env, jthread thread, jfloat value) {
+  static jvmtiError ForceEarlyReturnFloat(jvmtiEnv* env,
+                                          jthread thread ATTRIBUTE_UNUSED,
+                                          jfloat value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnDouble(jvmtiEnv* env, jthread thread, jdouble value) {
+  static jvmtiError ForceEarlyReturnDouble(jvmtiEnv* env,
+                                           jthread thread ATTRIBUTE_UNUSED,
+                                           jdouble value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ForceEarlyReturnVoid(jvmtiEnv* env, jthread thread) {
+  static jvmtiError ForceEarlyReturnVoid(jvmtiEnv* env, jthread thread ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_force_early_return);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -332,6 +381,7 @@
                                      jobject initial_object,
                                      const jvmtiHeapCallbacks* callbacks,
                                      const void* user_data) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_tag_objects);
     HeapUtil heap_util(ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     return heap_util.FollowReferences(env,
@@ -347,12 +397,14 @@
                                        jclass klass,
                                        const jvmtiHeapCallbacks* callbacks,
                                        const void* user_data) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_tag_objects);
     HeapUtil heap_util(ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     return heap_util.IterateThroughHeap(env, heap_filter, klass, callbacks, user_data);
   }
 
   static jvmtiError GetTag(jvmtiEnv* env, jobject object, jlong* tag_ptr) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_tag_objects);
 
     JNIEnv* jni_env = GetJniEnv(env);
@@ -370,6 +422,7 @@
   }
 
   static jvmtiError SetTag(jvmtiEnv* env, jobject object, jlong tag) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_tag_objects);
 
     if (object == nullptr) {
@@ -394,6 +447,7 @@
                                        jint* count_ptr,
                                        jobject** object_result_ptr,
                                        jlong** tag_result_ptr) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_tag_objects);
 
     JNIEnv* jni_env = GetJniEnv(env);
@@ -411,173 +465,210 @@
   }
 
   static jvmtiError ForceGarbageCollection(jvmtiEnv* env) {
+    ENSURE_VALID_ENV(env);
     return HeapUtil::ForceGarbageCollection(env);
   }
 
   static jvmtiError IterateOverObjectsReachableFromObject(
       jvmtiEnv* env,
-      jobject object,
-      jvmtiObjectReferenceCallback object_reference_callback,
-      const void* user_data) {
+      jobject object ATTRIBUTE_UNUSED,
+      jvmtiObjectReferenceCallback object_reference_callback ATTRIBUTE_UNUSED,
+      const void* user_data ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_tag_objects);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError IterateOverReachableObjects(jvmtiEnv* env,
-                                                jvmtiHeapRootCallback heap_root_callback,
-                                                jvmtiStackReferenceCallback stack_ref_callback,
-                                                jvmtiObjectReferenceCallback object_ref_callback,
-                                                const void* user_data) {
+  static jvmtiError IterateOverReachableObjects(
+      jvmtiEnv* env,
+      jvmtiHeapRootCallback heap_root_callback ATTRIBUTE_UNUSED,
+      jvmtiStackReferenceCallback stack_ref_callback ATTRIBUTE_UNUSED,
+      jvmtiObjectReferenceCallback object_ref_callback ATTRIBUTE_UNUSED,
+      const void* user_data ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_tag_objects);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError IterateOverHeap(jvmtiEnv* env,
-                                    jvmtiHeapObjectFilter object_filter,
-                                    jvmtiHeapObjectCallback heap_object_callback,
-                                    const void* user_data) {
+                                    jvmtiHeapObjectFilter object_filter ATTRIBUTE_UNUSED,
+                                    jvmtiHeapObjectCallback heap_object_callback ATTRIBUTE_UNUSED,
+                                    const void* user_data ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_tag_objects);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError IterateOverInstancesOfClass(jvmtiEnv* env,
-                                                jclass klass,
-                                                jvmtiHeapObjectFilter object_filter,
-                                                jvmtiHeapObjectCallback heap_object_callback,
-                                                const void* user_data) {
+  static jvmtiError IterateOverInstancesOfClass(
+      jvmtiEnv* env,
+      jclass klass ATTRIBUTE_UNUSED,
+      jvmtiHeapObjectFilter object_filter ATTRIBUTE_UNUSED,
+      jvmtiHeapObjectCallback heap_object_callback ATTRIBUTE_UNUSED,
+      const void* user_data ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_tag_objects);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalObject(jvmtiEnv* env,
-                                   jthread thread,
-                                   jint depth,
-                                   jint slot,
-                                   jobject* value_ptr) {
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED,
+                                   jint slot ATTRIBUTE_UNUSED,
+                                   jobject* value_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalInstance(jvmtiEnv* env,
-                                     jthread thread,
-                                     jint depth,
-                                     jobject* value_ptr) {
+                                     jthread thread ATTRIBUTE_UNUSED,
+                                     jint depth ATTRIBUTE_UNUSED,
+                                     jobject* value_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalInt(jvmtiEnv* env,
-                                jthread thread,
-                                jint depth,
-                                jint slot,
-                                jint* value_ptr) {
+                                jthread thread ATTRIBUTE_UNUSED,
+                                jint depth ATTRIBUTE_UNUSED,
+                                jint slot ATTRIBUTE_UNUSED,
+                                jint* value_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalLong(jvmtiEnv* env,
-                                 jthread thread,
-                                 jint depth,
-                                 jint slot,
-                                 jlong* value_ptr) {
+                                 jthread thread ATTRIBUTE_UNUSED,
+                                 jint depth ATTRIBUTE_UNUSED,
+                                 jint slot ATTRIBUTE_UNUSED,
+                                 jlong* value_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalFloat(jvmtiEnv* env,
-                                  jthread thread,
-                                  jint depth,
-                                  jint slot,
-                                  jfloat* value_ptr) {
+                                  jthread thread ATTRIBUTE_UNUSED,
+                                  jint depth ATTRIBUTE_UNUSED,
+                                  jint slot ATTRIBUTE_UNUSED,
+                                  jfloat* value_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLocalDouble(jvmtiEnv* env,
-                                   jthread thread,
-                                   jint depth,
-                                   jint slot,
-                                   jdouble* value_ptr) {
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED,
+                                   jint slot ATTRIBUTE_UNUSED,
+                                   jdouble* value_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalObject(jvmtiEnv* env,
-                                   jthread thread,
-                                   jint depth,
-                                   jint slot,
-                                   jobject value) {
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED,
+                                   jint slot ATTRIBUTE_UNUSED,
+                                   jobject value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalInt(jvmtiEnv* env,
-                                jthread thread,
-                                jint depth,
-                                jint slot,
-                                jint value) {
+                                jthread thread ATTRIBUTE_UNUSED,
+                                jint depth ATTRIBUTE_UNUSED,
+                                jint slot ATTRIBUTE_UNUSED,
+                                jint value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalLong(jvmtiEnv* env,
-                                 jthread thread,
-                                 jint depth,
-                                 jint slot,
-                                 jlong value) {
+                                 jthread thread ATTRIBUTE_UNUSED,
+                                 jint depth ATTRIBUTE_UNUSED,
+                                 jint slot ATTRIBUTE_UNUSED,
+                                 jlong value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalFloat(jvmtiEnv* env,
-                                  jthread thread,
-                                  jint depth,
-                                  jint slot,
-                                  jfloat value) {
+                                  jthread thread ATTRIBUTE_UNUSED,
+                                  jint depth ATTRIBUTE_UNUSED,
+                                  jint slot ATTRIBUTE_UNUSED,
+                                  jfloat value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError SetLocalDouble(jvmtiEnv* env,
-                                   jthread thread,
-                                   jint depth,
-                                   jint slot,
-                                   jdouble value) {
+                                   jthread thread ATTRIBUTE_UNUSED,
+                                   jint depth ATTRIBUTE_UNUSED,
+                                   jint slot ATTRIBUTE_UNUSED,
+                                   jdouble value ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError SetBreakpoint(jvmtiEnv* env, jmethodID method, jlocation location) {
+  static jvmtiError SetBreakpoint(jvmtiEnv* env,
+                                  jmethodID method ATTRIBUTE_UNUSED,
+                                  jlocation location ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_generate_breakpoint_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ClearBreakpoint(jvmtiEnv* env, jmethodID method, jlocation location) {
+  static jvmtiError ClearBreakpoint(jvmtiEnv* env,
+                                    jmethodID method ATTRIBUTE_UNUSED,
+                                    jlocation location ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_generate_breakpoint_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError SetFieldAccessWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+  static jvmtiError SetFieldAccessWatch(jvmtiEnv* env,
+                                        jclass klass ATTRIBUTE_UNUSED,
+                                        jfieldID field ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_generate_field_access_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ClearFieldAccessWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+  static jvmtiError ClearFieldAccessWatch(jvmtiEnv* env,
+                                          jclass klass ATTRIBUTE_UNUSED,
+                                          jfieldID field ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_generate_field_access_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError SetFieldModificationWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+  static jvmtiError SetFieldModificationWatch(jvmtiEnv* env,
+                                              jclass klass ATTRIBUTE_UNUSED,
+                                              jfieldID field ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_generate_field_modification_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError ClearFieldModificationWatch(jvmtiEnv* env, jclass klass, jfieldID field) {
+  static jvmtiError ClearFieldModificationWatch(jvmtiEnv* env,
+                                                jclass klass ATTRIBUTE_UNUSED,
+                                                jfieldID field ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_generate_field_modification_events);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetLoadedClasses(jvmtiEnv* env, jint* class_count_ptr, jclass** classes_ptr) {
+    ENSURE_VALID_ENV(env);
     HeapUtil heap_util(ArtJvmTiEnv::AsArtJvmTiEnv(env)->object_tag_table.get());
     return heap_util.GetLoadedClasses(env, class_count_ptr, classes_ptr);
   }
@@ -586,6 +677,7 @@
                                           jobject initiating_loader,
                                           jint* class_count_ptr,
                                           jclass** classes_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::GetClassLoaderClasses(env, initiating_loader, class_count_ptr, classes_ptr);
   }
 
@@ -593,19 +685,25 @@
                                       jclass klass,
                                       char** signature_ptr,
                                       char** generic_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::GetClassSignature(env, klass, signature_ptr, generic_ptr);
   }
 
   static jvmtiError GetClassStatus(jvmtiEnv* env, jclass klass, jint* status_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::GetClassStatus(env, klass, status_ptr);
   }
 
-  static jvmtiError GetSourceFileName(jvmtiEnv* env, jclass klass, char** source_name_ptr) {
+  static jvmtiError GetSourceFileName(jvmtiEnv* env,
+                                      jclass klass ATTRIBUTE_UNUSED,
+                                      char** source_name_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_source_file_name);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetClassModifiers(jvmtiEnv* env, jclass klass, jint* modifiers_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::GetClassModifiers(env, klass, modifiers_ptr);
   }
 
@@ -613,6 +711,7 @@
                                     jclass klass,
                                     jint* method_count_ptr,
                                     jmethodID** methods_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::GetClassMethods(env, klass, method_count_ptr, methods_ptr);
   }
 
@@ -620,6 +719,7 @@
                                    jclass klass,
                                    jint* field_count_ptr,
                                    jfieldID** fields_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::GetClassFields(env, klass, field_count_ptr, fields_ptr);
   }
 
@@ -627,6 +727,7 @@
                                              jclass klass,
                                              jint* interface_count_ptr,
                                              jclass** interfaces_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::GetImplementedInterfaces(env, klass, interface_count_ptr, interfaces_ptr);
   }
 
@@ -634,46 +735,54 @@
                                            jclass klass,
                                            jint* minor_version_ptr,
                                            jint* major_version_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::GetClassVersionNumbers(env, klass, minor_version_ptr, major_version_ptr);
   }
 
   static jvmtiError GetConstantPool(jvmtiEnv* env,
-                                    jclass klass,
-                                    jint* constant_pool_count_ptr,
-                                    jint* constant_pool_byte_count_ptr,
-                                    unsigned char** constant_pool_bytes_ptr) {
+                                    jclass klass ATTRIBUTE_UNUSED,
+                                    jint* constant_pool_count_ptr ATTRIBUTE_UNUSED,
+                                    jint* constant_pool_byte_count_ptr ATTRIBUTE_UNUSED,
+                                    unsigned char** constant_pool_bytes_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_constant_pool);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError IsInterface(jvmtiEnv* env, jclass klass, jboolean* is_interface_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::IsInterface(env, klass, is_interface_ptr);
   }
 
   static jvmtiError IsArrayClass(jvmtiEnv* env,
                                  jclass klass,
                                  jboolean* is_array_class_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::IsArrayClass(env, klass, is_array_class_ptr);
   }
 
   static jvmtiError IsModifiableClass(jvmtiEnv* env,
                                       jclass klass,
                                       jboolean* is_modifiable_class_ptr) {
+    ENSURE_VALID_ENV(env);
     return Redefiner::IsModifiableClass(env, klass, is_modifiable_class_ptr);
   }
 
   static jvmtiError GetClassLoader(jvmtiEnv* env, jclass klass, jobject* classloader_ptr) {
+    ENSURE_VALID_ENV(env);
     return ClassUtil::GetClassLoader(env, klass, classloader_ptr);
   }
 
   static jvmtiError GetSourceDebugExtension(jvmtiEnv* env,
-                                            jclass klass,
-                                            char** source_debug_extension_ptr) {
+                                            jclass klass ATTRIBUTE_UNUSED,
+                                            char** source_debug_extension_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_source_debug_extension);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError RetransformClasses(jvmtiEnv* env, jint class_count, const jclass* classes) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_retransform_classes);
     std::string error_msg;
     jvmtiError res = Transformer::RetransformClasses(ArtJvmTiEnv::AsArtJvmTiEnv(env),
@@ -692,6 +801,7 @@
   static jvmtiError RedefineClasses(jvmtiEnv* env,
                                     jint class_count,
                                     const jvmtiClassDefinition* class_definitions) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_redefine_classes);
     std::string error_msg;
     jvmtiError res = Redefiner::RedefineClasses(ArtJvmTiEnv::AsArtJvmTiEnv(env),
@@ -708,16 +818,19 @@
   }
 
   static jvmtiError GetObjectSize(jvmtiEnv* env, jobject object, jlong* size_ptr) {
+    ENSURE_VALID_ENV(env);
     return ObjectUtil::GetObjectSize(env, object, size_ptr);
   }
 
   static jvmtiError GetObjectHashCode(jvmtiEnv* env, jobject object, jint* hash_code_ptr) {
+    ENSURE_VALID_ENV(env);
     return ObjectUtil::GetObjectHashCode(env, object, hash_code_ptr);
   }
 
   static jvmtiError GetObjectMonitorUsage(jvmtiEnv* env,
-                                          jobject object,
-                                          jvmtiMonitorUsage* info_ptr) {
+                                          jobject object ATTRIBUTE_UNUSED,
+                                          jvmtiMonitorUsage* info_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_monitor_info);
     return ERR(NOT_IMPLEMENTED);
   }
@@ -728,6 +841,7 @@
                                  char** name_ptr,
                                  char** signature_ptr,
                                  char** generic_ptr) {
+    ENSURE_VALID_ENV(env);
     return FieldUtil::GetFieldName(env, klass, field, name_ptr, signature_ptr, generic_ptr);
   }
 
@@ -735,6 +849,7 @@
                                            jclass klass,
                                            jfieldID field,
                                            jclass* declaring_class_ptr) {
+    ENSURE_VALID_ENV(env);
     return FieldUtil::GetFieldDeclaringClass(env, klass, field, declaring_class_ptr);
   }
 
@@ -742,6 +857,7 @@
                                       jclass klass,
                                       jfieldID field,
                                       jint* modifiers_ptr) {
+    ENSURE_VALID_ENV(env);
     return FieldUtil::GetFieldModifiers(env, klass, field, modifiers_ptr);
   }
 
@@ -749,6 +865,7 @@
                                      jclass klass,
                                      jfieldID field,
                                      jboolean* is_synthetic_ptr) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_synthetic_attribute);
     return FieldUtil::IsFieldSynthetic(env, klass, field, is_synthetic_ptr);
   }
@@ -758,30 +875,35 @@
                                   char** name_ptr,
                                   char** signature_ptr,
                                   char** generic_ptr) {
+    ENSURE_VALID_ENV(env);
     return MethodUtil::GetMethodName(env, method, name_ptr, signature_ptr, generic_ptr);
   }
 
   static jvmtiError GetMethodDeclaringClass(jvmtiEnv* env,
                                             jmethodID method,
                                             jclass* declaring_class_ptr) {
+    ENSURE_VALID_ENV(env);
     return MethodUtil::GetMethodDeclaringClass(env, method, declaring_class_ptr);
   }
 
   static jvmtiError GetMethodModifiers(jvmtiEnv* env,
                                        jmethodID method,
                                        jint* modifiers_ptr) {
+    ENSURE_VALID_ENV(env);
     return MethodUtil::GetMethodModifiers(env, method, modifiers_ptr);
   }
 
   static jvmtiError GetMaxLocals(jvmtiEnv* env,
                                  jmethodID method,
                                  jint* max_ptr) {
+    ENSURE_VALID_ENV(env);
     return MethodUtil::GetMaxLocals(env, method, max_ptr);
   }
 
   static jvmtiError GetArgumentsSize(jvmtiEnv* env,
                                      jmethodID method,
                                      jint* size_ptr) {
+    ENSURE_VALID_ENV(env);
     return MethodUtil::GetArgumentsSize(env, method, size_ptr);
   }
 
@@ -789,6 +911,7 @@
                                        jmethodID method,
                                        jint* entry_count_ptr,
                                        jvmtiLineNumberEntry** table_ptr) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_line_numbers);
     return MethodUtil::GetLineNumberTable(env, method, entry_count_ptr, table_ptr);
   }
@@ -797,81 +920,100 @@
                                       jmethodID method,
                                       jlocation* start_location_ptr,
                                       jlocation* end_location_ptr) {
+    ENSURE_VALID_ENV(env);
     return MethodUtil::GetMethodLocation(env, method, start_location_ptr, end_location_ptr);
   }
 
   static jvmtiError GetLocalVariableTable(jvmtiEnv* env,
-                                          jmethodID method,
-                                          jint* entry_count_ptr,
-                                          jvmtiLocalVariableEntry** table_ptr) {
+                                          jmethodID method ATTRIBUTE_UNUSED,
+                                          jint* entry_count_ptr ATTRIBUTE_UNUSED,
+                                          jvmtiLocalVariableEntry** table_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_access_local_variables);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetBytecodes(jvmtiEnv* env,
-                                 jmethodID method,
-                                 jint* bytecode_count_ptr,
-                                 unsigned char** bytecodes_ptr) {
+                                 jmethodID method ATTRIBUTE_UNUSED,
+                                 jint* bytecode_count_ptr ATTRIBUTE_UNUSED,
+                                 unsigned char** bytecodes_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_bytecodes);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError IsMethodNative(jvmtiEnv* env, jmethodID method, jboolean* is_native_ptr) {
+    ENSURE_VALID_ENV(env);
     return MethodUtil::IsMethodNative(env, method, is_native_ptr);
   }
 
   static jvmtiError IsMethodSynthetic(jvmtiEnv* env, jmethodID method, jboolean* is_synthetic_ptr) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_synthetic_attribute);
     return MethodUtil::IsMethodSynthetic(env, method, is_synthetic_ptr);
   }
 
   static jvmtiError IsMethodObsolete(jvmtiEnv* env, jmethodID method, jboolean* is_obsolete_ptr) {
+    ENSURE_VALID_ENV(env);
     return MethodUtil::IsMethodObsolete(env, method, is_obsolete_ptr);
   }
 
-  static jvmtiError SetNativeMethodPrefix(jvmtiEnv* env, const char* prefix) {
+  static jvmtiError SetNativeMethodPrefix(jvmtiEnv* env, const char* prefix ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_set_native_method_prefix);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError SetNativeMethodPrefixes(jvmtiEnv* env, jint prefix_count, char** prefixes) {
+  static jvmtiError SetNativeMethodPrefixes(jvmtiEnv* env,
+                                            jint prefix_count ATTRIBUTE_UNUSED,
+                                            char** prefixes ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_set_native_method_prefix);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError CreateRawMonitor(jvmtiEnv* env, const char* name, jrawMonitorID* monitor_ptr) {
+    ENSURE_VALID_ENV(env);
     return MonitorUtil::CreateRawMonitor(env, name, monitor_ptr);
   }
 
   static jvmtiError DestroyRawMonitor(jvmtiEnv* env, jrawMonitorID monitor) {
+    ENSURE_VALID_ENV(env);
     return MonitorUtil::DestroyRawMonitor(env, monitor);
   }
 
   static jvmtiError RawMonitorEnter(jvmtiEnv* env, jrawMonitorID monitor) {
+    ENSURE_VALID_ENV(env);
     return MonitorUtil::RawMonitorEnter(env, monitor);
   }
 
   static jvmtiError RawMonitorExit(jvmtiEnv* env, jrawMonitorID monitor) {
+    ENSURE_VALID_ENV(env);
     return MonitorUtil::RawMonitorExit(env, monitor);
   }
 
   static jvmtiError RawMonitorWait(jvmtiEnv* env, jrawMonitorID monitor, jlong millis) {
+    ENSURE_VALID_ENV(env);
     return MonitorUtil::RawMonitorWait(env, monitor, millis);
   }
 
   static jvmtiError RawMonitorNotify(jvmtiEnv* env, jrawMonitorID monitor) {
+    ENSURE_VALID_ENV(env);
     return MonitorUtil::RawMonitorNotify(env, monitor);
   }
 
   static jvmtiError RawMonitorNotifyAll(jvmtiEnv* env, jrawMonitorID monitor) {
+    ENSURE_VALID_ENV(env);
     return MonitorUtil::RawMonitorNotifyAll(env, monitor);
   }
 
   static jvmtiError SetJNIFunctionTable(jvmtiEnv* env, const jniNativeInterface* function_table) {
+    ENSURE_VALID_ENV(env);
     return JNIUtil::SetJNIFunctionTable(env, function_table);
   }
 
   static jvmtiError GetJNIFunctionTable(jvmtiEnv* env, jniNativeInterface** function_table) {
+    ENSURE_VALID_ENV(env);
     return JNIUtil::GetJNIFunctionTable(env, function_table);
   }
 
@@ -926,13 +1068,16 @@
     return gEventHandler.SetEvent(art_env, art_thread, GetArtJvmtiEvent(art_env, event_type), mode);
   }
 
-  static jvmtiError GenerateEvents(jvmtiEnv* env, jvmtiEvent event_type) {
-    return ERR(NOT_IMPLEMENTED);
+  static jvmtiError GenerateEvents(jvmtiEnv* env,
+                                   jvmtiEvent event_type ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
+    return OK;
   }
 
   static jvmtiError GetExtensionFunctions(jvmtiEnv* env,
                                           jint* extension_count_ptr,
                                           jvmtiExtensionFunctionInfo** extensions) {
+    ENSURE_VALID_ENV(env);
     // We do not have any extension functions.
     *extension_count_ptr = 0;
     *extensions = nullptr;
@@ -943,6 +1088,7 @@
   static jvmtiError GetExtensionEvents(jvmtiEnv* env,
                                        jint* extension_count_ptr,
                                        jvmtiExtensionEventInfo** extensions) {
+    ENSURE_VALID_ENV(env);
     // We do not have any extension events.
     *extension_count_ptr = 0;
     *extensions = nullptr;
@@ -951,8 +1097,9 @@
   }
 
   static jvmtiError SetExtensionEventCallback(jvmtiEnv* env,
-                                              jint extension_event_index,
-                                              jvmtiExtensionEvent callback) {
+                                              jint extension_event_index ATTRIBUTE_UNUSED,
+                                              jvmtiExtensionEvent callback ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     // We do not have any extension events, so any call is illegal.
     return ERR(ILLEGAL_ARGUMENT);
   }
@@ -969,11 +1116,16 @@
     ENSURE_NON_NULL(capabilities_ptr);
     ArtJvmTiEnv* art_env = static_cast<ArtJvmTiEnv*>(env);
     jvmtiError ret = OK;
-    jvmtiCapabilities changed;
+    jvmtiCapabilities changed = {};
+    jvmtiCapabilities potential_capabilities = {};
+    ret = env->GetPotentialCapabilities(&potential_capabilities);
+    if (ret != OK) {
+      return ret;
+    }
 #define ADD_CAPABILITY(e) \
     do { \
       if (capabilities_ptr->e == 1) { \
-        if (kPotentialCapabilities.e == 1) { \
+        if (potential_capabilities.e == 1) { \
           if (art_env->capabilities.e != 1) { \
             art_env->capabilities.e = 1; \
             changed.e = 1; \
@@ -1037,7 +1189,7 @@
     ENSURE_VALID_ENV(env);
     ENSURE_NON_NULL(capabilities_ptr);
     ArtJvmTiEnv* art_env = reinterpret_cast<ArtJvmTiEnv*>(env);
-    jvmtiCapabilities changed;
+    jvmtiCapabilities changed = {};
 #define DEL_CAPABILITY(e) \
     do { \
       if (capabilities_ptr->e == 1) { \
@@ -1104,59 +1256,76 @@
     return OK;
   }
 
-  static jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+  static jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiEnv* env,
+                                                 jvmtiTimerInfo* info_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_current_thread_cpu_time);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError GetCurrentThreadCpuTime(jvmtiEnv* env, jlong* nanos_ptr) {
+  static jvmtiError GetCurrentThreadCpuTime(jvmtiEnv* env, jlong* nanos_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_current_thread_cpu_time);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError GetThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+  static jvmtiError GetThreadCpuTimerInfo(jvmtiEnv* env,
+                                          jvmtiTimerInfo* info_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_thread_cpu_time);
     return ERR(NOT_IMPLEMENTED);
   }
 
-  static jvmtiError GetThreadCpuTime(jvmtiEnv* env, jthread thread, jlong* nanos_ptr) {
+  static jvmtiError GetThreadCpuTime(jvmtiEnv* env,
+                                     jthread thread ATTRIBUTE_UNUSED,
+                                     jlong* nanos_ptr ATTRIBUTE_UNUSED) {
+    ENSURE_VALID_ENV(env);
     ENSURE_HAS_CAP(env, can_get_thread_cpu_time);
     return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError GetTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
+    ENSURE_VALID_ENV(env);
     return TimerUtil::GetTimerInfo(env, info_ptr);
   }
 
   static jvmtiError GetTime(jvmtiEnv* env, jlong* nanos_ptr) {
+    ENSURE_VALID_ENV(env);
     return TimerUtil::GetTime(env, nanos_ptr);
   }
 
   static jvmtiError GetAvailableProcessors(jvmtiEnv* env, jint* processor_count_ptr) {
+    ENSURE_VALID_ENV(env);
     return TimerUtil::GetAvailableProcessors(env, processor_count_ptr);
   }
 
   static jvmtiError AddToBootstrapClassLoaderSearch(jvmtiEnv* env, const char* segment) {
+    ENSURE_VALID_ENV(env);
     return SearchUtil::AddToBootstrapClassLoaderSearch(env, segment);
   }
 
   static jvmtiError AddToSystemClassLoaderSearch(jvmtiEnv* env, const char* segment) {
+    ENSURE_VALID_ENV(env);
     return SearchUtil::AddToSystemClassLoaderSearch(env, segment);
   }
 
   static jvmtiError GetSystemProperties(jvmtiEnv* env, jint* count_ptr, char*** property_ptr) {
+    ENSURE_VALID_ENV(env);
     return PropertiesUtil::GetSystemProperties(env, count_ptr, property_ptr);
   }
 
   static jvmtiError GetSystemProperty(jvmtiEnv* env, const char* property, char** value_ptr) {
+    ENSURE_VALID_ENV(env);
     return PropertiesUtil::GetSystemProperty(env, property, value_ptr);
   }
 
   static jvmtiError SetSystemProperty(jvmtiEnv* env, const char* property, const char* value) {
+    ENSURE_VALID_ENV(env);
     return PropertiesUtil::SetSystemProperty(env, property, value);
   }
 
   static jvmtiError GetPhase(jvmtiEnv* env, jvmtiPhase* phase_ptr) {
+    ENSURE_VALID_ENV(env);
     return PhaseUtil::GetPhase(env, phase_ptr);
   }
 
@@ -1264,7 +1433,10 @@
     }
   }
 
-  static jvmtiError SetVerboseFlag(jvmtiEnv* env, jvmtiVerboseFlag flag, jboolean value) {
+  static jvmtiError SetVerboseFlag(jvmtiEnv* env,
+                                   jvmtiVerboseFlag flag,
+                                   jboolean value) {
+    ENSURE_VALID_ENV(env);
     if (flag == jvmtiVerboseFlag::JVMTI_VERBOSE_OTHER) {
       // OTHER is special, as it's 0, so can't do a bit check.
       bool val = (value == JNI_TRUE) ? true : false;
@@ -1319,6 +1491,7 @@
   }
 
   static jvmtiError GetJLocationFormat(jvmtiEnv* env, jvmtiJlocationFormat* format_ptr) {
+    ENSURE_VALID_ENV(env);
     // Report BCI as jlocation format. We report dex bytecode indices.
     if (format_ptr == nullptr) {
       return ERR(NULL_POINTER);
@@ -1340,8 +1513,8 @@
 ArtJvmTiEnv::ArtJvmTiEnv(art::JavaVMExt* runtime, EventHandler* event_handler)
     : art_vm(runtime),
       local_data(nullptr),
-      capabilities(),
-      object_tag_table(new ObjectTagTable(event_handler)) {
+      capabilities() {
+  object_tag_table = std::unique_ptr<ObjectTagTable>(new ObjectTagTable(event_handler, this));
   functions = &gJvmtiInterface;
 }
 
@@ -1384,6 +1557,7 @@
   ClassUtil::Register(&gEventHandler);
   DumpUtil::Register(&gEventHandler);
   SearchUtil::Register();
+  HeapUtil::Register();
 
   runtime->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
 
@@ -1396,6 +1570,7 @@
   ClassUtil::Unregister();
   DumpUtil::Unregister();
   SearchUtil::Unregister();
+  HeapUtil::Unregister();
 
   return true;
 }
diff --git a/runtime/openjdkjvmti/events-inl.h b/runtime/openjdkjvmti/events-inl.h
index 4f5eb0c..1ddbb86 100644
--- a/runtime/openjdkjvmti/events-inl.h
+++ b/runtime/openjdkjvmti/events-inl.h
@@ -126,6 +126,7 @@
                                                          unsigned char** new_class_data) const {
   static_assert(kEvent == ArtJvmtiEvent::kClassFileLoadHookRetransformable ||
                 kEvent == ArtJvmtiEvent::kClassFileLoadHookNonRetransformable, "Unsupported event");
+  DCHECK(*new_class_data == nullptr);
   jint current_len = class_data_len;
   unsigned char* current_class_data = const_cast<unsigned char*>(class_data);
   ArtJvmTiEnv* last_env = nullptr;
@@ -168,15 +169,19 @@
 // exactly the argument types of the corresponding Jvmti kEvent function pointer.
 
 template <ArtJvmtiEvent kEvent, typename ...Args>
-inline void EventHandler::DispatchEvent(art::Thread* thread,
-                                        Args... args) const {
-  using FnType = void(jvmtiEnv*, Args...);
+inline void EventHandler::DispatchEvent(art::Thread* thread, Args... args) const {
   for (ArtJvmTiEnv* env : envs) {
-    if (ShouldDispatch<kEvent>(env, thread)) {
-      FnType* callback = impl::GetCallback<kEvent>(env);
-      if (callback != nullptr) {
-        (*callback)(env, args...);
-      }
+    DispatchEvent<kEvent, Args...>(env, thread, args...);
+  }
+}
+
+template <ArtJvmtiEvent kEvent, typename ...Args>
+inline void EventHandler::DispatchEvent(ArtJvmTiEnv* env, art::Thread* thread, Args... args) const {
+  using FnType = void(jvmtiEnv*, Args...);
+  if (ShouldDispatch<kEvent>(env, thread)) {
+    FnType* callback = impl::GetCallback<kEvent>(env);
+    if (callback != nullptr) {
+      (*callback)(env, args...);
     }
   }
 }
diff --git a/runtime/openjdkjvmti/events.h b/runtime/openjdkjvmti/events.h
index 4e20d17..ae8bf0f 100644
--- a/runtime/openjdkjvmti/events.h
+++ b/runtime/openjdkjvmti/events.h
@@ -156,9 +156,14 @@
                       ArtJvmtiEvent event,
                       jvmtiEventMode mode);
 
+  // Dispatch event to all registered environments.
   template <ArtJvmtiEvent kEvent, typename ...Args>
   ALWAYS_INLINE
   inline void DispatchEvent(art::Thread* thread, Args... args) const;
+  // Dispatch event to the given environment, only.
+  template <ArtJvmtiEvent kEvent, typename ...Args>
+  ALWAYS_INLINE
+  inline void DispatchEvent(ArtJvmTiEnv* env, art::Thread* thread, Args... args) const;
 
   // Tell the event handler capabilities were added/lost so it can adjust the sent events.If
   // caps_added is true then caps is all the newly set capabilities of the jvmtiEnv. If it is false
diff --git a/runtime/openjdkjvmti/fixed_up_dex_file.cc b/runtime/openjdkjvmti/fixed_up_dex_file.cc
new file mode 100644
index 0000000..3338358
--- /dev/null
+++ b/runtime/openjdkjvmti/fixed_up_dex_file.cc
@@ -0,0 +1,145 @@
+/* Copyright (C) 2017 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "fixed_up_dex_file.h"
+#include "dex_file-inl.h"
+
+// Compiler includes.
+#include "dex/dex_to_dex_decompiler.h"
+
+// Runtime includes.
+#include "oat_file.h"
+#include "vdex_file.h"
+
+namespace openjdkjvmti {
+
+static void RecomputeDexChecksum(art::DexFile* dex_file)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  reinterpret_cast<art::DexFile::Header*>(const_cast<uint8_t*>(dex_file->Begin()))->checksum_ =
+      dex_file->CalculateChecksum();
+}
+
+// TODO This is more complicated then it seems like it should be.
+// The fact we don't keep around the data of where in the flat binary log of dex-quickening changes
+// each dex file starts means we need to search for it. Since JVMTI is the exception though we are
+// not going to put in the effort to optimize for it.
+static void DoDexUnquicken(const art::DexFile& new_dex_file,
+                           const art::DexFile& original_dex_file)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  const art::OatDexFile* oat_dex = original_dex_file.GetOatDexFile();
+  if (oat_dex == nullptr) {
+    return;
+  }
+  const art::OatFile* oat_file = oat_dex->GetOatFile();
+  if (oat_file == nullptr) {
+    return;
+  }
+  const art::VdexFile* vdex = oat_file->GetVdexFile();
+  if (vdex == nullptr || vdex->GetQuickeningInfo().size() == 0) {
+    return;
+  }
+  const art::ArrayRef<const uint8_t> quickening_info(vdex->GetQuickeningInfo());
+  const uint8_t* quickening_info_ptr = quickening_info.data();
+  for (const art::OatDexFile* cur_oat_dex : oat_file->GetOatDexFiles()) {
+    std::string error;
+    std::unique_ptr<const art::DexFile> cur_dex_file(cur_oat_dex->OpenDexFile(&error));
+    DCHECK(cur_dex_file.get() != nullptr);
+    // Is this the dex file we are looking for?
+    if (UNLIKELY(cur_dex_file->Begin() == original_dex_file.Begin())) {
+      // Simple sanity check.
+      CHECK_EQ(new_dex_file.NumClassDefs(), original_dex_file.NumClassDefs());
+      for (uint32_t i = 0; i < new_dex_file.NumClassDefs(); ++i) {
+        const art::DexFile::ClassDef& class_def = new_dex_file.GetClassDef(i);
+        const uint8_t* class_data = new_dex_file.GetClassData(class_def);
+        if (class_data == nullptr) {
+          continue;
+        }
+        for (art::ClassDataItemIterator it(new_dex_file, class_data); it.HasNext(); it.Next()) {
+          if (it.IsAtMethod() && it.GetMethodCodeItem() != nullptr) {
+            uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
+            quickening_info_ptr += sizeof(uint32_t);
+            art::optimizer::ArtDecompileDEX(
+                *it.GetMethodCodeItem(),
+                art::ArrayRef<const uint8_t>(quickening_info_ptr, quickening_size),
+                /*decompile_return_instruction*/true);
+            quickening_info_ptr += quickening_size;
+          }
+        }
+      }
+      // We don't need to bother looking through the rest of the dex-files.
+      break;
+    } else {
+      // Not the dex file we want. Skip over all the quickening info for all its classes.
+      for (uint32_t i = 0; i < cur_dex_file->NumClassDefs(); ++i) {
+        const art::DexFile::ClassDef& class_def = cur_dex_file->GetClassDef(i);
+        const uint8_t* class_data = cur_dex_file->GetClassData(class_def);
+        if (class_data == nullptr) {
+          continue;
+        }
+        for (art::ClassDataItemIterator it(*cur_dex_file, class_data); it.HasNext(); it.Next()) {
+          if (it.IsAtMethod() && it.GetMethodCodeItem() != nullptr) {
+            uint32_t quickening_size = *reinterpret_cast<const uint32_t*>(quickening_info_ptr);
+            quickening_info_ptr += sizeof(uint32_t);
+            quickening_info_ptr += quickening_size;
+          }
+        }
+      }
+    }
+  }
+}
+
+std::unique_ptr<FixedUpDexFile> FixedUpDexFile::Create(const art::DexFile& original) {
+  // Copy the data into mutable memory.
+  std::vector<unsigned char> data;
+  data.resize(original.Size());
+  memcpy(data.data(), original.Begin(), original.Size());
+  std::string error;
+  std::unique_ptr<const art::DexFile> new_dex_file(art::DexFile::Open(
+      data.data(),
+      data.size(),
+      /*location*/"Unquickening_dexfile.dex",
+      /*location_checksum*/0,
+      /*oat_dex_file*/nullptr,
+      /*verify*/false,
+      /*verify_checksum*/false,
+      &error));
+  if (new_dex_file.get() == nullptr) {
+    LOG(ERROR) << "Unable to open dex file from memory for unquickening! error: " << error;
+    return nullptr;
+  }
+
+  DoDexUnquicken(*new_dex_file, original);
+  RecomputeDexChecksum(const_cast<art::DexFile*>(new_dex_file.get()));
+  std::unique_ptr<FixedUpDexFile> ret(new FixedUpDexFile(std::move(new_dex_file), std::move(data)));
+  return ret;
+}
+
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/fixed_up_dex_file.h b/runtime/openjdkjvmti/fixed_up_dex_file.h
new file mode 100644
index 0000000..db12f48
--- /dev/null
+++ b/runtime/openjdkjvmti/fixed_up_dex_file.h
@@ -0,0 +1,82 @@
+/* Copyright (C) 2017 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_FIXED_UP_DEX_FILE_H_
+#define ART_RUNTIME_OPENJDKJVMTI_FIXED_UP_DEX_FILE_H_
+
+#include <memory>
+#include <vector>
+
+#include "jni.h"
+#include "jvmti.h"
+#include "base/mutex.h"
+#include "dex_file.h"
+
+namespace openjdkjvmti {
+
+// A holder for a DexFile that has been 'fixed up' to ensure it is fully compliant with the
+// published standard (no internal/quick opcodes, all fields are the defined values, etc). This is
+// used to ensure that agents get a consistent dex file regardless of what version of android they
+// are running on.
+class FixedUpDexFile {
+ public:
+  static std::unique_ptr<FixedUpDexFile> Create(const art::DexFile& original)
+      REQUIRES_SHARED(art::Locks::mutator_lock_);
+
+  const art::DexFile& GetDexFile() {
+    return *dex_file_;
+  }
+
+  const unsigned char* Begin() {
+    return data_.data();
+  }
+
+  size_t Size() {
+    return data_.size();
+  }
+
+ private:
+  explicit FixedUpDexFile(std::unique_ptr<const art::DexFile> fixed_up_dex_file,
+                          std::vector<unsigned char> data)
+      : dex_file_(std::move(fixed_up_dex_file)),
+        data_(std::move(data)) {}
+
+  // the fixed up DexFile
+  std::unique_ptr<const art::DexFile> dex_file_;
+  // The backing data for dex_file_.
+  const std::vector<unsigned char> data_;
+
+  DISALLOW_COPY_AND_ASSIGN(FixedUpDexFile);
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_FIXED_UP_DEX_FILE_H_
diff --git a/runtime/openjdkjvmti/jvmti.h b/runtime/openjdkjvmti/include/jvmti.h
similarity index 100%
rename from runtime/openjdkjvmti/jvmti.h
rename to runtime/openjdkjvmti/include/jvmti.h
diff --git a/runtime/openjdkjvmti/jvmti_weak_table-inl.h b/runtime/openjdkjvmti/jvmti_weak_table-inl.h
new file mode 100644
index 0000000..f67fffc
--- /dev/null
+++ b/runtime/openjdkjvmti/jvmti_weak_table-inl.h
@@ -0,0 +1,389 @@
+/* Copyright (C) 2017 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
+#define ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
+
+#include "jvmti_weak_table.h"
+
+#include <limits>
+
+#include "art_jvmti.h"
+#include "base/logging.h"
+#include "gc/allocation_listener.h"
+#include "instrumentation.h"
+#include "jni_env_ext-inl.h"
+#include "jvmti_allocator.h"
+#include "mirror/class.h"
+#include "mirror/object.h"
+#include "runtime.h"
+#include "ScopedLocalRef.h"
+
+namespace openjdkjvmti {
+
+template <typename T>
+void JvmtiWeakTable<T>::Lock() {
+  allow_disallow_lock_.ExclusiveLock(art::Thread::Current());
+}
+template <typename T>
+void JvmtiWeakTable<T>::Unlock() {
+  allow_disallow_lock_.ExclusiveUnlock(art::Thread::Current());
+}
+template <typename T>
+void JvmtiWeakTable<T>::AssertLocked() {
+  allow_disallow_lock_.AssertHeld(art::Thread::Current());
+}
+
+template <typename T>
+void JvmtiWeakTable<T>::UpdateTableWithReadBarrier() {
+  update_since_last_sweep_ = true;
+
+  auto WithReadBarrierUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root,
+                                    art::mirror::Object* original_obj ATTRIBUTE_UNUSED)
+     REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return original_root.Read<art::kWithReadBarrier>();
+  };
+
+  UpdateTableWith<decltype(WithReadBarrierUpdater), kIgnoreNull>(WithReadBarrierUpdater);
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, T* result) {
+  // Under concurrent GC, there is a window between moving objects and sweeping of system
+  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+  // but still have from-space pointers in the table. Explicitly update the table once.
+  // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
+  UpdateTableWithReadBarrier();
+  return GetTagLocked(self, obj, result);
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::Remove(art::mirror::Object* obj, /* out */ T* tag) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  return RemoveLocked(self, obj, tag);
+}
+template <typename T>
+bool JvmtiWeakTable<T>::RemoveLocked(art::mirror::Object* obj, T* tag) {
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return RemoveLocked(self, obj, tag);
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::RemoveLocked(art::Thread* self, art::mirror::Object* obj, T* tag) {
+  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
+  if (it != tagged_objects_.end()) {
+    if (tag != nullptr) {
+      *tag = it->second;
+    }
+    tagged_objects_.erase(it);
+    return true;
+  }
+
+  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
+    // Under concurrent GC, there is a window between moving objects and sweeping of system
+    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+    // but still have from-space pointers in the table. Explicitly update the table once.
+    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
+
+    // Update the table.
+    UpdateTableWithReadBarrier();
+
+    // And try again.
+    return RemoveLocked(self, obj, tag);
+  }
+
+  // Not in here.
+  return false;
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::Set(art::mirror::Object* obj, T new_tag) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  return SetLocked(self, obj, new_tag);
+}
+template <typename T>
+bool JvmtiWeakTable<T>::SetLocked(art::mirror::Object* obj, T new_tag) {
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return SetLocked(self, obj, new_tag);
+}
+
+template <typename T>
+bool JvmtiWeakTable<T>::SetLocked(art::Thread* self, art::mirror::Object* obj, T new_tag) {
+  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
+  if (it != tagged_objects_.end()) {
+    it->second = new_tag;
+    return true;
+  }
+
+  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
+    // Under concurrent GC, there is a window between moving objects and sweeping of system
+    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+    // but still have from-space pointers in the table. Explicitly update the table once.
+    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
+
+    // Update the table.
+    UpdateTableWithReadBarrier();
+
+    // And try again.
+    return SetLocked(self, obj, new_tag);
+  }
+
+  // New element.
+  auto insert_it = tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(obj), new_tag);
+  DCHECK(insert_it.second);
+  return false;
+}
+
+template <typename T>
+void JvmtiWeakTable<T>::Sweep(art::IsMarkedVisitor* visitor) {
+  if (DoesHandleNullOnSweep()) {
+    SweepImpl<true>(visitor);
+  } else {
+    SweepImpl<false>(visitor);
+  }
+
+  // Under concurrent GC, there is a window between moving objects and sweeping of system
+  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+  // but still have from-space pointers in the table. We explicitly update the table then
+  // to ensure we compare against to-space pointers. But we want to do this only once. Once
+  // sweeping is done, we know all objects are to-space pointers until the next GC cycle,
+  // so we re-enable the explicit update for the next marking.
+  update_since_last_sweep_ = false;
+}
+
+template <typename T>
+template <bool kHandleNull>
+void JvmtiWeakTable<T>::SweepImpl(art::IsMarkedVisitor* visitor) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+
+  auto IsMarkedUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root ATTRIBUTE_UNUSED,
+                             art::mirror::Object* original_obj) {
+    return visitor->IsMarked(original_obj);
+  };
+
+  UpdateTableWith<decltype(IsMarkedUpdater),
+                  kHandleNull ? kCallHandleNull : kRemoveNull>(IsMarkedUpdater);
+}
+
+template <typename T>
+template <typename Updater, typename JvmtiWeakTable<T>::TableUpdateNullTarget kTargetNull>
+ALWAYS_INLINE inline void JvmtiWeakTable<T>::UpdateTableWith(Updater& updater) {
+  // We optimistically hope that elements will still be well-distributed when re-inserting them.
+  // So play with the map mechanics, and postpone rehashing. This avoids the need of a side
+  // vector and two passes.
+  float original_max_load_factor = tagged_objects_.max_load_factor();
+  tagged_objects_.max_load_factor(std::numeric_limits<float>::max());
+  // For checking that a max load-factor actually does what we expect.
+  size_t original_bucket_count = tagged_objects_.bucket_count();
+
+  for (auto it = tagged_objects_.begin(); it != tagged_objects_.end();) {
+    DCHECK(!it->first.IsNull());
+    art::mirror::Object* original_obj = it->first.template Read<art::kWithoutReadBarrier>();
+    art::mirror::Object* target_obj = updater(it->first, original_obj);
+    if (original_obj != target_obj) {
+      if (kTargetNull == kIgnoreNull && target_obj == nullptr) {
+        // Ignore null target, don't do anything.
+      } else {
+        T tag = it->second;
+        it = tagged_objects_.erase(it);
+        if (target_obj != nullptr) {
+          tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(target_obj), tag);
+          DCHECK_EQ(original_bucket_count, tagged_objects_.bucket_count());
+        } else if (kTargetNull == kCallHandleNull) {
+          HandleNullSweep(tag);
+        }
+        continue;  // Iterator was implicitly updated by erase.
+      }
+    }
+    it++;
+  }
+
+  tagged_objects_.max_load_factor(original_max_load_factor);
+  // TODO: consider rehash here.
+}
+
+template <typename T>
+template <typename Storage, class Allocator>
+struct JvmtiWeakTable<T>::ReleasableContainer {
+  using allocator_type = Allocator;
+
+  explicit ReleasableContainer(const allocator_type& alloc, size_t reserve = 10)
+      : allocator(alloc),
+        data(reserve > 0 ? allocator.allocate(reserve) : nullptr),
+        size(0),
+        capacity(reserve) {
+  }
+
+  ~ReleasableContainer() {
+    if (data != nullptr) {
+      allocator.deallocate(data, capacity);
+      capacity = 0;
+      size = 0;
+    }
+  }
+
+  Storage* Release() {
+    Storage* tmp = data;
+
+    data = nullptr;
+    size = 0;
+    capacity = 0;
+
+    return tmp;
+  }
+
+  void Resize(size_t new_capacity) {
+    CHECK_GT(new_capacity, capacity);
+
+    Storage* tmp = allocator.allocate(new_capacity);
+    DCHECK(tmp != nullptr);
+    if (data != nullptr) {
+      memcpy(tmp, data, sizeof(Storage) * size);
+    }
+    Storage* old = data;
+    data = tmp;
+    allocator.deallocate(old, capacity);
+    capacity = new_capacity;
+  }
+
+  void Pushback(const Storage& elem) {
+    if (size == capacity) {
+      size_t new_capacity = 2 * capacity + 1;
+      Resize(new_capacity);
+    }
+    data[size++] = elem;
+  }
+
+  Allocator allocator;
+  Storage* data;
+  size_t size;
+  size_t capacity;
+};
+
+template <typename T>
+jvmtiError JvmtiWeakTable<T>::GetTaggedObjects(jvmtiEnv* jvmti_env,
+                                               jint tag_count,
+                                               const T* tags,
+                                               jint* count_ptr,
+                                               jobject** object_result_ptr,
+                                               T** tag_result_ptr) {
+  if (tag_count < 0) {
+    return ERR(ILLEGAL_ARGUMENT);
+  }
+  if (tag_count > 0) {
+    for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
+      if (tags[i] == 0) {
+        return ERR(ILLEGAL_ARGUMENT);
+      }
+    }
+  }
+  if (tags == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+  if (count_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  art::JNIEnvExt* jni_env = self->GetJniEnv();
+
+  constexpr size_t kDefaultSize = 10;
+  size_t initial_object_size;
+  size_t initial_tag_size;
+  if (tag_count == 0) {
+    initial_object_size = (object_result_ptr != nullptr) ? tagged_objects_.size() : 0;
+    initial_tag_size = (tag_result_ptr != nullptr) ? tagged_objects_.size() : 0;
+  } else {
+    initial_object_size = initial_tag_size = kDefaultSize;
+  }
+  JvmtiAllocator<void> allocator(jvmti_env);
+  ReleasableContainer<jobject, JvmtiAllocator<jobject>> selected_objects(allocator,
+                                                                         initial_object_size);
+  ReleasableContainer<T, JvmtiAllocator<T>> selected_tags(allocator, initial_tag_size);
+
+  size_t count = 0;
+  for (auto& pair : tagged_objects_) {
+    bool select;
+    if (tag_count > 0) {
+      select = false;
+      for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
+        if (tags[i] == pair.second) {
+          select = true;
+          break;
+        }
+      }
+    } else {
+      select = true;
+    }
+
+    if (select) {
+      art::mirror::Object* obj = pair.first.template Read<art::kWithReadBarrier>();
+      if (obj != nullptr) {
+        count++;
+        if (object_result_ptr != nullptr) {
+          selected_objects.Pushback(jni_env->AddLocalReference<jobject>(obj));
+        }
+        if (tag_result_ptr != nullptr) {
+          selected_tags.Pushback(pair.second);
+        }
+      }
+    }
+  }
+
+  if (object_result_ptr != nullptr) {
+    *object_result_ptr = selected_objects.Release();
+  }
+  if (tag_result_ptr != nullptr) {
+    *tag_result_ptr = selected_tags.Release();
+  }
+  *count_ptr = static_cast<jint>(count);
+  return ERR(NONE);
+}
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_INL_H_
diff --git a/runtime/openjdkjvmti/jvmti_weak_table.h b/runtime/openjdkjvmti/jvmti_weak_table.h
new file mode 100644
index 0000000..eeea75a
--- /dev/null
+++ b/runtime/openjdkjvmti/jvmti_weak_table.h
@@ -0,0 +1,215 @@
+/* Copyright (C) 2017 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
+#define ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
+
+#include <unordered_map>
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "gc/system_weak.h"
+#include "gc_root-inl.h"
+#include "globals.h"
+#include "jvmti.h"
+#include "mirror/object.h"
+#include "thread-inl.h"
+
+namespace openjdkjvmti {
+
+class EventHandler;
+
+// A system-weak container mapping objects to elements of the template type. This corresponds
+// to a weak hash map. For historical reasons the stored value is called "tag."
+template <typename T>
+class JvmtiWeakTable : public art::gc::SystemWeakHolder {
+ public:
+  JvmtiWeakTable()
+      : art::gc::SystemWeakHolder(art::kTaggingLockLevel),
+        update_since_last_sweep_(false) {
+  }
+
+  // Remove the mapping for the given object, returning whether such a mapping existed (and the old
+  // value).
+  bool Remove(art::mirror::Object* obj, /* out */ T* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+  bool RemoveLocked(art::mirror::Object* obj, /* out */ T* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  // Set the mapping for the given object. Returns true if this overwrites an already existing
+  // mapping.
+  virtual bool Set(art::mirror::Object* obj, T tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+  virtual bool SetLocked(art::mirror::Object* obj, T tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  // Return the value associated with the given object. Returns true if the mapping exists, false
+  // otherwise.
+  bool GetTag(art::mirror::Object* obj, /* out */ T* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_) {
+    art::Thread* self = art::Thread::Current();
+    art::MutexLock mu(self, allow_disallow_lock_);
+    Wait(self);
+
+    return GetTagLocked(self, obj, result);
+  }
+  bool GetTagLocked(art::mirror::Object* obj, /* out */ T* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    art::Thread* self = art::Thread::Current();
+    allow_disallow_lock_.AssertHeld(self);
+    Wait(self);
+
+    return GetTagLocked(self, obj, result);
+  }
+
+  // Sweep the container. DO NOT CALL MANUALLY.
+  void Sweep(art::IsMarkedVisitor* visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
+  // Return all objects that have a value mapping in tags.
+  jvmtiError GetTaggedObjects(jvmtiEnv* jvmti_env,
+                              jint tag_count,
+                              const T* tags,
+                              /* out */ jint* count_ptr,
+                              /* out */ jobject** object_result_ptr,
+                              /* out */ T** tag_result_ptr)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
+  // Locking functions, to allow coarse-grained locking and amortization.
+  void Lock() ACQUIRE(allow_disallow_lock_);
+  void Unlock() RELEASE(allow_disallow_lock_);
+  void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
+
+ protected:
+  // Should HandleNullSweep be called when Sweep detects the release of an object?
+  virtual bool DoesHandleNullOnSweep() {
+    return false;
+  }
+  // If DoesHandleNullOnSweep returns true, this function will be called.
+  virtual void HandleNullSweep(T tag ATTRIBUTE_UNUSED) {}
+
+ private:
+  bool SetLocked(art::Thread* self, art::mirror::Object* obj, T tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  bool RemoveLocked(art::Thread* self, art::mirror::Object* obj, /* out */ T* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  bool GetTagLocked(art::Thread* self, art::mirror::Object* obj, /* out */ T* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
+    if (it != tagged_objects_.end()) {
+      *result = it->second;
+      return true;
+    }
+
+    // Performance optimization: To avoid multiple table updates, ensure that during GC we
+    // only update once. See the comment on the implementation of GetTagSlowPath.
+    if (art::kUseReadBarrier &&
+        self != nullptr &&
+        self->GetIsGcMarking() &&
+        !update_since_last_sweep_) {
+      return GetTagSlowPath(self, obj, result);
+    }
+
+    return false;
+  }
+
+  // Slow-path for GetTag. We didn't find the object, but we might be storing from-pointers and
+  // are asked to retrieve with a to-pointer.
+  bool GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, /* out */ T* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  // Update the table by doing read barriers on each element, ensuring that to-space pointers
+  // are stored.
+  void UpdateTableWithReadBarrier()
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  template <bool kHandleNull>
+  void SweepImpl(art::IsMarkedVisitor* visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
+  enum TableUpdateNullTarget {
+    kIgnoreNull,
+    kRemoveNull,
+    kCallHandleNull
+  };
+
+  template <typename Updater, TableUpdateNullTarget kTargetNull>
+  void UpdateTableWith(Updater& updater)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  template <typename Storage, class Allocator = std::allocator<T>>
+  struct ReleasableContainer;
+
+  struct HashGcRoot {
+    size_t operator()(const art::GcRoot<art::mirror::Object>& r) const
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      return reinterpret_cast<uintptr_t>(r.Read<art::kWithoutReadBarrier>());
+    }
+  };
+
+  struct EqGcRoot {
+    bool operator()(const art::GcRoot<art::mirror::Object>& r1,
+                    const art::GcRoot<art::mirror::Object>& r2) const
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      return r1.Read<art::kWithoutReadBarrier>() == r2.Read<art::kWithoutReadBarrier>();
+    }
+  };
+
+  std::unordered_map<art::GcRoot<art::mirror::Object>,
+                     T,
+                     HashGcRoot,
+                     EqGcRoot> tagged_objects_
+      GUARDED_BY(allow_disallow_lock_)
+      GUARDED_BY(art::Locks::mutator_lock_);
+  // To avoid repeatedly scanning the whole table, remember if we did that since the last sweep.
+  bool update_since_last_sweep_;
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_WEAK_TABLE_H_
diff --git a/runtime/openjdkjvmti/object_tagging.cc b/runtime/openjdkjvmti/object_tagging.cc
index b27c2a3..dcdd3ed 100644
--- a/runtime/openjdkjvmti/object_tagging.cc
+++ b/runtime/openjdkjvmti/object_tagging.cc
@@ -34,354 +34,34 @@
 #include <limits>
 
 #include "art_jvmti.h"
-#include "base/logging.h"
 #include "events-inl.h"
-#include "gc/allocation_listener.h"
-#include "instrumentation.h"
-#include "jni_env_ext-inl.h"
-#include "jvmti_allocator.h"
-#include "mirror/class.h"
-#include "mirror/object.h"
-#include "runtime.h"
-#include "ScopedLocalRef.h"
+#include "jvmti_weak_table-inl.h"
 
 namespace openjdkjvmti {
 
-void ObjectTagTable::Lock() {
-  allow_disallow_lock_.ExclusiveLock(art::Thread::Current());
-}
-void ObjectTagTable::Unlock() {
-  allow_disallow_lock_.ExclusiveUnlock(art::Thread::Current());
-}
-void ObjectTagTable::AssertLocked() {
-  allow_disallow_lock_.AssertHeld(art::Thread::Current());
-}
-
-void ObjectTagTable::UpdateTableWithReadBarrier() {
-  update_since_last_sweep_ = true;
-
-  auto WithReadBarrierUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root,
-                                    art::mirror::Object* original_obj ATTRIBUTE_UNUSED)
-     REQUIRES_SHARED(art::Locks::mutator_lock_) {
-    return original_root.Read<art::kWithReadBarrier>();
-  };
-
-  UpdateTableWith<decltype(WithReadBarrierUpdater), kIgnoreNull>(WithReadBarrierUpdater);
-}
-
-bool ObjectTagTable::GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, jlong* result) {
-  // Under concurrent GC, there is a window between moving objects and sweeping of system
-  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
-  // but still have from-space pointers in the table. Explicitly update the table once.
-  // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
-  UpdateTableWithReadBarrier();
-  return GetTagLocked(self, obj, result);
-}
-
-void ObjectTagTable::Add(art::mirror::Object* obj, jlong tag) {
-  // Same as Set(), as we don't have duplicates in an unordered_map.
-  Set(obj, tag);
-}
-
-bool ObjectTagTable::Remove(art::mirror::Object* obj, jlong* tag) {
-  art::Thread* self = art::Thread::Current();
-  art::MutexLock mu(self, allow_disallow_lock_);
-  Wait(self);
-
-  return RemoveLocked(self, obj, tag);
-}
-bool ObjectTagTable::RemoveLocked(art::mirror::Object* obj, jlong* tag) {
-  art::Thread* self = art::Thread::Current();
-  allow_disallow_lock_.AssertHeld(self);
-  Wait(self);
-
-  return RemoveLocked(self, obj, tag);
-}
-
-bool ObjectTagTable::RemoveLocked(art::Thread* self, art::mirror::Object* obj, jlong* tag) {
-  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
-  if (it != tagged_objects_.end()) {
-    if (tag != nullptr) {
-      *tag = it->second;
-    }
-    tagged_objects_.erase(it);
-    return true;
-  }
-
-  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
-    // Under concurrent GC, there is a window between moving objects and sweeping of system
-    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
-    // but still have from-space pointers in the table. Explicitly update the table once.
-    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
-
-    // Update the table.
-    UpdateTableWithReadBarrier();
-
-    // And try again.
-    return RemoveLocked(self, obj, tag);
-  }
-
-  // Not in here.
-  return false;
-}
+// Instantiate for jlong = JVMTI tags.
+template class JvmtiWeakTable<jlong>;
 
 bool ObjectTagTable::Set(art::mirror::Object* obj, jlong new_tag) {
   if (new_tag == 0) {
     jlong tmp;
     return Remove(obj, &tmp);
   }
-
-  art::Thread* self = art::Thread::Current();
-  art::MutexLock mu(self, allow_disallow_lock_);
-  Wait(self);
-
-  return SetLocked(self, obj, new_tag);
+  return JvmtiWeakTable<jlong>::Set(obj, new_tag);
 }
 bool ObjectTagTable::SetLocked(art::mirror::Object* obj, jlong new_tag) {
   if (new_tag == 0) {
     jlong tmp;
     return RemoveLocked(obj, &tmp);
   }
-
-  art::Thread* self = art::Thread::Current();
-  allow_disallow_lock_.AssertHeld(self);
-  Wait(self);
-
-  return SetLocked(self, obj, new_tag);
+  return JvmtiWeakTable<jlong>::SetLocked(obj, new_tag);
 }
 
-bool ObjectTagTable::SetLocked(art::Thread* self, art::mirror::Object* obj, jlong new_tag) {
-  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
-  if (it != tagged_objects_.end()) {
-    it->second = new_tag;
-    return true;
-  }
-
-  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
-    // Under concurrent GC, there is a window between moving objects and sweeping of system
-    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
-    // but still have from-space pointers in the table. Explicitly update the table once.
-    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
-
-    // Update the table.
-    UpdateTableWithReadBarrier();
-
-    // And try again.
-    return SetLocked(self, obj, new_tag);
-  }
-
-  // New element.
-  auto insert_it = tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(obj), new_tag);
-  DCHECK(insert_it.second);
-  return false;
+bool ObjectTagTable::DoesHandleNullOnSweep() {
+  return event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kObjectFree);
 }
-
-void ObjectTagTable::Sweep(art::IsMarkedVisitor* visitor) {
-  if (event_handler_->IsEventEnabledAnywhere(ArtJvmtiEvent::kObjectFree)) {
-    SweepImpl<true>(visitor);
-  } else {
-    SweepImpl<false>(visitor);
-  }
-
-  // Under concurrent GC, there is a window between moving objects and sweeping of system
-  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
-  // but still have from-space pointers in the table. We explicitly update the table then
-  // to ensure we compare against to-space pointers. But we want to do this only once. Once
-  // sweeping is done, we know all objects are to-space pointers until the next GC cycle,
-  // so we re-enable the explicit update for the next marking.
-  update_since_last_sweep_ = false;
-}
-
-template <bool kHandleNull>
-void ObjectTagTable::SweepImpl(art::IsMarkedVisitor* visitor) {
-  art::Thread* self = art::Thread::Current();
-  art::MutexLock mu(self, allow_disallow_lock_);
-
-  auto IsMarkedUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root ATTRIBUTE_UNUSED,
-                             art::mirror::Object* original_obj) {
-    return visitor->IsMarked(original_obj);
-  };
-
-  UpdateTableWith<decltype(IsMarkedUpdater),
-                  kHandleNull ? kCallHandleNull : kRemoveNull>(IsMarkedUpdater);
-}
-
 void ObjectTagTable::HandleNullSweep(jlong tag) {
-  event_handler_->DispatchEvent<ArtJvmtiEvent::kObjectFree>(nullptr, tag);
-}
-
-template <typename T, ObjectTagTable::TableUpdateNullTarget kTargetNull>
-ALWAYS_INLINE inline void ObjectTagTable::UpdateTableWith(T& updater) {
-  // We optimistically hope that elements will still be well-distributed when re-inserting them.
-  // So play with the map mechanics, and postpone rehashing. This avoids the need of a side
-  // vector and two passes.
-  float original_max_load_factor = tagged_objects_.max_load_factor();
-  tagged_objects_.max_load_factor(std::numeric_limits<float>::max());
-  // For checking that a max load-factor actually does what we expect.
-  size_t original_bucket_count = tagged_objects_.bucket_count();
-
-  for (auto it = tagged_objects_.begin(); it != tagged_objects_.end();) {
-    DCHECK(!it->first.IsNull());
-    art::mirror::Object* original_obj = it->first.Read<art::kWithoutReadBarrier>();
-    art::mirror::Object* target_obj = updater(it->first, original_obj);
-    if (original_obj != target_obj) {
-      if (kTargetNull == kIgnoreNull && target_obj == nullptr) {
-        // Ignore null target, don't do anything.
-      } else {
-        jlong tag = it->second;
-        it = tagged_objects_.erase(it);
-        if (target_obj != nullptr) {
-          tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(target_obj), tag);
-          DCHECK_EQ(original_bucket_count, tagged_objects_.bucket_count());
-        } else if (kTargetNull == kCallHandleNull) {
-          HandleNullSweep(tag);
-        }
-        continue;  // Iterator was implicitly updated by erase.
-      }
-    }
-    it++;
-  }
-
-  tagged_objects_.max_load_factor(original_max_load_factor);
-  // TODO: consider rehash here.
-}
-
-template <typename T, class Allocator = std::allocator<T>>
-struct ReleasableContainer {
-  using allocator_type = Allocator;
-
-  explicit ReleasableContainer(const allocator_type& alloc, size_t reserve = 10)
-      : allocator(alloc),
-        data(reserve > 0 ? allocator.allocate(reserve) : nullptr),
-        size(0),
-        capacity(reserve) {
-  }
-
-  ~ReleasableContainer() {
-    if (data != nullptr) {
-      allocator.deallocate(data, capacity);
-      capacity = 0;
-      size = 0;
-    }
-  }
-
-  T* Release() {
-    T* tmp = data;
-
-    data = nullptr;
-    size = 0;
-    capacity = 0;
-
-    return tmp;
-  }
-
-  void Resize(size_t new_capacity) {
-    CHECK_GT(new_capacity, capacity);
-
-    T* tmp = allocator.allocate(new_capacity);
-    DCHECK(tmp != nullptr);
-    if (data != nullptr) {
-      memcpy(tmp, data, sizeof(T) * size);
-    }
-    T* old = data;
-    data = tmp;
-    allocator.deallocate(old, capacity);
-    capacity = new_capacity;
-  }
-
-  void Pushback(const T& elem) {
-    if (size == capacity) {
-      size_t new_capacity = 2 * capacity + 1;
-      Resize(new_capacity);
-    }
-    data[size++] = elem;
-  }
-
-  Allocator allocator;
-  T* data;
-  size_t size;
-  size_t capacity;
-};
-
-jvmtiError ObjectTagTable::GetTaggedObjects(jvmtiEnv* jvmti_env,
-                                            jint tag_count,
-                                            const jlong* tags,
-                                            jint* count_ptr,
-                                            jobject** object_result_ptr,
-                                            jlong** tag_result_ptr) {
-  if (tag_count < 0) {
-    return ERR(ILLEGAL_ARGUMENT);
-  }
-  if (tag_count > 0) {
-    for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
-      if (tags[i] == 0) {
-        return ERR(ILLEGAL_ARGUMENT);
-      }
-    }
-  }
-  if (tags == nullptr) {
-    return ERR(NULL_POINTER);
-  }
-  if (count_ptr == nullptr) {
-    return ERR(NULL_POINTER);
-  }
-
-  art::Thread* self = art::Thread::Current();
-  art::MutexLock mu(self, allow_disallow_lock_);
-  Wait(self);
-
-  art::JNIEnvExt* jni_env = self->GetJniEnv();
-
-  constexpr size_t kDefaultSize = 10;
-  size_t initial_object_size;
-  size_t initial_tag_size;
-  if (tag_count == 0) {
-    initial_object_size = (object_result_ptr != nullptr) ? tagged_objects_.size() : 0;
-    initial_tag_size = (tag_result_ptr != nullptr) ? tagged_objects_.size() : 0;
-  } else {
-    initial_object_size = initial_tag_size = kDefaultSize;
-  }
-  JvmtiAllocator<void> allocator(jvmti_env);
-  ReleasableContainer<jobject, JvmtiAllocator<jobject>> selected_objects(allocator, initial_object_size);
-  ReleasableContainer<jlong, JvmtiAllocator<jlong>> selected_tags(allocator, initial_tag_size);
-
-  size_t count = 0;
-  for (auto& pair : tagged_objects_) {
-    bool select;
-    if (tag_count > 0) {
-      select = false;
-      for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
-        if (tags[i] == pair.second) {
-          select = true;
-          break;
-        }
-      }
-    } else {
-      select = true;
-    }
-
-    if (select) {
-      art::mirror::Object* obj = pair.first.Read<art::kWithReadBarrier>();
-      if (obj != nullptr) {
-        count++;
-        if (object_result_ptr != nullptr) {
-          selected_objects.Pushback(jni_env->AddLocalReference<jobject>(obj));
-        }
-        if (tag_result_ptr != nullptr) {
-          selected_tags.Pushback(pair.second);
-        }
-      }
-    }
-  }
-
-  if (object_result_ptr != nullptr) {
-    *object_result_ptr = selected_objects.Release();
-  }
-  if (tag_result_ptr != nullptr) {
-    *tag_result_ptr = selected_tags.Release();
-  }
-  *count_ptr = static_cast<jint>(count);
-  return ERR(NONE);
+  event_handler_->DispatchEvent<ArtJvmtiEvent::kObjectFree>(jvmti_env_, nullptr, tag);
 }
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/object_tagging.h b/runtime/openjdkjvmti/object_tagging.h
index 0296f1a..ca84e44 100644
--- a/runtime/openjdkjvmti/object_tagging.h
+++ b/runtime/openjdkjvmti/object_tagging.h
@@ -1,17 +1,32 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
  *
- *      http://www.apache.org/licenses/LICENSE-2.0
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
  */
 
 #ifndef ART_RUNTIME_OPENJDKJVMTI_OBJECT_TAGGING_H_
@@ -20,62 +35,28 @@
 #include <unordered_map>
 
 #include "base/mutex.h"
-#include "gc/system_weak.h"
-#include "gc_root-inl.h"
 #include "globals.h"
 #include "jvmti.h"
+#include "jvmti_weak_table.h"
 #include "mirror/object.h"
-#include "thread-inl.h"
 
 namespace openjdkjvmti {
 
+struct ArtJvmTiEnv;
 class EventHandler;
 
-class ObjectTagTable : public art::gc::SystemWeakHolder {
+class ObjectTagTable FINAL : public JvmtiWeakTable<jlong> {
  public:
-  explicit ObjectTagTable(EventHandler* event_handler)
-      : art::gc::SystemWeakHolder(kTaggingLockLevel),
-        update_since_last_sweep_(false),
-        event_handler_(event_handler) {
-  }
+  ObjectTagTable(EventHandler* event_handler, ArtJvmTiEnv* env)
+      : event_handler_(event_handler), jvmti_env_(env) {}
 
-  void Add(art::mirror::Object* obj, jlong tag)
+  bool Set(art::mirror::Object* obj, jlong tag) OVERRIDE
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_);
-
-  bool Remove(art::mirror::Object* obj, jlong* tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-  bool RemoveLocked(art::mirror::Object* obj, jlong* tag)
+  bool SetLocked(art::mirror::Object* obj, jlong tag) OVERRIDE
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(allow_disallow_lock_);
 
-  bool Set(art::mirror::Object* obj, jlong tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-  bool SetLocked(art::mirror::Object* obj, jlong tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  bool GetTag(art::mirror::Object* obj, jlong* result)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_) {
-    art::Thread* self = art::Thread::Current();
-    art::MutexLock mu(self, allow_disallow_lock_);
-    Wait(self);
-
-    return GetTagLocked(self, obj, result);
-  }
-  bool GetTagLocked(art::mirror::Object* obj, jlong* result)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_) {
-    art::Thread* self = art::Thread::Current();
-    allow_disallow_lock_.AssertHeld(self);
-    Wait(self);
-
-    return GetTagLocked(self, obj, result);
-  }
-
   jlong GetTagOrZero(art::mirror::Object* obj)
       REQUIRES_SHARED(art::Locks::mutator_lock_)
       REQUIRES(!allow_disallow_lock_) {
@@ -91,109 +72,13 @@
     return tmp;
   }
 
-  void Sweep(art::IsMarkedVisitor* visitor)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-
-  jvmtiError GetTaggedObjects(jvmtiEnv* jvmti_env,
-                              jint tag_count,
-                              const jlong* tags,
-                              jint* count_ptr,
-                              jobject** object_result_ptr,
-                              jlong** tag_result_ptr)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-
-  void Lock() ACQUIRE(allow_disallow_lock_);
-  void Unlock() RELEASE(allow_disallow_lock_);
-  void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
+ protected:
+  bool DoesHandleNullOnSweep() OVERRIDE;
+  void HandleNullSweep(jlong tag) OVERRIDE;
 
  private:
-  bool SetLocked(art::Thread* self, art::mirror::Object* obj, jlong tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  bool RemoveLocked(art::Thread* self, art::mirror::Object* obj, jlong* tag)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  bool GetTagLocked(art::Thread* self, art::mirror::Object* obj, jlong* result)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_) {
-    auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
-    if (it != tagged_objects_.end()) {
-      *result = it->second;
-      return true;
-    }
-
-    if (art::kUseReadBarrier &&
-        self != nullptr &&
-        self->GetIsGcMarking() &&
-        !update_since_last_sweep_) {
-      return GetTagSlowPath(self, obj, result);
-    }
-
-    return false;
-  }
-
-  // Slow-path for GetTag. We didn't find the object, but we might be storing from-pointers and
-  // are asked to retrieve with a to-pointer.
-  bool GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, jlong* result)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  // Update the table by doing read barriers on each element, ensuring that to-space pointers
-  // are stored.
-  void UpdateTableWithReadBarrier()
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  template <bool kHandleNull>
-  void SweepImpl(art::IsMarkedVisitor* visitor)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(!allow_disallow_lock_);
-  void HandleNullSweep(jlong tag);
-
-  enum TableUpdateNullTarget {
-    kIgnoreNull,
-    kRemoveNull,
-    kCallHandleNull
-  };
-
-  template <typename T, TableUpdateNullTarget kTargetNull>
-  void UpdateTableWith(T& updater)
-      REQUIRES_SHARED(art::Locks::mutator_lock_)
-      REQUIRES(allow_disallow_lock_);
-
-  struct HashGcRoot {
-    size_t operator()(const art::GcRoot<art::mirror::Object>& r) const
-        REQUIRES_SHARED(art::Locks::mutator_lock_) {
-      return reinterpret_cast<uintptr_t>(r.Read<art::kWithoutReadBarrier>());
-    }
-  };
-
-  struct EqGcRoot {
-    bool operator()(const art::GcRoot<art::mirror::Object>& r1,
-                    const art::GcRoot<art::mirror::Object>& r2) const
-        REQUIRES_SHARED(art::Locks::mutator_lock_) {
-      return r1.Read<art::kWithoutReadBarrier>() == r2.Read<art::kWithoutReadBarrier>();
-    }
-  };
-
-  // The tag table is used when visiting roots. So it needs to have a low lock level.
-  static constexpr art::LockLevel kTaggingLockLevel =
-      static_cast<art::LockLevel>(art::LockLevel::kAbortLock + 1);
-
-  std::unordered_map<art::GcRoot<art::mirror::Object>,
-                     jlong,
-                     HashGcRoot,
-                     EqGcRoot> tagged_objects_
-      GUARDED_BY(allow_disallow_lock_)
-      GUARDED_BY(art::Locks::mutator_lock_);
-  // To avoid repeatedly scanning the whole table, remember if we did that since the last sweep.
-  bool update_since_last_sweep_;
-
   EventHandler* event_handler_;
+  ArtJvmTiEnv* jvmti_env_;
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_class.cc b/runtime/openjdkjvmti/ti_class.cc
index 4282e38..e94c4e6 100644
--- a/runtime/openjdkjvmti/ti_class.cc
+++ b/runtime/openjdkjvmti/ti_class.cc
@@ -43,6 +43,7 @@
 #include "common_throws.h"
 #include "dex_file_annotations.h"
 #include "events-inl.h"
+#include "fixed_up_dex_file.h"
 #include "gc/heap.h"
 #include "gc_root.h"
 #include "handle.h"
@@ -55,6 +56,8 @@
 #include "mirror/object_reference.h"
 #include "mirror/object-inl.h"
 #include "mirror/reference.h"
+#include "primitive.h"
+#include "reflection.h"
 #include "runtime.h"
 #include "runtime_callbacks.h"
 #include "ScopedLocalRef.h"
@@ -62,6 +65,7 @@
 #include "thread-inl.h"
 #include "thread_list.h"
 #include "ti_class_loader.h"
+#include "ti_phase.h"
 #include "ti_redefine.h"
 #include "utils.h"
 
@@ -77,9 +81,9 @@
       REQUIRES_SHARED(art::Locks::mutator_lock_) {
   // Make the mmap
   std::string error_msg;
+  art::ArraySlice<const unsigned char> final_data(final_dex_data, final_len);
   std::unique_ptr<art::MemMap> map(Redefiner::MoveDataToMemMap(orig_location,
-                                                               final_len,
-                                                               final_dex_data,
+                                                               final_data,
                                                                &error_msg));
   if (map.get() == nullptr) {
     LOG(WARNING) << "Unable to allocate mmap for redefined dex file! Error was: " << error_msg;
@@ -142,12 +146,26 @@
       // It is a primitive or array. Just return
       return;
     }
+    jvmtiPhase phase = PhaseUtil::GetPhaseUnchecked();
+    if (UNLIKELY(phase != JVMTI_PHASE_START && phase != JVMTI_PHASE_LIVE)) {
+      // We want to wait until we are at least in the START phase so that all WellKnownClasses and
+      // mirror classes have been initialized and loaded. The runtime relies on these classes having
+      // specific fields and methods present. Since PreDefine hooks don't need to abide by this
+      // restriction we will simply not send the event for these classes.
+      LOG(WARNING) << "Ignoring load of class <" << descriptor << "> as it is being loaded during "
+                   << "runtime initialization.";
+      return;
+    }
+
+    // Strip the 'L' and ';' from the descriptor
     std::string name(std::string(descriptor).substr(1, strlen(descriptor) - 2));
 
     art::Thread* self = art::Thread::Current();
     art::JNIEnvExt* env = self->GetJniEnv();
     ScopedLocalRef<jobject> loader(
         env, class_loader.IsNull() ? nullptr : env->AddLocalReference<jobject>(class_loader.Get()));
+    std::unique_ptr<FixedUpDexFile> dex_file_copy(FixedUpDexFile::Create(initial_dex_file));
+
     // Go back to native.
     art::ScopedThreadSuspension sts(self, art::ThreadState::kNative);
     // Call all Non-retransformable agents.
@@ -161,14 +179,14 @@
         loader.get(),
         name.c_str(),
         static_cast<jobject>(nullptr),  // Android doesn't seem to have protection domains
-        static_cast<jint>(initial_dex_file.Size()),
-        static_cast<const unsigned char*>(initial_dex_file.Begin()),
+        static_cast<jint>(dex_file_copy->Size()),
+        static_cast<const unsigned char*>(dex_file_copy->Begin()),
         static_cast<jint*>(&post_no_redefine_len),
         static_cast<unsigned char**>(&post_no_redefine_dex_data));
     if (post_no_redefine_dex_data == nullptr) {
       DCHECK_EQ(post_no_redefine_len, 0);
-      post_no_redefine_dex_data = const_cast<unsigned char*>(initial_dex_file.Begin());
-      post_no_redefine_len = initial_dex_file.Size();
+      post_no_redefine_dex_data = const_cast<unsigned char*>(dex_file_copy->Begin());
+      post_no_redefine_len = dex_file_copy->Size();
     } else {
       post_no_redefine_unique_ptr = std::unique_ptr<const unsigned char>(post_no_redefine_dex_data);
       DCHECK_GT(post_no_redefine_len, 0);
@@ -197,7 +215,7 @@
       DCHECK_GT(final_len, 0);
     }
 
-    if (final_dex_data != initial_dex_file.Begin()) {
+    if (final_dex_data != dex_file_copy->Begin()) {
       LOG(WARNING) << "Changing class " << descriptor;
       art::ScopedObjectAccess soa(self);
       art::StackHandleScope<2> hs(self);
@@ -215,14 +233,22 @@
       }
 
       // Allocate the byte array to store the dex file bytes in.
-      art::Handle<art::mirror::ByteArray> arr(hs.NewHandle(
-          art::mirror::ByteArray::AllocateAndFill(
-              self,
-              reinterpret_cast<const signed char*>(post_no_redefine_dex_data),
-              post_no_redefine_len)));
+      art::MutableHandle<art::mirror::Object> arr(hs.NewHandle<art::mirror::Object>(nullptr));
+      if (post_no_redefine_dex_data == dex_file_copy->Begin() && name != "java/lang/Long") {
+        // we didn't have any non-retransformable agents. We can just cache a pointer to the
+        // initial_dex_file. It will be kept live by the class_loader.
+        jlong dex_ptr = reinterpret_cast<uintptr_t>(&initial_dex_file);
+        art::JValue val;
+        val.SetJ(dex_ptr);
+        arr.Assign(art::BoxPrimitive(art::Primitive::kPrimLong, val));
+      } else {
+        arr.Assign(art::mirror::ByteArray::AllocateAndFill(
+            self,
+            reinterpret_cast<const signed char*>(post_no_redefine_dex_data),
+            post_no_redefine_len));
+      }
       if (arr.IsNull()) {
-        LOG(WARNING) << "Unable to allocate byte array for initial dex-file bytes. Aborting "
-                     << "transformation";
+        LOG(WARNING) << "Unable to allocate memory for initial dex-file. Aborting transformation";
         self->AssertPendingOOMException();
         return;
       }
@@ -246,7 +272,7 @@
       }
 
       // Actually set the ClassExt's original bytes once we have actually succeeded.
-      ext->SetOriginalDexFileBytes(arr.Get());
+      ext->SetOriginalDexFile(arr.Get());
       // Set the return values
       *final_class_def = &dex_file->GetClassDef(0);
       *final_dex_file = dex_file.release();
diff --git a/runtime/openjdkjvmti/ti_class_definition.cc b/runtime/openjdkjvmti/ti_class_definition.cc
index 2c2a79b..153692b 100644
--- a/runtime/openjdkjvmti/ti_class_definition.cc
+++ b/runtime/openjdkjvmti/ti_class_definition.cc
@@ -31,25 +31,145 @@
 
 #include "ti_class_definition.h"
 
+#include "base/array_slice.h"
 #include "dex_file.h"
+#include "fixed_up_dex_file.h"
 #include "handle_scope-inl.h"
 #include "handle.h"
 #include "mirror/class-inl.h"
 #include "mirror/object-inl.h"
+#include "reflection.h"
 #include "thread.h"
 
 namespace openjdkjvmti {
 
-bool ArtClassDefinition::IsModified(art::Thread* self) const {
-  if (modified) {
+bool ArtClassDefinition::IsModified() const {
+  // RedefineClasses calls always are 'modified' since they need to change the original_dex_file of
+  // the class.
+  if (redefined_) {
     return true;
   }
   // Check if the dex file we want to set is the same as the current one.
+  // Unfortunately we need to do this check even if no modifications have been done since it could
+  // be that agents were removed in the mean-time so we still have a different dex file. The dex
+  // checksum means this is likely to be fairly fast.
+  return static_cast<jint>(original_dex_file_.size()) != dex_len_ ||
+      memcmp(&original_dex_file_.At(0), dex_data_.get(), dex_len_) != 0;
+}
+
+jvmtiError ArtClassDefinition::InitCommon(ArtJvmTiEnv* env, jclass klass) {
+  JNIEnv* jni_env = GetJniEnv(env);
+  if (jni_env == nullptr) {
+    return ERR(INTERNAL);
+  }
+  art::ScopedObjectAccess soa(jni_env);
+  art::ObjPtr<art::mirror::Class> m_klass(soa.Decode<art::mirror::Class>(klass));
+  if (m_klass.IsNull()) {
+    return ERR(INVALID_CLASS);
+  }
+  klass_ = klass;
+  loader_ = soa.AddLocalReference<jobject>(m_klass->GetClassLoader());
+  std::string descriptor_store;
+  std::string descriptor(m_klass->GetDescriptor(&descriptor_store));
+  name_ = descriptor.substr(1, descriptor.size() - 2);
+  // Android doesn't really have protection domains.
+  protection_domain_ = nullptr;
+  return OK;
+}
+
+// Gets the data surrounding the given class.
+static jvmtiError GetDexDataForRetransformation(ArtJvmTiEnv* env,
+                                                art::Handle<art::mirror::Class> klass,
+                                                /*out*/jint* dex_data_len,
+                                                /*out*/unsigned char** dex_data)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  art::StackHandleScope<3> hs(art::Thread::Current());
+  art::Handle<art::mirror::ClassExt> ext(hs.NewHandle(klass->GetExtData()));
+  const art::DexFile* dex_file = nullptr;
+  if (!ext.IsNull()) {
+    art::Handle<art::mirror::Object> orig_dex(hs.NewHandle(ext->GetOriginalDexFile()));
+    if (!orig_dex.IsNull()) {
+      if (orig_dex->IsArrayInstance()) {
+        DCHECK(orig_dex->GetClass()->GetComponentType()->IsPrimitiveByte());
+        art::Handle<art::mirror::ByteArray> orig_dex_bytes(
+            hs.NewHandle(art::down_cast<art::mirror::ByteArray*>(orig_dex->AsArray())));
+        *dex_data_len = static_cast<jint>(orig_dex_bytes->GetLength());
+        return CopyDataIntoJvmtiBuffer(
+            env,
+            reinterpret_cast<const unsigned char*>(orig_dex_bytes->GetData()),
+            *dex_data_len,
+            /*out*/dex_data);
+      } else if (orig_dex->IsDexCache()) {
+        dex_file = orig_dex->AsDexCache()->GetDexFile();
+      } else {
+        DCHECK_EQ(orig_dex->GetClass()->GetPrimitiveType(), art::Primitive::kPrimLong);
+        art::ObjPtr<art::mirror::Class> prim_long_class(
+            art::Runtime::Current()->GetClassLinker()->GetClassRoot(
+                art::ClassLinker::kPrimitiveLong));
+        art::JValue val;
+        if (!art::UnboxPrimitiveForResult(orig_dex.Get(), prim_long_class, &val)) {
+          // This should never happen.
+          return ERR(INTERNAL);
+        }
+        dex_file = reinterpret_cast<const art::DexFile*>(static_cast<uintptr_t>(val.GetJ()));
+      }
+    }
+  }
+  if (dex_file == nullptr) {
+    dex_file = &klass->GetDexFile();
+  }
+  std::unique_ptr<FixedUpDexFile> fixed_dex_file(FixedUpDexFile::Create(*dex_file));
+  *dex_data_len = static_cast<jint>(fixed_dex_file->Size());
+  return CopyDataIntoJvmtiBuffer(env,
+                                 fixed_dex_file->Begin(),
+                                 fixed_dex_file->Size(),
+                                 /*out*/dex_data);
+}
+
+jvmtiError ArtClassDefinition::Init(ArtJvmTiEnv* env, jclass klass) {
+  jvmtiError res = InitCommon(env, klass);
+  if (res != OK) {
+    return res;
+  }
+  unsigned char* new_data = nullptr;
+  art::Thread* self = art::Thread::Current();
+  art::ScopedObjectAccess soa(self);
   art::StackHandleScope<1> hs(self);
-  art::Handle<art::mirror::Class> h_klass(hs.NewHandle(self->DecodeJObject(klass)->AsClass()));
-  const art::DexFile& cur_dex_file = h_klass->GetDexFile();
-  return static_cast<jint>(cur_dex_file.Size()) != dex_len ||
-      memcmp(cur_dex_file.Begin(), dex_data.get(), dex_len) != 0;
+  art::Handle<art::mirror::Class> m_klass(hs.NewHandle(self->DecodeJObject(klass)->AsClass()));
+  res = GetDexDataForRetransformation(env, m_klass, &dex_len_, &new_data);
+  if (res != OK) {
+    return res;
+  }
+  dex_data_ = MakeJvmtiUniquePtr(env, new_data);
+  if (m_klass->GetExtData() == nullptr || m_klass->GetExtData()->GetOriginalDexFile() == nullptr) {
+    // We have never redefined class this yet. Keep track of what the (de-quickened) dex file looks
+    // like so we can tell if anything has changed. Really we would like to just always do the
+    // 'else' block but the fact that we de-quickened stuff screws us over.
+    unsigned char* original_data_memory = nullptr;
+    res = CopyDataIntoJvmtiBuffer(env, dex_data_.get(), dex_len_, &original_data_memory);
+    original_dex_file_memory_ = MakeJvmtiUniquePtr(env, original_data_memory);
+    original_dex_file_ = art::ArraySlice<const unsigned char>(original_data_memory, dex_len_);
+  } else {
+    // We know that we have been redefined at least once (there is an original_dex_file set in
+    // the class) so we can just use the current dex file directly.
+    const art::DexFile& dex_file = m_klass->GetDexFile();
+    original_dex_file_ = art::ArraySlice<const unsigned char>(dex_file.Begin(), dex_file.Size());
+  }
+  return res;
+}
+
+jvmtiError ArtClassDefinition::Init(ArtJvmTiEnv* env, const jvmtiClassDefinition& def) {
+  jvmtiError res = InitCommon(env, def.klass);
+  if (res != OK) {
+    return res;
+  }
+  unsigned char* new_data = nullptr;
+  original_dex_file_ = art::ArraySlice<const unsigned char>(def.class_bytes, def.class_byte_count);
+  redefined_ = true;
+  dex_len_ = def.class_byte_count;
+  res = CopyDataIntoJvmtiBuffer(env, def.class_bytes, def.class_byte_count, /*out*/ &new_data);
+  dex_data_ = MakeJvmtiUniquePtr(env, new_data);
+  return res;
 }
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_class_definition.h b/runtime/openjdkjvmti/ti_class_definition.h
index 3c251d4..43d0c3f 100644
--- a/runtime/openjdkjvmti/ti_class_definition.h
+++ b/runtime/openjdkjvmti/ti_class_definition.h
@@ -39,37 +39,89 @@
 // A struct that stores data needed for redefining/transforming classes. This structure should only
 // even be accessed from a single thread and must not survive past the completion of the
 // redefinition/retransformation function that created it.
-struct ArtClassDefinition {
+class ArtClassDefinition {
  public:
-  jclass klass;
-  jobject loader;
-  std::string name;
-  jobject protection_domain;
-  jint dex_len;
-  JvmtiUniquePtr<unsigned char> dex_data;
-  art::ArraySlice<const unsigned char> original_dex_file;
+  ArtClassDefinition()
+      : klass_(nullptr),
+        loader_(nullptr),
+        name_(),
+        protection_domain_(nullptr),
+        dex_len_(0),
+        dex_data_(nullptr),
+        original_dex_file_memory_(nullptr),
+        original_dex_file_(),
+        redefined_(false) {}
 
-  ArtClassDefinition() = default;
+  jvmtiError Init(ArtJvmTiEnv* env, jclass klass);
+  jvmtiError Init(ArtJvmTiEnv* env, const jvmtiClassDefinition& def);
+
   ArtClassDefinition(ArtClassDefinition&& o) = default;
+  ArtClassDefinition& operator=(ArtClassDefinition&& o) = default;
 
   void SetNewDexData(ArtJvmTiEnv* env, jint new_dex_len, unsigned char* new_dex_data) {
+    DCHECK(IsInitialized());
     if (new_dex_data == nullptr) {
       return;
-    } else if (new_dex_data != dex_data.get() || new_dex_len != dex_len) {
-      SetModified();
-      dex_len = new_dex_len;
-      dex_data = MakeJvmtiUniquePtr(env, new_dex_data);
+    } else if (new_dex_data != dex_data_.get() || new_dex_len != dex_len_) {
+      dex_len_ = new_dex_len;
+      dex_data_ = MakeJvmtiUniquePtr(env, new_dex_data);
     }
   }
 
-  void SetModified() {
-    modified = true;
+  art::ArraySlice<const unsigned char> GetNewOriginalDexFile() const {
+    DCHECK(IsInitialized());
+    if (redefined_) {
+      return original_dex_file_;
+    } else {
+      return art::ArraySlice<const unsigned char>();
+    }
   }
 
-  bool IsModified(art::Thread* self) const REQUIRES_SHARED(art::Locks::mutator_lock_);
+  bool IsModified() const;
+
+  bool IsInitialized() const {
+    return klass_ != nullptr;
+  }
+
+  jclass GetClass() const {
+    DCHECK(IsInitialized());
+    return klass_;
+  }
+
+  jobject GetLoader() const {
+    DCHECK(IsInitialized());
+    return loader_;
+  }
+
+  const std::string& GetName() const {
+    DCHECK(IsInitialized());
+    return name_;
+  }
+
+  jobject GetProtectionDomain() const {
+    DCHECK(IsInitialized());
+    return protection_domain_;
+  }
+
+  art::ArraySlice<const unsigned char> GetDexData() const {
+    DCHECK(IsInitialized());
+    return art::ArraySlice<const unsigned char>(dex_data_.get(), dex_len_);
+  }
 
  private:
-  bool modified;
+  jvmtiError InitCommon(ArtJvmTiEnv* env, jclass klass);
+
+  jclass klass_;
+  jobject loader_;
+  std::string name_;
+  jobject protection_domain_;
+  jint dex_len_;
+  JvmtiUniquePtr<unsigned char> dex_data_;
+  JvmtiUniquePtr<unsigned char> original_dex_file_memory_;
+  art::ArraySlice<const unsigned char> original_dex_file_;
+  bool redefined_;
+
+  DISALLOW_COPY_AND_ASSIGN(ArtClassDefinition);
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc
index 976ce66..49d9aca 100644
--- a/runtime/openjdkjvmti/ti_heap.cc
+++ b/runtime/openjdkjvmti/ti_heap.cc
@@ -25,6 +25,7 @@
 #include "gc_root-inl.h"
 #include "jni_env_ext.h"
 #include "jni_internal.h"
+#include "jvmti_weak_table-inl.h"
 #include "mirror/class.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
@@ -41,6 +42,21 @@
 
 namespace {
 
+struct IndexCache {
+  // The number of interface fields implemented by the class. This is a prefix to all assigned
+  // field indices.
+  size_t interface_fields;
+
+  // It would be nice to also cache the following, but it is complicated to wire up into the
+  // generic visit:
+  // The number of fields in interfaces and superclasses. This is the first index assigned to
+  // fields of the class.
+  // size_t superclass_fields;
+};
+using IndexCachingTable = JvmtiWeakTable<IndexCache>;
+
+static IndexCachingTable gIndexCachingTable;
+
 // Report the contents of a string, if a callback is set.
 jint ReportString(art::ObjPtr<art::mirror::Object> obj,
                   jvmtiEnv* env,
@@ -50,25 +66,28 @@
   if (UNLIKELY(cb->string_primitive_value_callback != nullptr) && obj->IsString()) {
     art::ObjPtr<art::mirror::String> str = obj->AsString();
     int32_t string_length = str->GetLength();
-    jvmtiError alloc_error;
-    JvmtiUniquePtr<uint16_t[]> data = AllocJvmtiUniquePtr<uint16_t[]>(env,
-                                                                      string_length,
-                                                                      &alloc_error);
-    if (data == nullptr) {
-      // TODO: Not really sure what to do here. Should we abort the iteration and go all the way
-      //       back? For now just warn.
-      LOG(WARNING) << "Unable to allocate buffer for string reporting! Silently dropping value.";
-      return 0;
-    }
+    JvmtiUniquePtr<uint16_t[]> data;
 
-    if (str->IsCompressed()) {
-      uint8_t* compressed_data = str->GetValueCompressed();
-      for (int32_t i = 0; i != string_length; ++i) {
-        data[i] = compressed_data[i];
+    if (string_length > 0) {
+      jvmtiError alloc_error;
+      data = AllocJvmtiUniquePtr<uint16_t[]>(env, string_length, &alloc_error);
+      if (data == nullptr) {
+        // TODO: Not really sure what to do here. Should we abort the iteration and go all the way
+        //       back? For now just warn.
+        LOG(WARNING) << "Unable to allocate buffer for string reporting! Silently dropping value."
+                     << " >" << str->ToModifiedUtf8() << "<";
+        return 0;
       }
-    } else {
-      // Can copy directly.
-      memcpy(data.get(), str->GetValue(), string_length * sizeof(uint16_t));
+
+      if (str->IsCompressed()) {
+        uint8_t* compressed_data = str->GetValueCompressed();
+        for (int32_t i = 0; i != string_length; ++i) {
+          data[i] = compressed_data[i];
+        }
+      } else {
+        // Can copy directly.
+        memcpy(data.get(), str->GetValue(), string_length * sizeof(uint16_t));
+      }
     }
 
     const jlong class_tag = tag_table->GetTagOrZero(obj->GetClass());
@@ -159,6 +178,433 @@
   return 0;
 }
 
+template <typename UserData>
+bool VisitorFalse(art::ObjPtr<art::mirror::Object> obj ATTRIBUTE_UNUSED,
+                  art::ObjPtr<art::mirror::Class> klass ATTRIBUTE_UNUSED,
+                  art::ArtField& field ATTRIBUTE_UNUSED,
+                  size_t field_index ATTRIBUTE_UNUSED,
+                  UserData* user_data ATTRIBUTE_UNUSED) {
+  return false;
+}
+
+template <typename UserData, bool kCallVisitorOnRecursion>
+class FieldVisitor {
+ public:
+  // Report the contents of a primitive fields of the given object, if a callback is set.
+  template <typename StaticPrimitiveVisitor,
+            typename StaticReferenceVisitor,
+            typename InstancePrimitiveVisitor,
+            typename InstanceReferenceVisitor>
+  static bool ReportFields(art::ObjPtr<art::mirror::Object> obj,
+                           UserData* user_data,
+                           StaticPrimitiveVisitor& static_prim_visitor,
+                           StaticReferenceVisitor& static_ref_visitor,
+                           InstancePrimitiveVisitor& instance_prim_visitor,
+                           InstanceReferenceVisitor& instance_ref_visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    FieldVisitor fv(user_data);
+
+    if (obj->IsClass()) {
+      // When visiting a class, we only visit the static fields of the given class. No field of
+      // superclasses is visited.
+      art::ObjPtr<art::mirror::Class> klass = obj->AsClass();
+      // Only report fields on resolved classes. We need valid field data.
+      if (!klass->IsResolved()) {
+        return false;
+      }
+      return fv.ReportFieldsImpl(nullptr,
+                                 obj->AsClass(),
+                                 obj->AsClass()->IsInterface(),
+                                 static_prim_visitor,
+                                 static_ref_visitor,
+                                 instance_prim_visitor,
+                                 instance_ref_visitor);
+    } else {
+      // See comment above. Just double-checking here, but an instance *should* mean the class was
+      // resolved.
+      DCHECK(obj->GetClass()->IsResolved() || obj->GetClass()->IsErroneousResolved());
+      return fv.ReportFieldsImpl(obj,
+                                 obj->GetClass(),
+                                 false,
+                                 static_prim_visitor,
+                                 static_ref_visitor,
+                                 instance_prim_visitor,
+                                 instance_ref_visitor);
+    }
+  }
+
+ private:
+  explicit FieldVisitor(UserData* user_data) : user_data_(user_data) {}
+
+  // Report the contents of fields of the given object. If obj is null, report the static fields,
+  // otherwise the instance fields.
+  template <typename StaticPrimitiveVisitor,
+            typename StaticReferenceVisitor,
+            typename InstancePrimitiveVisitor,
+            typename InstanceReferenceVisitor>
+  bool ReportFieldsImpl(art::ObjPtr<art::mirror::Object> obj,
+                        art::ObjPtr<art::mirror::Class> klass,
+                        bool skip_java_lang_object,
+                        StaticPrimitiveVisitor& static_prim_visitor,
+                        StaticReferenceVisitor& static_ref_visitor,
+                        InstancePrimitiveVisitor& instance_prim_visitor,
+                        InstanceReferenceVisitor& instance_ref_visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    // Compute the offset of field indices.
+    size_t interface_field_count = CountInterfaceFields(klass);
+
+    size_t tmp;
+    bool aborted = ReportFieldsRecursive(obj,
+                                         klass,
+                                         interface_field_count,
+                                         skip_java_lang_object,
+                                         static_prim_visitor,
+                                         static_ref_visitor,
+                                         instance_prim_visitor,
+                                         instance_ref_visitor,
+                                         &tmp);
+    return aborted;
+  }
+
+  // Visit primitive fields in an object (instance). Return true if the visit was aborted.
+  template <typename StaticPrimitiveVisitor,
+            typename StaticReferenceVisitor,
+            typename InstancePrimitiveVisitor,
+            typename InstanceReferenceVisitor>
+  bool ReportFieldsRecursive(art::ObjPtr<art::mirror::Object> obj,
+                             art::ObjPtr<art::mirror::Class> klass,
+                             size_t interface_fields,
+                             bool skip_java_lang_object,
+                             StaticPrimitiveVisitor& static_prim_visitor,
+                             StaticReferenceVisitor& static_ref_visitor,
+                             InstancePrimitiveVisitor& instance_prim_visitor,
+                             InstanceReferenceVisitor& instance_ref_visitor,
+                             size_t* field_index_out)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    DCHECK(klass != nullptr);
+    size_t field_index;
+    if (klass->GetSuperClass() == nullptr) {
+      // j.l.Object. Start with the fields from interfaces.
+      field_index = interface_fields;
+      if (skip_java_lang_object) {
+        *field_index_out = field_index;
+        return false;
+      }
+    } else {
+      // Report superclass fields.
+      if (kCallVisitorOnRecursion) {
+        if (ReportFieldsRecursive(obj,
+                                  klass->GetSuperClass(),
+                                  interface_fields,
+                                  skip_java_lang_object,
+                                  static_prim_visitor,
+                                  static_ref_visitor,
+                                  instance_prim_visitor,
+                                  instance_ref_visitor,
+                                  &field_index)) {
+          return true;
+        }
+      } else {
+        // Still call, but with empty visitor. This is required for correct counting.
+        ReportFieldsRecursive(obj,
+                              klass->GetSuperClass(),
+                              interface_fields,
+                              skip_java_lang_object,
+                              VisitorFalse<UserData>,
+                              VisitorFalse<UserData>,
+                              VisitorFalse<UserData>,
+                              VisitorFalse<UserData>,
+                              &field_index);
+      }
+    }
+
+    // Now visit fields for the current klass.
+
+    for (auto& static_field : klass->GetSFields()) {
+      if (static_field.IsPrimitiveType()) {
+        if (static_prim_visitor(obj,
+                                klass,
+                                static_field,
+                                field_index,
+                                user_data_)) {
+          return true;
+        }
+      } else {
+        if (static_ref_visitor(obj,
+                               klass,
+                               static_field,
+                               field_index,
+                               user_data_)) {
+          return true;
+        }
+      }
+      field_index++;
+    }
+
+    for (auto& instance_field : klass->GetIFields()) {
+      if (instance_field.IsPrimitiveType()) {
+        if (instance_prim_visitor(obj,
+                                  klass,
+                                  instance_field,
+                                  field_index,
+                                  user_data_)) {
+          return true;
+        }
+      } else {
+        if (instance_ref_visitor(obj,
+                                 klass,
+                                 instance_field,
+                                 field_index,
+                                 user_data_)) {
+          return true;
+        }
+      }
+      field_index++;
+    }
+
+    *field_index_out = field_index;
+    return false;
+  }
+
+  // Implements a visit of the implemented interfaces of a given class.
+  template <typename T>
+  struct RecursiveInterfaceVisit {
+    static void VisitStatic(art::Thread* self, art::ObjPtr<art::mirror::Class> klass, T& visitor)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      RecursiveInterfaceVisit rv;
+      rv.Visit(self, klass, visitor);
+    }
+
+    void Visit(art::Thread* self, art::ObjPtr<art::mirror::Class> klass, T& visitor)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      // First visit the parent, to get the order right.
+      // (We do this in preparation for actual visiting of interface fields.)
+      if (klass->GetSuperClass() != nullptr) {
+        Visit(self, klass->GetSuperClass(), visitor);
+      }
+      for (uint32_t i = 0; i != klass->NumDirectInterfaces(); ++i) {
+        art::ObjPtr<art::mirror::Class> inf_klass =
+            art::mirror::Class::GetDirectInterface(self, klass, i);
+        DCHECK(inf_klass != nullptr);
+        VisitInterface(self, inf_klass, visitor);
+      }
+    }
+
+    void VisitInterface(art::Thread* self, art::ObjPtr<art::mirror::Class> inf_klass, T& visitor)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      auto it = visited_interfaces.find(inf_klass.Ptr());
+      if (it != visited_interfaces.end()) {
+        return;
+      }
+      visited_interfaces.insert(inf_klass.Ptr());
+
+      // Let the visitor know about this one. Note that this order is acceptable, as the ordering
+      // of these fields never matters for known visitors.
+      visitor(inf_klass);
+
+      // Now visit the superinterfaces.
+      for (uint32_t i = 0; i != inf_klass->NumDirectInterfaces(); ++i) {
+        art::ObjPtr<art::mirror::Class> super_inf_klass =
+            art::mirror::Class::GetDirectInterface(self, inf_klass, i);
+        DCHECK(super_inf_klass != nullptr);
+        VisitInterface(self, super_inf_klass, visitor);
+      }
+    }
+
+    std::unordered_set<art::mirror::Class*> visited_interfaces;
+  };
+
+  // Counting interface fields. Note that we cannot use the interface table, as that only contains
+  // "non-marker" interfaces (= interfaces with methods).
+  static size_t CountInterfaceFields(art::ObjPtr<art::mirror::Class> klass)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    // Do we have a cached value?
+    IndexCache tmp;
+    if (gIndexCachingTable.GetTag(klass.Ptr(), &tmp)) {
+      return tmp.interface_fields;
+    }
+
+    size_t count = 0;
+    auto visitor = [&count](art::ObjPtr<art::mirror::Class> inf_klass)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      DCHECK(inf_klass->IsInterface());
+      DCHECK_EQ(0u, inf_klass->NumInstanceFields());
+      count += inf_klass->NumStaticFields();
+    };
+    RecursiveInterfaceVisit<decltype(visitor)>::VisitStatic(art::Thread::Current(), klass, visitor);
+
+    // Store this into the cache.
+    tmp.interface_fields = count;
+    gIndexCachingTable.Set(klass.Ptr(), tmp);
+
+    return count;
+  }
+
+  UserData* user_data_;
+};
+
+// Debug helper. Prints the structure of an object.
+template <bool kStatic, bool kRef>
+struct DumpVisitor {
+  static bool Callback(art::ObjPtr<art::mirror::Object> obj ATTRIBUTE_UNUSED,
+                       art::ObjPtr<art::mirror::Class> klass ATTRIBUTE_UNUSED,
+                       art::ArtField& field,
+                       size_t field_index,
+                       void* user_data ATTRIBUTE_UNUSED)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    LOG(ERROR) << (kStatic ? "static " : "instance ")
+               << (kRef ? "ref " : "primitive ")
+               << field.PrettyField()
+               << " @ "
+               << field_index;
+    return false;
+  }
+};
+ATTRIBUTE_UNUSED
+void DumpObjectFields(art::ObjPtr<art::mirror::Object> obj)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  if (obj->IsClass()) {
+    FieldVisitor<void, false>:: ReportFields(obj,
+                                             nullptr,
+                                             DumpVisitor<true, false>::Callback,
+                                             DumpVisitor<true, true>::Callback,
+                                             DumpVisitor<false, false>::Callback,
+                                             DumpVisitor<false, true>::Callback);
+  } else {
+    FieldVisitor<void, true>::ReportFields(obj,
+                                           nullptr,
+                                           DumpVisitor<true, false>::Callback,
+                                           DumpVisitor<true, true>::Callback,
+                                           DumpVisitor<false, false>::Callback,
+                                           DumpVisitor<false, true>::Callback);
+  }
+}
+
+class ReportPrimitiveField {
+ public:
+  static bool Report(art::ObjPtr<art::mirror::Object> obj,
+                     ObjectTagTable* tag_table,
+                     const jvmtiHeapCallbacks* cb,
+                     const void* user_data)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (UNLIKELY(cb->primitive_field_callback != nullptr)) {
+      jlong class_tag = tag_table->GetTagOrZero(obj->GetClass());
+      ReportPrimitiveField rpf(tag_table, class_tag, cb, user_data);
+      if (obj->IsClass()) {
+        return FieldVisitor<ReportPrimitiveField, false>::ReportFields(
+            obj,
+            &rpf,
+            ReportPrimitiveFieldCallback<true>,
+            VisitorFalse<ReportPrimitiveField>,
+            VisitorFalse<ReportPrimitiveField>,
+            VisitorFalse<ReportPrimitiveField>);
+      } else {
+        return FieldVisitor<ReportPrimitiveField, true>::ReportFields(
+            obj,
+            &rpf,
+            VisitorFalse<ReportPrimitiveField>,
+            VisitorFalse<ReportPrimitiveField>,
+            ReportPrimitiveFieldCallback<false>,
+            VisitorFalse<ReportPrimitiveField>);
+      }
+    }
+    return false;
+  }
+
+
+ private:
+  ReportPrimitiveField(ObjectTagTable* tag_table,
+                       jlong class_tag,
+                       const jvmtiHeapCallbacks* cb,
+                       const void* user_data)
+      : tag_table_(tag_table), class_tag_(class_tag), cb_(cb), user_data_(user_data) {}
+
+  template <bool kReportStatic>
+  static bool ReportPrimitiveFieldCallback(art::ObjPtr<art::mirror::Object> obj,
+                                           art::ObjPtr<art::mirror::Class> klass,
+                                           art::ArtField& field,
+                                           size_t field_index,
+                                           ReportPrimitiveField* user_data)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    art::Primitive::Type art_prim_type = field.GetTypeAsPrimitiveType();
+    jvmtiPrimitiveType prim_type =
+        static_cast<jvmtiPrimitiveType>(art::Primitive::Descriptor(art_prim_type)[0]);
+    DCHECK(prim_type == JVMTI_PRIMITIVE_TYPE_BOOLEAN ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_BYTE ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_CHAR ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_SHORT ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_INT ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_LONG ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_FLOAT ||
+           prim_type == JVMTI_PRIMITIVE_TYPE_DOUBLE);
+    jvmtiHeapReferenceInfo info;
+    info.field.index = field_index;
+
+    jvalue value;
+    memset(&value, 0, sizeof(jvalue));
+    art::ObjPtr<art::mirror::Object> src = kReportStatic ? klass : obj;
+    switch (art_prim_type) {
+      case art::Primitive::Type::kPrimBoolean:
+        value.z = field.GetBoolean(src) == 0 ? JNI_FALSE : JNI_TRUE;
+        break;
+      case art::Primitive::Type::kPrimByte:
+        value.b = field.GetByte(src);
+        break;
+      case art::Primitive::Type::kPrimChar:
+        value.c = field.GetChar(src);
+        break;
+      case art::Primitive::Type::kPrimShort:
+        value.s = field.GetShort(src);
+        break;
+      case art::Primitive::Type::kPrimInt:
+        value.i = field.GetInt(src);
+        break;
+      case art::Primitive::Type::kPrimLong:
+        value.j = field.GetLong(src);
+        break;
+      case art::Primitive::Type::kPrimFloat:
+        value.f = field.GetFloat(src);
+        break;
+      case art::Primitive::Type::kPrimDouble:
+        value.d = field.GetDouble(src);
+        break;
+      case art::Primitive::Type::kPrimVoid:
+      case art::Primitive::Type::kPrimNot: {
+        LOG(FATAL) << "Should not reach here";
+        UNREACHABLE();
+      }
+    }
+
+    jlong obj_tag = user_data->tag_table_->GetTagOrZero(src.Ptr());
+    const jlong saved_obj_tag = obj_tag;
+
+    jint ret = user_data->cb_->primitive_field_callback(kReportStatic
+                                                            ? JVMTI_HEAP_REFERENCE_STATIC_FIELD
+                                                            : JVMTI_HEAP_REFERENCE_FIELD,
+                                                        &info,
+                                                        user_data->class_tag_,
+                                                        &obj_tag,
+                                                        value,
+                                                        prim_type,
+                                                        const_cast<void*>(user_data->user_data_));
+
+    if (saved_obj_tag != obj_tag) {
+      user_data->tag_table_->Set(src.Ptr(), obj_tag);
+    }
+
+    if ((ret & JVMTI_VISIT_ABORT) != 0) {
+      return true;
+    }
+
+    return false;
+  }
+
+  ObjectTagTable* tag_table_;
+  jlong class_tag_;
+  const jvmtiHeapCallbacks* cb_;
+  const void* user_data_;
+};
+
 struct HeapFilter {
   explicit HeapFilter(jint heap_filter)
       : filter_out_tagged((heap_filter & JVMTI_HEAP_FILTER_TAGGED) != 0),
@@ -197,6 +643,14 @@
 
 }  // namespace
 
+void HeapUtil::Register() {
+  art::Runtime::Current()->AddSystemWeakHolder(&gIndexCachingTable);
+}
+
+void HeapUtil::Unregister() {
+  art::Runtime::Current()->RemoveSystemWeakHolder(&gIndexCachingTable);
+}
+
 struct IterateThroughHeapData {
   IterateThroughHeapData(HeapUtil* _heap_util,
                          jvmtiEnv* _env,
@@ -289,7 +743,12 @@
     ithd->stop_reports = (array_ret & JVMTI_VISIT_ABORT) != 0;
   }
 
-  // TODO Implement primitive field callback.
+  if (!ithd->stop_reports) {
+    ithd->stop_reports = ReportPrimitiveField::Report(obj,
+                                                      ithd->heap_util->GetTags(),
+                                                      ithd->callbacks,
+                                                      ithd->user_data);
+  }
 }
 
 jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env,
@@ -423,12 +882,17 @@
     void AddRoot(art::mirror::Object* root_obj, const art::RootInfo& info)
         REQUIRES_SHARED(art::Locks::mutator_lock_)
         REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+      if (stop_reports_) {
+        return;
+      }
+      bool add_to_worklist = ReportRoot(root_obj, info);
       // We use visited_ to mark roots already so we do not need another set.
       if (visited_->find(root_obj) == visited_->end()) {
         visited_->insert(root_obj);
-        worklist_->push_back(root_obj);
+        if (add_to_worklist) {
+          worklist_->push_back(root_obj);
+        }
       }
-      ReportRoot(root_obj, info);
     }
 
     // Remove NO_THREAD_SAFETY_ANALYSIS once ASSERT_CAPABILITY works correctly.
@@ -534,7 +998,7 @@
       UNREACHABLE();
     }
 
-    void ReportRoot(art::mirror::Object* root_obj, const art::RootInfo& info)
+    bool ReportRoot(art::mirror::Object* root_obj, const art::RootInfo& info)
         REQUIRES_SHARED(art::Locks::mutator_lock_)
         REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
       jvmtiHeapReferenceInfo ref_info;
@@ -543,6 +1007,7 @@
       if ((result & JVMTI_VISIT_ABORT) != 0) {
         stop_reports_ = true;
       }
+      return (result & JVMTI_VISIT_OBJECTS) != 0;
     }
 
    private:
@@ -565,64 +1030,49 @@
       return;
     }
 
-    // TODO: We'll probably have to rewrite this completely with our own visiting logic, if we
-    //       want to have a chance of getting the field indices computed halfway efficiently. For
-    //       now, ignore them altogether.
-
-    struct InstanceReferenceVisitor {
-      explicit InstanceReferenceVisitor(FollowReferencesHelper* helper_)
-          : helper(helper_), stop_reports(false) {}
-
-      void operator()(art::mirror::Object* src,
-                      art::MemberOffset field_offset,
-                      bool is_static ATTRIBUTE_UNUSED) const
-          REQUIRES_SHARED(art::Locks::mutator_lock_)
-          REQUIRES(!*helper->tag_table_->GetAllowDisallowLock()) {
-        if (stop_reports) {
-          return;
-        }
-
-        art::mirror::Object* trg = src->GetFieldObjectReferenceAddr(field_offset)->AsMirrorPtr();
+    // All instance fields.
+    auto report_instance_field = [&](art::ObjPtr<art::mirror::Object> src,
+                                     art::ObjPtr<art::mirror::Class> obj_klass ATTRIBUTE_UNUSED,
+                                     art::ArtField& field,
+                                     size_t field_index,
+                                     void* user_data ATTRIBUTE_UNUSED)
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+      art::ObjPtr<art::mirror::Object> field_value = field.GetObject(src);
+      if (field_value != nullptr) {
         jvmtiHeapReferenceInfo reference_info;
         memset(&reference_info, 0, sizeof(reference_info));
 
-        // TODO: Implement spec-compliant numbering.
-        reference_info.field.index = field_offset.Int32Value();
+        reference_info.field.index = field_index;
 
         jvmtiHeapReferenceKind kind =
-            field_offset.Int32Value() == art::mirror::Object::ClassOffset().Int32Value()
+            field.GetOffset().Int32Value() == art::mirror::Object::ClassOffset().Int32Value()
                 ? JVMTI_HEAP_REFERENCE_CLASS
                 : JVMTI_HEAP_REFERENCE_FIELD;
         const jvmtiHeapReferenceInfo* reference_info_ptr =
             kind == JVMTI_HEAP_REFERENCE_CLASS ? nullptr : &reference_info;
 
-        stop_reports = !helper->ReportReferenceMaybeEnqueue(kind, reference_info_ptr, src, trg);
+        return !ReportReferenceMaybeEnqueue(kind, reference_info_ptr, src.Ptr(), field_value.Ptr());
       }
-
-      void VisitRoot(art::mirror::CompressedReference<art::mirror::Object>* root ATTRIBUTE_UNUSED)
-          const {
-        LOG(FATAL) << "Unreachable";
-      }
-      void VisitRootIfNonNull(
-          art::mirror::CompressedReference<art::mirror::Object>* root ATTRIBUTE_UNUSED) const {
-        LOG(FATAL) << "Unreachable";
-      }
-
-      // "mutable" required by the visitor API.
-      mutable FollowReferencesHelper* helper;
-      mutable bool stop_reports;
+      return false;
     };
-
-    InstanceReferenceVisitor visitor(this);
-    // Visit references, not native roots.
-    obj->VisitReferences<false>(visitor, art::VoidFunctor());
-
-    stop_reports_ = visitor.stop_reports;
-
-    if (!stop_reports_) {
-      jint string_ret = ReportString(obj, env, tag_table_, callbacks_, user_data_);
-      stop_reports_ = (string_ret & JVMTI_VISIT_ABORT) != 0;
+    stop_reports_ = FieldVisitor<void, true>::ReportFields(obj,
+                                                           nullptr,
+                                                           VisitorFalse<void>,
+                                                           VisitorFalse<void>,
+                                                           VisitorFalse<void>,
+                                                           report_instance_field);
+    if (stop_reports_) {
+      return;
     }
+
+    jint string_ret = ReportString(obj, env, tag_table_, callbacks_, user_data_);
+    stop_reports_ = (string_ret & JVMTI_VISIT_ABORT) != 0;
+    if (stop_reports_) {
+      return;
+    }
+
+    stop_reports_ = ReportPrimitiveField::Report(obj, tag_table_, callbacks_, user_data_);
   }
 
   void VisitArray(art::mirror::Object* array)
@@ -716,26 +1166,38 @@
     DCHECK_EQ(h_klass.Get(), klass);
 
     // Declared static fields.
-    for (auto& field : klass->GetSFields()) {
-      if (!field.IsPrimitiveType()) {
-        art::ObjPtr<art::mirror::Object> field_value = field.GetObject(klass);
-        if (field_value != nullptr) {
-          jvmtiHeapReferenceInfo reference_info;
-          memset(&reference_info, 0, sizeof(reference_info));
+    auto report_static_field = [&](art::ObjPtr<art::mirror::Object> obj ATTRIBUTE_UNUSED,
+                                   art::ObjPtr<art::mirror::Class> obj_klass,
+                                   art::ArtField& field,
+                                   size_t field_index,
+                                   void* user_data ATTRIBUTE_UNUSED)
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+      art::ObjPtr<art::mirror::Object> field_value = field.GetObject(obj_klass);
+      if (field_value != nullptr) {
+        jvmtiHeapReferenceInfo reference_info;
+        memset(&reference_info, 0, sizeof(reference_info));
 
-          // TODO: Implement spec-compliant numbering.
-          reference_info.field.index = field.GetOffset().Int32Value();
+        reference_info.field.index = static_cast<jint>(field_index);
 
-          stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_STATIC_FIELD,
-                                                       &reference_info,
-                                                       klass,
-                                                       field_value.Ptr());
-          if (stop_reports_) {
-            return;
-          }
-        }
+        return !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_STATIC_FIELD,
+                                            &reference_info,
+                                            obj_klass.Ptr(),
+                                            field_value.Ptr());
       }
+      return false;
+    };
+    stop_reports_ = FieldVisitor<void, false>::ReportFields(klass,
+                                                            nullptr,
+                                                            VisitorFalse<void>,
+                                                            report_static_field,
+                                                            VisitorFalse<void>,
+                                                            VisitorFalse<void>);
+    if (stop_reports_) {
+      return;
     }
+
+    stop_reports_ = ReportPrimitiveField::Report(klass, tag_table_, callbacks_, user_data_);
   }
 
   void MaybeEnqueue(art::mirror::Object* obj) REQUIRES_SHARED(art::Locks::mutator_lock_) {
diff --git a/runtime/openjdkjvmti/ti_heap.h b/runtime/openjdkjvmti/ti_heap.h
index 72ee097..dccecb4 100644
--- a/runtime/openjdkjvmti/ti_heap.h
+++ b/runtime/openjdkjvmti/ti_heap.h
@@ -49,6 +49,9 @@
     return tags_;
   }
 
+  static void Register();
+  static void Unregister();
+
  private:
   ObjectTagTable* tags_;
 };
diff --git a/runtime/openjdkjvmti/ti_phase.cc b/runtime/openjdkjvmti/ti_phase.cc
index e494cb6..941cf7b 100644
--- a/runtime/openjdkjvmti/ti_phase.cc
+++ b/runtime/openjdkjvmti/ti_phase.cc
@@ -40,6 +40,7 @@
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
+#include "ti_thread.h"
 
 namespace openjdkjvmti {
 
@@ -69,6 +70,7 @@
         break;
       case RuntimePhase::kInit:
         {
+          ThreadUtil::CacheData();
           ScopedLocalRef<jthread> thread(GetJniEnv(), GetCurrentJThread());
           art::ScopedThreadSuspension sts(art::Thread::Current(), art::ThreadState::kNative);
           event_handler->DispatchEvent<ArtJvmtiEvent::kVmInit>(nullptr, GetJniEnv(), thread.get());
@@ -105,6 +107,16 @@
   return ERR(NONE);
 }
 
+bool PhaseUtil::IsLivePhase() {
+  jvmtiPhase now = PhaseUtil::current_phase_;
+  DCHECK(now == JVMTI_PHASE_ONLOAD ||
+         now == JVMTI_PHASE_PRIMORDIAL ||
+         now == JVMTI_PHASE_START ||
+         now == JVMTI_PHASE_LIVE ||
+         now == JVMTI_PHASE_DEAD);
+  return now == JVMTI_PHASE_LIVE;
+}
+
 void PhaseUtil::SetToOnLoad() {
   DCHECK_EQ(0u, static_cast<size_t>(PhaseUtil::current_phase_));
   PhaseUtil::current_phase_ = JVMTI_PHASE_ONLOAD;
@@ -117,6 +129,7 @@
 
 void PhaseUtil::SetToLive() {
   DCHECK_EQ(static_cast<size_t>(0), static_cast<size_t>(PhaseUtil::current_phase_));
+  ThreadUtil::CacheData();
   PhaseUtil::current_phase_ = JVMTI_PHASE_LIVE;
 }
 
diff --git a/runtime/openjdkjvmti/ti_phase.h b/runtime/openjdkjvmti/ti_phase.h
index 851fc27..a2c0d11 100644
--- a/runtime/openjdkjvmti/ti_phase.h
+++ b/runtime/openjdkjvmti/ti_phase.h
@@ -42,6 +42,7 @@
 class PhaseUtil {
  public:
   static jvmtiError GetPhase(jvmtiEnv* env, jvmtiPhase* phase_ptr);
+  static bool IsLivePhase();
 
   static void Register(EventHandler* event_handler);
   static void Unregister();
diff --git a/runtime/openjdkjvmti/ti_redefine.cc b/runtime/openjdkjvmti/ti_redefine.cc
index c4d20c0..7d95de8 100644
--- a/runtime/openjdkjvmti/ti_redefine.cc
+++ b/runtime/openjdkjvmti/ti_redefine.cc
@@ -178,7 +178,7 @@
         art::ClassLinker* cl = runtime->GetClassLinker();
         auto ptr_size = cl->GetImagePointerSize();
         const size_t method_size = art::ArtMethod::Size(ptr_size);
-        auto* method_storage = allocator_->Alloc(GetThread(), method_size);
+        auto* method_storage = allocator_->Alloc(art::Thread::Current(), method_size);
         CHECK(method_storage != nullptr) << "Unable to allocate storage for obsolete version of '"
                                          << old_method->PrettyMethod() << "'";
         new_obsolete_method = new (method_storage) art::ArtMethod();
@@ -186,6 +186,7 @@
         DCHECK_EQ(new_obsolete_method->GetDeclaringClass(), old_method->GetDeclaringClass());
         new_obsolete_method->SetIsObsolete();
         new_obsolete_method->SetDontCompile();
+        cl->SetEntryPointsForObsoleteMethod(new_obsolete_method);
         obsolete_maps_->RecordObsolete(old_method, new_obsolete_method);
         // Update JIT Data structures to point to the new method.
         art::jit::Jit* jit = art::Runtime::Current()->GetJit();
@@ -261,13 +262,12 @@
 
 // Moves dex data to an anonymous, read-only mmap'd region.
 std::unique_ptr<art::MemMap> Redefiner::MoveDataToMemMap(const std::string& original_location,
-                                                         jint data_len,
-                                                         const unsigned char* dex_data,
+                                                         art::ArraySlice<const unsigned char> data,
                                                          std::string* error_msg) {
   std::unique_ptr<art::MemMap> map(art::MemMap::MapAnonymous(
       StringPrintf("%s-transformed", original_location.c_str()).c_str(),
       nullptr,
-      data_len,
+      data.size(),
       PROT_READ|PROT_WRITE,
       /*low_4gb*/false,
       /*reuse*/false,
@@ -275,7 +275,7 @@
   if (map == nullptr) {
     return map;
   }
-  memcpy(map->Begin(), dex_data, data_len);
+  memcpy(map->Begin(), &data.At(0), data.size());
   // Make the dex files mmap read only. This matches how other DexFiles are mmaped and prevents
   // programs from corrupting it.
   map->Protect(PROT_READ);
@@ -325,25 +325,26 @@
   std::vector<ArtClassDefinition> def_vector;
   def_vector.reserve(class_count);
   for (jint i = 0; i < class_count; i++) {
+    jboolean is_modifiable = JNI_FALSE;
+    jvmtiError res = env->IsModifiableClass(definitions[i].klass, &is_modifiable);
+    if (res != OK) {
+      return res;
+    } else if (!is_modifiable) {
+      return ERR(UNMODIFIABLE_CLASS);
+    }
     // We make a copy of the class_bytes to pass into the retransformation.
     // This makes cleanup easier (since we unambiguously own the bytes) and also is useful since we
     // will need to keep the original bytes around unaltered for subsequent RetransformClasses calls
     // to get the passed in bytes.
     unsigned char* class_bytes_copy = nullptr;
-    jvmtiError res = env->Allocate(definitions[i].class_byte_count, &class_bytes_copy);
+    res = env->Allocate(definitions[i].class_byte_count, &class_bytes_copy);
     if (res != OK) {
       return res;
     }
     memcpy(class_bytes_copy, definitions[i].class_bytes, definitions[i].class_byte_count);
 
     ArtClassDefinition def;
-    def.dex_len = definitions[i].class_byte_count;
-    def.dex_data = MakeJvmtiUniquePtr(env, class_bytes_copy);
-    // We are definitely modified.
-    def.SetModified();
-    def.original_dex_file = art::ArraySlice<const unsigned char>(definitions[i].class_bytes,
-                                                                 definitions[i].class_byte_count);
-    res = Transformer::FillInTransformationData(env, definitions[i].klass, &def);
+    res = def.Init(env, definitions[i]);
     if (res != OK) {
       return res;
     }
@@ -379,7 +380,7 @@
   Redefiner r(runtime, self, error_msg);
   for (const ArtClassDefinition& def : definitions) {
     // Only try to transform classes that have been modified.
-    if (def.IsModified(self)) {
+    if (def.IsModified()) {
       jvmtiError res = r.AddRedefinition(env, def);
       if (res != OK) {
         return res;
@@ -392,25 +393,24 @@
 jvmtiError Redefiner::AddRedefinition(ArtJvmTiEnv* env, const ArtClassDefinition& def) {
   std::string original_dex_location;
   jvmtiError ret = OK;
-  if ((ret = GetClassLocation(env, def.klass, &original_dex_location))) {
+  if ((ret = GetClassLocation(env, def.GetClass(), &original_dex_location))) {
     *error_msg_ = "Unable to get original dex file location!";
     return ret;
   }
   char* generic_ptr_unused = nullptr;
   char* signature_ptr = nullptr;
-  if ((ret = env->GetClassSignature(def.klass, &signature_ptr, &generic_ptr_unused)) != OK) {
+  if ((ret = env->GetClassSignature(def.GetClass(), &signature_ptr, &generic_ptr_unused)) != OK) {
     *error_msg_ = "Unable to get class signature!";
     return ret;
   }
   JvmtiUniquePtr<char> generic_unique_ptr(MakeJvmtiUniquePtr(env, generic_ptr_unused));
   JvmtiUniquePtr<char> signature_unique_ptr(MakeJvmtiUniquePtr(env, signature_ptr));
   std::unique_ptr<art::MemMap> map(MoveDataToMemMap(original_dex_location,
-                                                    def.dex_len,
-                                                    def.dex_data.get(),
+                                                    def.GetDexData(),
                                                     error_msg_));
   std::ostringstream os;
   if (map.get() == nullptr) {
-    os << "Failed to create anonymous mmap for modified dex file of class " << def.name
+    os << "Failed to create anonymous mmap for modified dex file of class " << def.GetName()
        << "in dex file " << original_dex_location << " because: " << *error_msg_;
     *error_msg_ = os.str();
     return ERR(OUT_OF_MEMORY);
@@ -427,16 +427,16 @@
                                                                   /*verify_checksum*/true,
                                                                   error_msg_));
   if (dex_file.get() == nullptr) {
-    os << "Unable to load modified dex file for " << def.name << ": " << *error_msg_;
+    os << "Unable to load modified dex file for " << def.GetName() << ": " << *error_msg_;
     *error_msg_ = os.str();
     return ERR(INVALID_CLASS_FORMAT);
   }
   redefinitions_.push_back(
       Redefiner::ClassRedefinition(this,
-                                   def.klass,
+                                   def.GetClass(),
                                    dex_file.release(),
                                    signature_ptr,
-                                   def.original_dex_file));
+                                   def.GetNewOriginalDexFile()));
   return OK;
 }
 
@@ -462,7 +462,7 @@
   result_ = result;
 }
 
-art::mirror::ByteArray* Redefiner::ClassRedefinition::AllocateOrGetOriginalDexFileBytes() {
+art::mirror::Object* Redefiner::ClassRedefinition::AllocateOrGetOriginalDexFile() {
   // If we have been specifically given a new set of bytes use that
   if (original_dex_file_.size() != 0) {
     return art::mirror::ByteArray::AllocateAndFill(
@@ -474,24 +474,21 @@
   // See if we already have one set.
   art::ObjPtr<art::mirror::ClassExt> ext(GetMirrorClass()->GetExtData());
   if (!ext.IsNull()) {
-    art::ObjPtr<art::mirror::ByteArray> old_original_bytes(ext->GetOriginalDexFileBytes());
-    if (!old_original_bytes.IsNull()) {
+    art::ObjPtr<art::mirror::Object> old_original_dex_file(ext->GetOriginalDexFile());
+    if (!old_original_dex_file.IsNull()) {
       // We do. Use it.
-      return old_original_bytes.Ptr();
+      return old_original_dex_file.Ptr();
     }
   }
 
-  // Copy the current dex_file
-  const art::DexFile& current_dex_file = GetMirrorClass()->GetDexFile();
+  // return the current dex_cache which has the dex file in it.
+  art::ObjPtr<art::mirror::DexCache> current_dex_cache(GetMirrorClass()->GetDexCache());
   // TODO Handle this or make it so it cannot happen.
-  if (current_dex_file.NumClassDefs() != 1) {
+  if (current_dex_cache->GetDexFile()->NumClassDefs() != 1) {
     LOG(WARNING) << "Current dex file has more than one class in it. Calling RetransformClasses "
                  << "on this class might fail if no transformations are applied to it!";
   }
-  return art::mirror::ByteArray::AllocateAndFill(
-      driver_->self_,
-      reinterpret_cast<const signed char*>(current_dex_file.Begin()),
-      current_dex_file.Size());
+  return current_dex_cache.Ptr();
 }
 
 struct CallbackCtx {
@@ -779,6 +776,8 @@
       CheckSameMethods();
 }
 
+class RedefinitionDataIter;
+
 // A wrapper that lets us hold onto the arbitrary sized data needed for redefinitions in a
 // reasonably sane way. This adds no fields to the normal ObjectArray. By doing this we can avoid
 // having to deal with the fact that we need to hold an arbitrary number of references live.
@@ -802,13 +801,15 @@
   RedefinitionDataHolder(art::StackHandleScope<1>* hs,
                          art::Runtime* runtime,
                          art::Thread* self,
-                         int32_t num_redefinitions) REQUIRES_SHARED(art::Locks::mutator_lock_) :
+                         std::vector<Redefiner::ClassRedefinition>* redefinitions)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) :
     arr_(
       hs->NewHandle(
         art::mirror::ObjectArray<art::mirror::Object>::Alloc(
             self,
             runtime->GetClassLinker()->GetClassRoot(art::ClassLinker::kObjectArrayClass),
-            num_redefinitions * kNumSlots))) {}
+            redefinitions->size() * kNumSlots))),
+    redefinitions_(redefinitions) {}
 
   bool IsNull() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
     return arr_.IsNull();
@@ -836,9 +837,9 @@
     return art::down_cast<art::mirror::Class*>(GetSlot(klass_index, kSlotMirrorClass));
   }
 
-  art::mirror::ByteArray* GetOriginalDexFileBytes(jint klass_index) const
+  art::mirror::Object* GetOriginalDexFile(jint klass_index) const
       REQUIRES_SHARED(art::Locks::mutator_lock_) {
-    return art::down_cast<art::mirror::ByteArray*>(GetSlot(klass_index, kSlotOrigDexFile));
+    return art::down_cast<art::mirror::Object*>(GetSlot(klass_index, kSlotOrigDexFile));
   }
 
   void SetSourceClassLoader(jint klass_index, art::mirror::ClassLoader* loader)
@@ -861,7 +862,7 @@
       REQUIRES_SHARED(art::Locks::mutator_lock_) {
     SetSlot(klass_index, kSlotMirrorClass, klass);
   }
-  void SetOriginalDexFileBytes(jint klass_index, art::mirror::ByteArray* bytes)
+  void SetOriginalDexFile(jint klass_index, art::mirror::Object* bytes)
       REQUIRES_SHARED(art::Locks::mutator_lock_) {
     SetSlot(klass_index, kSlotOrigDexFile, bytes);
   }
@@ -870,8 +871,27 @@
     return arr_->GetLength() / kNumSlots;
   }
 
+  std::vector<Redefiner::ClassRedefinition>* GetRedefinitions()
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return redefinitions_;
+  }
+
+  bool operator==(const RedefinitionDataHolder& other) const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return arr_.Get() == other.arr_.Get();
+  }
+
+  bool operator!=(const RedefinitionDataHolder& other) const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return !(*this == other);
+  }
+
+  RedefinitionDataIter begin() REQUIRES_SHARED(art::Locks::mutator_lock_);
+  RedefinitionDataIter end() REQUIRES_SHARED(art::Locks::mutator_lock_);
+
  private:
   mutable art::Handle<art::mirror::ObjectArray<art::mirror::Object>> arr_;
+  std::vector<Redefiner::ClassRedefinition>* redefinitions_;
 
   art::mirror::Object* GetSlot(jint klass_index,
                                DataSlot slot) const REQUIRES_SHARED(art::Locks::mutator_lock_) {
@@ -890,8 +910,115 @@
   DISALLOW_COPY_AND_ASSIGN(RedefinitionDataHolder);
 };
 
-bool Redefiner::ClassRedefinition::CheckVerification(int32_t klass_index,
-                                                     const RedefinitionDataHolder& holder) {
+class RedefinitionDataIter {
+ public:
+  RedefinitionDataIter(int32_t idx, RedefinitionDataHolder& holder) : idx_(idx), holder_(holder) {}
+
+  RedefinitionDataIter(const RedefinitionDataIter&) = default;
+  RedefinitionDataIter(RedefinitionDataIter&&) = default;
+  RedefinitionDataIter& operator=(const RedefinitionDataIter&) = default;
+  RedefinitionDataIter& operator=(RedefinitionDataIter&&) = default;
+
+  bool operator==(const RedefinitionDataIter& other) const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return idx_ == other.idx_ && holder_ == other.holder_;
+  }
+
+  bool operator!=(const RedefinitionDataIter& other) const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return !(*this == other);
+  }
+
+  RedefinitionDataIter operator++() {  // Value after modification.
+    idx_++;
+    return *this;
+  }
+
+  RedefinitionDataIter operator++(int) {
+    RedefinitionDataIter temp = *this;
+    idx_++;
+    return temp;
+  }
+
+  RedefinitionDataIter operator+(ssize_t delta) const {
+    RedefinitionDataIter temp = *this;
+    temp += delta;
+    return temp;
+  }
+
+  RedefinitionDataIter& operator+=(ssize_t delta) {
+    idx_ += delta;
+    return *this;
+  }
+
+  Redefiner::ClassRedefinition& GetRedefinition() REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return (*holder_.GetRedefinitions())[idx_];
+  }
+
+  RedefinitionDataHolder& GetHolder() {
+    return holder_;
+  }
+
+  art::mirror::ClassLoader* GetSourceClassLoader() const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetSourceClassLoader(idx_);
+  }
+  art::mirror::Object* GetJavaDexFile() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetJavaDexFile(idx_);
+  }
+  art::mirror::LongArray* GetNewDexFileCookie() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetNewDexFileCookie(idx_);
+  }
+  art::mirror::DexCache* GetNewDexCache() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetNewDexCache(idx_);
+  }
+  art::mirror::Class* GetMirrorClass() const REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetMirrorClass(idx_);
+  }
+  art::mirror::Object* GetOriginalDexFile() const
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return holder_.GetOriginalDexFile(idx_);
+  }
+  int32_t GetIndex() const {
+    return idx_;
+  }
+
+  void SetSourceClassLoader(art::mirror::ClassLoader* loader)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetSourceClassLoader(idx_, loader);
+  }
+  void SetJavaDexFile(art::mirror::Object* dexfile) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetJavaDexFile(idx_, dexfile);
+  }
+  void SetNewDexFileCookie(art::mirror::LongArray* cookie)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetNewDexFileCookie(idx_, cookie);
+  }
+  void SetNewDexCache(art::mirror::DexCache* cache) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetNewDexCache(idx_, cache);
+  }
+  void SetMirrorClass(art::mirror::Class* klass) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetMirrorClass(idx_, klass);
+  }
+  void SetOriginalDexFile(art::mirror::Object* bytes)
+      REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    holder_.SetOriginalDexFile(idx_, bytes);
+  }
+
+ private:
+  int32_t idx_;
+  RedefinitionDataHolder& holder_;
+};
+
+RedefinitionDataIter RedefinitionDataHolder::begin() {
+  return RedefinitionDataIter(0, *this);
+}
+
+RedefinitionDataIter RedefinitionDataHolder::end() {
+  return RedefinitionDataIter(Length(), *this);
+}
+
+bool Redefiner::ClassRedefinition::CheckVerification(const RedefinitionDataIter& iter) {
   DCHECK_EQ(dex_file_->NumClassDefs(), 1u);
   art::StackHandleScope<2> hs(driver_->self_);
   std::string error;
@@ -899,7 +1026,7 @@
   art::verifier::MethodVerifier::FailureKind failure =
       art::verifier::MethodVerifier::VerifyClass(driver_->self_,
                                                  dex_file_.get(),
-                                                 hs.NewHandle(holder.GetNewDexCache(klass_index)),
+                                                 hs.NewHandle(iter.GetNewDexCache()),
                                                  hs.NewHandle(GetClassLoader()),
                                                  dex_file_->GetClassDef(0), /*class_def*/
                                                  nullptr, /*compiler_callbacks*/
@@ -918,21 +1045,20 @@
 // dexfile. This is so that even if multiple classes with the same classloader are redefined at
 // once they are all added to the classloader.
 bool Redefiner::ClassRedefinition::AllocateAndRememberNewDexFileCookie(
-    int32_t klass_index,
     art::Handle<art::mirror::ClassLoader> source_class_loader,
     art::Handle<art::mirror::Object> dex_file_obj,
-    /*out*/RedefinitionDataHolder* holder) {
+    /*out*/RedefinitionDataIter* cur_data) {
   art::StackHandleScope<2> hs(driver_->self_);
   art::MutableHandle<art::mirror::LongArray> old_cookie(
       hs.NewHandle<art::mirror::LongArray>(nullptr));
   bool has_older_cookie = false;
   // See if we already have a cookie that a previous redefinition got from the same classloader.
-  for (int32_t i = 0; i < klass_index; i++) {
-    if (holder->GetSourceClassLoader(i) == source_class_loader.Get()) {
+  for (auto old_data = cur_data->GetHolder().begin(); old_data != *cur_data; ++old_data) {
+    if (old_data.GetSourceClassLoader() == source_class_loader.Get()) {
       // Since every instance of this classloader should have the same cookie associated with it we
       // can stop looking here.
       has_older_cookie = true;
-      old_cookie.Assign(holder->GetNewDexFileCookie(i));
+      old_cookie.Assign(old_data.GetNewDexFileCookie());
       break;
     }
   }
@@ -953,14 +1079,14 @@
   }
 
   // Save the cookie.
-  holder->SetNewDexFileCookie(klass_index, new_cookie.Get());
+  cur_data->SetNewDexFileCookie(new_cookie.Get());
   // If there are other copies of this same classloader we need to make sure that we all have the
   // same cookie.
   if (has_older_cookie) {
-    for (int32_t i = 0; i < klass_index; i++) {
+    for (auto old_data = cur_data->GetHolder().begin(); old_data != *cur_data; ++old_data) {
       // We will let the GC take care of the cookie we allocated for this one.
-      if (holder->GetSourceClassLoader(i) == source_class_loader.Get()) {
-        holder->SetNewDexFileCookie(i, new_cookie.Get());
+      if (old_data.GetSourceClassLoader() == source_class_loader.Get()) {
+        old_data.SetNewDexFileCookie(new_cookie.Get());
       }
     }
   }
@@ -969,32 +1095,32 @@
 }
 
 bool Redefiner::ClassRedefinition::FinishRemainingAllocations(
-    int32_t klass_index, /*out*/RedefinitionDataHolder* holder) {
+    /*out*/RedefinitionDataIter* cur_data) {
   art::ScopedObjectAccessUnchecked soa(driver_->self_);
   art::StackHandleScope<2> hs(driver_->self_);
-  holder->SetMirrorClass(klass_index, GetMirrorClass());
+  cur_data->SetMirrorClass(GetMirrorClass());
   // This shouldn't allocate
   art::Handle<art::mirror::ClassLoader> loader(hs.NewHandle(GetClassLoader()));
   // The bootclasspath is handled specially so it doesn't have a j.l.DexFile.
   if (!art::ClassLinker::IsBootClassLoader(soa, loader.Get())) {
-    holder->SetSourceClassLoader(klass_index, loader.Get());
+    cur_data->SetSourceClassLoader(loader.Get());
     art::Handle<art::mirror::Object> dex_file_obj(hs.NewHandle(
         ClassLoaderHelper::FindSourceDexFileObject(driver_->self_, loader)));
-    holder->SetJavaDexFile(klass_index, dex_file_obj.Get());
+    cur_data->SetJavaDexFile(dex_file_obj.Get());
     if (dex_file_obj == nullptr) {
       RecordFailure(ERR(INTERNAL), "Unable to find dex file!");
       return false;
     }
     // Allocate the new dex file cookie.
-    if (!AllocateAndRememberNewDexFileCookie(klass_index, loader, dex_file_obj, holder)) {
+    if (!AllocateAndRememberNewDexFileCookie(loader, dex_file_obj, cur_data)) {
       driver_->self_->AssertPendingOOMException();
       driver_->self_->ClearException();
       RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate dex file array for class loader");
       return false;
     }
   }
-  holder->SetNewDexCache(klass_index, CreateNewDexCache(loader));
-  if (holder->GetNewDexCache(klass_index) == nullptr) {
+  cur_data->SetNewDexCache(CreateNewDexCache(loader));
+  if (cur_data->GetNewDexCache() == nullptr) {
     driver_->self_->AssertPendingException();
     driver_->self_->ClearException();
     RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate DexCache");
@@ -1002,8 +1128,8 @@
   }
 
   // We won't always need to set this field.
-  holder->SetOriginalDexFileBytes(klass_index, AllocateOrGetOriginalDexFileBytes());
-  if (holder->GetOriginalDexFileBytes(klass_index) == nullptr) {
+  cur_data->SetOriginalDexFile(AllocateOrGetOriginalDexFile());
+  if (cur_data->GetOriginalDexFile() == nullptr) {
     driver_->self_->AssertPendingOOMException();
     driver_->self_->ClearException();
     RecordFailure(ERR(OUT_OF_MEMORY), "Unable to allocate array for original dex file");
@@ -1048,13 +1174,11 @@
 }
 
 bool Redefiner::FinishAllRemainingAllocations(RedefinitionDataHolder& holder) {
-  int32_t cnt = 0;
-  for (Redefiner::ClassRedefinition& redef : redefinitions_) {
+  for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
     // Allocate the data this redefinition requires.
-    if (!redef.FinishRemainingAllocations(cnt, &holder)) {
+    if (!data.GetRedefinition().FinishRemainingAllocations(&data)) {
       return false;
     }
-    cnt++;
   }
   return true;
 }
@@ -1069,22 +1193,39 @@
   }
 }
 
-bool Redefiner::CheckAllClassesAreVerified(const RedefinitionDataHolder& holder) {
-  int32_t cnt = 0;
-  for (Redefiner::ClassRedefinition& redef : redefinitions_) {
-    if (!redef.CheckVerification(cnt, holder)) {
+bool Redefiner::CheckAllClassesAreVerified(RedefinitionDataHolder& holder) {
+  for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
+    if (!data.GetRedefinition().CheckVerification(data)) {
       return false;
     }
-    cnt++;
   }
   return true;
 }
 
+class ScopedDisableConcurrentAndMovingGc {
+ public:
+  ScopedDisableConcurrentAndMovingGc(art::gc::Heap* heap, art::Thread* self)
+      : heap_(heap), self_(self) {
+    if (heap_->IsGcConcurrentAndMoving()) {
+      heap_->IncrementDisableMovingGC(self_);
+    }
+  }
+
+  ~ScopedDisableConcurrentAndMovingGc() {
+    if (heap_->IsGcConcurrentAndMoving()) {
+      heap_->DecrementDisableMovingGC(self_);
+    }
+  }
+ private:
+  art::gc::Heap* heap_;
+  art::Thread* self_;
+};
+
 jvmtiError Redefiner::Run() {
   art::StackHandleScope<1> hs(self_);
   // Allocate an array to hold onto all java temporary objects associated with this redefinition.
   // We will let this be collected after the end of this function.
-  RedefinitionDataHolder holder(&hs, runtime_, self_, redefinitions_.size());
+  RedefinitionDataHolder holder(&hs, runtime_, self_, &redefinitions_);
   if (holder.IsNull()) {
     self_->AssertPendingOOMException();
     self_->ClearException();
@@ -1107,57 +1248,43 @@
     // cleaned up by the GC eventually.
     return result_;
   }
+
   // At this point we can no longer fail without corrupting the runtime state.
-  int32_t counter = 0;
-  for (Redefiner::ClassRedefinition& redef : redefinitions_) {
-    if (holder.GetSourceClassLoader(counter) == nullptr) {
-      runtime_->GetClassLinker()->AppendToBootClassPath(self_, redef.GetDexFile());
+  for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
+    if (data.GetSourceClassLoader() == nullptr) {
+      runtime_->GetClassLinker()->AppendToBootClassPath(self_, data.GetRedefinition().GetDexFile());
     }
-    counter++;
   }
   UnregisterAllBreakpoints();
+
   // Disable GC and wait for it to be done if we are a moving GC.  This is fine since we are done
   // allocating so no deadlocks.
-  art::gc::Heap* heap = runtime_->GetHeap();
-  if (heap->IsGcConcurrentAndMoving()) {
-    // GC moving objects can cause deadlocks as we are deoptimizing the stack.
-    heap->IncrementDisableMovingGC(self_);
-  }
+  ScopedDisableConcurrentAndMovingGc sdcamgc(runtime_->GetHeap(), self_);
+
   // Do transition to final suspension
   // TODO We might want to give this its own suspended state!
   // TODO This isn't right. We need to change state without any chance of suspend ideally!
-  self_->TransitionFromRunnableToSuspended(art::ThreadState::kNative);
-  runtime_->GetThreadList()->SuspendAll(
-      "Final installation of redefined Classes!", /*long_suspend*/true);
-  counter = 0;
-  for (Redefiner::ClassRedefinition& redef : redefinitions_) {
+  art::ScopedThreadSuspension sts(self_, art::ThreadState::kNative);
+  art::ScopedSuspendAll ssa("Final installation of redefined Classes!", /*long_suspend*/true);
+  for (RedefinitionDataIter data = holder.begin(); data != holder.end(); ++data) {
     art::ScopedAssertNoThreadSuspension nts("Updating runtime objects for redefinition");
-    if (holder.GetSourceClassLoader(counter) != nullptr) {
-      ClassLoaderHelper::UpdateJavaDexFile(holder.GetJavaDexFile(counter),
-                                           holder.GetNewDexFileCookie(counter));
+    ClassRedefinition& redef = data.GetRedefinition();
+    if (data.GetSourceClassLoader() != nullptr) {
+      ClassLoaderHelper::UpdateJavaDexFile(data.GetJavaDexFile(), data.GetNewDexFileCookie());
     }
-    art::mirror::Class* klass = holder.GetMirrorClass(counter);
+    art::mirror::Class* klass = data.GetMirrorClass();
     // TODO Rewrite so we don't do a stack walk for each and every class.
     redef.FindAndAllocateObsoleteMethods(klass);
-    redef.UpdateClass(klass, holder.GetNewDexCache(counter),
-                      holder.GetOriginalDexFileBytes(counter));
-    counter++;
+    redef.UpdateClass(klass, data.GetNewDexCache(), data.GetOriginalDexFile());
   }
   // TODO We should check for if any of the redefined methods are intrinsic methods here and, if any
   // are, force a full-world deoptimization before finishing redefinition. If we don't do this then
   // methods that have been jitted prior to the current redefinition being applied might continue
   // to use the old versions of the intrinsics!
   // TODO Shrink the obsolete method maps if possible?
-  // TODO Put this into a scoped thing.
-  runtime_->GetThreadList()->ResumeAll();
-  // Get back shared mutator lock as expected for return.
-  self_->TransitionFromSuspendedToRunnable();
   // TODO Do the dex_file release at a more reasonable place. This works but it muddles who really
   // owns the DexFile and when ownership is transferred.
   ReleaseAllDexFiles();
-  if (heap->IsGcConcurrentAndMoving()) {
-    heap->DecrementDisableMovingGC(self_);
-  }
   return OK;
 }
 
@@ -1228,7 +1355,7 @@
 void Redefiner::ClassRedefinition::UpdateClass(
     art::ObjPtr<art::mirror::Class> mclass,
     art::ObjPtr<art::mirror::DexCache> new_dex_cache,
-    art::ObjPtr<art::mirror::ByteArray> original_dex_file) {
+    art::ObjPtr<art::mirror::Object> original_dex_file) {
   DCHECK_EQ(dex_file_->NumClassDefs(), 1u);
   const art::DexFile::ClassDef& class_def = dex_file_->GetClassDef(0);
   UpdateMethods(mclass, new_dex_cache, class_def);
@@ -1242,7 +1369,7 @@
   mclass->SetDexTypeIndex(dex_file_->GetIndexForTypeId(*dex_file_->FindTypeId(class_sig_.c_str())));
   art::ObjPtr<art::mirror::ClassExt> ext(mclass->GetExtData());
   CHECK(!ext.IsNull());
-  ext->SetOriginalDexFileBytes(original_dex_file);
+  ext->SetOriginalDexFile(original_dex_file);
 }
 
 // This function does all (java) allocations we need to do for the Class being redefined.
@@ -1259,8 +1386,6 @@
   art::Handle<art::mirror::ClassExt> ext(hs.NewHandle(klass->EnsureExtDataPresent(driver_->self_)));
   if (ext == nullptr) {
     // No memory. Clear exception (it's not useful) and return error.
-    // TODO This doesn't need to be fatal. We could just not support obsolete methods after hitting
-    // this case.
     driver_->self_->AssertPendingOOMException();
     driver_->self_->ClearException();
     RecordFailure(ERR(OUT_OF_MEMORY), "Could not allocate ClassExt");
diff --git a/runtime/openjdkjvmti/ti_redefine.h b/runtime/openjdkjvmti/ti_redefine.h
index 4e6d05f..809a681 100644
--- a/runtime/openjdkjvmti/ti_redefine.h
+++ b/runtime/openjdkjvmti/ti_redefine.h
@@ -66,6 +66,7 @@
 namespace openjdkjvmti {
 
 class RedefinitionDataHolder;
+class RedefinitionDataIter;
 
 // Class that can redefine a single class's methods.
 // TODO We should really make this be driven by an outside class so we can do multiple classes at
@@ -98,8 +99,7 @@
   static jvmtiError IsModifiableClass(jvmtiEnv* env, jclass klass, jboolean* is_redefinable);
 
   static std::unique_ptr<art::MemMap> MoveDataToMemMap(const std::string& original_location,
-                                                       jint data_len,
-                                                       const unsigned char* dex_data,
+                                                       art::ArraySlice<const unsigned char> data,
                                                        std::string* error_msg);
 
  private:
@@ -136,21 +136,20 @@
         REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     // This may return nullptr with a OOME pending if allocation fails.
-    art::mirror::ByteArray* AllocateOrGetOriginalDexFileBytes()
+    art::mirror::Object* AllocateOrGetOriginalDexFile()
         REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     void RecordFailure(jvmtiError e, const std::string& err) {
       driver_->RecordFailure(e, class_sig_, err);
     }
 
-    bool FinishRemainingAllocations(int32_t klass_index, /*out*/RedefinitionDataHolder* holder)
+    bool FinishRemainingAllocations(/*out*/RedefinitionDataIter* cur_data)
         REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     bool AllocateAndRememberNewDexFileCookie(
-        int32_t klass_index,
         art::Handle<art::mirror::ClassLoader> source_class_loader,
         art::Handle<art::mirror::Object> dex_file_obj,
-        /*out*/RedefinitionDataHolder* holder)
+        /*out*/RedefinitionDataIter* cur_data)
           REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     void FindAndAllocateObsoleteMethods(art::mirror::Class* art_klass)
@@ -161,8 +160,7 @@
     bool CheckClass() REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     // Checks that the contained class can be successfully verified.
-    bool CheckVerification(int32_t klass_index,
-                           const RedefinitionDataHolder& holder)
+    bool CheckVerification(const RedefinitionDataIter& holder)
         REQUIRES_SHARED(art::Locks::mutator_lock_);
 
     // Preallocates all needed allocations in klass so that we can pause execution safely.
@@ -197,7 +195,7 @@
 
     void UpdateClass(art::ObjPtr<art::mirror::Class> mclass,
                      art::ObjPtr<art::mirror::DexCache> new_dex_cache,
-                     art::ObjPtr<art::mirror::ByteArray> original_dex_file)
+                     art::ObjPtr<art::mirror::Object> original_dex_file)
         REQUIRES(art::Locks::mutator_lock_);
 
     void ReleaseDexFile() REQUIRES_SHARED(art::Locks::mutator_lock_);
@@ -241,7 +239,7 @@
   jvmtiError Run() REQUIRES_SHARED(art::Locks::mutator_lock_);
 
   bool CheckAllRedefinitionAreValid() REQUIRES_SHARED(art::Locks::mutator_lock_);
-  bool CheckAllClassesAreVerified(const RedefinitionDataHolder& holder)
+  bool CheckAllClassesAreVerified(RedefinitionDataHolder& holder)
       REQUIRES_SHARED(art::Locks::mutator_lock_);
   bool EnsureAllClassAllocationsFinished() REQUIRES_SHARED(art::Locks::mutator_lock_);
   bool FinishAllRemainingAllocations(RedefinitionDataHolder& holder)
@@ -255,6 +253,8 @@
   }
 
   friend struct CallbackCtx;
+  friend class RedefinitionDataHolder;
+  friend class RedefinitionDataIter;
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_thread.cc b/runtime/openjdkjvmti/ti_thread.cc
index 788ac30..e5ff090 100644
--- a/runtime/openjdkjvmti/ti_thread.cc
+++ b/runtime/openjdkjvmti/ti_thread.cc
@@ -44,6 +44,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/string.h"
 #include "obj_ptr.h"
+#include "ti_phase.h"
 #include "runtime.h"
 #include "runtime_callbacks.h"
 #include "ScopedLocalRef.h"
@@ -54,6 +55,8 @@
 
 namespace openjdkjvmti {
 
+art::ArtField* ThreadUtil::context_class_loader_ = nullptr;
+
 struct ThreadCallback : public art::ThreadLifecycleCallback, public art::RuntimePhaseCallback {
   jthread GetThreadObject(art::Thread* self) REQUIRES_SHARED(art::Locks::mutator_lock_) {
     if (self->GetPeer() == nullptr) {
@@ -121,6 +124,16 @@
   runtime->GetRuntimeCallbacks()->AddRuntimePhaseCallback(&gThreadCallback);
 }
 
+void ThreadUtil::CacheData() {
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::ObjPtr<art::mirror::Class> thread_class =
+      soa.Decode<art::mirror::Class>(art::WellKnownClasses::java_lang_Thread);
+  CHECK(thread_class != nullptr);
+  context_class_loader_ = thread_class->FindDeclaredInstanceField("contextClassLoader",
+                                                                  "Ljava/lang/ClassLoader;");
+  CHECK(context_class_loader_ != nullptr);
+}
+
 void ThreadUtil::Unregister() {
   art::ScopedThreadStateChange stsc(art::Thread::Current(),
                                     art::ThreadState::kWaitingForDebuggerToAttach);
@@ -146,22 +159,6 @@
   return ERR(NONE);
 }
 
-// Read the context classloader from a Java thread object. This is a lazy implementation
-// that assumes GetThreadInfo isn't called too often. If we instead cache the ArtField,
-// we will have to add synchronization as this can't be cached on startup (which is
-// potentially runtime startup).
-static art::ObjPtr<art::mirror::Object> GetContextClassLoader(art::ObjPtr<art::mirror::Object> peer)
-    REQUIRES_SHARED(art::Locks::mutator_lock_) {
-  if (peer == nullptr) {
-    return nullptr;
-  }
-  art::ObjPtr<art::mirror::Class> klass = peer->GetClass();
-  art::ArtField* cc_field = klass->FindDeclaredInstanceField("contextClassLoader",
-                                                             "Ljava/lang/ClassLoader;");
-  CHECK(cc_field != nullptr);
-  return cc_field->GetObject(peer);
-}
-
 // Get the native thread. The spec says a null object denotes the current thread.
 static art::Thread* GetNativeThread(jthread thread,
                                     const art::ScopedObjectAccessAlreadyRunnable& soa)
@@ -178,6 +175,9 @@
   if (info_ptr == nullptr) {
     return ERR(NULL_POINTER);
   }
+  if (!PhaseUtil::IsLivePhase()) {
+    return JVMTI_ERROR_WRONG_PHASE;
+  }
 
   art::ScopedObjectAccess soa(art::Thread::Current());
 
@@ -217,7 +217,10 @@
     }
 
     // Context classloader.
-    art::ObjPtr<art::mirror::Object> ccl = GetContextClassLoader(peer);
+    DCHECK(context_class_loader_ != nullptr);
+    art::ObjPtr<art::mirror::Object> ccl = peer != nullptr
+        ? context_class_loader_->GetObject(peer)
+        : nullptr;
     info_ptr->context_class_loader = ccl == nullptr
                                          ? nullptr
                                          : soa.AddLocalReference<jobject>(ccl);
@@ -272,7 +275,10 @@
     }
 
     // Context classloader.
-    art::ObjPtr<art::mirror::Object> ccl = GetContextClassLoader(peer);
+    DCHECK(context_class_loader_ != nullptr);
+    art::ObjPtr<art::mirror::Object> ccl = peer != nullptr
+        ? context_class_loader_->GetObject(peer)
+        : nullptr;
     info_ptr->context_class_loader = ccl == nullptr
                                          ? nullptr
                                          : soa.AddLocalReference<jobject>(ccl);
diff --git a/runtime/openjdkjvmti/ti_thread.h b/runtime/openjdkjvmti/ti_thread.h
index f6f93ee..c7f75d8 100644
--- a/runtime/openjdkjvmti/ti_thread.h
+++ b/runtime/openjdkjvmti/ti_thread.h
@@ -35,6 +35,10 @@
 #include "jni.h"
 #include "jvmti.h"
 
+namespace art {
+class ArtField;
+}
+
 namespace openjdkjvmti {
 
 class EventHandler;
@@ -44,6 +48,9 @@
   static void Register(EventHandler* event_handler);
   static void Unregister();
 
+  // To be called when it is safe to cache data.
+  static void CacheData();
+
   static jvmtiError GetAllThreads(jvmtiEnv* env, jint* threads_count_ptr, jthread** threads_ptr);
 
   static jvmtiError GetCurrentThread(jvmtiEnv* env, jthread* thread_ptr);
@@ -60,6 +67,9 @@
                                    jvmtiStartFunction proc,
                                    const void* arg,
                                    jint priority);
+
+ private:
+  static art::ArtField* context_class_loader_;
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/transform.cc b/runtime/openjdkjvmti/transform.cc
index 36421b9..15d8dd0 100644
--- a/runtime/openjdkjvmti/transform.cc
+++ b/runtime/openjdkjvmti/transform.cc
@@ -42,6 +42,7 @@
 #include "gc_root-inl.h"
 #include "globals.h"
 #include "jni_env_ext-inl.h"
+#include "jvalue.h"
 #include "jvmti.h"
 #include "linear_alloc.h"
 #include "mem_map.h"
@@ -69,17 +70,18 @@
   for (ArtClassDefinition& def : *definitions) {
     jint new_len = -1;
     unsigned char* new_data = nullptr;
+    art::ArraySlice<const unsigned char> dex_data = def.GetDexData();
     event_handler->DispatchEvent<ArtJvmtiEvent::kClassFileLoadHookRetransformable>(
         self,
         GetJniEnv(env),
-        def.klass,
-        def.loader,
-        def.name.c_str(),
-        def.protection_domain,
-        def.dex_len,
-        static_cast<const unsigned char*>(def.dex_data.get()),
-        &new_len,
-        &new_data);
+        def.GetClass(),
+        def.GetLoader(),
+        def.GetName().c_str(),
+        def.GetProtectionDomain(),
+        static_cast<jint>(dex_data.size()),
+        &dex_data.At(0),
+        /*out*/&new_len,
+        /*out*/&new_data);
     def.SetNewDexData(env, new_len, new_data);
   }
   return OK;
@@ -109,8 +111,15 @@
   std::vector<ArtClassDefinition> definitions;
   jvmtiError res = OK;
   for (jint i = 0; i < class_count; i++) {
+    jboolean is_modifiable = JNI_FALSE;
+    res = env->IsModifiableClass(classes[i], &is_modifiable);
+    if (res != OK) {
+      return res;
+    } else if (!is_modifiable) {
+      return ERR(UNMODIFIABLE_CLASS);
+    }
     ArtClassDefinition def;
-    res = FillInTransformationData(env, classes[i], &def);
+    res = def.Init(env, classes[i]);
     if (res != OK) {
       return res;
     }
@@ -139,63 +148,4 @@
   return OK;
 }
 
-jvmtiError Transformer::GetDexDataForRetransformation(ArtJvmTiEnv* env,
-                                                      art::Handle<art::mirror::Class> klass,
-                                                      /*out*/jint* dex_data_len,
-                                                      /*out*/unsigned char** dex_data) {
-  art::StackHandleScope<2> hs(art::Thread::Current());
-  art::Handle<art::mirror::ClassExt> ext(hs.NewHandle(klass->GetExtData()));
-  if (!ext.IsNull()) {
-    art::Handle<art::mirror::ByteArray> orig_dex(hs.NewHandle(ext->GetOriginalDexFileBytes()));
-    if (!orig_dex.IsNull()) {
-      *dex_data_len = static_cast<jint>(orig_dex->GetLength());
-      return CopyDataIntoJvmtiBuffer(env,
-                                     reinterpret_cast<const unsigned char*>(orig_dex->GetData()),
-                                     *dex_data_len,
-                                     /*out*/dex_data);
-    }
-  }
-  // TODO De-quicken the dex file before passing it to the agents.
-  LOG(WARNING) << "Dex file is not de-quickened yet! Quickened dex instructions might be present";
-  const art::DexFile& dex = klass->GetDexFile();
-  *dex_data_len = static_cast<jint>(dex.Size());
-  return CopyDataIntoJvmtiBuffer(env, dex.Begin(), *dex_data_len, /*out*/dex_data);
-}
-
-// TODO Move this function somewhere more appropriate.
-// Gets the data surrounding the given class.
-// TODO Make this less magical.
-jvmtiError Transformer::FillInTransformationData(ArtJvmTiEnv* env,
-                                                 jclass klass,
-                                                 ArtClassDefinition* def) {
-  JNIEnv* jni_env = GetJniEnv(env);
-  if (jni_env == nullptr) {
-    // TODO Different error might be better?
-    return ERR(INTERNAL);
-  }
-  art::ScopedObjectAccess soa(jni_env);
-  art::StackHandleScope<3> hs(art::Thread::Current());
-  art::Handle<art::mirror::Class> hs_klass(hs.NewHandle(soa.Decode<art::mirror::Class>(klass)));
-  if (hs_klass.IsNull()) {
-    return ERR(INVALID_CLASS);
-  }
-  def->klass = klass;
-  def->loader = soa.AddLocalReference<jobject>(hs_klass->GetClassLoader());
-  std::string descriptor_store;
-  std::string descriptor(hs_klass->GetDescriptor(&descriptor_store));
-  def->name = descriptor.substr(1, descriptor.size() - 2);
-  // TODO is this always null?
-  def->protection_domain = nullptr;
-  if (def->dex_data.get() == nullptr) {
-    unsigned char* new_data;
-    jvmtiError res = GetDexDataForRetransformation(env, hs_klass, &def->dex_len, &new_data);
-    if (res == OK) {
-      def->dex_data = MakeJvmtiUniquePtr(env, new_data);
-    } else {
-      return res;
-    }
-  }
-  return OK;
-}
-
 }  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/transform.h b/runtime/openjdkjvmti/transform.h
index c6a36e8..ba40e04 100644
--- a/runtime/openjdkjvmti/transform.h
+++ b/runtime/openjdkjvmti/transform.h
@@ -61,18 +61,6 @@
                                        jint class_count,
                                        const jclass* classes,
                                        /*out*/std::string* error_msg);
-
-  // Gets the data surrounding the given class.
-  static jvmtiError FillInTransformationData(ArtJvmTiEnv* env,
-                                             jclass klass,
-                                             ArtClassDefinition* def);
-
- private:
-  static jvmtiError GetDexDataForRetransformation(ArtJvmTiEnv* env,
-                                                  art::Handle<art::mirror::Class> klass,
-                                                  /*out*/jint* dex_data_length,
-                                                  /*out*/unsigned char** dex_data)
-      REQUIRES_SHARED(art::Locks::mutator_lock_);
 };
 
 }  // namespace openjdkjvmti
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index 9113f83..0784e59 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -96,7 +96,7 @@
       //     .WithType<std::vector<ti::Agent>>().AppendValues()
       //     .IntoKey(M::AgentLib)
       .Define("-agentpath:_")
-          .WithType<std::vector<ti::Agent>>().AppendValues()
+          .WithType<std::list<ti::Agent>>().AppendValues()
           .IntoKey(M::AgentPath)
       .Define("-Xms_")
           .WithType<MemoryKiB>()
@@ -708,6 +708,7 @@
   UsageMessage(stream, "  -Xps-min-classes-to-save:integervalue\n");
   UsageMessage(stream, "  -Xps-min-notification-before-wake:integervalue\n");
   UsageMessage(stream, "  -Xps-max-notification-before-wake:integervalue\n");
+  UsageMessage(stream, "  -Xps-profile-path:file-path\n");
   UsageMessage(stream, "  -Xcompiler:filename\n");
   UsageMessage(stream, "  -Xcompiler-option dex2oat-option\n");
   UsageMessage(stream, "  -Ximage-compiler-option dex2oat-option\n");
diff --git a/runtime/primitive.cc b/runtime/primitive.cc
index 2380284..1ec345a 100644
--- a/runtime/primitive.cc
+++ b/runtime/primitive.cc
@@ -44,7 +44,7 @@
   "Ljava/lang/Void;",
 };
 
-#define COUNT_OF(x) (sizeof(x) / sizeof(x[0]))
+#define COUNT_OF(x) (sizeof(x) / sizeof((x)[0]))
 
 const char* Primitive::PrettyDescriptor(Primitive::Type type) {
   static_assert(COUNT_OF(kTypeNames) == static_cast<size_t>(Primitive::kPrimLast) + 1,
diff --git a/runtime/quick/inline_method_analyser.cc b/runtime/quick/inline_method_analyser.cc
index b009b47..3347070 100644
--- a/runtime/quick/inline_method_analyser.cc
+++ b/runtime/quick/inline_method_analyser.cc
@@ -215,9 +215,8 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(IsInstructionIPut(new_iput->Opcode()));
   uint32_t field_index = new_iput->VRegC_22c();
-  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-  mirror::DexCache* dex_cache = method->GetDexCache();
-  ArtField* field = dex_cache->GetResolvedField(field_index, pointer_size);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* field = class_linker->LookupResolvedField(field_index, method, /* is_static */ false);
   if (UNLIKELY(field == nullptr)) {
     return false;
   }
@@ -227,7 +226,9 @@
     if (iputs[old_pos].field_index == DexFile::kDexNoIndex16) {
       break;
     }
-    ArtField* f = dex_cache->GetResolvedField(iputs[old_pos].field_index, pointer_size);
+    ArtField* f = class_linker->LookupResolvedField(iputs[old_pos].field_index,
+                                                    method,
+                                                    /* is_static */ false);
     DCHECK(f != nullptr);
     if (f == field) {
       auto back_it = std::copy(iputs + old_pos + 1, iputs + arraysize(iputs), iputs + old_pos);
@@ -732,9 +733,9 @@
   if (method == nullptr) {
     return false;
   }
-  mirror::DexCache* dex_cache = method->GetDexCache();
-  PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-  ArtField* field = dex_cache->GetResolvedField(field_idx, pointer_size);
+  ObjPtr<mirror::DexCache> dex_cache = method->GetDexCache();
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ArtField* field = class_linker->LookupResolvedField(field_idx, method, /* is_static */ false);
   if (field == nullptr || field->IsStatic()) {
     return false;
   }
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index 3c64d40..87bc7df 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -671,14 +671,14 @@
     soa.Self()->ClearException();
     jclass exception_class = soa.Env()->FindClass("java/lang/reflect/InvocationTargetException");
     if (exception_class == nullptr) {
-      soa.Self()->AssertPendingOOMException();
+      soa.Self()->AssertPendingException();
       return nullptr;
     }
     jmethodID mid = soa.Env()->GetMethodID(exception_class, "<init>", "(Ljava/lang/Throwable;)V");
     CHECK(mid != nullptr);
     jobject exception_instance = soa.Env()->NewObject(exception_class, mid, th);
     if (exception_instance == nullptr) {
-      soa.Self()->AssertPendingOOMException();
+      soa.Self()->AssertPendingException();
       return nullptr;
     }
     soa.Env()->Throw(reinterpret_cast<jthrowable>(exception_instance));
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index e254dfe..2f70ded 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -509,7 +509,6 @@
 };
 
 TEST_F(ReflectionTest, StaticMainMethod) {
-  TEST_DISABLED_FOR_READ_BARRIER_WITH_OPTIMIZING_FOR_UNSUPPORTED_INSTRUCTION_SETS();
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Main");
   StackHandleScope<1> hs(soa.Self());
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 69dcfeb..48efbe5 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -106,7 +106,6 @@
 #include "native/dalvik_system_VMStack.h"
 #include "native/dalvik_system_ZygoteHooks.h"
 #include "native/java_lang_Class.h"
-#include "native/java_lang_DexCache.h"
 #include "native/java_lang_Object.h"
 #include "native/java_lang_String.h"
 #include "native/java_lang_StringFactory.h"
@@ -114,6 +113,7 @@
 #include "native/java_lang_Thread.h"
 #include "native/java_lang_Throwable.h"
 #include "native/java_lang_VMClassLoader.h"
+#include "native/java_lang_Void.h"
 #include "native/java_lang_invoke_MethodHandleImpl.h"
 #include "native/java_lang_ref_FinalizerReference.h"
 #include "native/java_lang_ref_Reference.h"
@@ -285,6 +285,13 @@
     LOG(WARNING) << "Current thread not detached in Runtime shutdown";
   }
 
+  if (jit_ != nullptr) {
+    // Stop the profile saver thread before marking the runtime as shutting down.
+    // The saver will try to dump the profiles before being sopped and that
+    // requires holding the mutator lock.
+    jit_->StopProfileSaver();
+  }
+
   {
     ScopedTrace trace2("Wait for shutdown cond");
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
@@ -326,8 +333,6 @@
     // Delete thread pool before the thread list since we don't want to wait forever on the
     // JIT compiler threads.
     jit_->DeleteThreadPool();
-    // Similarly, stop the profile saver thread before deleting the thread list.
-    jit_->StopProfileSaver();
   }
 
   // TODO Maybe do some locking.
@@ -801,11 +806,11 @@
   // before fork aren't attributed to an app.
   heap_->ResetGcPerformanceInfo();
 
-
-  if (!is_system_server &&
+  // We may want to collect profiling samples for system server, but we never want to JIT there.
+  if ((!is_system_server || !jit_options_->UseJitCompilation()) &&
       !safe_mode_ &&
       (jit_options_->UseJitCompilation() || jit_options_->GetSaveProfilingInfo()) &&
-      jit_.get() == nullptr) {
+      jit_ == nullptr) {
     // Note that when running ART standalone (not zygote, nor zygote fork),
     // the jit may have already been created.
     CreateJit();
@@ -1538,7 +1543,6 @@
   register_dalvik_system_VMStack(env);
   register_dalvik_system_ZygoteHooks(env);
   register_java_lang_Class(env);
-  register_java_lang_DexCache(env);
   register_java_lang_Object(env);
   register_java_lang_invoke_MethodHandleImpl(env);
   register_java_lang_ref_FinalizerReference(env);
@@ -1556,6 +1560,7 @@
   register_java_lang_Thread(env);
   register_java_lang_Throwable(env);
   register_java_lang_VMClassLoader(env);
+  register_java_lang_Void(env);
   register_java_util_concurrent_atomic_AtomicLong(env);
   register_libcore_util_CharsetUtils(env);
   register_org_apache_harmony_dalvik_ddmc_DdmServer(env);
@@ -1961,9 +1966,7 @@
 }
 
 void Runtime::RegisterAppInfo(const std::vector<std::string>& code_paths,
-                              const std::string& profile_output_filename,
-                              const std::string& foreign_dex_profile_path,
-                              const std::string& app_dir) {
+                              const std::string& profile_output_filename) {
   if (jit_.get() == nullptr) {
     // We are not JITing. Nothing to do.
     return;
@@ -1985,18 +1988,7 @@
     return;
   }
 
-  jit_->StartProfileSaver(profile_output_filename,
-                          code_paths,
-                          foreign_dex_profile_path,
-                          app_dir);
-}
-
-void Runtime::NotifyDexLoaded(const std::string& dex_location) {
-  VLOG(profiler) << "Notify dex loaded: " << dex_location;
-  // We know that if the ProfileSaver is started then we can record profile information.
-  if (ProfileSaver::IsStarted()) {
-    ProfileSaver::NotifyDexUse(dex_location);
-  }
+  jit_->StartProfileSaver(profile_output_filename, code_paths);
 }
 
 // Transaction support.
@@ -2163,6 +2155,19 @@
   jit_.reset(jit::Jit::Create(jit_options_.get(), &error_msg));
   if (jit_.get() == nullptr) {
     LOG(WARNING) << "Failed to create JIT " << error_msg;
+    return;
+  }
+
+  // In case we have a profile path passed as a command line argument,
+  // register the current class path for profiling now. Note that we cannot do
+  // this before we create the JIT and having it here is the most convenient way.
+  // This is used when testing profiles with dalvikvm command as there is no
+  // framework to register the dex files for profiling.
+  if (jit_options_->GetSaveProfilingInfo() &&
+      !jit_options_->GetProfileSaverOptions().GetProfilePath().empty()) {
+    std::vector<std::string> dex_filenames;
+    Split(class_path_string_, ':', &dex_filenames);
+    RegisterAppInfo(dex_filenames, jit_options_->GetProfileSaverOptions().GetProfilePath());
   }
 }
 
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 4a0169d..92feabb 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -454,10 +454,7 @@
   }
 
   void RegisterAppInfo(const std::vector<std::string>& code_paths,
-                       const std::string& profile_output_filename,
-                       const std::string& foreign_dex_profile_path,
-                       const std::string& app_dir);
-  void NotifyDexLoaded(const std::string& dex_location);
+                       const std::string& profile_output_filename);
 
   // Transaction support.
   bool IsActiveTransaction() const {
@@ -736,7 +733,7 @@
   std::string class_path_string_;
   std::vector<std::string> properties_;
 
-  std::vector<ti::Agent> agents_;
+  std::list<ti::Agent> agents_;
   std::vector<Plugin> plugins_;
 
   // The default stack size for managed threads created by the runtime.
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index e68a1b2..16190cd 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -120,8 +120,8 @@
 RUNTIME_OPTIONS_KEY (std::string,         CpuAbiList)
 RUNTIME_OPTIONS_KEY (std::string,         Fingerprint)
 RUNTIME_OPTIONS_KEY (ExperimentalFlags,   Experimental,     ExperimentalFlags::kNone) // -Xexperimental:{...}
-RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentLib)  // -agentlib:<libname>=<options>
-RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentPath)  // -agentpath:<libname>=<options>
+RUNTIME_OPTIONS_KEY (std::list<ti::Agent>,         AgentLib)  // -agentlib:<libname>=<options>
+RUNTIME_OPTIONS_KEY (std::list<ti::Agent>,         AgentPath)  // -agentpath:<libname>=<options>
 RUNTIME_OPTIONS_KEY (std::vector<Plugin>,            Plugins)  // -Xplugin:<library>
 
 // Not parse-able from command line, but can be provided explicitly.
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index a3286ac..5f03741 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -141,6 +141,8 @@
   ALWAYS_INLINE explicit ScopedObjectAccessUnchecked(Thread* self)
       REQUIRES(!Locks::thread_suspend_count_lock_);
 
+  ALWAYS_INLINE ~ScopedObjectAccessUnchecked() REQUIRES(!Locks::thread_suspend_count_lock_) {}
+
   // Used when we want a scoped JNI thread state but have no thread/JNIEnv. Consequently doesn't
   // change into Runnable or acquire a share on the mutator_lock_.
   explicit ScopedObjectAccessUnchecked(JavaVM* vm) ALWAYS_INLINE
diff --git a/runtime/signal_catcher.cc b/runtime/signal_catcher.cc
index 674459d..0b7ea2f 100644
--- a/runtime/signal_catcher.cc
+++ b/runtime/signal_catcher.cc
@@ -32,6 +32,7 @@
 #include "base/unix_file/fd_file.h"
 #include "class_linker.h"
 #include "gc/heap.h"
+#include "jit/profile_saver.h"
 #include "os.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
@@ -154,8 +155,9 @@
 }
 
 void SignalCatcher::HandleSigUsr1() {
-  LOG(INFO) << "SIGUSR1 forcing GC (no HPROF)";
+  LOG(INFO) << "SIGUSR1 forcing GC (no HPROF) and profile save";
   Runtime::Current()->GetHeap()->CollectGarbage(false);
+  ProfileSaver::ForceProcessProfiles();
 }
 
 int SignalCatcher::WaitForSignal(Thread* self, SignalSet& signals) {
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 51a24e4..333128b 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -142,8 +142,10 @@
       InlineInfo inline_info = GetCurrentInlineInfo();
       const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
       CodeInfoEncoding encoding = method_header->GetOptimizedCodeInfo().ExtractEncoding();
+      MethodInfo method_info = method_header->GetOptimizedMethodInfo();
       DCHECK(walk_kind_ != StackWalkKind::kSkipInlinedFrames);
       return GetResolvedMethod(*GetCurrentQuickFrame(),
+                               method_info,
                                inline_info,
                                encoding.inline_info.encoding,
                                depth_in_stack_map);
@@ -647,7 +649,7 @@
   }
 
   const void* code = method->GetEntryPointFromQuickCompiledCode();
-  if (code == GetQuickInstrumentationEntryPoint()) {
+  if (code == GetQuickInstrumentationEntryPoint() || code == GetInvokeObsoleteMethodStub()) {
     return;
   }
 
diff --git a/runtime/stack.h b/runtime/stack.h
index 90a0aee..bdaa4c3 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -197,6 +197,11 @@
     return *reinterpret_cast<const int32_t*>(vreg);
   }
 
+  // Shorts are extended to Ints in VRegs.  Interpreter intrinsics needs them as shorts.
+  int16_t GetVRegShort(size_t i) const {
+    return static_cast<int16_t>(GetVReg(i));
+  }
+
   uint32_t* GetVRegAddr(size_t i) {
     return &vregs_[i];
   }
@@ -425,8 +430,15 @@
  private:
   ShadowFrame(uint32_t num_vregs, ShadowFrame* link, ArtMethod* method,
               uint32_t dex_pc, bool has_reference_array)
-      : link_(link), method_(method), result_register_(nullptr), dex_pc_ptr_(nullptr),
-        code_item_(nullptr), number_of_vregs_(num_vregs), dex_pc_(dex_pc) {
+      : link_(link),
+        method_(method),
+        result_register_(nullptr),
+        dex_pc_ptr_(nullptr),
+        code_item_(nullptr),
+        number_of_vregs_(num_vregs),
+        dex_pc_(dex_pc),
+        cached_hotness_countdown_(0),
+        hotness_countdown_(0) {
     // TODO(iam): Remove this parameter, it's an an artifact of portable removal
     DCHECK(has_reference_array);
     if (has_reference_array) {
diff --git a/runtime/stack_map.cc b/runtime/stack_map.cc
index d657311..250ff2a 100644
--- a/runtime/stack_map.cc
+++ b/runtime/stack_map.cc
@@ -118,7 +118,8 @@
                     uint32_t code_offset,
                     uint16_t number_of_dex_registers,
                     bool dump_stack_maps,
-                    InstructionSet instruction_set) const {
+                    InstructionSet instruction_set,
+                    const MethodInfo& method_info) const {
   CodeInfoEncoding encoding = ExtractEncoding();
   size_t number_of_stack_maps = GetNumberOfStackMaps(encoding);
   vios->Stream()
@@ -139,6 +140,7 @@
       stack_map.Dump(vios,
                      *this,
                      encoding,
+                     method_info,
                      code_offset,
                      number_of_dex_registers,
                      instruction_set,
@@ -189,6 +191,7 @@
 void StackMap::Dump(VariableIndentationOutputStream* vios,
                     const CodeInfo& code_info,
                     const CodeInfoEncoding& encoding,
+                    const MethodInfo& method_info,
                     uint32_t code_offset,
                     uint16_t number_of_dex_registers,
                     InstructionSet instruction_set,
@@ -222,12 +225,13 @@
     // We do not know the length of the dex register maps of inlined frames
     // at this level, so we just pass null to `InlineInfo::Dump` to tell
     // it not to look at these maps.
-    inline_info.Dump(vios, code_info, nullptr);
+    inline_info.Dump(vios, code_info, method_info, nullptr);
   }
 }
 
 void InlineInfo::Dump(VariableIndentationOutputStream* vios,
                       const CodeInfo& code_info,
+                      const MethodInfo& method_info,
                       uint16_t number_of_dex_registers[]) const {
   InlineInfoEncoding inline_info_encoding = code_info.ExtractEncoding().inline_info.encoding;
   vios->Stream() << "InlineInfo with depth "
@@ -245,7 +249,7 @@
     } else {
       vios->Stream()
           << std::dec
-          << ", method_index=" << GetMethodIndexAtDepth(inline_info_encoding, i);
+          << ", method_index=" << GetMethodIndexAtDepth(inline_info_encoding, method_info, i);
     }
     vios->Stream() << ")\n";
     if (HasDexRegisterMapAtDepth(inline_info_encoding, i) && (number_of_dex_registers != nullptr)) {
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 67f0b57..a224986 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -23,6 +23,7 @@
 #include "bit_memory_region.h"
 #include "dex_file.h"
 #include "memory_region.h"
+#include "method_info.h"
 #include "leb128.h"
 
 namespace art {
@@ -367,7 +368,8 @@
     return region_.size();
   }
 
-  void Dump(VariableIndentationOutputStream* vios, const CodeInfo& code_info);
+  void Dump(VariableIndentationOutputStream* vios,
+            const CodeInfo& code_info);
 
   // Special (invalid) Dex register location catalog entry index meaning
   // that there is no location for a given Dex register (i.e., it is
@@ -571,7 +573,7 @@
     }
   }
 
-  bool IsDexRegisterLive(uint16_t dex_register_number) const {
+  ALWAYS_INLINE bool IsDexRegisterLive(uint16_t dex_register_number) const {
     size_t live_bit_mask_offset_in_bits = GetLiveBitMaskOffset() * kBitsPerByte;
     return region_.LoadBit(live_bit_mask_offset_in_bits + dex_register_number);
   }
@@ -686,7 +688,13 @@
 
 class StackMapEncoding {
  public:
-  StackMapEncoding() {}
+  StackMapEncoding()
+      : dex_pc_bit_offset_(0),
+        dex_register_map_bit_offset_(0),
+        inline_info_bit_offset_(0),
+        register_mask_index_bit_offset_(0),
+        stack_mask_index_bit_offset_(0),
+        total_bit_size_(0) {}
 
   // Set stack map bit layout based on given sizes.
   // Returns the size of stack map in bits.
@@ -862,6 +870,7 @@
   void Dump(VariableIndentationOutputStream* vios,
             const CodeInfo& code_info,
             const CodeInfoEncoding& encoding,
+            const MethodInfo& method_info,
             uint32_t code_offset,
             uint16_t number_of_dex_registers,
             InstructionSet instruction_set,
@@ -885,12 +894,12 @@
 
 class InlineInfoEncoding {
  public:
-  void SetFromSizes(size_t method_index_max,
+  void SetFromSizes(size_t method_index_idx_max,
                     size_t dex_pc_max,
                     size_t extra_data_max,
                     size_t dex_register_map_size) {
     total_bit_size_ = kMethodIndexBitOffset;
-    total_bit_size_ += MinimumBitsToStore(method_index_max);
+    total_bit_size_ += MinimumBitsToStore(method_index_idx_max);
 
     dex_pc_bit_offset_ = dchecked_integral_cast<uint8_t>(total_bit_size_);
     // Note: We're not encoding the dex pc if there is none. That's the case
@@ -908,7 +917,7 @@
     total_bit_size_ += MinimumBitsToStore(dex_register_map_size);
   }
 
-  ALWAYS_INLINE FieldEncoding GetMethodIndexEncoding() const {
+  ALWAYS_INLINE FieldEncoding GetMethodIndexIdxEncoding() const {
     return FieldEncoding(kMethodIndexBitOffset, dex_pc_bit_offset_);
   }
   ALWAYS_INLINE FieldEncoding GetDexPcEncoding() const {
@@ -975,16 +984,23 @@
     }
   }
 
-  ALWAYS_INLINE uint32_t GetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
-                                               uint32_t depth) const {
+  ALWAYS_INLINE uint32_t GetMethodIndexIdxAtDepth(const InlineInfoEncoding& encoding,
+                                                  uint32_t depth) const {
     DCHECK(!EncodesArtMethodAtDepth(encoding, depth));
-    return encoding.GetMethodIndexEncoding().Load(GetRegionAtDepth(encoding, depth));
+    return encoding.GetMethodIndexIdxEncoding().Load(GetRegionAtDepth(encoding, depth));
   }
 
-  ALWAYS_INLINE void SetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
-                                           uint32_t depth,
-                                           uint32_t index) {
-    encoding.GetMethodIndexEncoding().Store(GetRegionAtDepth(encoding, depth), index);
+  ALWAYS_INLINE void SetMethodIndexIdxAtDepth(const InlineInfoEncoding& encoding,
+                                              uint32_t depth,
+                                              uint32_t index) {
+    encoding.GetMethodIndexIdxEncoding().Store(GetRegionAtDepth(encoding, depth), index);
+  }
+
+
+  ALWAYS_INLINE uint32_t GetMethodIndexAtDepth(const InlineInfoEncoding& encoding,
+                                               const MethodInfo& method_info,
+                                               uint32_t depth) const {
+    return method_info.GetMethodIndex(GetMethodIndexIdxAtDepth(encoding, depth));
   }
 
   ALWAYS_INLINE uint32_t GetDexPcAtDepth(const InlineInfoEncoding& encoding,
@@ -1012,7 +1028,8 @@
   ALWAYS_INLINE ArtMethod* GetArtMethodAtDepth(const InlineInfoEncoding& encoding,
                                                uint32_t depth) const {
     uint32_t low_bits = encoding.GetExtraDataEncoding().Load(GetRegionAtDepth(encoding, depth));
-    uint32_t high_bits = encoding.GetMethodIndexEncoding().Load(GetRegionAtDepth(encoding, depth));
+    uint32_t high_bits = encoding.GetMethodIndexIdxEncoding().Load(
+        GetRegionAtDepth(encoding, depth));
     if (high_bits == 0) {
       return reinterpret_cast<ArtMethod*>(low_bits);
     } else {
@@ -1040,6 +1057,7 @@
 
   void Dump(VariableIndentationOutputStream* vios,
             const CodeInfo& info,
+            const MethodInfo& method_info,
             uint16_t* number_of_dex_registers) const;
 
  private:
@@ -1219,12 +1237,18 @@
     encoding.GetInvokeTypeEncoding().Store(region_, invoke_type);
   }
 
-  ALWAYS_INLINE uint32_t GetMethodIndex(const InvokeInfoEncoding& encoding) const {
+  ALWAYS_INLINE uint32_t GetMethodIndexIdx(const InvokeInfoEncoding& encoding) const {
     return encoding.GetMethodIndexEncoding().Load(region_);
   }
 
-  ALWAYS_INLINE void SetMethodIndex(const InvokeInfoEncoding& encoding, uint32_t method_index) {
-    encoding.GetMethodIndexEncoding().Store(region_, method_index);
+  ALWAYS_INLINE void SetMethodIndexIdx(const InvokeInfoEncoding& encoding,
+                                       uint32_t method_index_idx) {
+    encoding.GetMethodIndexEncoding().Store(region_, method_index_idx);
+  }
+
+  ALWAYS_INLINE uint32_t GetMethodIndex(const InvokeInfoEncoding& encoding,
+                                        MethodInfo method_info) const {
+    return method_info.GetMethodIndex(GetMethodIndexIdx(encoding));
   }
 
   bool IsValid() const { return region_.pointer() != nullptr; }
@@ -1542,7 +1566,8 @@
             uint32_t code_offset,
             uint16_t number_of_dex_registers,
             bool dump_stack_maps,
-            InstructionSet instruction_set) const;
+            InstructionSet instruction_set,
+            const MethodInfo& method_info) const;
 
   // Check that the code info has valid stack map and abort if it does not.
   void AssertValidStackMap(const CodeInfoEncoding& encoding) const {
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 482e0e3..02a1e4d 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -94,9 +94,7 @@
           if (held_mutex != nullptr &&
               held_mutex != Locks::mutator_lock_ &&
               held_mutex != cond_var_mutex) {
-            std::vector<BaseMutex*>& expected_mutexes = Locks::expected_mutexes_on_weak_ref_access_;
-            CHECK(std::find(expected_mutexes.begin(), expected_mutexes.end(), held_mutex) !=
-                  expected_mutexes.end())
+            CHECK(Locks::IsExpectedOnWeakRefAccess(held_mutex))
                 << "Holding unexpected mutex " << held_mutex->GetName()
                 << " when accessing weak ref";
           }
diff --git a/runtime/thread.cc b/runtime/thread.cc
index ff66cc1..008c388 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -16,6 +16,10 @@
 
 #include "thread.h"
 
+#if !defined(__APPLE__)
+#include <sched.h>
+#endif
+
 #include <pthread.h>
 #include <signal.h>
 #include <sys/resource.h>
@@ -1591,8 +1595,21 @@
   if (thread != nullptr) {
     int policy;
     sched_param sp;
+#if !defined(__APPLE__)
+    // b/36445592 Don't use pthread_getschedparam since pthread may have exited.
+    policy = sched_getscheduler(tid);
+    if (policy == -1) {
+      PLOG(WARNING) << "sched_getscheduler(" << tid << ")";
+    }
+    int sched_getparam_result = sched_getparam(tid, &sp);
+    if (sched_getparam_result == -1) {
+      PLOG(WARNING) << "sched_getparam(" << tid << ", &sp)";
+      sp.sched_priority = -1;
+    }
+#else
     CHECK_PTHREAD_CALL(pthread_getschedparam, (thread->tlsPtr_.pthread_self, &policy, &sp),
                        __FUNCTION__);
+#endif
     os << " sched=" << policy << "/" << sp.sched_priority
        << " handle=" << reinterpret_cast<void*>(thread->tlsPtr_.pthread_self);
   }
@@ -1934,7 +1951,6 @@
   wait_cond_ = new ConditionVariable("a thread wait condition variable", *wait_mutex_);
   tlsPtr_.instrumentation_stack = new std::deque<instrumentation::InstrumentationStackFrame>;
   tlsPtr_.name = new std::string(kThreadNameDuringStartup);
-  tlsPtr_.nested_signal_state = static_cast<jmp_buf*>(malloc(sizeof(jmp_buf)));
 
   static_assert((sizeof(Thread) % 4) == 0U,
                 "art::Thread has a size which is not a multiple of 4.");
@@ -2118,7 +2134,6 @@
   delete tlsPtr_.instrumentation_stack;
   delete tlsPtr_.name;
   delete tlsPtr_.deps_or_stack_trace_sample.stack_trace_sample;
-  free(tlsPtr_.nested_signal_state);
 
   Runtime::Current()->GetHeap()->AssertThreadLocalBuffersAreRevoked(this);
 
diff --git a/runtime/thread.h b/runtime/thread.h
index d5fd9e9..de0b892 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -1115,21 +1115,12 @@
     return tlsPtr_.mterp_alt_ibase;
   }
 
-  // Notify that a signal is being handled. This is to protect us from doing recursive
-  // NPE handling after a SIGSEGV.
-  void NoteSignalBeingHandled() {
-    if (tls32_.handling_signal_) {
-      LOG(FATAL) << "Detected signal while processing a signal";
-    }
-    tls32_.handling_signal_ = true;
+  bool HandlingSignal() const {
+    return tls32_.handling_signal_;
   }
 
-  void NoteSignalHandlerDone() {
-    tls32_.handling_signal_ = false;
-  }
-
-  jmp_buf* GetNestedSignalState() {
-    return tlsPtr_.nested_signal_state;
+  void SetHandlingSignal(bool handling_signal) {
+    tls32_.handling_signal_ = handling_signal;
   }
 
   bool IsTransitioningToRunnable() const {
@@ -1460,7 +1451,7 @@
       thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
-      thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr),
+      thread_local_alloc_stack_end(nullptr),
       flip_function(nullptr), method_verifier(nullptr), thread_local_mark_stack(nullptr) {
       std::fill(held_mutexes, held_mutexes + kLockLevelCount, nullptr);
     }
@@ -1606,9 +1597,6 @@
     // Support for Mutex lock hierarchy bug detection.
     BaseMutex* held_mutexes[kLockLevelCount];
 
-    // Recorded thread state for nested signals.
-    jmp_buf* nested_signal_state;
-
     // The function used for thread flip.
     Closure* flip_function;
 
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index caed369..8d72fe8 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -736,7 +736,7 @@
         // EAGAIN and EINTR both indicate a spurious failure, try again from the beginning.
         if ((errno != EAGAIN) && (errno != EINTR)) {
           if (errno == ETIMEDOUT) {
-            LOG(kIsDebugBuild ? ::android::base::FATAL : ::android::base::ERROR)
+            LOG(::android::base::FATAL)
                 << "Timed out waiting for threads to suspend, waited for "
                 << PrettyDuration(NanoTime() - start_time);
           } else {
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 7aa98cd..0333fe8 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -162,7 +162,7 @@
       FieldValueKind kind;
       bool is_volatile;
 
-      FieldValue() = default;
+      FieldValue() : value(0), kind(FieldValueKind::kBoolean), is_volatile(false) {}
       FieldValue(FieldValue&& log) = default;
 
      private:
diff --git a/runtime/transaction_test.cc b/runtime/transaction_test.cc
index 97c1228..9206292 100644
--- a/runtime/transaction_test.cc
+++ b/runtime/transaction_test.cc
@@ -508,7 +508,7 @@
   dex::StringIndex string_idx = dex_file->GetIndexForStringId(*string_id);
   ASSERT_TRUE(string_idx.IsValid());
   // String should only get resolved by the initializer.
-  EXPECT_TRUE(class_linker_->LookupString(*dex_file, string_idx, h_dex_cache) == nullptr);
+  EXPECT_TRUE(class_linker_->LookupString(*dex_file, string_idx, h_dex_cache.Get()) == nullptr);
   EXPECT_TRUE(h_dex_cache->GetResolvedString(string_idx) == nullptr);
   // Do the transaction, then roll back.
   Transaction transaction;
@@ -518,7 +518,7 @@
   ASSERT_TRUE(h_klass->IsInitialized());
   // Make sure the string got resolved by the transaction.
   {
-    mirror::String* s = class_linker_->LookupString(*dex_file, string_idx, h_dex_cache);
+    mirror::String* s = class_linker_->LookupString(*dex_file, string_idx, h_dex_cache.Get());
     ASSERT_TRUE(s != nullptr);
     EXPECT_STREQ(s->ToModifiedUtf8().c_str(), kResolvedString);
     EXPECT_EQ(s, h_dex_cache->GetResolvedString(string_idx));
@@ -526,7 +526,7 @@
   Runtime::Current()->ExitTransactionMode();
   transaction.Rollback();
   // Check that the string did not stay resolved.
-  EXPECT_TRUE(class_linker_->LookupString(*dex_file, string_idx, h_dex_cache) == nullptr);
+  EXPECT_TRUE(class_linker_->LookupString(*dex_file, string_idx, h_dex_cache.Get()) == nullptr);
   EXPECT_TRUE(h_dex_cache->GetResolvedString(string_idx) == nullptr);
   ASSERT_FALSE(h_klass->IsInitialized());
   ASSERT_FALSE(soa.Self()->IsExceptionPending());
diff --git a/runtime/utils.cc b/runtime/utils.cc
index 6a20eaf..8d216ce 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -52,77 +52,6 @@
 using android::base::StringAppendF;
 using android::base::StringPrintf;
 
-static const uint8_t kBase64Map[256] = {
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255,  62, 255, 255, 255,  63,
-  52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255,
-  255, 254, 255, 255, 255,   0,   1,   2,   3,   4,   5,   6,
-    7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  // NOLINT
-   19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255, 255,  // NOLINT
-  255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,
-   37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  // NOLINT
-   49,  50,  51, 255, 255, 255, 255, 255, 255, 255, 255, 255,  // NOLINT
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-  255, 255, 255, 255
-};
-
-uint8_t* DecodeBase64(const char* src, size_t* dst_size) {
-  std::vector<uint8_t> tmp;
-  uint32_t t = 0, y = 0;
-  int g = 3;
-  for (size_t i = 0; src[i] != '\0'; ++i) {
-    uint8_t c = kBase64Map[src[i] & 0xFF];
-    if (c == 255) continue;
-    // the final = symbols are read and used to trim the remaining bytes
-    if (c == 254) {
-      c = 0;
-      // prevent g < 0 which would potentially allow an overflow later
-      if (--g < 0) {
-        *dst_size = 0;
-        return nullptr;
-      }
-    } else if (g != 3) {
-      // we only allow = to be at the end
-      *dst_size = 0;
-      return nullptr;
-    }
-    t = (t << 6) | c;
-    if (++y == 4) {
-      tmp.push_back((t >> 16) & 255);
-      if (g > 1) {
-        tmp.push_back((t >> 8) & 255);
-      }
-      if (g > 2) {
-        tmp.push_back(t & 255);
-      }
-      y = t = 0;
-    }
-  }
-  if (y != 0) {
-    *dst_size = 0;
-    return nullptr;
-  }
-  std::unique_ptr<uint8_t[]> dst(new uint8_t[tmp.size()]);
-  if (dst_size != nullptr) {
-    *dst_size = tmp.size();
-  } else {
-    *dst_size = 0;
-  }
-  std::copy(tmp.begin(), tmp.end(), dst.get());
-  return dst.release();
-}
-
 pid_t GetTid() {
 #if defined(__APPLE__)
   uint64_t owner;
diff --git a/runtime/utils.h b/runtime/utils.h
index 96e5bfa..2011d9e 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -70,8 +70,6 @@
   return intp & 0xFFFFFFFFU;
 }
 
-uint8_t* DecodeBase64(const char* src, size_t* dst_size);
-
 std::string PrintableChar(uint16_t ch);
 
 // Returns an ASCII string corresponding to the given UTF-8 string.
@@ -325,6 +323,18 @@
   return size;
 }
 
+// Return -1 if <, 0 if ==, 1 if >.
+template <typename T>
+inline static int32_t Compare(T lhs, T rhs) {
+  return (lhs < rhs) ? -1 : ((lhs == rhs) ? 0 : 1);
+}
+
+// Return -1 if < 0, 0 if == 0, 1 if > 0.
+template <typename T>
+inline static int32_t Signum(T opnd) {
+  return (opnd < 0) ? -1 : ((opnd == 0) ? 0 : 1);
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_UTILS_H_
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index f9a1405..95904af 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -51,7 +51,11 @@
     : DexCacheArraysLayout(pointer_size, dex_file->GetHeader(), dex_file->NumCallSiteIds()) {
 }
 
-constexpr size_t DexCacheArraysLayout::Alignment() {
+inline size_t DexCacheArraysLayout::Alignment() const {
+  return Alignment(pointer_size_);
+}
+
+inline constexpr size_t DexCacheArraysLayout::Alignment(PointerSize pointer_size) {
   // mirror::Type/String/MethodTypeDexCacheType alignment is 8,
   // i.e. higher than or equal to the pointer alignment.
   static_assert(alignof(mirror::TypeDexCacheType) == 8,
@@ -60,8 +64,8 @@
                 "Expecting alignof(StringDexCacheType) == 8");
   static_assert(alignof(mirror::MethodTypeDexCacheType) == 8,
                 "Expecting alignof(MethodTypeDexCacheType) == 8");
-  // This is the same as alignof(MethodTypeDexCacheType).
-  return alignof(mirror::StringDexCacheType);
+  // This is the same as alignof(FieldDexCacheType) for the given pointer size.
+  return 2u * static_cast<size_t>(pointer_size);
 }
 
 template <typename T>
@@ -100,8 +104,8 @@
 }
 
 inline size_t DexCacheArraysLayout::StringOffset(uint32_t string_idx) const {
-  return strings_offset_ + ElementOffset(PointerSize::k64,
-                                         string_idx % mirror::DexCache::kDexCacheStringCacheSize);
+  uint32_t string_hash = string_idx % mirror::DexCache::kDexCacheStringCacheSize;
+  return strings_offset_ + ElementOffset(PointerSize::k64, string_hash);
 }
 
 inline size_t DexCacheArraysLayout::StringsSize(size_t num_elements) const {
@@ -119,15 +123,20 @@
 }
 
 inline size_t DexCacheArraysLayout::FieldOffset(uint32_t field_idx) const {
-  return fields_offset_ + ElementOffset(pointer_size_, field_idx);
+  uint32_t field_hash = field_idx % mirror::DexCache::kDexCacheFieldCacheSize;
+  return fields_offset_ + 2u * static_cast<size_t>(pointer_size_) * field_hash;
 }
 
 inline size_t DexCacheArraysLayout::FieldsSize(size_t num_elements) const {
-  return ArraySize(pointer_size_, num_elements);
+  size_t cache_size = mirror::DexCache::kDexCacheFieldCacheSize;
+  if (num_elements < cache_size) {
+    cache_size = num_elements;
+  }
+  return 2u * static_cast<size_t>(pointer_size_) * num_elements;
 }
 
 inline size_t DexCacheArraysLayout::FieldsAlignment() const {
-  return static_cast<size_t>(pointer_size_);
+  return 2u * static_cast<size_t>(pointer_size_);
 }
 
 inline size_t DexCacheArraysLayout::MethodTypesSize(size_t num_elements) const {
diff --git a/runtime/utils/dex_cache_arrays_layout.h b/runtime/utils/dex_cache_arrays_layout.h
index ed677ed..377a374 100644
--- a/runtime/utils/dex_cache_arrays_layout.h
+++ b/runtime/utils/dex_cache_arrays_layout.h
@@ -57,7 +57,9 @@
     return size_;
   }
 
-  static constexpr size_t Alignment();
+  size_t Alignment() const;
+
+  static constexpr size_t Alignment(PointerSize pointer_size);
 
   size_t TypesOffset() const {
     return types_offset_;
@@ -125,8 +127,6 @@
   const size_t call_sites_offset_;
   const size_t size_;
 
-  static size_t Alignment(PointerSize pointer_size);
-
   static size_t ElementOffset(PointerSize element_size, uint32_t idx);
 
   static size_t ArraySize(PointerSize element_size, uint32_t num_elements);
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index 2481c8b..9ff104b 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -120,4 +120,30 @@
   }
 }
 
+bool VdexFile::OpenAllDexFiles(std::vector<std::unique_ptr<const DexFile>>* dex_files,
+                               std::string* error_msg) {
+  size_t i = 0;
+  for (const uint8_t* dex_file_start = GetNextDexFileData(nullptr);
+       dex_file_start != nullptr;
+       dex_file_start = GetNextDexFileData(dex_file_start), ++i) {
+    size_t size = reinterpret_cast<const DexFile::Header*>(dex_file_start)->file_size_;
+    // TODO: Supply the location information for a vdex file.
+    static constexpr char kVdexLocation[] = "";
+    std::string location = DexFile::GetMultiDexLocation(i, kVdexLocation);
+    std::unique_ptr<const DexFile> dex(DexFile::Open(dex_file_start,
+                                                     size,
+                                                     location,
+                                                     GetLocationChecksum(i),
+                                                     nullptr /*oat_dex_file*/,
+                                                     false /*verify*/,
+                                                     false /*verify_checksum*/,
+                                                     error_msg));
+    if (dex == nullptr) {
+      return false;
+    }
+    dex_files->push_back(std::move(dex));
+  }
+  return true;
+}
+
 }  // namespace art
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 7daf2f8..9840555 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -27,6 +27,8 @@
 
 namespace art {
 
+class DexFile;
+
 // VDEX files contain extracted DEX files. The VdexFile class maps the file to
 // memory and provides tools for accessing its individual sections.
 //
@@ -61,7 +63,7 @@
 
    private:
     static constexpr uint8_t kVdexMagic[] = { 'v', 'd', 'e', 'x' };
-    static constexpr uint8_t kVdexVersion[] = { '0', '0', '3', '\0' };  // Remove verify-profile
+    static constexpr uint8_t kVdexVersion[] = { '0', '0', '5', '\0' };  // access flags
 
     uint8_t magic_[4];
     uint8_t version_[4];
@@ -122,6 +124,12 @@
     return reinterpret_cast<const uint32_t*>(Begin() + sizeof(Header))[dex_file_index];
   }
 
+  // Opens all the dex files contained in this vdex file.  This is currently
+  // used for dumping tools only, and has not been tested for use by the
+  // remainder of the runtime.
+  bool OpenAllDexFiles(std::vector<std::unique_ptr<const DexFile>>* dex_files,
+                       std::string* error_msg);
+
  private:
   explicit VdexFile(MemMap* mmap) : mmap_(mmap) {}
 
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index 52f7e34..740b7dd 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -309,6 +309,7 @@
   // Note: no check for IsInstantiable() here. We may produce this in case an InstantiationError
   //       would be thrown at runtime, but we need to continue verification and *not* create a
   //       hard failure or abort.
+  CheckConstructorInvariants(this);
 }
 
 std::string UnresolvedMergedType::Dump() const {
@@ -789,7 +790,7 @@
   if (!klass_.IsNull()) {
     CHECK(!descriptor_.empty()) << *this;
     std::string temp;
-    CHECK_EQ(descriptor_.ToString(), klass_.Read()->GetDescriptor(&temp)) << *this;
+    CHECK_EQ(descriptor_, klass_.Read()->GetDescriptor(&temp)) << *this;
   }
 }
 
@@ -820,9 +821,7 @@
       reg_type_cache_(reg_type_cache),
       resolved_part_(resolved),
       unresolved_types_(unresolved, false, unresolved.GetAllocator()) {
-  if (kIsDebugBuild) {
-    CheckInvariants();
-  }
+  CheckConstructorInvariants(this);
 }
 void UnresolvedMergedType::CheckInvariants() const {
   CHECK(reg_type_cache_ != nullptr);
diff --git a/runtime/verifier/reg_type.h b/runtime/verifier/reg_type.h
index 472381d..dedf77f 100644
--- a/runtime/verifier/reg_type.h
+++ b/runtime/verifier/reg_type.h
@@ -274,14 +274,17 @@
           uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
       : descriptor_(descriptor),
         klass_(klass),
-        cache_id_(cache_id) {
+        cache_id_(cache_id) {}
+
+  template <typename Class>
+  void CheckConstructorInvariants(Class* this_ ATTRIBUTE_UNUSED) const
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    static_assert(std::is_final<Class>::value, "Class must be final.");
     if (kIsDebugBuild) {
       CheckInvariants();
     }
   }
 
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
-
   const StringPiece descriptor_;
   mutable GcRoot<mirror::Class> klass_;  // Non-const only due to moving classes.
   const uint16_t cache_id_;
@@ -289,6 +292,8 @@
   friend class RegTypeCache;
 
  private:
+  virtual void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+
   /*
    * A basic Join operation on classes. For a pair of types S and T the Join, written S v T = J, is
    * S <: J, T <: J and for-all U such that S <: U, T <: U then J <: U. That is J is the parent of
@@ -339,7 +344,9 @@
  private:
   ConflictType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : RegType(klass, descriptor, cache_id) {}
+      : RegType(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   static const ConflictType* instance_;
 };
@@ -368,7 +375,9 @@
  private:
   UndefinedType(mirror::Class* klass, const StringPiece& descriptor,
                 uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : RegType(klass, descriptor, cache_id) {}
+      : RegType(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   static const UndefinedType* instance_;
 };
@@ -387,7 +396,7 @@
            uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_);
 };
 
-class IntegerType : public Cat1Type {
+class IntegerType FINAL : public Cat1Type {
  public:
   bool IsInteger() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
@@ -401,7 +410,9 @@
  private:
   IntegerType(mirror::Class* klass, const StringPiece& descriptor,
               uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const IntegerType* instance_;
 };
 
@@ -419,7 +430,9 @@
  private:
   BooleanType(mirror::Class* klass, const StringPiece& descriptor,
               uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   static const BooleanType* instance_;
 };
@@ -438,7 +451,9 @@
  private:
   ByteType(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const ByteType* instance_;
 };
 
@@ -456,7 +471,9 @@
  private:
   ShortType(mirror::Class* klass, const StringPiece& descriptor,
             uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const ShortType* instance_;
 };
 
@@ -474,7 +491,9 @@
  private:
   CharType(mirror::Class* klass, const StringPiece& descriptor,
            uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const CharType* instance_;
 };
 
@@ -492,7 +511,9 @@
  private:
   FloatType(mirror::Class* klass, const StringPiece& descriptor,
             uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat1Type(klass, descriptor, cache_id) {}
+      : Cat1Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const FloatType* instance_;
 };
 
@@ -517,7 +538,9 @@
  private:
   LongLoType(mirror::Class* klass, const StringPiece& descriptor,
              uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat2Type(klass, descriptor, cache_id) {}
+      : Cat2Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const LongLoType* instance_;
 };
 
@@ -535,7 +558,9 @@
  private:
   LongHiType(mirror::Class* klass, const StringPiece& descriptor,
              uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat2Type(klass, descriptor, cache_id) {}
+      : Cat2Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const LongHiType* instance_;
 };
 
@@ -554,7 +579,9 @@
  private:
   DoubleLoType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat2Type(klass, descriptor, cache_id) {}
+      : Cat2Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const DoubleLoType* instance_;
 };
 
@@ -572,7 +599,9 @@
  private:
   DoubleHiType(mirror::Class* klass, const StringPiece& descriptor,
                uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : Cat2Type(klass, descriptor, cache_id) {}
+      : Cat2Type(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   static const DoubleHiType* instance_;
 };
 
@@ -637,7 +666,9 @@
  public:
   PreciseConstType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   bool IsPreciseConstant() const OVERRIDE { return true; }
 
@@ -648,7 +679,9 @@
  public:
   PreciseConstLoType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   bool IsPreciseConstantLo() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 };
@@ -657,7 +690,9 @@
  public:
   PreciseConstHiType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   bool IsPreciseConstantHi() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 };
@@ -667,6 +702,7 @@
   ImpreciseConstType(uint32_t constat, uint16_t cache_id)
        REQUIRES_SHARED(Locks::mutator_lock_)
        : ConstantType(constat, cache_id) {
+    CheckConstructorInvariants(this);
   }
   bool IsImpreciseConstant() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
@@ -676,7 +712,9 @@
  public:
   ImpreciseConstLoType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   bool IsImpreciseConstantLo() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 };
@@ -685,7 +723,9 @@
  public:
   ImpreciseConstHiType(uint32_t constant, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : ConstantType(constant, cache_id) {}
+      : ConstantType(constant, cache_id) {
+    CheckConstructorInvariants(this);
+  }
   bool IsImpreciseConstantHi() const OVERRIDE { return true; }
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 };
@@ -718,7 +758,9 @@
                              const StringPiece& descriptor,
                              uint32_t allocation_pc, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : UninitializedType(klass, descriptor, allocation_pc, cache_id) {}
+      : UninitializedType(klass, descriptor, allocation_pc, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   bool IsUninitializedReference() const OVERRIDE { return true; }
 
@@ -735,9 +777,7 @@
                                  uint32_t allocation_pc, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
       : UninitializedType(nullptr, descriptor, allocation_pc, cache_id) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   bool IsUnresolvedAndUninitializedReference() const OVERRIDE { return true; }
@@ -747,7 +787,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 };
 
 // Similar to UninitializedReferenceType but special case for the this argument
@@ -759,9 +799,7 @@
                                  uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
       : UninitializedType(klass, descriptor, 0, cache_id) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   virtual bool IsUninitializedThisReference() const OVERRIDE { return true; }
@@ -771,7 +809,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 };
 
 class UnresolvedUninitializedThisRefType FINAL : public UninitializedType {
@@ -780,9 +818,7 @@
                                      uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
       : UninitializedType(nullptr, descriptor, 0, cache_id) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   bool IsUnresolvedAndUninitializedThisReference() const OVERRIDE { return true; }
@@ -792,7 +828,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 };
 
 // A type of register holding a reference to an Object of type GetClass or a
@@ -801,7 +837,9 @@
  public:
   ReferenceType(mirror::Class* klass, const StringPiece& descriptor,
                 uint16_t cache_id) REQUIRES_SHARED(Locks::mutator_lock_)
-      : RegType(klass, descriptor, cache_id) {}
+      : RegType(klass, descriptor, cache_id) {
+    CheckConstructorInvariants(this);
+  }
 
   bool IsReference() const OVERRIDE { return true; }
 
@@ -848,9 +886,7 @@
   UnresolvedReferenceType(const StringPiece& descriptor, uint16_t cache_id)
       REQUIRES_SHARED(Locks::mutator_lock_)
       : UnresolvedType(descriptor, cache_id) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   bool IsUnresolvedReference() const OVERRIDE { return true; }
@@ -860,7 +896,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 };
 
 // Type representing the super-class of an unresolved type.
@@ -872,9 +908,7 @@
       : UnresolvedType("", cache_id),
         unresolved_child_id_(child_id),
         reg_type_cache_(reg_type_cache) {
-    if (kIsDebugBuild) {
-      CheckInvariants();
-    }
+    CheckConstructorInvariants(this);
   }
 
   bool IsUnresolvedSuperClass() const OVERRIDE { return true; }
@@ -889,7 +923,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 
   const uint16_t unresolved_child_id_;
   const RegTypeCache* const reg_type_cache_;
@@ -925,7 +959,7 @@
   std::string Dump() const OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
-  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_);
+  void CheckInvariants() const REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE;
 
   const RegTypeCache* const reg_type_cache_;
 
diff --git a/runtime/verifier/verifier_deps.cc b/runtime/verifier/verifier_deps.cc
index 000cf7c..8e4c166 100644
--- a/runtime/verifier/verifier_deps.cc
+++ b/runtime/verifier/verifier_deps.cc
@@ -68,13 +68,17 @@
   return (it == dex_deps_.end()) ? nullptr : it->second.get();
 }
 
+// Access flags that impact vdex verification.
+static constexpr uint32_t kAccVdexAccessFlags =
+    kAccPublic | kAccPrivate | kAccProtected | kAccStatic | kAccInterface;
+
 template <typename T>
 uint16_t VerifierDeps::GetAccessFlags(T* element) {
   static_assert(kAccJavaFlagsMask == 0xFFFF, "Unexpected value of a constant");
   if (element == nullptr) {
     return VerifierDeps::kUnresolvedMarker;
   } else {
-    uint16_t access_flags = Low16Bits(element->GetAccessFlags());
+    uint16_t access_flags = Low16Bits(element->GetAccessFlags()) & kAccVdexAccessFlags;
     CHECK_NE(access_flags, VerifierDeps::kUnresolvedMarker);
     return access_flags;
   }
@@ -458,8 +462,7 @@
   }
 
   if (!IsInClassPath(source)) {
-    if (!destination->IsInterface()) {
-      DCHECK(!source->IsInterface());
+    if (!destination->IsInterface() && !source->IsInterface()) {
       // Find the super class at the classpath boundary. Only that class
       // can change the assignability.
       do {
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 2610252..5aef062 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -34,7 +34,6 @@
 
 namespace art {
 
-jclass WellKnownClasses::com_android_dex_Dex;
 jclass WellKnownClasses::dalvik_annotation_optimization_CriticalNative;
 jclass WellKnownClasses::dalvik_annotation_optimization_FastNative;
 jclass WellKnownClasses::dalvik_system_BaseDexClassLoader;
@@ -80,7 +79,6 @@
 jclass WellKnownClasses::org_apache_harmony_dalvik_ddmc_Chunk;
 jclass WellKnownClasses::org_apache_harmony_dalvik_ddmc_DdmServer;
 
-jmethodID WellKnownClasses::com_android_dex_Dex_create;
 jmethodID WellKnownClasses::dalvik_system_VMRuntime_runFinalization;
 jmethodID WellKnownClasses::java_lang_Boolean_valueOf;
 jmethodID WellKnownClasses::java_lang_Byte_valueOf;
@@ -268,7 +266,6 @@
 #undef STRING_INIT_LIST
 
 void WellKnownClasses::Init(JNIEnv* env) {
-  com_android_dex_Dex = CacheClass(env, "com/android/dex/Dex");
   dalvik_annotation_optimization_CriticalNative =
       CacheClass(env, "dalvik/annotation/optimization/CriticalNative");
   dalvik_annotation_optimization_FastNative = CacheClass(env, "dalvik/annotation/optimization/FastNative");
@@ -317,7 +314,6 @@
   org_apache_harmony_dalvik_ddmc_DdmServer = CacheClass(env, "org/apache/harmony/dalvik/ddmc/DdmServer");
 
   dalvik_system_VMRuntime_runFinalization = CacheMethod(env, dalvik_system_VMRuntime, true, "runFinalization", "(J)V");
-  com_android_dex_Dex_create = CacheMethod(env, com_android_dex_Dex, true, "create", "(Ljava/nio/ByteBuffer;)Lcom/android/dex/Dex;");
   java_lang_ClassNotFoundException_init = CacheMethod(env, java_lang_ClassNotFoundException, false, "<init>", "(Ljava/lang/String;Ljava/lang/Throwable;)V");
   java_lang_ClassLoader_loadClass = CacheMethod(env, java_lang_ClassLoader, false, "loadClass", "(Ljava/lang/String;)Ljava/lang/Class;");
 
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index db8a53c..c184731 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -44,7 +44,6 @@
 
   static ObjPtr<mirror::Class> ToClass(jclass global_jclass) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  static jclass com_android_dex_Dex;
   static jclass dalvik_annotation_optimization_CriticalNative;
   static jclass dalvik_annotation_optimization_FastNative;
   static jclass dalvik_system_BaseDexClassLoader;
@@ -90,7 +89,6 @@
   static jclass org_apache_harmony_dalvik_ddmc_Chunk;
   static jclass org_apache_harmony_dalvik_ddmc_DdmServer;
 
-  static jmethodID com_android_dex_Dex_create;
   static jmethodID dalvik_system_VMRuntime_runFinalization;
   static jmethodID java_lang_Boolean_valueOf;
   static jmethodID java_lang_Byte_valueOf;
diff --git a/sigchainlib/OWNERS b/sigchainlib/OWNERS
new file mode 100644
index 0000000..450fc12
--- /dev/null
+++ b/sigchainlib/OWNERS
@@ -0,0 +1,4 @@
+# Default maintainers and code reviewers:
+jmgao@google.com
+dimitry@google.com
+sehr@google.com
diff --git a/sigchainlib/sigchain.cc b/sigchainlib/sigchain.cc
index c1efecd..cc1b78d 100644
--- a/sigchainlib/sigchain.cc
+++ b/sigchainlib/sigchain.cc
@@ -22,81 +22,45 @@
 #endif
 
 #include <dlfcn.h>
+#include <errno.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+
+#include <initializer_list>
+#include <utility>
 
 #include "sigchain.h"
 
 #if defined(__APPLE__)
 #define _NSIG NSIG
 #define sighandler_t sig_t
+
+// Darwin has an #error when ucontext.h is included without _XOPEN_SOURCE defined.
+#define _XOPEN_SOURCE
 #endif
 
-namespace art {
+#include <ucontext.h>
 
-typedef int (*SigActionFnPtr)(int, const struct sigaction*, struct sigaction*);
+// libsigchain provides an interception layer for signal handlers, to allow ART and others to give
+// their signal handlers the first stab at handling signals before passing them on to user code.
+//
+// It implements wrapper functions for signal, sigaction, and sigprocmask, and a handler that
+// forwards signals appropriately.
+//
+// In our handler, we start off with all signals blocked, fetch the original signal mask from the
+// passed in ucontext, and then adjust our signal mask appropriately for the user handler.
+//
+// It's somewhat tricky for us to properly handle some flag cases:
+//   SA_NOCLDSTOP and SA_NOCLDWAIT: shouldn't matter, we don't have special handlers for SIGCHLD.
+//   SA_NODEFER: unimplemented, we can manually change the signal mask appropriately.
+//  ~SA_ONSTACK: always silently enable this
+//   SA_RESETHAND: unimplemented, but we can probably do this?
+//  ~SA_RESTART: unimplemented, maybe we can reserve an RT signal, register an empty handler that
+//               doesn't have SA_RESTART, and raise the signal to avoid restarting syscalls that are
+//               expected to be interrupted?
 
-class SignalAction {
- public:
-  SignalAction() : claimed_(false), uses_old_style_(false), special_handler_(nullptr) {
-  }
-
-  // Claim the signal and keep the action specified.
-  void Claim(const struct sigaction& action) {
-    action_ = action;
-    claimed_ = true;
-  }
-
-  // Unclaim the signal and restore the old action.
-  void Unclaim(int signal) {
-    claimed_ = false;
-    sigaction(signal, &action_, nullptr);        // Restore old action.
-  }
-
-  // Get the action associated with this signal.
-  const struct sigaction& GetAction() const {
-    return action_;
-  }
-
-  // Is the signal claimed?
-  bool IsClaimed() const {
-    return claimed_;
-  }
-
-  // Change the recorded action to that specified.
-  // If oldstyle is true then this action is from an older style signal()
-  // call as opposed to sigaction().  In this case the sa_handler is
-  // used when invoking the user's handler.
-  void SetAction(const struct sigaction& action, bool oldstyle) {
-    action_ = action;
-    uses_old_style_ = oldstyle;
-  }
-
-  bool OldStyle() const {
-    return uses_old_style_;
-  }
-
-  void SetSpecialHandler(SpecialSignalHandlerFn fn) {
-    special_handler_ = fn;
-  }
-
-  SpecialSignalHandlerFn GetSpecialHandler() {
-    return special_handler_;
-  }
-
- private:
-  struct sigaction action_;                 // Action to be performed.
-  bool claimed_;                            // Whether signal is claimed or not.
-  bool uses_old_style_;                     // Action is created using signal().  Use sa_handler.
-  SpecialSignalHandlerFn special_handler_;  // A special handler executed before user handlers.
-};
-
-// User's signal handlers
-static SignalAction user_sigactions[_NSIG];
-static bool initialized;
-static void* linked_sigaction_sym;
-static void* linked_sigprocmask_sym;
 
 static void log(const char* format, ...) {
   char buf[256];
@@ -111,102 +75,186 @@
   va_end(ap);
 }
 
-static void CheckSignalValid(int signal) {
-  if (signal <= 0 || signal >= _NSIG) {
-    log("Invalid signal %d", signal);
-    abort();
-  }
-}
+#define fatal(...) log(__VA_ARGS__); abort()
 
-// Sigchainlib's own handler so we can ensure a managed handler is called first even if nobody
-// claimed a chain. Simply forward to InvokeUserSignalHandler.
-static void sigchainlib_managed_handler_sigaction(int sig, siginfo_t* info, void* context) {
-  InvokeUserSignalHandler(sig, info, context);
-}
-
-// Claim a signal chain for a particular signal.
-extern "C" void ClaimSignalChain(int signal, struct sigaction* oldaction) {
-  CheckSignalValid(signal);
-
-  user_sigactions[signal].Claim(*oldaction);
-}
-
-extern "C" void UnclaimSignalChain(int signal) {
-  CheckSignalValid(signal);
-
-  user_sigactions[signal].Unclaim(signal);
-}
-
-// Invoke the user's signal handler.
-extern "C" void InvokeUserSignalHandler(int sig, siginfo_t* info, void* context) {
-  // Check the arguments.
-  CheckSignalValid(sig);
-
-  // The signal must have been claimed in order to get here.  Check it.
-  if (!user_sigactions[sig].IsClaimed()) {
-    abort();
-  }
-
-  // Do we have a managed handler? If so, run it first.
-  SpecialSignalHandlerFn managed = user_sigactions[sig].GetSpecialHandler();
-  if (managed != nullptr) {
-    sigset_t mask, old_mask;
-    sigfillset(&mask);
-    sigprocmask(SIG_BLOCK, &mask, &old_mask);
-    // Call the handler. If it succeeds, we're done.
-    if (managed(sig, info, context)) {
-      sigprocmask(SIG_SETMASK, &old_mask, nullptr);
-      return;
+static int sigorset(sigset_t* dest, sigset_t* left, sigset_t* right) {
+  sigemptyset(dest);
+  for (size_t i = 0; i < sizeof(sigset_t) * CHAR_BIT; ++i) {
+    if (sigismember(left, i) == 1 || sigismember(right, i) == 1) {
+      sigaddset(dest, i);
     }
-    sigprocmask(SIG_SETMASK, &old_mask, nullptr);
+  }
+  return 0;
+}
+
+namespace art {
+
+static decltype(&sigaction) linked_sigaction;
+static decltype(&sigprocmask) linked_sigprocmask;
+__thread bool handling_signal;
+
+class SignalChain {
+ public:
+  SignalChain() : claimed_(false) {
   }
 
-  const struct sigaction& action = user_sigactions[sig].GetAction();
-  if (user_sigactions[sig].OldStyle()) {
-    if (action.sa_handler != nullptr) {
-      action.sa_handler(sig);
-    } else {
-      signal(sig, SIG_DFL);
-      raise(sig);
+  bool IsClaimed() {
+    return claimed_;
+  }
+
+  void Claim(int signo) {
+    if (!claimed_) {
+      Register(signo);
+      claimed_ = true;
     }
+  }
+
+  // Register the signal chain with the kernel if needed.
+  void Register(int signo) {
+    struct sigaction handler_action = {};
+    handler_action.sa_sigaction = SignalChain::Handler;
+    handler_action.sa_flags = SA_RESTART | SA_SIGINFO | SA_ONSTACK;
+    sigfillset(&handler_action.sa_mask);
+    linked_sigaction(signo, &handler_action, &action_);
+  }
+
+  void SetAction(const struct sigaction* action) {
+    action_ = *action;
+  }
+
+  struct sigaction GetAction() {
+    return action_;
+  }
+
+  void AddSpecialHandler(SpecialSignalHandlerFn fn) {
+    for (SpecialSignalHandlerFn& slot : special_handlers_) {
+      if (slot == nullptr) {
+        slot = fn;
+        return;
+      }
+    }
+
+    fatal("too many special signal handlers");
+  }
+
+  void RemoveSpecialHandler(SpecialSignalHandlerFn fn) {
+    // This isn't thread safe, but it's unlikely to be a real problem.
+    size_t len = sizeof(special_handlers_)/sizeof(*special_handlers_);
+    for (size_t i = 0; i < len; ++i) {
+      if (special_handlers_[i] == fn) {
+        for (size_t j = i; j < len - 1; ++j) {
+          special_handlers_[j] = special_handlers_[j + 1];
+        }
+        special_handlers_[len - 1] = nullptr;
+        return;
+      }
+    }
+
+    fatal("failed to find special handler to remove");
+  }
+
+
+  static void Handler(int signo, siginfo_t* siginfo, void*);
+
+ private:
+  bool claimed_;
+  struct sigaction action_;
+  SpecialSignalHandlerFn special_handlers_[2];
+};
+
+static SignalChain chains[_NSIG];
+
+class ScopedFlagRestorer {
+ public:
+  explicit ScopedFlagRestorer(bool* flag) : flag_(flag), original_value_(*flag) {
+  }
+
+  ~ScopedFlagRestorer() {
+    *flag_ = original_value_;
+  }
+
+ private:
+  bool* flag_;
+  bool original_value_;
+};
+
+class ScopedSignalUnblocker {
+ public:
+  explicit ScopedSignalUnblocker(const std::initializer_list<int>& signals) {
+    sigset_t new_mask;
+    sigemptyset(&new_mask);
+    for (int signal : signals) {
+      sigaddset(&new_mask, signal);
+    }
+    if (sigprocmask(SIG_UNBLOCK, &new_mask, &previous_mask_) != 0) {
+      fatal("failed to unblock signals: %s", strerror(errno));
+    }
+  }
+
+  ~ScopedSignalUnblocker() {
+    if (sigprocmask(SIG_SETMASK, &previous_mask_, nullptr) != 0) {
+      fatal("failed to unblock signals: %s", strerror(errno));
+    }
+  }
+
+ private:
+  sigset_t previous_mask_;
+};
+
+void SignalChain::Handler(int signo, siginfo_t* siginfo, void* ucontext_raw) {
+  ScopedFlagRestorer flag(&handling_signal);
+
+  // Try the special handlers first.
+  // If one of them crashes, we'll reenter this handler and pass that crash onto the user handler.
+  if (!handling_signal) {
+    ScopedSignalUnblocker unblocked { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV }; // NOLINT
+    handling_signal = true;
+
+    for (const auto& handler : chains[signo].special_handlers_) {
+      if (handler != nullptr && handler(signo, siginfo, ucontext_raw)) {
+        return;
+      }
+    }
+  }
+
+  // Forward to the user's signal handler.
+  int handler_flags = chains[signo].action_.sa_flags;
+  ucontext_t* ucontext = static_cast<ucontext_t*>(ucontext_raw);
+  sigset_t mask;
+  sigorset(&mask, &ucontext->uc_sigmask, &chains[signo].action_.sa_mask);
+  if ((handler_flags & SA_NODEFER)) {
+    sigdelset(&mask, signo);
+  }
+  sigprocmask(SIG_SETMASK, &mask, nullptr);
+
+  if ((handler_flags & SA_SIGINFO)) {
+    chains[signo].action_.sa_sigaction(signo, siginfo, ucontext_raw);
   } else {
-    if (action.sa_sigaction != nullptr) {
-      sigset_t old_mask;
-      sigprocmask(SIG_BLOCK, &action.sa_mask, &old_mask);
-      action.sa_sigaction(sig, info, context);
-      sigprocmask(SIG_SETMASK, &old_mask, nullptr);
+    auto handler = chains[signo].action_.sa_handler;
+    if (handler == SIG_IGN) {
+      return;
+    } else if (handler == SIG_DFL) {
+      fatal("exiting due to SIG_DFL handler for signal %d", signo);
     } else {
-      signal(sig, SIG_DFL);
-      raise(sig);
+      handler(signo);
     }
   }
 }
 
-extern "C" void EnsureFrontOfChain(int signal, struct sigaction* expected_action) {
-  CheckSignalValid(signal);
-  // Read the current action without looking at the chain, it should be the expected action.
-  SigActionFnPtr linked_sigaction = reinterpret_cast<SigActionFnPtr>(linked_sigaction_sym);
-  struct sigaction current_action;
-  linked_sigaction(signal, nullptr, &current_action);
-  // If the sigactions don't match then we put the current action on the chain and make ourself as
-  // the main action.
-  if (current_action.sa_sigaction != expected_action->sa_sigaction) {
-    log("Warning: Unexpected sigaction action found %p\n", current_action.sa_sigaction);
-    user_sigactions[signal].Claim(current_action);
-    linked_sigaction(signal, expected_action, nullptr);
-  }
-}
-
 extern "C" int sigaction(int signal, const struct sigaction* new_action, struct sigaction* old_action) {
   // If this signal has been claimed as a signal chain, record the user's
   // action but don't pass it on to the kernel.
   // Note that we check that the signal number is in range here.  An out of range signal
   // number should behave exactly as the libc sigaction.
-  if (signal > 0 && signal < _NSIG && user_sigactions[signal].IsClaimed() &&
-      (new_action == nullptr || new_action->sa_handler != SIG_DFL)) {
-    struct sigaction saved_action = user_sigactions[signal].GetAction();
+  if (signal < 0 || signal >= _NSIG) {
+    errno = EINVAL;
+    return -1;
+  }
+
+  if (chains[signal].IsClaimed()) {
+    struct sigaction saved_action = chains[signal].GetAction();
     if (new_action != nullptr) {
-      user_sigactions[signal].SetAction(*new_action, false);
+      chains[signal].SetAction(new_action);
     }
     if (old_action != nullptr) {
       *old_action = saved_action;
@@ -216,73 +264,52 @@
 
   // Will only get here if the signal chain has not been claimed.  We want
   // to pass the sigaction on to the kernel via the real sigaction in libc.
-
-  if (linked_sigaction_sym == nullptr) {
-    // Perform lazy initialization.
-    // This will only occur outside of a signal context since we have
-    // not been initialized and therefore cannot be within the ART
-    // runtime.
-    InitializeSignalChain();
-  }
-
-  if (linked_sigaction_sym == nullptr) {
-    log("Unable to find next sigaction in signal chain");
-    abort();
-  }
-  SigActionFnPtr linked_sigaction = reinterpret_cast<SigActionFnPtr>(linked_sigaction_sym);
+  InitializeSignalChain();
   return linked_sigaction(signal, new_action, old_action);
 }
 
-static sighandler_t signal_impl(int signal, sighandler_t handler) {
-  struct sigaction sa;
+extern "C" sighandler_t signal(int signo, sighandler_t handler) {
+  if (signo < 0 || signo > _NSIG) {
+    errno = EINVAL;
+    return SIG_ERR;
+  }
+
+  struct sigaction sa = {};
   sigemptyset(&sa.sa_mask);
   sa.sa_handler = handler;
-  sa.sa_flags = SA_RESTART;
+  sa.sa_flags = SA_RESTART | SA_ONSTACK;
   sighandler_t oldhandler;
 
   // If this signal has been claimed as a signal chain, record the user's
   // action but don't pass it on to the kernel.
-  // Note that we check that the signal number is in range here.  An out of range signal
-  // number should behave exactly as the libc sigaction.
-  if (signal > 0 && signal < _NSIG && user_sigactions[signal].IsClaimed() && handler != SIG_DFL) {
-    oldhandler = reinterpret_cast<sighandler_t>(user_sigactions[signal].GetAction().sa_handler);
-    user_sigactions[signal].SetAction(sa, true);
+  if (chains[signo].IsClaimed()) {
+    oldhandler = reinterpret_cast<sighandler_t>(chains[signo].GetAction().sa_handler);
+    chains[signo].SetAction(&sa);
     return oldhandler;
   }
 
   // Will only get here if the signal chain has not been claimed.  We want
   // to pass the sigaction on to the kernel via the real sigaction in libc.
-
-  if (linked_sigaction_sym == nullptr) {
-    // Perform lazy initialization.
-    InitializeSignalChain();
-  }
-
-  if (linked_sigaction_sym == nullptr) {
-    log("Unable to find next sigaction in signal chain");
-    abort();
-  }
-
-  typedef int (*SigAction)(int, const struct sigaction*, struct sigaction*);
-  SigAction linked_sigaction = reinterpret_cast<SigAction>(linked_sigaction_sym);
-  if (linked_sigaction(signal, &sa, &sa) == -1) {
+  InitializeSignalChain();
+  if (linked_sigaction(signo, &sa, &sa) == -1) {
     return SIG_ERR;
   }
 
   return reinterpret_cast<sighandler_t>(sa.sa_handler);
 }
 
-extern "C" sighandler_t signal(int signal, sighandler_t handler) {
-  return signal_impl(signal, handler);
-}
-
 #if !defined(__LP64__)
-extern "C" sighandler_t bsd_signal(int signal, sighandler_t handler) {
-  return signal_impl(signal, handler);
+extern "C" sighandler_t bsd_signal(int signo, sighandler_t handler) {
+  return signal(signo, handler);
 }
 #endif
 
 extern "C" int sigprocmask(int how, const sigset_t* bionic_new_set, sigset_t* bionic_old_set) {
+  // When inside a signal handler, forward directly to the actual sigprocmask.
+  if (handling_signal) {
+    return linked_sigprocmask(how, bionic_new_set, bionic_old_set);
+  }
+
   const sigset_t* new_set_ptr = bionic_new_set;
   sigset_t tmpset;
   if (bionic_new_set != nullptr) {
@@ -292,7 +319,7 @@
       // Don't allow claimed signals in the mask.  If a signal chain has been claimed
       // we can't allow the user to block that signal.
       for (int i = 0 ; i < _NSIG; ++i) {
-        if (user_sigactions[i].IsClaimed() && sigismember(&tmpset, i)) {
+        if (chains[i].IsClaimed() && sigismember(&tmpset, i)) {
           sigdelset(&tmpset, i);
         }
       }
@@ -300,18 +327,7 @@
     new_set_ptr = &tmpset;
   }
 
-  if (linked_sigprocmask_sym == nullptr) {
-    // Perform lazy initialization.
-    InitializeSignalChain();
-  }
-
-  if (linked_sigprocmask_sym == nullptr) {
-    log("Unable to find next sigprocmask in signal chain");
-    abort();
-  }
-
-  typedef int (*SigProcMask)(int how, const sigset_t*, sigset_t*);
-  SigProcMask linked_sigprocmask= reinterpret_cast<SigProcMask>(linked_sigprocmask_sym);
+  InitializeSignalChain();
   return linked_sigprocmask(how, new_set_ptr, bionic_old_set);
 }
 
@@ -322,49 +338,67 @@
   // taken and if it so happens that a signal occurs while one of these
   // locks is already taken, dlsym will block trying to reenter a
   // mutex and we will never get out of it.
+  static bool initialized = false;
   if (initialized) {
     // Don't initialize twice.
     return;
   }
-  linked_sigaction_sym = dlsym(RTLD_NEXT, "sigaction");
+
+  void* linked_sigaction_sym = dlsym(RTLD_NEXT, "sigaction");
   if (linked_sigaction_sym == nullptr) {
     linked_sigaction_sym = dlsym(RTLD_DEFAULT, "sigaction");
     if (linked_sigaction_sym == nullptr ||
         linked_sigaction_sym == reinterpret_cast<void*>(sigaction)) {
-      linked_sigaction_sym = nullptr;
+      fatal("Unable to find next sigaction in signal chain");
     }
   }
 
-  linked_sigprocmask_sym = dlsym(RTLD_NEXT, "sigprocmask");
+  void* linked_sigprocmask_sym = dlsym(RTLD_NEXT, "sigprocmask");
   if (linked_sigprocmask_sym == nullptr) {
     linked_sigprocmask_sym = dlsym(RTLD_DEFAULT, "sigprocmask");
     if (linked_sigprocmask_sym == nullptr ||
         linked_sigprocmask_sym == reinterpret_cast<void*>(sigprocmask)) {
-      linked_sigprocmask_sym = nullptr;
+      fatal("Unable to find next sigprocmask in signal chain");
     }
   }
+
+  linked_sigaction = reinterpret_cast<decltype(linked_sigaction)>(linked_sigaction_sym);
+  linked_sigprocmask = reinterpret_cast<decltype(linked_sigprocmask)>(linked_sigprocmask_sym);
   initialized = true;
 }
 
-extern "C" void SetSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn) {
-  CheckSignalValid(signal);
+extern "C" void AddSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn) {
+  if (signal <= 0 || signal >= _NSIG) {
+    fatal("Invalid signal %d", signal);
+  }
 
   // Set the managed_handler.
-  user_sigactions[signal].SetSpecialHandler(fn);
+  chains[signal].AddSpecialHandler(fn);
+  chains[signal].Claim(signal);
+}
 
-  // In case the chain isn't claimed, claim it for ourself so we can ensure the managed handler
-  // goes first.
-  if (!user_sigactions[signal].IsClaimed()) {
-    struct sigaction act, old_act;
-    act.sa_sigaction = sigchainlib_managed_handler_sigaction;
-    sigemptyset(&act.sa_mask);
-    act.sa_flags = SA_SIGINFO | SA_ONSTACK;
-#if !defined(__APPLE__) && !defined(__mips__)
-    act.sa_restorer = nullptr;
-#endif
-    if (sigaction(signal, &act, &old_act) != -1) {
-      user_sigactions[signal].Claim(old_act);
-    }
+extern "C" void RemoveSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn) {
+  if (signal <= 0 || signal >= _NSIG) {
+    fatal("Invalid signal %d", signal);
+  }
+
+  chains[signal].RemoveSpecialHandler(fn);
+}
+
+extern "C" void EnsureFrontOfChain(int signal) {
+  if (signal <= 0 || signal >= _NSIG) {
+    fatal("Invalid signal %d", signal);
+  }
+
+  // Read the current action without looking at the chain, it should be the expected action.
+  struct sigaction current_action;
+  InitializeSignalChain();
+  linked_sigaction(signal, nullptr, &current_action);
+  // If the sigactions don't match then we put the current action on the chain and make ourself as
+  // the main action.
+  if (current_action.sa_sigaction != SignalChain::Handler) {
+    log("Warning: Unexpected sigaction action found %p\n", current_action.sa_sigaction);
+    chains[signal].Register(signal);
   }
 }
 
diff --git a/sigchainlib/sigchain.h b/sigchainlib/sigchain.h
index 01ccedf..960d221 100644
--- a/sigchainlib/sigchain.h
+++ b/sigchainlib/sigchain.h
@@ -23,16 +23,11 @@
 
 extern "C" void InitializeSignalChain();
 
-extern "C" void ClaimSignalChain(int signal, struct sigaction* oldaction);
-
-extern "C" void UnclaimSignalChain(int signal);
-
 typedef bool (*SpecialSignalHandlerFn)(int, siginfo_t*, void*);
-extern "C" void SetSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn);
+extern "C" void AddSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn);
+extern "C" void RemoveSpecialSignalHandlerFn(int signal, SpecialSignalHandlerFn fn);
 
-extern "C" void InvokeUserSignalHandler(int sig, siginfo_t* info, void* context);
-
-extern "C" void EnsureFrontOfChain(int signal, struct sigaction* expected_action);
+extern "C" void EnsureFrontOfChain(int signal);
 
 }  // namespace art
 
diff --git a/sigchainlib/sigchain_dummy.cc b/sigchainlib/sigchain_dummy.cc
index aa3c360..d6a5e12 100644
--- a/sigchainlib/sigchain_dummy.cc
+++ b/sigchainlib/sigchain_dummy.cc
@@ -48,37 +48,23 @@
 
 namespace art {
 
-
-extern "C" void ClaimSignalChain(int signal ATTRIBUTE_UNUSED,
-                                 struct sigaction* oldaction ATTRIBUTE_UNUSED) {
-  log("ClaimSignalChain is not exported by the main executable.");
-  abort();
-}
-
-extern "C" void UnclaimSignalChain(int signal ATTRIBUTE_UNUSED) {
-  log("UnclaimSignalChain is not exported by the main executable.");
-  abort();
-}
-
-extern "C" void InvokeUserSignalHandler(int sig ATTRIBUTE_UNUSED,
-                                        siginfo_t* info ATTRIBUTE_UNUSED,
-                                        void* context ATTRIBUTE_UNUSED) {
-  log("InvokeUserSignalHandler is not exported by the main executable.");
-  abort();
-}
-
 extern "C" void InitializeSignalChain() {
   log("InitializeSignalChain is not exported by the main executable.");
   abort();
 }
 
-extern "C" void EnsureFrontOfChain(int signal ATTRIBUTE_UNUSED,
-                                   struct sigaction* expected_action ATTRIBUTE_UNUSED) {
+extern "C" void EnsureFrontOfChain(int signal ATTRIBUTE_UNUSED) {
   log("EnsureFrontOfChain is not exported by the main executable.");
   abort();
 }
 
-extern "C" void SetSpecialSignalHandlerFn(int signal ATTRIBUTE_UNUSED,
+extern "C" void AddSpecialSignalHandlerFn(int signal ATTRIBUTE_UNUSED,
+                                          SpecialSignalHandlerFn fn ATTRIBUTE_UNUSED) {
+  log("SetSpecialSignalHandlerFn is not exported by the main executable.");
+  abort();
+}
+
+extern "C" void RemoveSpecialSignalHandlerFn(int signal ATTRIBUTE_UNUSED,
                                           SpecialSignalHandlerFn fn ATTRIBUTE_UNUSED) {
   log("SetSpecialSignalHandlerFn is not exported by the main executable.");
   abort();
diff --git a/sigchainlib/version-script32.txt b/sigchainlib/version-script32.txt
index eec9103..f360efa 100644
--- a/sigchainlib/version-script32.txt
+++ b/sigchainlib/version-script32.txt
@@ -1,11 +1,9 @@
 {
 global:
-  ClaimSignalChain;
-  UnclaimSignalChain;
-  InvokeUserSignalHandler;
   InitializeSignalChain;
   EnsureFrontOfChain;
-  SetSpecialSignalHandlerFn;
+  AddSpecialSignalHandlerFn;
+  RemoveSpecialSignalHandlerFn;
   bsd_signal;
   sigaction;
   signal;
diff --git a/sigchainlib/version-script64.txt b/sigchainlib/version-script64.txt
index 08c312e..319d1c6 100644
--- a/sigchainlib/version-script64.txt
+++ b/sigchainlib/version-script64.txt
@@ -1,11 +1,9 @@
 {
 global:
-  ClaimSignalChain;
-  UnclaimSignalChain;
-  InvokeUserSignalHandler;
   InitializeSignalChain;
   EnsureFrontOfChain;
-  SetSpecialSignalHandlerFn;
+  AddSpecialSignalHandlerFn;
+  RemoveSpecialSignalHandlerFn;
   sigaction;
   signal;
   sigprocmask;
diff --git a/test.py b/test.py
new file mode 100755
index 0000000..414d779
--- /dev/null
+++ b/test.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+#
+# Copyright 2017, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# --run-test : To run run-test
+# --gtest : To run gtest
+# -j : Number of jobs
+# --host: for host tests
+# --target: for target tests
+# All the other arguments will be passed to the run-test testrunner.
+import sys
+import subprocess
+import os
+import argparse
+
+ANDROID_BUILD_TOP = os.environ.get('ANDROID_BUILD_TOP', os.getcwd())
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-j', default='', dest='n_threads')
+parser.add_argument('--run-test', '-r', action='store_true', dest='run_test')
+parser.add_argument('--gtest', '-g', action='store_true', dest='gtest')
+parser.add_argument('--target', action='store_true', dest='target')
+parser.add_argument('--host', action='store_true', dest='host')
+options, unknown = parser.parse_known_args()
+
+if options.run_test or not options.gtest:
+  testrunner = os.path.join('./',
+                          ANDROID_BUILD_TOP,
+                            'art/test/testrunner/testrunner.py')
+  run_test_args = []
+  for arg in sys.argv[1:]:
+    if arg == '--run-test' or arg == '--gtest' \
+    or arg == '-r' or arg == '-g':
+      continue
+    run_test_args.append(arg)
+
+  test_runner_cmd = [testrunner] + run_test_args
+  print test_runner_cmd
+  if subprocess.call(test_runner_cmd):
+    sys.exit(1)
+
+if options.gtest or not options.run_test:
+  build_target = ''
+  if options.host or not options.target:
+    build_target += ' test-art-host-gtest'
+  if options.target or not options.host:
+    build_target += ' test-art-target-gtest'
+
+  build_command = 'make'
+  build_command += ' -j' + str(options.n_threads)
+
+  build_command += ' -C ' + ANDROID_BUILD_TOP
+  build_command += ' ' + build_target
+  # Add 'dist' to avoid Jack issues b/36169180.
+  build_command += ' dist'
+
+  print build_command
+
+  if subprocess.call(build_command.split()):
+      sys.exit(1)
+
+sys.exit(0)
diff --git a/test/004-SignalTest/expected.txt b/test/004-SignalTest/expected.txt
index b3a0e1c..847b56f 100644
--- a/test/004-SignalTest/expected.txt
+++ b/test/004-SignalTest/expected.txt
@@ -3,4 +3,8 @@
 Caught NullPointerException
 Caught StackOverflowError
 signal caught
+unblocked signal received
+unblocking blocked signal
+blocked signal received
+signal handler done
 Signal test OK
diff --git a/test/004-SignalTest/signaltest.cc b/test/004-SignalTest/signaltest.cc
index 6dd6355..a58a075 100644
--- a/test/004-SignalTest/signaltest.cc
+++ b/test/004-SignalTest/signaltest.cc
@@ -18,13 +18,14 @@
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <sys/ucontext.h>
 #include <unistd.h>
 
 #include "base/macros.h"
 
 static int signal_count;
-static const int kMaxSignal = 2;
+static const int kMaxSignal = 1;
 
 #if defined(__i386__) || defined(__x86_64__)
 #if defined(__APPLE__)
@@ -47,6 +48,17 @@
 #endif
 #endif
 
+#define BLOCKED_SIGNAL SIGUSR1
+#define UNBLOCKED_SIGNAL SIGUSR2
+
+static void blocked_signal(int sig ATTRIBUTE_UNUSED) {
+  printf("blocked signal received\n");
+}
+
+static void unblocked_signal(int sig ATTRIBUTE_UNUSED) {
+  printf("unblocked signal received\n");
+}
+
 static void signalhandler(int sig ATTRIBUTE_UNUSED, siginfo_t* info ATTRIBUTE_UNUSED,
                           void* context) {
   printf("signal caught\n");
@@ -54,6 +66,16 @@
   if (signal_count > kMaxSignal) {
      abort();
   }
+
+  raise(UNBLOCKED_SIGNAL);
+  raise(BLOCKED_SIGNAL);
+  printf("unblocking blocked signal\n");
+
+  sigset_t mask;
+  sigemptyset(&mask);
+  sigaddset(&mask, BLOCKED_SIGNAL);
+  sigprocmask(SIG_UNBLOCK, &mask, nullptr);
+
 #if defined(__arm__)
   struct ucontext *uc = reinterpret_cast<struct ucontext*>(context);
   struct sigcontext *sc = reinterpret_cast<struct sigcontext*>(&uc->uc_mcontext);
@@ -71,6 +93,8 @@
 #else
   UNUSED(context);
 #endif
+
+  printf("signal handler done\n");
 }
 
 static struct sigaction oldaction;
@@ -78,13 +102,21 @@
 extern "C" JNIEXPORT void JNICALL Java_Main_initSignalTest(JNIEnv*, jclass) {
   struct sigaction action;
   action.sa_sigaction = signalhandler;
-  sigemptyset(&action.sa_mask);
+  sigfillset(&action.sa_mask);
+  sigdelset(&action.sa_mask, UNBLOCKED_SIGNAL);
   action.sa_flags = SA_SIGINFO | SA_ONSTACK;
 #if !defined(__APPLE__) && !defined(__mips__)
   action.sa_restorer = nullptr;
 #endif
 
   sigaction(SIGSEGV, &action, &oldaction);
+  struct sigaction check;
+  sigaction(SIGSEGV, nullptr, &check);
+  if (memcmp(&action, &check, sizeof(action)) != 0) {
+    printf("sigaction returned different value\n");
+  }
+  signal(BLOCKED_SIGNAL, blocked_signal);
+  signal(UNBLOCKED_SIGNAL, unblocked_signal);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_terminateSignalTest(JNIEnv*, jclass) {
@@ -96,6 +128,12 @@
 char *go_away_compiler = nullptr;
 
 extern "C" JNIEXPORT jint JNICALL Java_Main_testSignal(JNIEnv*, jclass) {
+  // Unblock UNBLOCKED_SIGNAL.
+  sigset_t mask;
+  memset(&mask, 0, sizeof(mask));
+  sigaddset(&mask, UNBLOCKED_SIGNAL);
+  sigprocmask(SIG_UNBLOCK, &mask, nullptr);
+
 #if defined(__arm__) || defined(__i386__) || defined(__aarch64__)
   // On supported architectures we cause a real SEGV.
   *go_away_compiler = 'a';
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index 0dd82ab..194f4a1 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -127,6 +127,9 @@
         Assert.assertEquals("I", /* Small latin dotless i */ "\u0131".toUpperCase());
         Assert.assertEquals("abc", "a\u0131c".replace('\u0131', 'b'));
         Assert.assertEquals("a\u0131c", "abc".replace('b', '\u0131'));
+
+        // Regression test for scratch register exhaustion in String.equals() intrinsic on arm64.
+        Assert.assertFalse(result.equals("Very long constant string, so that the known constant count field cannot be embedded in a CMP immediate instruction on arm64. Since it can hold 12-bit values, optionally shifted left by 12, let's go somewhere over 2^12, i.e. 4096. That should trigger the bug with or without string compression. 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/"));
     }
 
     public static void testCompareToAndEquals() {
diff --git a/test/051-thread/expected.txt b/test/051-thread/expected.txt
index c6cd4f8..3fc3492 100644
--- a/test/051-thread/expected.txt
+++ b/test/051-thread/expected.txt
@@ -1,6 +1,6 @@
 JNI_OnLoad called
 thread test starting
-testThreadCapacity thread count: 512
+testThreadCapacity thread count: 128
 testThreadDaemons starting thread 'TestDaemonThread'
 testThreadDaemons @ Thread running
 testThreadDaemons @ Got expected setDaemon exception
diff --git a/test/051-thread/src/Main.java b/test/051-thread/src/Main.java
index 2e26b22..82fc0d4 100644
--- a/test/051-thread/src/Main.java
+++ b/test/051-thread/src/Main.java
@@ -35,8 +35,8 @@
      * Simple thread capacity test.
      */
     private static void testThreadCapacity() throws Exception {
-        TestCapacityThread[] threads = new TestCapacityThread[512];
-        for (int i = 0; i < 512; i++) {
+        TestCapacityThread[] threads = new TestCapacityThread[128];
+        for (int i = 0; i < threads.length; i++) {
             threads[i] = new TestCapacityThread();
         }
 
diff --git a/test/577-profile-foreign-dex/run b/test/080-oom-throw/run
similarity index 71%
copy from test/577-profile-foreign-dex/run
copy to test/080-oom-throw/run
index ad57d14..eb47378 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/080-oom-throw/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+exec ${RUN} $@ --runtime-option -Xmx16m
diff --git a/test/080-oom-throw/src/Main.java b/test/080-oom-throw/src/Main.java
index a6c18b7..3d5d062 100644
--- a/test/080-oom-throw/src/Main.java
+++ b/test/080-oom-throw/src/Main.java
@@ -114,13 +114,13 @@
     static Object[] holder;
 
     public static void blowup() throws Exception {
-        int size = 32 * 1024 * 1024;
+        int size = 2 * 1024 * 1024;
         for (int i = 0; i < holder.length; ) {
             try {
                 holder[i] = new char[size];
                 i++;
             } catch (OutOfMemoryError oome) {
-                size = size / 2;
+                size = size / 16;
                 if (size == 0) {
                      break;
                 }
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index 16ac6be..87287f8 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -395,20 +395,6 @@
 #endif
 #endif
 
-static bool cannot_be_blocked(int signum) {
-  // These two sigs cannot be blocked anywhere.
-  if ((signum == SIGKILL) || (signum == SIGSTOP)) {
-      return true;
-  }
-
-  // The invalid rt_sig cannot be blocked.
-  if (((signum >= 32) && (signum < SIGRTMIN)) || (signum > SIGRTMAX)) {
-      return true;
-  }
-
-  return false;
-}
-
 // A dummy special handler, continueing after the faulting location. This code comes from
 // 004-SignalTest.
 static bool nb_signalhandler(int sig, siginfo_t* info ATTRIBUTE_UNUSED, void* context) {
@@ -433,22 +419,6 @@
 #endif
   }
 
-  // Before invoking this handler, all other unclaimed signals must be blocked.
-  // We're trying to check the signal mask to verify its status here.
-  sigset_t tmpset;
-  sigemptyset(&tmpset);
-  sigprocmask(SIG_SETMASK, nullptr, &tmpset);
-  int other_claimed = (sig == SIGSEGV) ? SIGILL : SIGSEGV;
-  for (int signum = 0; signum < NSIG; ++signum) {
-    if (cannot_be_blocked(signum)) {
-        continue;
-    } else if ((sigismember(&tmpset, signum)) && (signum == other_claimed)) {
-      printf("ERROR: The claimed signal %d is blocked\n", signum);
-    } else if ((!sigismember(&tmpset, signum)) && (signum != other_claimed)) {
-      printf("ERROR: The unclaimed signal %d is not blocked\n", signum);
-    }
-  }
-
   // We handled this...
   return true;
 }
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index 595c70d..7e8431f 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -59,7 +59,7 @@
         // Stop the JIT to ensure its threads and work queue are not keeping classes
         // artifically alive.
         stopJit();
-        Runtime.getRuntime().gc();
+        doUnloading();
         System.runFinalization();
         BufferedReader reader = new BufferedReader(new FileReader ("/proc/" + pid + "/maps"));
         String line;
@@ -83,12 +83,20 @@
         }
     }
 
+    private static void doUnloading() {
+      // Do multiple GCs to prevent rare flakiness if some other thread is keeping the
+      // classloader live.
+      for (int i = 0; i < 5; ++i) {
+         Runtime.getRuntime().gc();
+      }
+    }
+
     private static void testUnloadClass(Constructor<?> constructor) throws Exception {
         WeakReference<Class> klass = setUpUnloadClassWeak(constructor);
         // No strong references to class loader, should get unloaded.
-        Runtime.getRuntime().gc();
+        doUnloading();
         WeakReference<Class> klass2 = setUpUnloadClassWeak(constructor);
-        Runtime.getRuntime().gc();
+        doUnloading();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(klass.get());
         System.out.println(klass2.get());
@@ -98,7 +106,7 @@
         throws Exception {
       WeakReference<ClassLoader> loader = setUpUnloadLoader(constructor, true);
       // No strong references to class loader, should get unloaded.
-      Runtime.getRuntime().gc();
+      doUnloading();
       // If the weak reference is cleared, then it was unloaded.
       System.out.println(loader.get());
     }
@@ -110,7 +118,7 @@
         Throwable throwable = (Throwable) stackTraceMethod.invoke(klass);
         stackTraceMethod = null;
         klass = null;
-        Runtime.getRuntime().gc();
+        doUnloading();
         boolean isNull = weak_klass.get() == null;
         System.out.println("class null " + isNull + " " + throwable.getMessage());
     }
@@ -118,7 +126,7 @@
     private static void testLoadAndUnloadLibrary(Constructor<?> constructor) throws Exception {
         WeakReference<ClassLoader> loader = setUpLoadLibrary(constructor);
         // No strong references to class loader, should get unloaded.
-        Runtime.getRuntime().gc();
+        doUnloading();
         // If the weak reference is cleared, then it was unloaded.
         System.out.println(loader.get());
     }
@@ -147,7 +155,7 @@
 
     private static void testNoUnloadInstance(Constructor<?> constructor) throws Exception {
         Pair p = testNoUnloadInstanceHelper(constructor);
-        Runtime.getRuntime().gc();
+        doUnloading();
         // If the class loader was unloded too early due to races, just pass the test.
         boolean isNull = p.classLoader.get() == null;
         System.out.println("loader null " + isNull);
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/152-dead-large-object/expected.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/152-dead-large-object/expected.txt
diff --git a/test/152-dead-large-object/info.txt b/test/152-dead-large-object/info.txt
new file mode 100644
index 0000000..45023cd
--- /dev/null
+++ b/test/152-dead-large-object/info.txt
@@ -0,0 +1 @@
+Test that large objects are freed properly after a GC.
diff --git a/test/ti-agent/common_load.h b/test/152-dead-large-object/src/Main.java
similarity index 65%
copy from test/ti-agent/common_load.h
copy to test/152-dead-large-object/src/Main.java
index d254421..72fd25c 100644
--- a/test/ti-agent/common_load.h
+++ b/test/152-dead-large-object/src/Main.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,13 @@
  * limitations under the License.
  */
 
-#ifndef ART_TEST_TI_AGENT_COMMON_LOAD_H_
-#define ART_TEST_TI_AGENT_COMMON_LOAD_H_
+public class Main {
+    static volatile Object a[] = null;
 
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-
-namespace art {
-
-extern jvmtiEnv* jvmti_env;
-
-}  // namespace art
-
-#endif  // ART_TEST_TI_AGENT_COMMON_LOAD_H_
+    public static void main(String[] args) {
+        for (int i = 0; i < 10; ++i) {
+            a = new Object[i * 300000];
+            Runtime.getRuntime().gc();
+        }
+    }
+}
diff --git a/test/154-gc-loop/src/Main.java b/test/154-gc-loop/src/Main.java
index 3a256c1..69015b6 100644
--- a/test/154-gc-loop/src/Main.java
+++ b/test/154-gc-loop/src/Main.java
@@ -38,7 +38,7 @@
         }
     } catch (Exception e) {}
     System.out.println("Finalize count too large: " +
-            ((finalizeCounter >= 10) ? Integer.toString(finalizeCounter) : "false"));
+            ((finalizeCounter >= 15) ? Integer.toString(finalizeCounter) : "false"));
   }
 
   private static native void backgroundProcessState();
diff --git a/test/155-java-set-resolved-type/src/Main.java b/test/155-java-set-resolved-type/src/Main.java
index 56b8c3e..8f79bd7 100644
--- a/test/155-java-set-resolved-type/src/Main.java
+++ b/test/155-java-set-resolved-type/src/Main.java
@@ -57,8 +57,8 @@
             // we need to find TestInterface.
             clearResolvedTypes(timpl);
 
-            // Force intialization of TestClass2. This expects the interface type to be
-            // resolved and found through simple lookup.
+            // Force intialization of TestImplementation. This expects the interface type
+            // to be resolved and found through simple lookup.
             timpl.newInstance();
         } catch (Throwable t) {
             t.printStackTrace();
diff --git a/test/157-void-class/expected.txt b/test/157-void-class/expected.txt
new file mode 100644
index 0000000..3f61c0b
--- /dev/null
+++ b/test/157-void-class/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+void.class = void
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/157-void-class/info.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/157-void-class/info.txt
diff --git a/test/157-void-class/run b/test/157-void-class/run
new file mode 100755
index 0000000..59e852c
--- /dev/null
+++ b/test/157-void-class/run
@@ -0,0 +1,22 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Let the test build its own core image with --no-image and use verify-profile,
+# so that the compiler does not try to initialize classes. This leaves the
+# java.lang.Void compile-time verified but uninitialized.
+./default-run "$@" --no-image \
+    --runtime-option -Ximage-compiler-option \
+    --runtime-option --compiler-filter=verify-profile
diff --git a/test/157-void-class/src/Main.java b/test/157-void-class/src/Main.java
new file mode 100644
index 0000000..322b705
--- /dev/null
+++ b/test/157-void-class/src/Main.java
@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import libcore.util.EmptyArray;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            // Check if we're running dalvik or RI.
+            Class<?> class_loader_class = Class.forName("dalvik.system.PathClassLoader");
+            System.loadLibrary(args[0]);
+        } catch (ClassNotFoundException e) {
+            usingRI = true;
+            // Add expected JNI_OnLoad log line to match expected.txt.
+            System.out.println("JNI_OnLoad called");
+        }
+        try {
+            // Initialize all classes needed for old java.lang.Void.TYPE initialization.
+            Runnable.class.getMethod("run", EmptyArray.CLASS).getReturnType();
+        } catch (Exception e) {
+            throw new Error(e);
+        }
+        // Clear the resolved types of the ojluni dex file to make sure there is no entry
+        // for "V", i.e. void.
+        clearResolvedTypes(Integer.class);
+        // With java.lang.Void being compile-time verified but uninitialized, initialize
+        // it now. Previously, this would indirectly initialize TYPE with the current,
+        // i.e. zero-initialized, value of TYPE. The only thing that could prevent the
+        // series of calls leading to this was a cache hit in Class.getDexCacheType()
+        // which we have prevented by clearing the cache above.
+        Class<?> voidClass = void.class;
+        System.out.println("void.class = " + voidClass);
+    }
+
+    public static void clearResolvedTypes(Class<?> c) {
+        if (!usingRI) {
+            nativeClearResolvedTypes(c);
+        }
+    }
+
+    public static native void nativeClearResolvedTypes(Class<?> c);
+
+    static boolean usingRI = false;
+}
diff --git a/test/158-app-image-class-table/expected.txt b/test/158-app-image-class-table/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/158-app-image-class-table/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/158-app-image-class-table/info.txt b/test/158-app-image-class-table/info.txt
new file mode 100644
index 0000000..c844c8e
--- /dev/null
+++ b/test/158-app-image-class-table/info.txt
@@ -0,0 +1,3 @@
+Regression test for app image class table being erroneously omitted
+when it contains only boot image class loader classes while dex caches
+were written with references to these classes.
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/158-app-image-class-table/profile
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/158-app-image-class-table/profile
diff --git a/test/577-profile-foreign-dex/run b/test/158-app-image-class-table/run
similarity index 71%
copy from test/577-profile-foreign-dex/run
copy to test/158-app-image-class-table/run
index ad57d14..146e180 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/158-app-image-class-table/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+exec ${RUN} $@ --profile -Xcompiler-option --compiler-filter=speed-profile
diff --git a/test/158-app-image-class-table/src/Main.java b/test/158-app-image-class-table/src/Main.java
new file mode 100644
index 0000000..804468f
--- /dev/null
+++ b/test/158-app-image-class-table/src/Main.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+    public static String TEST_NAME = "158-app-image-class-table";
+
+    public static void main(String[] args) {
+        try {
+            Class<?> class_loader_class = Class.forName("dalvik.system.PathClassLoader");
+            System.loadLibrary(args[0]);
+        } catch (ClassNotFoundException e) {
+            usingRI = true;
+            // Add expected JNI_OnLoad log line to match expected.txt.
+            System.out.println("JNI_OnLoad called");
+        }
+        try {
+            // Resolve but do not initialize TestImplementation. During the resolution,
+            // we see the Cloneable in the dex cache, so we do not try to look it up
+            // or resolve it.
+            Class<?> timpl =
+                Class.forName("TestImplementation", false, Main.class.getClassLoader());
+            // Clear the dex cache resolved types to force a proper lookup the next time
+            // we need to find TestInterface.
+            clearResolvedTypes(timpl);
+            // Force intialization of TestImplementation. This expects the interface type
+            // to be resolved and found through simple lookup.
+            timpl.newInstance();
+        } catch (Throwable t) {
+            t.printStackTrace();
+        }
+    }
+
+    public static void clearResolvedTypes(Class<?> c) {
+        if (!usingRI) {
+            nativeClearResolvedTypes(c);
+        }
+    }
+
+    private static boolean usingRI = false;
+
+    public static native void nativeClearResolvedTypes(Class<?> c);
+}
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/158-app-image-class-table/src/TestImplementation.java
similarity index 74%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/158-app-image-class-table/src/TestImplementation.java
index cba73b3..558e587 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/158-app-image-class-table/src/TestImplementation.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+public class TestImplementation implements Cloneable {
+    public Object clone() {
+        return new TestImplementation();
+    }
 }
diff --git a/test/159-app-image-fields/expected.txt b/test/159-app-image-fields/expected.txt
new file mode 100644
index 0000000..f63e8e3
--- /dev/null
+++ b/test/159-app-image-fields/expected.txt
@@ -0,0 +1,3 @@
+Eating all memory.
+memoryWasAllocated = true
+match: true
diff --git a/test/159-app-image-fields/info.txt b/test/159-app-image-fields/info.txt
new file mode 100644
index 0000000..9b10078
--- /dev/null
+++ b/test/159-app-image-fields/info.txt
@@ -0,0 +1,3 @@
+Regression test for erroneously storing an ArtField* in the app image DexCache
+when the class from the corresponding FieldId is not in the app image, only the
+declaring class is.
diff --git a/test/159-app-image-fields/profile b/test/159-app-image-fields/profile
new file mode 100644
index 0000000..4184fa2
--- /dev/null
+++ b/test/159-app-image-fields/profile
@@ -0,0 +1,3 @@
+LAAA/Base;
+LMain;
+LFields;
diff --git a/test/159-app-image-fields/run b/test/159-app-image-fields/run
new file mode 100644
index 0000000..7cc107a
--- /dev/null
+++ b/test/159-app-image-fields/run
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use a profile to put specific classes in the app image.
+# Also run the compiler with -j1 to ensure specific class verification order.
+exec ${RUN} $@ --profile -Xcompiler-option --compiler-filter=speed-profile \
+    -Xcompiler-option -j1
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/159-app-image-fields/src/AAA/Base.java
similarity index 75%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/159-app-image-fields/src/AAA/Base.java
index cba73b3..41ee83a 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/159-app-image-fields/src/AAA/Base.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,10 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+package AAA;
+
+class Base {
+    // The field is public but the class is package-private.
+    public static int value = 42;
 }
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/159-app-image-fields/src/AAA/Derived.java
similarity index 73%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/159-app-image-fields/src/AAA/Derived.java
index cba73b3..f6045d5 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/159-app-image-fields/src/AAA/Derived.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+package AAA;
+
+public class Derived extends Base {
+    // Allows public access to Base.value (Base is package-private) referenced as Derived.value.
 }
diff --git a/test/159-app-image-fields/src/Main.java b/test/159-app-image-fields/src/Main.java
new file mode 100644
index 0000000..d06a502
--- /dev/null
+++ b/test/159-app-image-fields/src/Main.java
@@ -0,0 +1,2156 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import AAA.Derived;
+
+public class Main {
+    public static void main(String[] args) {
+        try {
+            // Make sure we resolve Fields before eating memory.
+            // (Making sure that the test passes in no-image configurations.)
+            Class.forName("Fields", false, Main.class.getClassLoader());
+            System.out.println("Eating all memory.");
+            Object memory = eatAllMemory();
+
+            // This test assumes that Derived is not yet resolved. In some configurations
+            // (notably interp-ac), Derived is already resolved by verifying Main at run
+            // time. Therefore we cannot assume that we get a certain `value` and need to
+            // simply check for consistency, i.e. `value == another_value`.
+            int value = 0;
+            try {
+                // If the ArtField* is erroneously left in the DexCache, this
+                // shall succeed despite the class Derived being unresolved so
+                // far. Otherwise, we shall throw OOME trying to resolve it.
+                value = Derived.value;
+            } catch (OutOfMemoryError e) {
+                value = -1;
+            }
+            Fields.clobberDexCache();
+            int another_value = 0;
+            try {
+                // Try again for comparison. Since the DexCache field array has been
+                // clobbered by Fields.clobberDexCache(), this shall throw OOME.
+                another_value = Derived.value;
+            } catch (OutOfMemoryError e) {
+                another_value = -1;
+            }
+            boolean memoryWasAllocated = (memory != null);
+            memory = null;
+            System.out.println("memoryWasAllocated = " + memoryWasAllocated);
+            System.out.println("match: " + (value == another_value));
+            if (value != another_value || (value != -1 && value != 42)) {
+                // Mismatch or unexpected value, print additional debugging information.
+                System.out.println("value: " + value);
+                System.out.println("another_value: " + another_value);
+            }
+        } catch (Throwable t) {
+            t.printStackTrace();
+        }
+    }
+
+    public static Object eatAllMemory() {
+      Object[] result = null;
+      int size = 1000000;
+      while (result == null && size != 0) {
+          try {
+              result = new Object[size];
+          } catch (OutOfMemoryError oome) {
+              size /= 2;
+          }
+      }
+      if (result != null) {
+          int index = 0;
+          while (index != result.length && size != 0) {
+              try {
+                  result[index] = new byte[size];
+                  ++index;
+              } catch (OutOfMemoryError oome) {
+                  size /= 2;
+              }
+          }
+      }
+      return result;
+  }
+}
+
+// The naming is deliberate to take into account two different situations:
+//   - eagerly preloading DexCache with the available candidate with the lowest index,
+//   - not preloading DexCache and relying on the verification to populate it.
+// This corresponds to new and old behavior, respectively.
+//
+// Eager preloading: "LFields;" is after "LAAA/Base;" and "LAAA/Derived;" so that
+// Derived.value takes priority over Fields.testField*.
+//
+// Relying on verifier: "LFields;" is before "LMain;" so that the class definition
+// of Fields precedes the definition of Main (this is not strictly required but the
+// tools look at lexicographic ordering when there is no inheritance relationship)
+// and the verification of Main is last and fills the DexCache with Derived.value.
+//
+class Fields {
+    public static int clobberDexCache() {
+        return 0
+                + testField0000
+                + testField0001
+                + testField0002
+                + testField0003
+                + testField0004
+                + testField0005
+                + testField0006
+                + testField0007
+                + testField0008
+                + testField0009
+                + testField0010
+                + testField0011
+                + testField0012
+                + testField0013
+                + testField0014
+                + testField0015
+                + testField0016
+                + testField0017
+                + testField0018
+                + testField0019
+                + testField0020
+                + testField0021
+                + testField0022
+                + testField0023
+                + testField0024
+                + testField0025
+                + testField0026
+                + testField0027
+                + testField0028
+                + testField0029
+                + testField0030
+                + testField0031
+                + testField0032
+                + testField0033
+                + testField0034
+                + testField0035
+                + testField0036
+                + testField0037
+                + testField0038
+                + testField0039
+                + testField0040
+                + testField0041
+                + testField0042
+                + testField0043
+                + testField0044
+                + testField0045
+                + testField0046
+                + testField0047
+                + testField0048
+                + testField0049
+                + testField0050
+                + testField0051
+                + testField0052
+                + testField0053
+                + testField0054
+                + testField0055
+                + testField0056
+                + testField0057
+                + testField0058
+                + testField0059
+                + testField0060
+                + testField0061
+                + testField0062
+                + testField0063
+                + testField0064
+                + testField0065
+                + testField0066
+                + testField0067
+                + testField0068
+                + testField0069
+                + testField0070
+                + testField0071
+                + testField0072
+                + testField0073
+                + testField0074
+                + testField0075
+                + testField0076
+                + testField0077
+                + testField0078
+                + testField0079
+                + testField0080
+                + testField0081
+                + testField0082
+                + testField0083
+                + testField0084
+                + testField0085
+                + testField0086
+                + testField0087
+                + testField0088
+                + testField0089
+                + testField0090
+                + testField0091
+                + testField0092
+                + testField0093
+                + testField0094
+                + testField0095
+                + testField0096
+                + testField0097
+                + testField0098
+                + testField0099
+                + testField0100
+                + testField0101
+                + testField0102
+                + testField0103
+                + testField0104
+                + testField0105
+                + testField0106
+                + testField0107
+                + testField0108
+                + testField0109
+                + testField0110
+                + testField0111
+                + testField0112
+                + testField0113
+                + testField0114
+                + testField0115
+                + testField0116
+                + testField0117
+                + testField0118
+                + testField0119
+                + testField0120
+                + testField0121
+                + testField0122
+                + testField0123
+                + testField0124
+                + testField0125
+                + testField0126
+                + testField0127
+                + testField0128
+                + testField0129
+                + testField0130
+                + testField0131
+                + testField0132
+                + testField0133
+                + testField0134
+                + testField0135
+                + testField0136
+                + testField0137
+                + testField0138
+                + testField0139
+                + testField0140
+                + testField0141
+                + testField0142
+                + testField0143
+                + testField0144
+                + testField0145
+                + testField0146
+                + testField0147
+                + testField0148
+                + testField0149
+                + testField0150
+                + testField0151
+                + testField0152
+                + testField0153
+                + testField0154
+                + testField0155
+                + testField0156
+                + testField0157
+                + testField0158
+                + testField0159
+                + testField0160
+                + testField0161
+                + testField0162
+                + testField0163
+                + testField0164
+                + testField0165
+                + testField0166
+                + testField0167
+                + testField0168
+                + testField0169
+                + testField0170
+                + testField0171
+                + testField0172
+                + testField0173
+                + testField0174
+                + testField0175
+                + testField0176
+                + testField0177
+                + testField0178
+                + testField0179
+                + testField0180
+                + testField0181
+                + testField0182
+                + testField0183
+                + testField0184
+                + testField0185
+                + testField0186
+                + testField0187
+                + testField0188
+                + testField0189
+                + testField0190
+                + testField0191
+                + testField0192
+                + testField0193
+                + testField0194
+                + testField0195
+                + testField0196
+                + testField0197
+                + testField0198
+                + testField0199
+                + testField0200
+                + testField0201
+                + testField0202
+                + testField0203
+                + testField0204
+                + testField0205
+                + testField0206
+                + testField0207
+                + testField0208
+                + testField0209
+                + testField0210
+                + testField0211
+                + testField0212
+                + testField0213
+                + testField0214
+                + testField0215
+                + testField0216
+                + testField0217
+                + testField0218
+                + testField0219
+                + testField0220
+                + testField0221
+                + testField0222
+                + testField0223
+                + testField0224
+                + testField0225
+                + testField0226
+                + testField0227
+                + testField0228
+                + testField0229
+                + testField0230
+                + testField0231
+                + testField0232
+                + testField0233
+                + testField0234
+                + testField0235
+                + testField0236
+                + testField0237
+                + testField0238
+                + testField0239
+                + testField0240
+                + testField0241
+                + testField0242
+                + testField0243
+                + testField0244
+                + testField0245
+                + testField0246
+                + testField0247
+                + testField0248
+                + testField0249
+                + testField0250
+                + testField0251
+                + testField0252
+                + testField0253
+                + testField0254
+                + testField0255
+                + testField0256
+                + testField0257
+                + testField0258
+                + testField0259
+                + testField0260
+                + testField0261
+                + testField0262
+                + testField0263
+                + testField0264
+                + testField0265
+                + testField0266
+                + testField0267
+                + testField0268
+                + testField0269
+                + testField0270
+                + testField0271
+                + testField0272
+                + testField0273
+                + testField0274
+                + testField0275
+                + testField0276
+                + testField0277
+                + testField0278
+                + testField0279
+                + testField0280
+                + testField0281
+                + testField0282
+                + testField0283
+                + testField0284
+                + testField0285
+                + testField0286
+                + testField0287
+                + testField0288
+                + testField0289
+                + testField0290
+                + testField0291
+                + testField0292
+                + testField0293
+                + testField0294
+                + testField0295
+                + testField0296
+                + testField0297
+                + testField0298
+                + testField0299
+                + testField0300
+                + testField0301
+                + testField0302
+                + testField0303
+                + testField0304
+                + testField0305
+                + testField0306
+                + testField0307
+                + testField0308
+                + testField0309
+                + testField0310
+                + testField0311
+                + testField0312
+                + testField0313
+                + testField0314
+                + testField0315
+                + testField0316
+                + testField0317
+                + testField0318
+                + testField0319
+                + testField0320
+                + testField0321
+                + testField0322
+                + testField0323
+                + testField0324
+                + testField0325
+                + testField0326
+                + testField0327
+                + testField0328
+                + testField0329
+                + testField0330
+                + testField0331
+                + testField0332
+                + testField0333
+                + testField0334
+                + testField0335
+                + testField0336
+                + testField0337
+                + testField0338
+                + testField0339
+                + testField0340
+                + testField0341
+                + testField0342
+                + testField0343
+                + testField0344
+                + testField0345
+                + testField0346
+                + testField0347
+                + testField0348
+                + testField0349
+                + testField0350
+                + testField0351
+                + testField0352
+                + testField0353
+                + testField0354
+                + testField0355
+                + testField0356
+                + testField0357
+                + testField0358
+                + testField0359
+                + testField0360
+                + testField0361
+                + testField0362
+                + testField0363
+                + testField0364
+                + testField0365
+                + testField0366
+                + testField0367
+                + testField0368
+                + testField0369
+                + testField0370
+                + testField0371
+                + testField0372
+                + testField0373
+                + testField0374
+                + testField0375
+                + testField0376
+                + testField0377
+                + testField0378
+                + testField0379
+                + testField0380
+                + testField0381
+                + testField0382
+                + testField0383
+                + testField0384
+                + testField0385
+                + testField0386
+                + testField0387
+                + testField0388
+                + testField0389
+                + testField0390
+                + testField0391
+                + testField0392
+                + testField0393
+                + testField0394
+                + testField0395
+                + testField0396
+                + testField0397
+                + testField0398
+                + testField0399
+                + testField0400
+                + testField0401
+                + testField0402
+                + testField0403
+                + testField0404
+                + testField0405
+                + testField0406
+                + testField0407
+                + testField0408
+                + testField0409
+                + testField0410
+                + testField0411
+                + testField0412
+                + testField0413
+                + testField0414
+                + testField0415
+                + testField0416
+                + testField0417
+                + testField0418
+                + testField0419
+                + testField0420
+                + testField0421
+                + testField0422
+                + testField0423
+                + testField0424
+                + testField0425
+                + testField0426
+                + testField0427
+                + testField0428
+                + testField0429
+                + testField0430
+                + testField0431
+                + testField0432
+                + testField0433
+                + testField0434
+                + testField0435
+                + testField0436
+                + testField0437
+                + testField0438
+                + testField0439
+                + testField0440
+                + testField0441
+                + testField0442
+                + testField0443
+                + testField0444
+                + testField0445
+                + testField0446
+                + testField0447
+                + testField0448
+                + testField0449
+                + testField0450
+                + testField0451
+                + testField0452
+                + testField0453
+                + testField0454
+                + testField0455
+                + testField0456
+                + testField0457
+                + testField0458
+                + testField0459
+                + testField0460
+                + testField0461
+                + testField0462
+                + testField0463
+                + testField0464
+                + testField0465
+                + testField0466
+                + testField0467
+                + testField0468
+                + testField0469
+                + testField0470
+                + testField0471
+                + testField0472
+                + testField0473
+                + testField0474
+                + testField0475
+                + testField0476
+                + testField0477
+                + testField0478
+                + testField0479
+                + testField0480
+                + testField0481
+                + testField0482
+                + testField0483
+                + testField0484
+                + testField0485
+                + testField0486
+                + testField0487
+                + testField0488
+                + testField0489
+                + testField0490
+                + testField0491
+                + testField0492
+                + testField0493
+                + testField0494
+                + testField0495
+                + testField0496
+                + testField0497
+                + testField0498
+                + testField0499
+                + testField0500
+                + testField0501
+                + testField0502
+                + testField0503
+                + testField0504
+                + testField0505
+                + testField0506
+                + testField0507
+                + testField0508
+                + testField0509
+                + testField0510
+                + testField0511
+                + testField0512
+                + testField0513
+                + testField0514
+                + testField0515
+                + testField0516
+                + testField0517
+                + testField0518
+                + testField0519
+                + testField0520
+                + testField0521
+                + testField0522
+                + testField0523
+                + testField0524
+                + testField0525
+                + testField0526
+                + testField0527
+                + testField0528
+                + testField0529
+                + testField0530
+                + testField0531
+                + testField0532
+                + testField0533
+                + testField0534
+                + testField0535
+                + testField0536
+                + testField0537
+                + testField0538
+                + testField0539
+                + testField0540
+                + testField0541
+                + testField0542
+                + testField0543
+                + testField0544
+                + testField0545
+                + testField0546
+                + testField0547
+                + testField0548
+                + testField0549
+                + testField0550
+                + testField0551
+                + testField0552
+                + testField0553
+                + testField0554
+                + testField0555
+                + testField0556
+                + testField0557
+                + testField0558
+                + testField0559
+                + testField0560
+                + testField0561
+                + testField0562
+                + testField0563
+                + testField0564
+                + testField0565
+                + testField0566
+                + testField0567
+                + testField0568
+                + testField0569
+                + testField0570
+                + testField0571
+                + testField0572
+                + testField0573
+                + testField0574
+                + testField0575
+                + testField0576
+                + testField0577
+                + testField0578
+                + testField0579
+                + testField0580
+                + testField0581
+                + testField0582
+                + testField0583
+                + testField0584
+                + testField0585
+                + testField0586
+                + testField0587
+                + testField0588
+                + testField0589
+                + testField0590
+                + testField0591
+                + testField0592
+                + testField0593
+                + testField0594
+                + testField0595
+                + testField0596
+                + testField0597
+                + testField0598
+                + testField0599
+                + testField0600
+                + testField0601
+                + testField0602
+                + testField0603
+                + testField0604
+                + testField0605
+                + testField0606
+                + testField0607
+                + testField0608
+                + testField0609
+                + testField0610
+                + testField0611
+                + testField0612
+                + testField0613
+                + testField0614
+                + testField0615
+                + testField0616
+                + testField0617
+                + testField0618
+                + testField0619
+                + testField0620
+                + testField0621
+                + testField0622
+                + testField0623
+                + testField0624
+                + testField0625
+                + testField0626
+                + testField0627
+                + testField0628
+                + testField0629
+                + testField0630
+                + testField0631
+                + testField0632
+                + testField0633
+                + testField0634
+                + testField0635
+                + testField0636
+                + testField0637
+                + testField0638
+                + testField0639
+                + testField0640
+                + testField0641
+                + testField0642
+                + testField0643
+                + testField0644
+                + testField0645
+                + testField0646
+                + testField0647
+                + testField0648
+                + testField0649
+                + testField0650
+                + testField0651
+                + testField0652
+                + testField0653
+                + testField0654
+                + testField0655
+                + testField0656
+                + testField0657
+                + testField0658
+                + testField0659
+                + testField0660
+                + testField0661
+                + testField0662
+                + testField0663
+                + testField0664
+                + testField0665
+                + testField0666
+                + testField0667
+                + testField0668
+                + testField0669
+                + testField0670
+                + testField0671
+                + testField0672
+                + testField0673
+                + testField0674
+                + testField0675
+                + testField0676
+                + testField0677
+                + testField0678
+                + testField0679
+                + testField0680
+                + testField0681
+                + testField0682
+                + testField0683
+                + testField0684
+                + testField0685
+                + testField0686
+                + testField0687
+                + testField0688
+                + testField0689
+                + testField0690
+                + testField0691
+                + testField0692
+                + testField0693
+                + testField0694
+                + testField0695
+                + testField0696
+                + testField0697
+                + testField0698
+                + testField0699
+                + testField0700
+                + testField0701
+                + testField0702
+                + testField0703
+                + testField0704
+                + testField0705
+                + testField0706
+                + testField0707
+                + testField0708
+                + testField0709
+                + testField0710
+                + testField0711
+                + testField0712
+                + testField0713
+                + testField0714
+                + testField0715
+                + testField0716
+                + testField0717
+                + testField0718
+                + testField0719
+                + testField0720
+                + testField0721
+                + testField0722
+                + testField0723
+                + testField0724
+                + testField0725
+                + testField0726
+                + testField0727
+                + testField0728
+                + testField0729
+                + testField0730
+                + testField0731
+                + testField0732
+                + testField0733
+                + testField0734
+                + testField0735
+                + testField0736
+                + testField0737
+                + testField0738
+                + testField0739
+                + testField0740
+                + testField0741
+                + testField0742
+                + testField0743
+                + testField0744
+                + testField0745
+                + testField0746
+                + testField0747
+                + testField0748
+                + testField0749
+                + testField0750
+                + testField0751
+                + testField0752
+                + testField0753
+                + testField0754
+                + testField0755
+                + testField0756
+                + testField0757
+                + testField0758
+                + testField0759
+                + testField0760
+                + testField0761
+                + testField0762
+                + testField0763
+                + testField0764
+                + testField0765
+                + testField0766
+                + testField0767
+                + testField0768
+                + testField0769
+                + testField0770
+                + testField0771
+                + testField0772
+                + testField0773
+                + testField0774
+                + testField0775
+                + testField0776
+                + testField0777
+                + testField0778
+                + testField0779
+                + testField0780
+                + testField0781
+                + testField0782
+                + testField0783
+                + testField0784
+                + testField0785
+                + testField0786
+                + testField0787
+                + testField0788
+                + testField0789
+                + testField0790
+                + testField0791
+                + testField0792
+                + testField0793
+                + testField0794
+                + testField0795
+                + testField0796
+                + testField0797
+                + testField0798
+                + testField0799
+                + testField0800
+                + testField0801
+                + testField0802
+                + testField0803
+                + testField0804
+                + testField0805
+                + testField0806
+                + testField0807
+                + testField0808
+                + testField0809
+                + testField0810
+                + testField0811
+                + testField0812
+                + testField0813
+                + testField0814
+                + testField0815
+                + testField0816
+                + testField0817
+                + testField0818
+                + testField0819
+                + testField0820
+                + testField0821
+                + testField0822
+                + testField0823
+                + testField0824
+                + testField0825
+                + testField0826
+                + testField0827
+                + testField0828
+                + testField0829
+                + testField0830
+                + testField0831
+                + testField0832
+                + testField0833
+                + testField0834
+                + testField0835
+                + testField0836
+                + testField0837
+                + testField0838
+                + testField0839
+                + testField0840
+                + testField0841
+                + testField0842
+                + testField0843
+                + testField0844
+                + testField0845
+                + testField0846
+                + testField0847
+                + testField0848
+                + testField0849
+                + testField0850
+                + testField0851
+                + testField0852
+                + testField0853
+                + testField0854
+                + testField0855
+                + testField0856
+                + testField0857
+                + testField0858
+                + testField0859
+                + testField0860
+                + testField0861
+                + testField0862
+                + testField0863
+                + testField0864
+                + testField0865
+                + testField0866
+                + testField0867
+                + testField0868
+                + testField0869
+                + testField0870
+                + testField0871
+                + testField0872
+                + testField0873
+                + testField0874
+                + testField0875
+                + testField0876
+                + testField0877
+                + testField0878
+                + testField0879
+                + testField0880
+                + testField0881
+                + testField0882
+                + testField0883
+                + testField0884
+                + testField0885
+                + testField0886
+                + testField0887
+                + testField0888
+                + testField0889
+                + testField0890
+                + testField0891
+                + testField0892
+                + testField0893
+                + testField0894
+                + testField0895
+                + testField0896
+                + testField0897
+                + testField0898
+                + testField0899
+                + testField0900
+                + testField0901
+                + testField0902
+                + testField0903
+                + testField0904
+                + testField0905
+                + testField0906
+                + testField0907
+                + testField0908
+                + testField0909
+                + testField0910
+                + testField0911
+                + testField0912
+                + testField0913
+                + testField0914
+                + testField0915
+                + testField0916
+                + testField0917
+                + testField0918
+                + testField0919
+                + testField0920
+                + testField0921
+                + testField0922
+                + testField0923
+                + testField0924
+                + testField0925
+                + testField0926
+                + testField0927
+                + testField0928
+                + testField0929
+                + testField0930
+                + testField0931
+                + testField0932
+                + testField0933
+                + testField0934
+                + testField0935
+                + testField0936
+                + testField0937
+                + testField0938
+                + testField0939
+                + testField0940
+                + testField0941
+                + testField0942
+                + testField0943
+                + testField0944
+                + testField0945
+                + testField0946
+                + testField0947
+                + testField0948
+                + testField0949
+                + testField0950
+                + testField0951
+                + testField0952
+                + testField0953
+                + testField0954
+                + testField0955
+                + testField0956
+                + testField0957
+                + testField0958
+                + testField0959
+                + testField0960
+                + testField0961
+                + testField0962
+                + testField0963
+                + testField0964
+                + testField0965
+                + testField0966
+                + testField0967
+                + testField0968
+                + testField0969
+                + testField0970
+                + testField0971
+                + testField0972
+                + testField0973
+                + testField0974
+                + testField0975
+                + testField0976
+                + testField0977
+                + testField0978
+                + testField0979
+                + testField0980
+                + testField0981
+                + testField0982
+                + testField0983
+                + testField0984
+                + testField0985
+                + testField0986
+                + testField0987
+                + testField0988
+                + testField0989
+                + testField0990
+                + testField0991
+                + testField0992
+                + testField0993
+                + testField0994
+                + testField0995
+                + testField0996
+                + testField0997
+                + testField0998
+                + testField0999
+                + testField1000
+                + testField1001
+                + testField1002
+                + testField1003
+                + testField1004
+                + testField1005
+                + testField1006
+                + testField1007
+                + testField1008
+                + testField1009
+                + testField1010
+                + testField1011
+                + testField1012
+                + testField1013
+                + testField1014
+                + testField1015
+                + testField1016
+                + testField1017
+                + testField1018
+                + testField1019
+                + testField1020
+                + testField1021
+                + testField1022
+                + testField1023
+                + 0;
+    }
+
+    private static int testField0000 = 0;
+    private static int testField0001 = 1;
+    private static int testField0002 = 2;
+    private static int testField0003 = 3;
+    private static int testField0004 = 4;
+    private static int testField0005 = 5;
+    private static int testField0006 = 6;
+    private static int testField0007 = 7;
+    private static int testField0008 = 8;
+    private static int testField0009 = 9;
+    private static int testField0010 = 10;
+    private static int testField0011 = 11;
+    private static int testField0012 = 12;
+    private static int testField0013 = 13;
+    private static int testField0014 = 14;
+    private static int testField0015 = 15;
+    private static int testField0016 = 16;
+    private static int testField0017 = 17;
+    private static int testField0018 = 18;
+    private static int testField0019 = 19;
+    private static int testField0020 = 20;
+    private static int testField0021 = 21;
+    private static int testField0022 = 22;
+    private static int testField0023 = 23;
+    private static int testField0024 = 24;
+    private static int testField0025 = 25;
+    private static int testField0026 = 26;
+    private static int testField0027 = 27;
+    private static int testField0028 = 28;
+    private static int testField0029 = 29;
+    private static int testField0030 = 30;
+    private static int testField0031 = 31;
+    private static int testField0032 = 32;
+    private static int testField0033 = 33;
+    private static int testField0034 = 34;
+    private static int testField0035 = 35;
+    private static int testField0036 = 36;
+    private static int testField0037 = 37;
+    private static int testField0038 = 38;
+    private static int testField0039 = 39;
+    private static int testField0040 = 40;
+    private static int testField0041 = 41;
+    private static int testField0042 = 42;
+    private static int testField0043 = 43;
+    private static int testField0044 = 44;
+    private static int testField0045 = 45;
+    private static int testField0046 = 46;
+    private static int testField0047 = 47;
+    private static int testField0048 = 48;
+    private static int testField0049 = 49;
+    private static int testField0050 = 50;
+    private static int testField0051 = 51;
+    private static int testField0052 = 52;
+    private static int testField0053 = 53;
+    private static int testField0054 = 54;
+    private static int testField0055 = 55;
+    private static int testField0056 = 56;
+    private static int testField0057 = 57;
+    private static int testField0058 = 58;
+    private static int testField0059 = 59;
+    private static int testField0060 = 60;
+    private static int testField0061 = 61;
+    private static int testField0062 = 62;
+    private static int testField0063 = 63;
+    private static int testField0064 = 64;
+    private static int testField0065 = 65;
+    private static int testField0066 = 66;
+    private static int testField0067 = 67;
+    private static int testField0068 = 68;
+    private static int testField0069 = 69;
+    private static int testField0070 = 70;
+    private static int testField0071 = 71;
+    private static int testField0072 = 72;
+    private static int testField0073 = 73;
+    private static int testField0074 = 74;
+    private static int testField0075 = 75;
+    private static int testField0076 = 76;
+    private static int testField0077 = 77;
+    private static int testField0078 = 78;
+    private static int testField0079 = 79;
+    private static int testField0080 = 80;
+    private static int testField0081 = 81;
+    private static int testField0082 = 82;
+    private static int testField0083 = 83;
+    private static int testField0084 = 84;
+    private static int testField0085 = 85;
+    private static int testField0086 = 86;
+    private static int testField0087 = 87;
+    private static int testField0088 = 88;
+    private static int testField0089 = 89;
+    private static int testField0090 = 90;
+    private static int testField0091 = 91;
+    private static int testField0092 = 92;
+    private static int testField0093 = 93;
+    private static int testField0094 = 94;
+    private static int testField0095 = 95;
+    private static int testField0096 = 96;
+    private static int testField0097 = 97;
+    private static int testField0098 = 98;
+    private static int testField0099 = 99;
+    private static int testField0100 = 100;
+    private static int testField0101 = 101;
+    private static int testField0102 = 102;
+    private static int testField0103 = 103;
+    private static int testField0104 = 104;
+    private static int testField0105 = 105;
+    private static int testField0106 = 106;
+    private static int testField0107 = 107;
+    private static int testField0108 = 108;
+    private static int testField0109 = 109;
+    private static int testField0110 = 110;
+    private static int testField0111 = 111;
+    private static int testField0112 = 112;
+    private static int testField0113 = 113;
+    private static int testField0114 = 114;
+    private static int testField0115 = 115;
+    private static int testField0116 = 116;
+    private static int testField0117 = 117;
+    private static int testField0118 = 118;
+    private static int testField0119 = 119;
+    private static int testField0120 = 120;
+    private static int testField0121 = 121;
+    private static int testField0122 = 122;
+    private static int testField0123 = 123;
+    private static int testField0124 = 124;
+    private static int testField0125 = 125;
+    private static int testField0126 = 126;
+    private static int testField0127 = 127;
+    private static int testField0128 = 128;
+    private static int testField0129 = 129;
+    private static int testField0130 = 130;
+    private static int testField0131 = 131;
+    private static int testField0132 = 132;
+    private static int testField0133 = 133;
+    private static int testField0134 = 134;
+    private static int testField0135 = 135;
+    private static int testField0136 = 136;
+    private static int testField0137 = 137;
+    private static int testField0138 = 138;
+    private static int testField0139 = 139;
+    private static int testField0140 = 140;
+    private static int testField0141 = 141;
+    private static int testField0142 = 142;
+    private static int testField0143 = 143;
+    private static int testField0144 = 144;
+    private static int testField0145 = 145;
+    private static int testField0146 = 146;
+    private static int testField0147 = 147;
+    private static int testField0148 = 148;
+    private static int testField0149 = 149;
+    private static int testField0150 = 150;
+    private static int testField0151 = 151;
+    private static int testField0152 = 152;
+    private static int testField0153 = 153;
+    private static int testField0154 = 154;
+    private static int testField0155 = 155;
+    private static int testField0156 = 156;
+    private static int testField0157 = 157;
+    private static int testField0158 = 158;
+    private static int testField0159 = 159;
+    private static int testField0160 = 160;
+    private static int testField0161 = 161;
+    private static int testField0162 = 162;
+    private static int testField0163 = 163;
+    private static int testField0164 = 164;
+    private static int testField0165 = 165;
+    private static int testField0166 = 166;
+    private static int testField0167 = 167;
+    private static int testField0168 = 168;
+    private static int testField0169 = 169;
+    private static int testField0170 = 170;
+    private static int testField0171 = 171;
+    private static int testField0172 = 172;
+    private static int testField0173 = 173;
+    private static int testField0174 = 174;
+    private static int testField0175 = 175;
+    private static int testField0176 = 176;
+    private static int testField0177 = 177;
+    private static int testField0178 = 178;
+    private static int testField0179 = 179;
+    private static int testField0180 = 180;
+    private static int testField0181 = 181;
+    private static int testField0182 = 182;
+    private static int testField0183 = 183;
+    private static int testField0184 = 184;
+    private static int testField0185 = 185;
+    private static int testField0186 = 186;
+    private static int testField0187 = 187;
+    private static int testField0188 = 188;
+    private static int testField0189 = 189;
+    private static int testField0190 = 190;
+    private static int testField0191 = 191;
+    private static int testField0192 = 192;
+    private static int testField0193 = 193;
+    private static int testField0194 = 194;
+    private static int testField0195 = 195;
+    private static int testField0196 = 196;
+    private static int testField0197 = 197;
+    private static int testField0198 = 198;
+    private static int testField0199 = 199;
+    private static int testField0200 = 200;
+    private static int testField0201 = 201;
+    private static int testField0202 = 202;
+    private static int testField0203 = 203;
+    private static int testField0204 = 204;
+    private static int testField0205 = 205;
+    private static int testField0206 = 206;
+    private static int testField0207 = 207;
+    private static int testField0208 = 208;
+    private static int testField0209 = 209;
+    private static int testField0210 = 210;
+    private static int testField0211 = 211;
+    private static int testField0212 = 212;
+    private static int testField0213 = 213;
+    private static int testField0214 = 214;
+    private static int testField0215 = 215;
+    private static int testField0216 = 216;
+    private static int testField0217 = 217;
+    private static int testField0218 = 218;
+    private static int testField0219 = 219;
+    private static int testField0220 = 220;
+    private static int testField0221 = 221;
+    private static int testField0222 = 222;
+    private static int testField0223 = 223;
+    private static int testField0224 = 224;
+    private static int testField0225 = 225;
+    private static int testField0226 = 226;
+    private static int testField0227 = 227;
+    private static int testField0228 = 228;
+    private static int testField0229 = 229;
+    private static int testField0230 = 230;
+    private static int testField0231 = 231;
+    private static int testField0232 = 232;
+    private static int testField0233 = 233;
+    private static int testField0234 = 234;
+    private static int testField0235 = 235;
+    private static int testField0236 = 236;
+    private static int testField0237 = 237;
+    private static int testField0238 = 238;
+    private static int testField0239 = 239;
+    private static int testField0240 = 240;
+    private static int testField0241 = 241;
+    private static int testField0242 = 242;
+    private static int testField0243 = 243;
+    private static int testField0244 = 244;
+    private static int testField0245 = 245;
+    private static int testField0246 = 246;
+    private static int testField0247 = 247;
+    private static int testField0248 = 248;
+    private static int testField0249 = 249;
+    private static int testField0250 = 250;
+    private static int testField0251 = 251;
+    private static int testField0252 = 252;
+    private static int testField0253 = 253;
+    private static int testField0254 = 254;
+    private static int testField0255 = 255;
+    private static int testField0256 = 256;
+    private static int testField0257 = 257;
+    private static int testField0258 = 258;
+    private static int testField0259 = 259;
+    private static int testField0260 = 260;
+    private static int testField0261 = 261;
+    private static int testField0262 = 262;
+    private static int testField0263 = 263;
+    private static int testField0264 = 264;
+    private static int testField0265 = 265;
+    private static int testField0266 = 266;
+    private static int testField0267 = 267;
+    private static int testField0268 = 268;
+    private static int testField0269 = 269;
+    private static int testField0270 = 270;
+    private static int testField0271 = 271;
+    private static int testField0272 = 272;
+    private static int testField0273 = 273;
+    private static int testField0274 = 274;
+    private static int testField0275 = 275;
+    private static int testField0276 = 276;
+    private static int testField0277 = 277;
+    private static int testField0278 = 278;
+    private static int testField0279 = 279;
+    private static int testField0280 = 280;
+    private static int testField0281 = 281;
+    private static int testField0282 = 282;
+    private static int testField0283 = 283;
+    private static int testField0284 = 284;
+    private static int testField0285 = 285;
+    private static int testField0286 = 286;
+    private static int testField0287 = 287;
+    private static int testField0288 = 288;
+    private static int testField0289 = 289;
+    private static int testField0290 = 290;
+    private static int testField0291 = 291;
+    private static int testField0292 = 292;
+    private static int testField0293 = 293;
+    private static int testField0294 = 294;
+    private static int testField0295 = 295;
+    private static int testField0296 = 296;
+    private static int testField0297 = 297;
+    private static int testField0298 = 298;
+    private static int testField0299 = 299;
+    private static int testField0300 = 300;
+    private static int testField0301 = 301;
+    private static int testField0302 = 302;
+    private static int testField0303 = 303;
+    private static int testField0304 = 304;
+    private static int testField0305 = 305;
+    private static int testField0306 = 306;
+    private static int testField0307 = 307;
+    private static int testField0308 = 308;
+    private static int testField0309 = 309;
+    private static int testField0310 = 310;
+    private static int testField0311 = 311;
+    private static int testField0312 = 312;
+    private static int testField0313 = 313;
+    private static int testField0314 = 314;
+    private static int testField0315 = 315;
+    private static int testField0316 = 316;
+    private static int testField0317 = 317;
+    private static int testField0318 = 318;
+    private static int testField0319 = 319;
+    private static int testField0320 = 320;
+    private static int testField0321 = 321;
+    private static int testField0322 = 322;
+    private static int testField0323 = 323;
+    private static int testField0324 = 324;
+    private static int testField0325 = 325;
+    private static int testField0326 = 326;
+    private static int testField0327 = 327;
+    private static int testField0328 = 328;
+    private static int testField0329 = 329;
+    private static int testField0330 = 330;
+    private static int testField0331 = 331;
+    private static int testField0332 = 332;
+    private static int testField0333 = 333;
+    private static int testField0334 = 334;
+    private static int testField0335 = 335;
+    private static int testField0336 = 336;
+    private static int testField0337 = 337;
+    private static int testField0338 = 338;
+    private static int testField0339 = 339;
+    private static int testField0340 = 340;
+    private static int testField0341 = 341;
+    private static int testField0342 = 342;
+    private static int testField0343 = 343;
+    private static int testField0344 = 344;
+    private static int testField0345 = 345;
+    private static int testField0346 = 346;
+    private static int testField0347 = 347;
+    private static int testField0348 = 348;
+    private static int testField0349 = 349;
+    private static int testField0350 = 350;
+    private static int testField0351 = 351;
+    private static int testField0352 = 352;
+    private static int testField0353 = 353;
+    private static int testField0354 = 354;
+    private static int testField0355 = 355;
+    private static int testField0356 = 356;
+    private static int testField0357 = 357;
+    private static int testField0358 = 358;
+    private static int testField0359 = 359;
+    private static int testField0360 = 360;
+    private static int testField0361 = 361;
+    private static int testField0362 = 362;
+    private static int testField0363 = 363;
+    private static int testField0364 = 364;
+    private static int testField0365 = 365;
+    private static int testField0366 = 366;
+    private static int testField0367 = 367;
+    private static int testField0368 = 368;
+    private static int testField0369 = 369;
+    private static int testField0370 = 370;
+    private static int testField0371 = 371;
+    private static int testField0372 = 372;
+    private static int testField0373 = 373;
+    private static int testField0374 = 374;
+    private static int testField0375 = 375;
+    private static int testField0376 = 376;
+    private static int testField0377 = 377;
+    private static int testField0378 = 378;
+    private static int testField0379 = 379;
+    private static int testField0380 = 380;
+    private static int testField0381 = 381;
+    private static int testField0382 = 382;
+    private static int testField0383 = 383;
+    private static int testField0384 = 384;
+    private static int testField0385 = 385;
+    private static int testField0386 = 386;
+    private static int testField0387 = 387;
+    private static int testField0388 = 388;
+    private static int testField0389 = 389;
+    private static int testField0390 = 390;
+    private static int testField0391 = 391;
+    private static int testField0392 = 392;
+    private static int testField0393 = 393;
+    private static int testField0394 = 394;
+    private static int testField0395 = 395;
+    private static int testField0396 = 396;
+    private static int testField0397 = 397;
+    private static int testField0398 = 398;
+    private static int testField0399 = 399;
+    private static int testField0400 = 400;
+    private static int testField0401 = 401;
+    private static int testField0402 = 402;
+    private static int testField0403 = 403;
+    private static int testField0404 = 404;
+    private static int testField0405 = 405;
+    private static int testField0406 = 406;
+    private static int testField0407 = 407;
+    private static int testField0408 = 408;
+    private static int testField0409 = 409;
+    private static int testField0410 = 410;
+    private static int testField0411 = 411;
+    private static int testField0412 = 412;
+    private static int testField0413 = 413;
+    private static int testField0414 = 414;
+    private static int testField0415 = 415;
+    private static int testField0416 = 416;
+    private static int testField0417 = 417;
+    private static int testField0418 = 418;
+    private static int testField0419 = 419;
+    private static int testField0420 = 420;
+    private static int testField0421 = 421;
+    private static int testField0422 = 422;
+    private static int testField0423 = 423;
+    private static int testField0424 = 424;
+    private static int testField0425 = 425;
+    private static int testField0426 = 426;
+    private static int testField0427 = 427;
+    private static int testField0428 = 428;
+    private static int testField0429 = 429;
+    private static int testField0430 = 430;
+    private static int testField0431 = 431;
+    private static int testField0432 = 432;
+    private static int testField0433 = 433;
+    private static int testField0434 = 434;
+    private static int testField0435 = 435;
+    private static int testField0436 = 436;
+    private static int testField0437 = 437;
+    private static int testField0438 = 438;
+    private static int testField0439 = 439;
+    private static int testField0440 = 440;
+    private static int testField0441 = 441;
+    private static int testField0442 = 442;
+    private static int testField0443 = 443;
+    private static int testField0444 = 444;
+    private static int testField0445 = 445;
+    private static int testField0446 = 446;
+    private static int testField0447 = 447;
+    private static int testField0448 = 448;
+    private static int testField0449 = 449;
+    private static int testField0450 = 450;
+    private static int testField0451 = 451;
+    private static int testField0452 = 452;
+    private static int testField0453 = 453;
+    private static int testField0454 = 454;
+    private static int testField0455 = 455;
+    private static int testField0456 = 456;
+    private static int testField0457 = 457;
+    private static int testField0458 = 458;
+    private static int testField0459 = 459;
+    private static int testField0460 = 460;
+    private static int testField0461 = 461;
+    private static int testField0462 = 462;
+    private static int testField0463 = 463;
+    private static int testField0464 = 464;
+    private static int testField0465 = 465;
+    private static int testField0466 = 466;
+    private static int testField0467 = 467;
+    private static int testField0468 = 468;
+    private static int testField0469 = 469;
+    private static int testField0470 = 470;
+    private static int testField0471 = 471;
+    private static int testField0472 = 472;
+    private static int testField0473 = 473;
+    private static int testField0474 = 474;
+    private static int testField0475 = 475;
+    private static int testField0476 = 476;
+    private static int testField0477 = 477;
+    private static int testField0478 = 478;
+    private static int testField0479 = 479;
+    private static int testField0480 = 480;
+    private static int testField0481 = 481;
+    private static int testField0482 = 482;
+    private static int testField0483 = 483;
+    private static int testField0484 = 484;
+    private static int testField0485 = 485;
+    private static int testField0486 = 486;
+    private static int testField0487 = 487;
+    private static int testField0488 = 488;
+    private static int testField0489 = 489;
+    private static int testField0490 = 490;
+    private static int testField0491 = 491;
+    private static int testField0492 = 492;
+    private static int testField0493 = 493;
+    private static int testField0494 = 494;
+    private static int testField0495 = 495;
+    private static int testField0496 = 496;
+    private static int testField0497 = 497;
+    private static int testField0498 = 498;
+    private static int testField0499 = 499;
+    private static int testField0500 = 500;
+    private static int testField0501 = 501;
+    private static int testField0502 = 502;
+    private static int testField0503 = 503;
+    private static int testField0504 = 504;
+    private static int testField0505 = 505;
+    private static int testField0506 = 506;
+    private static int testField0507 = 507;
+    private static int testField0508 = 508;
+    private static int testField0509 = 509;
+    private static int testField0510 = 510;
+    private static int testField0511 = 511;
+    private static int testField0512 = 512;
+    private static int testField0513 = 513;
+    private static int testField0514 = 514;
+    private static int testField0515 = 515;
+    private static int testField0516 = 516;
+    private static int testField0517 = 517;
+    private static int testField0518 = 518;
+    private static int testField0519 = 519;
+    private static int testField0520 = 520;
+    private static int testField0521 = 521;
+    private static int testField0522 = 522;
+    private static int testField0523 = 523;
+    private static int testField0524 = 524;
+    private static int testField0525 = 525;
+    private static int testField0526 = 526;
+    private static int testField0527 = 527;
+    private static int testField0528 = 528;
+    private static int testField0529 = 529;
+    private static int testField0530 = 530;
+    private static int testField0531 = 531;
+    private static int testField0532 = 532;
+    private static int testField0533 = 533;
+    private static int testField0534 = 534;
+    private static int testField0535 = 535;
+    private static int testField0536 = 536;
+    private static int testField0537 = 537;
+    private static int testField0538 = 538;
+    private static int testField0539 = 539;
+    private static int testField0540 = 540;
+    private static int testField0541 = 541;
+    private static int testField0542 = 542;
+    private static int testField0543 = 543;
+    private static int testField0544 = 544;
+    private static int testField0545 = 545;
+    private static int testField0546 = 546;
+    private static int testField0547 = 547;
+    private static int testField0548 = 548;
+    private static int testField0549 = 549;
+    private static int testField0550 = 550;
+    private static int testField0551 = 551;
+    private static int testField0552 = 552;
+    private static int testField0553 = 553;
+    private static int testField0554 = 554;
+    private static int testField0555 = 555;
+    private static int testField0556 = 556;
+    private static int testField0557 = 557;
+    private static int testField0558 = 558;
+    private static int testField0559 = 559;
+    private static int testField0560 = 560;
+    private static int testField0561 = 561;
+    private static int testField0562 = 562;
+    private static int testField0563 = 563;
+    private static int testField0564 = 564;
+    private static int testField0565 = 565;
+    private static int testField0566 = 566;
+    private static int testField0567 = 567;
+    private static int testField0568 = 568;
+    private static int testField0569 = 569;
+    private static int testField0570 = 570;
+    private static int testField0571 = 571;
+    private static int testField0572 = 572;
+    private static int testField0573 = 573;
+    private static int testField0574 = 574;
+    private static int testField0575 = 575;
+    private static int testField0576 = 576;
+    private static int testField0577 = 577;
+    private static int testField0578 = 578;
+    private static int testField0579 = 579;
+    private static int testField0580 = 580;
+    private static int testField0581 = 581;
+    private static int testField0582 = 582;
+    private static int testField0583 = 583;
+    private static int testField0584 = 584;
+    private static int testField0585 = 585;
+    private static int testField0586 = 586;
+    private static int testField0587 = 587;
+    private static int testField0588 = 588;
+    private static int testField0589 = 589;
+    private static int testField0590 = 590;
+    private static int testField0591 = 591;
+    private static int testField0592 = 592;
+    private static int testField0593 = 593;
+    private static int testField0594 = 594;
+    private static int testField0595 = 595;
+    private static int testField0596 = 596;
+    private static int testField0597 = 597;
+    private static int testField0598 = 598;
+    private static int testField0599 = 599;
+    private static int testField0600 = 600;
+    private static int testField0601 = 601;
+    private static int testField0602 = 602;
+    private static int testField0603 = 603;
+    private static int testField0604 = 604;
+    private static int testField0605 = 605;
+    private static int testField0606 = 606;
+    private static int testField0607 = 607;
+    private static int testField0608 = 608;
+    private static int testField0609 = 609;
+    private static int testField0610 = 610;
+    private static int testField0611 = 611;
+    private static int testField0612 = 612;
+    private static int testField0613 = 613;
+    private static int testField0614 = 614;
+    private static int testField0615 = 615;
+    private static int testField0616 = 616;
+    private static int testField0617 = 617;
+    private static int testField0618 = 618;
+    private static int testField0619 = 619;
+    private static int testField0620 = 620;
+    private static int testField0621 = 621;
+    private static int testField0622 = 622;
+    private static int testField0623 = 623;
+    private static int testField0624 = 624;
+    private static int testField0625 = 625;
+    private static int testField0626 = 626;
+    private static int testField0627 = 627;
+    private static int testField0628 = 628;
+    private static int testField0629 = 629;
+    private static int testField0630 = 630;
+    private static int testField0631 = 631;
+    private static int testField0632 = 632;
+    private static int testField0633 = 633;
+    private static int testField0634 = 634;
+    private static int testField0635 = 635;
+    private static int testField0636 = 636;
+    private static int testField0637 = 637;
+    private static int testField0638 = 638;
+    private static int testField0639 = 639;
+    private static int testField0640 = 640;
+    private static int testField0641 = 641;
+    private static int testField0642 = 642;
+    private static int testField0643 = 643;
+    private static int testField0644 = 644;
+    private static int testField0645 = 645;
+    private static int testField0646 = 646;
+    private static int testField0647 = 647;
+    private static int testField0648 = 648;
+    private static int testField0649 = 649;
+    private static int testField0650 = 650;
+    private static int testField0651 = 651;
+    private static int testField0652 = 652;
+    private static int testField0653 = 653;
+    private static int testField0654 = 654;
+    private static int testField0655 = 655;
+    private static int testField0656 = 656;
+    private static int testField0657 = 657;
+    private static int testField0658 = 658;
+    private static int testField0659 = 659;
+    private static int testField0660 = 660;
+    private static int testField0661 = 661;
+    private static int testField0662 = 662;
+    private static int testField0663 = 663;
+    private static int testField0664 = 664;
+    private static int testField0665 = 665;
+    private static int testField0666 = 666;
+    private static int testField0667 = 667;
+    private static int testField0668 = 668;
+    private static int testField0669 = 669;
+    private static int testField0670 = 670;
+    private static int testField0671 = 671;
+    private static int testField0672 = 672;
+    private static int testField0673 = 673;
+    private static int testField0674 = 674;
+    private static int testField0675 = 675;
+    private static int testField0676 = 676;
+    private static int testField0677 = 677;
+    private static int testField0678 = 678;
+    private static int testField0679 = 679;
+    private static int testField0680 = 680;
+    private static int testField0681 = 681;
+    private static int testField0682 = 682;
+    private static int testField0683 = 683;
+    private static int testField0684 = 684;
+    private static int testField0685 = 685;
+    private static int testField0686 = 686;
+    private static int testField0687 = 687;
+    private static int testField0688 = 688;
+    private static int testField0689 = 689;
+    private static int testField0690 = 690;
+    private static int testField0691 = 691;
+    private static int testField0692 = 692;
+    private static int testField0693 = 693;
+    private static int testField0694 = 694;
+    private static int testField0695 = 695;
+    private static int testField0696 = 696;
+    private static int testField0697 = 697;
+    private static int testField0698 = 698;
+    private static int testField0699 = 699;
+    private static int testField0700 = 700;
+    private static int testField0701 = 701;
+    private static int testField0702 = 702;
+    private static int testField0703 = 703;
+    private static int testField0704 = 704;
+    private static int testField0705 = 705;
+    private static int testField0706 = 706;
+    private static int testField0707 = 707;
+    private static int testField0708 = 708;
+    private static int testField0709 = 709;
+    private static int testField0710 = 710;
+    private static int testField0711 = 711;
+    private static int testField0712 = 712;
+    private static int testField0713 = 713;
+    private static int testField0714 = 714;
+    private static int testField0715 = 715;
+    private static int testField0716 = 716;
+    private static int testField0717 = 717;
+    private static int testField0718 = 718;
+    private static int testField0719 = 719;
+    private static int testField0720 = 720;
+    private static int testField0721 = 721;
+    private static int testField0722 = 722;
+    private static int testField0723 = 723;
+    private static int testField0724 = 724;
+    private static int testField0725 = 725;
+    private static int testField0726 = 726;
+    private static int testField0727 = 727;
+    private static int testField0728 = 728;
+    private static int testField0729 = 729;
+    private static int testField0730 = 730;
+    private static int testField0731 = 731;
+    private static int testField0732 = 732;
+    private static int testField0733 = 733;
+    private static int testField0734 = 734;
+    private static int testField0735 = 735;
+    private static int testField0736 = 736;
+    private static int testField0737 = 737;
+    private static int testField0738 = 738;
+    private static int testField0739 = 739;
+    private static int testField0740 = 740;
+    private static int testField0741 = 741;
+    private static int testField0742 = 742;
+    private static int testField0743 = 743;
+    private static int testField0744 = 744;
+    private static int testField0745 = 745;
+    private static int testField0746 = 746;
+    private static int testField0747 = 747;
+    private static int testField0748 = 748;
+    private static int testField0749 = 749;
+    private static int testField0750 = 750;
+    private static int testField0751 = 751;
+    private static int testField0752 = 752;
+    private static int testField0753 = 753;
+    private static int testField0754 = 754;
+    private static int testField0755 = 755;
+    private static int testField0756 = 756;
+    private static int testField0757 = 757;
+    private static int testField0758 = 758;
+    private static int testField0759 = 759;
+    private static int testField0760 = 760;
+    private static int testField0761 = 761;
+    private static int testField0762 = 762;
+    private static int testField0763 = 763;
+    private static int testField0764 = 764;
+    private static int testField0765 = 765;
+    private static int testField0766 = 766;
+    private static int testField0767 = 767;
+    private static int testField0768 = 768;
+    private static int testField0769 = 769;
+    private static int testField0770 = 770;
+    private static int testField0771 = 771;
+    private static int testField0772 = 772;
+    private static int testField0773 = 773;
+    private static int testField0774 = 774;
+    private static int testField0775 = 775;
+    private static int testField0776 = 776;
+    private static int testField0777 = 777;
+    private static int testField0778 = 778;
+    private static int testField0779 = 779;
+    private static int testField0780 = 780;
+    private static int testField0781 = 781;
+    private static int testField0782 = 782;
+    private static int testField0783 = 783;
+    private static int testField0784 = 784;
+    private static int testField0785 = 785;
+    private static int testField0786 = 786;
+    private static int testField0787 = 787;
+    private static int testField0788 = 788;
+    private static int testField0789 = 789;
+    private static int testField0790 = 790;
+    private static int testField0791 = 791;
+    private static int testField0792 = 792;
+    private static int testField0793 = 793;
+    private static int testField0794 = 794;
+    private static int testField0795 = 795;
+    private static int testField0796 = 796;
+    private static int testField0797 = 797;
+    private static int testField0798 = 798;
+    private static int testField0799 = 799;
+    private static int testField0800 = 800;
+    private static int testField0801 = 801;
+    private static int testField0802 = 802;
+    private static int testField0803 = 803;
+    private static int testField0804 = 804;
+    private static int testField0805 = 805;
+    private static int testField0806 = 806;
+    private static int testField0807 = 807;
+    private static int testField0808 = 808;
+    private static int testField0809 = 809;
+    private static int testField0810 = 810;
+    private static int testField0811 = 811;
+    private static int testField0812 = 812;
+    private static int testField0813 = 813;
+    private static int testField0814 = 814;
+    private static int testField0815 = 815;
+    private static int testField0816 = 816;
+    private static int testField0817 = 817;
+    private static int testField0818 = 818;
+    private static int testField0819 = 819;
+    private static int testField0820 = 820;
+    private static int testField0821 = 821;
+    private static int testField0822 = 822;
+    private static int testField0823 = 823;
+    private static int testField0824 = 824;
+    private static int testField0825 = 825;
+    private static int testField0826 = 826;
+    private static int testField0827 = 827;
+    private static int testField0828 = 828;
+    private static int testField0829 = 829;
+    private static int testField0830 = 830;
+    private static int testField0831 = 831;
+    private static int testField0832 = 832;
+    private static int testField0833 = 833;
+    private static int testField0834 = 834;
+    private static int testField0835 = 835;
+    private static int testField0836 = 836;
+    private static int testField0837 = 837;
+    private static int testField0838 = 838;
+    private static int testField0839 = 839;
+    private static int testField0840 = 840;
+    private static int testField0841 = 841;
+    private static int testField0842 = 842;
+    private static int testField0843 = 843;
+    private static int testField0844 = 844;
+    private static int testField0845 = 845;
+    private static int testField0846 = 846;
+    private static int testField0847 = 847;
+    private static int testField0848 = 848;
+    private static int testField0849 = 849;
+    private static int testField0850 = 850;
+    private static int testField0851 = 851;
+    private static int testField0852 = 852;
+    private static int testField0853 = 853;
+    private static int testField0854 = 854;
+    private static int testField0855 = 855;
+    private static int testField0856 = 856;
+    private static int testField0857 = 857;
+    private static int testField0858 = 858;
+    private static int testField0859 = 859;
+    private static int testField0860 = 860;
+    private static int testField0861 = 861;
+    private static int testField0862 = 862;
+    private static int testField0863 = 863;
+    private static int testField0864 = 864;
+    private static int testField0865 = 865;
+    private static int testField0866 = 866;
+    private static int testField0867 = 867;
+    private static int testField0868 = 868;
+    private static int testField0869 = 869;
+    private static int testField0870 = 870;
+    private static int testField0871 = 871;
+    private static int testField0872 = 872;
+    private static int testField0873 = 873;
+    private static int testField0874 = 874;
+    private static int testField0875 = 875;
+    private static int testField0876 = 876;
+    private static int testField0877 = 877;
+    private static int testField0878 = 878;
+    private static int testField0879 = 879;
+    private static int testField0880 = 880;
+    private static int testField0881 = 881;
+    private static int testField0882 = 882;
+    private static int testField0883 = 883;
+    private static int testField0884 = 884;
+    private static int testField0885 = 885;
+    private static int testField0886 = 886;
+    private static int testField0887 = 887;
+    private static int testField0888 = 888;
+    private static int testField0889 = 889;
+    private static int testField0890 = 890;
+    private static int testField0891 = 891;
+    private static int testField0892 = 892;
+    private static int testField0893 = 893;
+    private static int testField0894 = 894;
+    private static int testField0895 = 895;
+    private static int testField0896 = 896;
+    private static int testField0897 = 897;
+    private static int testField0898 = 898;
+    private static int testField0899 = 899;
+    private static int testField0900 = 900;
+    private static int testField0901 = 901;
+    private static int testField0902 = 902;
+    private static int testField0903 = 903;
+    private static int testField0904 = 904;
+    private static int testField0905 = 905;
+    private static int testField0906 = 906;
+    private static int testField0907 = 907;
+    private static int testField0908 = 908;
+    private static int testField0909 = 909;
+    private static int testField0910 = 910;
+    private static int testField0911 = 911;
+    private static int testField0912 = 912;
+    private static int testField0913 = 913;
+    private static int testField0914 = 914;
+    private static int testField0915 = 915;
+    private static int testField0916 = 916;
+    private static int testField0917 = 917;
+    private static int testField0918 = 918;
+    private static int testField0919 = 919;
+    private static int testField0920 = 920;
+    private static int testField0921 = 921;
+    private static int testField0922 = 922;
+    private static int testField0923 = 923;
+    private static int testField0924 = 924;
+    private static int testField0925 = 925;
+    private static int testField0926 = 926;
+    private static int testField0927 = 927;
+    private static int testField0928 = 928;
+    private static int testField0929 = 929;
+    private static int testField0930 = 930;
+    private static int testField0931 = 931;
+    private static int testField0932 = 932;
+    private static int testField0933 = 933;
+    private static int testField0934 = 934;
+    private static int testField0935 = 935;
+    private static int testField0936 = 936;
+    private static int testField0937 = 937;
+    private static int testField0938 = 938;
+    private static int testField0939 = 939;
+    private static int testField0940 = 940;
+    private static int testField0941 = 941;
+    private static int testField0942 = 942;
+    private static int testField0943 = 943;
+    private static int testField0944 = 944;
+    private static int testField0945 = 945;
+    private static int testField0946 = 946;
+    private static int testField0947 = 947;
+    private static int testField0948 = 948;
+    private static int testField0949 = 949;
+    private static int testField0950 = 950;
+    private static int testField0951 = 951;
+    private static int testField0952 = 952;
+    private static int testField0953 = 953;
+    private static int testField0954 = 954;
+    private static int testField0955 = 955;
+    private static int testField0956 = 956;
+    private static int testField0957 = 957;
+    private static int testField0958 = 958;
+    private static int testField0959 = 959;
+    private static int testField0960 = 960;
+    private static int testField0961 = 961;
+    private static int testField0962 = 962;
+    private static int testField0963 = 963;
+    private static int testField0964 = 964;
+    private static int testField0965 = 965;
+    private static int testField0966 = 966;
+    private static int testField0967 = 967;
+    private static int testField0968 = 968;
+    private static int testField0969 = 969;
+    private static int testField0970 = 970;
+    private static int testField0971 = 971;
+    private static int testField0972 = 972;
+    private static int testField0973 = 973;
+    private static int testField0974 = 974;
+    private static int testField0975 = 975;
+    private static int testField0976 = 976;
+    private static int testField0977 = 977;
+    private static int testField0978 = 978;
+    private static int testField0979 = 979;
+    private static int testField0980 = 980;
+    private static int testField0981 = 981;
+    private static int testField0982 = 982;
+    private static int testField0983 = 983;
+    private static int testField0984 = 984;
+    private static int testField0985 = 985;
+    private static int testField0986 = 986;
+    private static int testField0987 = 987;
+    private static int testField0988 = 988;
+    private static int testField0989 = 989;
+    private static int testField0990 = 990;
+    private static int testField0991 = 991;
+    private static int testField0992 = 992;
+    private static int testField0993 = 993;
+    private static int testField0994 = 994;
+    private static int testField0995 = 995;
+    private static int testField0996 = 996;
+    private static int testField0997 = 997;
+    private static int testField0998 = 998;
+    private static int testField0999 = 999;
+    private static int testField1000 = 1000;
+    private static int testField1001 = 1001;
+    private static int testField1002 = 1002;
+    private static int testField1003 = 1003;
+    private static int testField1004 = 1004;
+    private static int testField1005 = 1005;
+    private static int testField1006 = 1006;
+    private static int testField1007 = 1007;
+    private static int testField1008 = 1008;
+    private static int testField1009 = 1009;
+    private static int testField1010 = 1010;
+    private static int testField1011 = 1011;
+    private static int testField1012 = 1012;
+    private static int testField1013 = 1013;
+    private static int testField1014 = 1014;
+    private static int testField1015 = 1015;
+    private static int testField1016 = 1016;
+    private static int testField1017 = 1017;
+    private static int testField1018 = 1018;
+    private static int testField1019 = 1019;
+    private static int testField1020 = 1020;
+    private static int testField1021 = 1021;
+    private static int testField1022 = 1022;
+    private static int testField1023 = 1023;
+}
diff --git a/test/527-checker-array-access-split/src/Main.java b/test/527-checker-array-access-split/src/Main.java
index 3de900a..a5caa7b 100644
--- a/test/527-checker-array-access-split/src/Main.java
+++ b/test/527-checker-array-access-split/src/Main.java
@@ -327,17 +327,17 @@
   // check.
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (before)
-  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Const7:i\d+>>        IntConstant 7
   /// CHECK:             <<Array:l\d+>>         NewArray
   /// CHECK:             <<Index:i\d+>>         Phi
   /// CHECK:                                    If
   //  -------------- Loop
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+  /// CHECK:             <<Div:i\d+>>           Div [<<ArrayGet>>,<<Const7>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Div>>]
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() instruction_simplifier_arm64 (after)
-  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<Const7:i\d+>>        IntConstant 7
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
   /// CHECK:             <<Index:i\d+>>         Phi
@@ -345,12 +345,12 @@
   //  -------------- Loop
   /// CHECK:             <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Div:i\d+>>           Div [<<ArrayGet>>,<<Const7>>]
   /// CHECK:             <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
-  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Div>>]
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE1() GVN$after_arch (after)
-  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<Const7:i\d+>>        IntConstant 7
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
   /// CHECK:             <<Index:i\d+>>         Phi
@@ -358,23 +358,23 @@
   //  -------------- Loop
   /// CHECK:             <<Address:i\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Div:i\d+>>           Div [<<ArrayGet>>,<<Const7>>]
   /// CHECK-NOT:                                IntermediateAddress
-  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Div>>]
 
 
   /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() instruction_simplifier_arm (before)
-  /// CHECK:             <<Const1:i\d+>>        IntConstant 1
+  /// CHECK:             <<Const7:i\d+>>        IntConstant 7
   /// CHECK:             <<Array:l\d+>>         NewArray
   /// CHECK:             <<Index:i\d+>>         Phi
   /// CHECK:                                    If
   //  -------------- Loop
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
-  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Add>>]
+  /// CHECK:             <<Div:i\d+>>           Div [<<ArrayGet>>,<<Const7>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index>>,<<Div>>]
 
   /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() instruction_simplifier_arm (after)
-  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<Const7:i\d+>>        IntConstant 7
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
   /// CHECK:             <<Index:i\d+>>         Phi
@@ -382,12 +382,12 @@
   //  -------------- Loop
   /// CHECK:             <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-NEXT:        <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Div:i\d+>>           Div [<<ArrayGet>>,<<Const7>>]
   /// CHECK:             <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
-  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+  /// CHECK-NEXT:                               ArraySet [<<Address2>>,<<Index>>,<<Div>>]
 
   /// CHECK-START-ARM:   int Main.canMergeAfterBCE1() GVN$after_arch (after)
-  /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
+  /// CHECK-DAG:         <<Const7:i\d+>>        IntConstant 7
   /// CHECK-DAG:         <<DataOffset:i\d+>>    IntConstant 12
   /// CHECK:             <<Array:l\d+>>         NewArray
   /// CHECK:             <<Index:i\d+>>         Phi
@@ -395,14 +395,14 @@
   //  -------------- Loop
   /// CHECK:             <<Address:i\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK:             <<ArrayGet:i\d+>>      ArrayGet [<<Address>>,<<Index>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGet>>,<<Const1>>]
+  /// CHECK:             <<Div:i\d+>>           Div [<<ArrayGet>>,<<Const7>>]
   /// CHECK-NOT:                                IntermediateAddress
-  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Add>>]
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index>>,<<Div>>]
 
   public static int canMergeAfterBCE1() {
-    int[] array = {0, 1, 2, 3};
+    int[] array = {0, 7, 14, 21};
     for (int i = 0; i < array.length; i++) {
-      array[i] = array[i] + 1;
+      array[i] = array[i] / 7;
     }
     return array[array.length - 1];
   }
@@ -421,8 +421,8 @@
   /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Array>>,<<Index>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Array>>,<<Index1>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
-  /// CHECK:                                    ArraySet [<<Array>>,<<Index1>>,<<Add>>]
+  /// CHECK:             <<Shl:i\d+>>           Shl [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index1>>,<<Shl>>]
 
   // Note that we do not care that the `DataOffset` is `12`. But if we do not
   // specify it and any other `IntConstant` appears before that instruction,
@@ -441,9 +441,9 @@
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK-DAG:         <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address2>>,<<Index1>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:             <<Shl:i\d+>>           Shl [<<ArrayGetI>>,<<ArrayGetI1>>]
   /// CHECK:             <<Address3:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
-  /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
+  /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Shl>>]
 
   /// CHECK-START-ARM64: int Main.canMergeAfterBCE2() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
@@ -456,8 +456,8 @@
   /// CHECK-DAG:         <<Address:i\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address>>,<<Index>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address>>,<<Index1>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
-  /// CHECK:                                    ArraySet [<<Address>>,<<Index1>>,<<Add>>]
+  /// CHECK:             <<Shl:i\d+>>           Shl [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index1>>,<<Shl>>]
 
   // There should be only one intermediate address computation in the loop.
 
@@ -475,8 +475,8 @@
   /// CHECK-DAG:         <<Index1:i\d+>>        Add [<<Index>>,<<Const1>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Array>>,<<Index>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Array>>,<<Index1>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
-  /// CHECK:                                    ArraySet [<<Array>>,<<Index1>>,<<Add>>]
+  /// CHECK:             <<Shl:i\d+>>           Shl [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Array>>,<<Index1>>,<<Shl>>]
 
   /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() instruction_simplifier_arm (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
@@ -490,9 +490,9 @@
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address1>>,<<Index>>]
   /// CHECK-DAG:         <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address2>>,<<Index1>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:             <<Shl:i\d+>>           Shl [<<ArrayGetI>>,<<ArrayGetI1>>]
   /// CHECK:             <<Address3:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
-  /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Add>>]
+  /// CHECK:                                    ArraySet [<<Address3>>,<<Index1>>,<<Shl>>]
 
   /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN$after_arch (after)
   /// CHECK-DAG:         <<Const1:i\d+>>        IntConstant 1
@@ -505,17 +505,17 @@
   /// CHECK-DAG:         <<Address:i\d+>>       IntermediateAddress [<<Array>>,<<DataOffset>>]
   /// CHECK-DAG:         <<ArrayGetI:i\d+>>     ArrayGet [<<Address>>,<<Index>>]
   /// CHECK-DAG:         <<ArrayGetI1:i\d+>>    ArrayGet [<<Address>>,<<Index1>>]
-  /// CHECK:             <<Add:i\d+>>           Add [<<ArrayGetI>>,<<ArrayGetI1>>]
-  /// CHECK:                                    ArraySet [<<Address>>,<<Index1>>,<<Add>>]
+  /// CHECK:             <<Shl:i\d+>>           Shl [<<ArrayGetI>>,<<ArrayGetI1>>]
+  /// CHECK:                                    ArraySet [<<Address>>,<<Index1>>,<<Shl>>]
 
   /// CHECK-START-ARM:   int Main.canMergeAfterBCE2() GVN$after_arch (after)
   /// CHECK:                                    IntermediateAddress
   /// CHECK-NOT:                                IntermediateAddress
 
   public static int canMergeAfterBCE2() {
-    int[] array = {0, 1, 2, 3};
+    int[] array = {64, 8, 4, 2 };
     for (int i = 0; i < array.length - 1; i++) {
-      array[i + 1] = array[i] + array[i + 1];
+      array[i + 1] = array[i] << array[i + 1];
     }
     return array[array.length - 1];
   }
@@ -571,8 +571,8 @@
     accrossGC(array, 0);
     assertIntEquals(125, array[0]);
 
-    assertIntEquals(4, canMergeAfterBCE1());
-    assertIntEquals(6, canMergeAfterBCE2());
+    assertIntEquals(3, canMergeAfterBCE1());
+    assertIntEquals(1048576, canMergeAfterBCE2());
 
     assertIntEquals(18, checkLongFloatDouble());
   }
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 9f4be6c..6632503 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -747,6 +747,69 @@
     return 1.0f;
   }
 
+  /// CHECK-START: TestClass2 Main.testStoreStore() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+
+  /// CHECK-START: TestClass2 Main.testStoreStore() load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
+
+  private static TestClass2 testStoreStore() {
+    TestClass2 obj = new TestClass2();
+    obj.i = 41;
+    obj.j = 42;
+    obj.i = 41;
+    obj.j = 43;
+    return obj;
+  }
+
+  /// CHECK-START: int Main.testStoreStoreWithDeoptimize(int[]) load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK: Deoptimize
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testStoreStoreWithDeoptimize(int[]) load_store_elimination (after)
+  /// CHECK: NewInstance
+  /// CHECK: InstanceFieldSet
+  /// CHECK: InstanceFieldSet
+  /// CHECK-NOT: InstanceFieldSet
+  /// CHECK: Deoptimize
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK-NOT: ArrayGet
+
+  private static int testStoreStoreWithDeoptimize(int[] arr) {
+    TestClass2 obj = new TestClass2();
+    obj.i = 41;
+    obj.j = 42;
+    obj.i = 41;
+    obj.j = 43;
+    arr[0] = 1;  // One HDeoptimize here.
+    arr[1] = 1;
+    arr[2] = 1;
+    arr[3] = 1;
+    return arr[0] + arr[1] + arr[2] + arr[3];
+  }
+
   /// CHECK-START: double Main.getCircleArea(double, boolean) load_store_elimination (before)
   /// CHECK: NewInstance
 
@@ -785,6 +848,86 @@
     return new Circle(Math.PI).getArea();
   }
 
+  /// CHECK-START: int Main.testAllocationEliminationOfArray1() load_store_elimination (before)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray1() load_store_elimination (after)
+  /// CHECK-NOT: NewArray
+  /// CHECK-NOT: ArraySet
+  /// CHECK-NOT: ArrayGet
+  private static int testAllocationEliminationOfArray1() {
+    int[] array = new int[4];
+    array[2] = 4;
+    array[3] = 7;
+    return array[0] + array[1] + array[2] + array[3];
+  }
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray2() load_store_elimination (before)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray2() load_store_elimination (after)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  private static int testAllocationEliminationOfArray2() {
+    // Cannot eliminate array allocation since array is accessed with non-constant
+    // index.
+    int[] array = new int[4];
+    array[2] = 4;
+    array[3] = 7;
+    int sum = 0;
+    for (int e : array) {
+      sum += e;
+    }
+    return sum;
+  }
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray3(int) load_store_elimination (before)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray3(int) load_store_elimination (after)
+  /// CHECK-NOT: NewArray
+  /// CHECK-NOT: ArraySet
+  /// CHECK-NOT: ArrayGet
+  private static int testAllocationEliminationOfArray3(int i) {
+    int[] array = new int[4];
+    array[i] = 4;
+    return array[i];
+  }
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray4(int) load_store_elimination (before)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK: ArrayGet
+
+  /// CHECK-START: int Main.testAllocationEliminationOfArray4(int) load_store_elimination (after)
+  /// CHECK: NewArray
+  /// CHECK: ArraySet
+  /// CHECK: ArraySet
+  /// CHECK: ArrayGet
+  /// CHECK-NOT: ArrayGet
+  private static int testAllocationEliminationOfArray4(int i) {
+    // Cannot eliminate array allocation due to index aliasing between 1 and i.
+    int[] array = new int[4];
+    array[1] = 2;
+    array[i] = 4;
+    return array[1] + array[i];
+  }
+
   static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -865,6 +1008,15 @@
     assertDoubleEquals(darray[0], Math.PI);
     assertDoubleEquals(darray[1], Math.PI);
     assertDoubleEquals(darray[2], Math.PI);
+
+    assertIntEquals(testAllocationEliminationOfArray1(), 11);
+    assertIntEquals(testAllocationEliminationOfArray2(), 11);
+    assertIntEquals(testAllocationEliminationOfArray3(2), 4);
+    assertIntEquals(testAllocationEliminationOfArray4(2), 6);
+
+    assertIntEquals(testStoreStore().i, 41);
+    assertIntEquals(testStoreStore().j, 43);
+    assertIntEquals(testStoreStoreWithDeoptimize(new int[4]), 4);
   }
 
   static boolean sFlag;
diff --git a/test/532-checker-nonnull-arrayset/src/Main.java b/test/532-checker-nonnull-arrayset/src/Main.java
index 2c701bb..61c9e88 100644
--- a/test/532-checker-nonnull-arrayset/src/Main.java
+++ b/test/532-checker-nonnull-arrayset/src/Main.java
@@ -30,10 +30,14 @@
   /// CHECK:          ReturnVoid
   public static void test() {
     Object[] array = new Object[2];
+    // Storing to static to avoid some lse optimization.
+    sArray = array;
     Object nonNull = array[0];
     nonNull.getClass(); // Ensure nonNull has an implicit null check.
     array[1] = nonNull;
   }
 
   public static void main(String[] args) {}
+
+  static Object[] sArray;
 }
diff --git a/test/536-checker-intrinsic-optimization/src/Main.java b/test/536-checker-intrinsic-optimization/src/Main.java
index 52f3f84..e395e28 100644
--- a/test/536-checker-intrinsic-optimization/src/Main.java
+++ b/test/536-checker-intrinsic-optimization/src/Main.java
@@ -330,6 +330,21 @@
   // Terminate the scope for the CHECK-NOT search at the reference or length comparison,
   // whichever comes first.
   /// CHECK:          cmp {{w.*,}} {{w.*|#.*}}
+
+  /// CHECK-START-MIPS: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      beq r0,
+  /// CHECK-NOT:      beqz
+  /// CHECK-NOT:      beqzc
+  // Terminate the scope for the CHECK-NOT search at the class field or length comparison,
+  // whichever comes first.
+  /// CHECK:          lw
+
+  /// CHECK-START-MIPS64: boolean Main.stringArgumentNotNull(java.lang.Object) disassembly (after)
+  /// CHECK:          InvokeVirtual {{.*\.equals.*}} intrinsic:StringEquals
+  /// CHECK-NOT:      beqzc
+  // Terminate the scope for the CHECK-NOT search at the reference comparison.
+  /// CHECK:          beqc
   public static boolean stringArgumentNotNull(Object obj) {
     obj.getClass();
     return "foo".equals(obj);
@@ -384,6 +399,22 @@
   /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}]
   /// CHECK-NOT:      ldr {{w\d+}}, [{{x\d+}}, #0]
   /// CHECK:          cmp {{w\d+}}, {{w\d+|#.*}}
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-MIPS: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          beq{{(zc)?}}
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:      lw {{r\d+}}, +0({{r\d+}})
+  /// CHECK:          bne{{c?}}
+
+  // Test is brittle as it depends on the class offset being 0.
+  /// CHECK-START-MIPS64: boolean Main.stringArgumentIsString() disassembly (after)
+  /// CHECK:          InvokeVirtual intrinsic:StringEquals
+  /// CHECK:          beqzc
+  // Check that we don't try to compare the classes.
+  /// CHECK-NOT:      lw {{r\d+}}, +0({{r\d+}})
+  /// CHECK:          bnec
   public static boolean stringArgumentIsString() {
     return "foo".equals(myString);
   }
diff --git a/test/538-checker-embed-constants/src/Main.java b/test/538-checker-embed-constants/src/Main.java
index 4f34ec9..94aad9d 100644
--- a/test/538-checker-embed-constants/src/Main.java
+++ b/test/538-checker-embed-constants/src/Main.java
@@ -37,13 +37,20 @@
   }
 
   /// CHECK-START-ARM: int Main.and511(int) disassembly (after)
-  /// CHECK:                mov {{r\d+}}, #511
-  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK:                ubfx {{r\d+}}, {{r\d+}}, #0, #9
 
   public static int and511(int arg) {
     return arg & 511;
   }
 
+  /// CHECK-START-ARM: int Main.andF00D(int) disassembly (after)
+  /// CHECK:                mov {{r\d+}}, #61453
+  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+
+  public static int andF00D(int arg) {
+    return arg & 0xF00D;
+  }
+
   /// CHECK-START-ARM: int Main.andNot15(int) disassembly (after)
   /// CHECK-NOT:            mvn {{r\d+}}, #15
   /// CHECK:                bic {{r\d+}}, {{r\d+}}, #0xf
@@ -114,19 +121,31 @@
   }
 
   /// CHECK-START-ARM: long Main.and511(long) disassembly (after)
-  /// CHECK:                mov {{r\d+}}, #511
+  /// CHECK:                ubfx {{r\d+}}, {{r\d+}}, #0, #9
   /// CHECK-NEXT:           mov{{s?}} {{r\d+}}, #0
   /// CHECK-NOT:            and{{(\.w)?}}
   /// CHECK-NOT:            bic{{(\.w)?}}
-  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
-  /// CHECK-NEXT:           and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
-  /// CHECK-NOT:            and{{(\.w)?}}
-  /// CHECK-NOT:            bic{{(\.w)?}}
 
   public static long and511(long arg) {
     return arg & 511L;
   }
 
+  /// CHECK-START-ARM: long Main.andF00D(long) disassembly (after)
+  /// CHECK:                mov {{r\d+}}, #61453
+  /// CHECK-NEXT:           mov{{s?}} {{r\d+}}, #0
+  /// CHECK-NOT:            and{{(\.w)?}}
+  /// CHECK-NOT:            bic{{(\.w)?}}
+  /// CHECK-NOT:            ubfx
+  /// CHECK:                and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NEXT:           and{{(\.w)?}} {{r\d+}}, {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:            and{{(\.w)?}}
+  /// CHECK-NOT:            bic{{(\.w)?}}
+  /// CHECK-NOT:            ubfx
+
+  public static long andF00D(long arg) {
+    return arg & 0xF00DL;
+  }
+
   /// CHECK-START-ARM: long Main.andNot15(long) disassembly (after)
   /// CHECK-NOT:            mvn {{r\d+}}, #15
   /// CHECK-NOT:            and{{(\.w)?}}
@@ -631,6 +650,7 @@
     int arg = 0x87654321;
     assertIntEquals(and255(arg), 0x21);
     assertIntEquals(and511(arg), 0x121);
+    assertIntEquals(andF00D(arg), 0x4001);
     assertIntEquals(andNot15(arg), 0x87654320);
     assertIntEquals(or255(arg), 0x876543ff);
     assertIntEquals(or511(arg), 0x876543ff);
@@ -642,6 +662,7 @@
     long longArg = 0x1234567887654321L;
     assertLongEquals(and255(longArg), 0x21L);
     assertLongEquals(and511(longArg), 0x121L);
+    assertLongEquals(andF00D(longArg), 0x4001L);
     assertLongEquals(andNot15(longArg), 0x1234567887654320L);
     assertLongEquals(and0xfffffff00000000f(longArg), 0x1234567000000001L);
     assertLongEquals(or255(longArg), 0x12345678876543ffL);
diff --git a/test/570-checker-select/src/Main.java b/test/570-checker-select/src/Main.java
index e0a76ca..3ac6f89 100644
--- a/test/570-checker-select/src/Main.java
+++ b/test/570-checker-select/src/Main.java
@@ -371,6 +371,49 @@
     return a > b ? x : y;
   }
 
+  /// CHECK-START-ARM: long Main.$noinline$LongEqNonmatCond_LongVarVar(long, long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp {{r\d+}}, {{r\d+}}
+  /// CHECK-NEXT:            it eq
+  /// CHECK-NEXT:            cmpeq {{r\d+}}, {{r\d+}}
+  /// CHECK-NEXT:            it eq
+
+  public static long $noinline$LongEqNonmatCond_LongVarVar(long a, long b, long x, long y) {
+    return a == b ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            mov ip, #52720
+  /// CHECK-NEXT:            movt ip, #35243
+  /// CHECK-NEXT:            cmp {{r\d+}}, ip
+  /// CHECK-NEXT:            sbcs ip, {{r\d+}}, #{{\d+}}
+  /// CHECK-NEXT:            it ge
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar(long a, long x, long y) {
+    return a > 0x89ABCDEFL ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar2(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            mov ip, #{{\d+}}
+  /// CHECK-NEXT:            movt ip, #{{\d+}}
+  /// CHECK-NEXT:            cmp {{r\d+}}, ip
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar2(long a, long x, long y) {
+    return a > 0x0123456789ABCDEFL ? x : y;
+  }
+
+  /// CHECK-START-ARM: long Main.$noinline$LongNonmatCondCst_LongVarVar3(long, long, long) disassembly (after)
+  /// CHECK:               Select
+  /// CHECK-NEXT:            cmp {{r\d+}}, {{r\d+}}
+  /// CHECK-NOT:             sbcs
+  /// CHECK-NOT:             cmp
+
+  public static long $noinline$LongNonmatCondCst_LongVarVar3(long a, long x, long y) {
+    return a > 0x7FFFFFFFFFFFFFFFL ? x : y;
+  }
+
   /// CHECK-START: long Main.LongMatCond_LongVarVar(long, long, long, long) register (after)
   /// CHECK:            <<Cond:z\d+>> LessThanOrEqual [{{j\d+}},{{j\d+}}]
   /// CHECK:            <<Sel1:j\d+>> Select [{{j\d+}},{{j\d+}},<<Cond>>]
@@ -612,6 +655,39 @@
     assertEqual(5, IntMatCond_IntVarVar(3, 2, 5, 7));
     assertEqual(8, IntMatCond_IntVarVar(2, 3, 5, 7));
 
+    assertEqual(0xAAAAAAAA55555555L,
+                LongNonmatCond_LongVarVar(3L, 2L, 0xAAAAAAAA55555555L, 0x8888888877777777L));
+    assertEqual(0x8888888877777777L,
+                LongNonmatCond_LongVarVar(2L, 2L, 0xAAAAAAAA55555555L, 0x8888888877777777L));
+    assertEqual(0x8888888877777777L,
+                LongNonmatCond_LongVarVar(2L, 3L, 0xAAAAAAAA55555555L, 0x8888888877777777L));
+    assertEqual(0xAAAAAAAA55555555L, LongNonmatCond_LongVarVar(0x0000000100000000L,
+                                                               0x00000000FFFFFFFFL,
+                                                               0xAAAAAAAA55555555L,
+                                                               0x8888888877777777L));
+    assertEqual(0x8888888877777777L, LongNonmatCond_LongVarVar(0x00000000FFFFFFFFL,
+                                                               0x0000000100000000L,
+                                                               0xAAAAAAAA55555555L,
+                                                               0x8888888877777777L));
+
+    assertEqual(0x8888888877777777L, $noinline$LongEqNonmatCond_LongVarVar(2L,
+                                                                           3L,
+                                                                           0xAAAAAAAA55555555L,
+                                                                           0x8888888877777777L));
+    assertEqual(0xAAAAAAAA55555555L, $noinline$LongEqNonmatCond_LongVarVar(2L,
+                                                                           2L,
+                                                                           0xAAAAAAAA55555555L,
+                                                                           0x8888888877777777L));
+    assertEqual(0x8888888877777777L, $noinline$LongEqNonmatCond_LongVarVar(0x10000000000L,
+                                                                           0L,
+                                                                           0xAAAAAAAA55555555L,
+                                                                           0x8888888877777777L));
+
+    assertEqual(5L, $noinline$LongNonmatCondCst_LongVarVar2(0x7FFFFFFFFFFFFFFFL, 5L, 7L));
+    assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar2(2L, 5L, 7L));
+
+    assertEqual(7L, $noinline$LongNonmatCondCst_LongVarVar3(2L, 5L, 7L));
+
     assertEqual(5, FloatLtNonmatCond_IntVarVar(3, 2, 5, 7));
     assertEqual(7, FloatLtNonmatCond_IntVarVar(2, 3, 5, 7));
     assertEqual(7, FloatLtNonmatCond_IntVarVar(Float.NaN, 2, 5, 7));
diff --git a/test/577-profile-foreign-dex/info.txt b/test/577-profile-foreign-dex/info.txt
deleted file mode 100644
index 090db3f..0000000
--- a/test/577-profile-foreign-dex/info.txt
+++ /dev/null
@@ -1 +0,0 @@
-Check that we record the use of foreign dex files when profiles are enabled.
diff --git a/test/577-profile-foreign-dex/src/Main.java b/test/577-profile-foreign-dex/src/Main.java
deleted file mode 100644
index ed7a625..0000000
--- a/test/577-profile-foreign-dex/src/Main.java
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Copyright (C) 2016 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.lang.reflect.Method;
-import java.lang.reflect.Constructor;
-import java.util.HashMap;
-
-public class Main {
-
-  private static final String PROFILE_NAME = "primary.prof";
-  private static final String APP_DIR_PREFIX = "app_dir_";
-  private static final String FOREIGN_DEX_PROFILE_DIR = "foreign-dex";
-  private static final String TEMP_FILE_NAME_PREFIX = "dummy";
-  private static final String TEMP_FILE_NAME_SUFFIX = "-file";
-
-  public static void main(String[] args) throws Exception {
-    File tmpFile = null;
-    File appDir = null;
-    File profileFile = null;
-    File foreignDexProfileDir = null;
-
-    try {
-      // Create the necessary files layout.
-      tmpFile = createTempFile();
-      appDir = new File(tmpFile.getParent(), APP_DIR_PREFIX + tmpFile.getName());
-      appDir.mkdir();
-      foreignDexProfileDir = new File(tmpFile.getParent(), FOREIGN_DEX_PROFILE_DIR);
-      foreignDexProfileDir.mkdir();
-      profileFile = createTempFile();
-
-      String codePath = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex.jar";
-
-      // Register the app with the runtime
-      VMRuntime.registerAppInfo(profileFile.getPath(), appDir.getPath(),
-             new String[] { codePath }, foreignDexProfileDir.getPath());
-
-      testMarkerForForeignDex(foreignDexProfileDir);
-      testMarkerForCodePath(foreignDexProfileDir);
-      testMarkerForApplicationDexFile(foreignDexProfileDir, appDir);
-    } finally {
-      if (tmpFile != null) {
-        tmpFile.delete();
-      }
-      if (profileFile != null) {
-        profileFile.delete();
-      }
-      if (foreignDexProfileDir != null) {
-        foreignDexProfileDir.delete();
-      }
-      if (appDir != null) {
-        appDir.delete();
-      }
-    }
-  }
-
-  // Verify we actually create a marker on disk for foreign dex files.
-  private static void testMarkerForForeignDex(File foreignDexProfileDir) throws Exception {
-    String foreignDex = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex-ex.jar";
-    loadDexFile(foreignDex);
-    checkMarker(foreignDexProfileDir, foreignDex, /* exists */ true);
-  }
-
-  // Verify we do not create a marker on disk for dex files path of the code path.
-  private static void testMarkerForCodePath(File foreignDexProfileDir) throws Exception {
-    String codePath = System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex.jar";
-    loadDexFile(codePath);
-    checkMarker(foreignDexProfileDir, codePath, /* exists */ false);
-  }
-
-  private static void testMarkerForApplicationDexFile(File foreignDexProfileDir, File appDir)
-      throws Exception {
-    // Copy the -ex jar to the application directory and load it from there.
-    // This will record duplicate class conflicts but we don't care for this use case.
-    File foreignDex = new File(System.getenv("DEX_LOCATION") + "/577-profile-foreign-dex-ex.jar");
-    File appDex = new File(appDir, "appDex.jar");
-    try {
-      copyFile(foreignDex, appDex);
-
-      loadDexFile(appDex.getAbsolutePath());
-      checkMarker(foreignDexProfileDir, appDex.getAbsolutePath(), /* exists */ false);
-    } finally {
-      if (appDex != null) {
-        appDex.delete();
-      }
-    }
-  }
-
-  private static void checkMarker(File foreignDexProfileDir, String dexFile, boolean exists) {
-    File marker = new File(foreignDexProfileDir, dexFile.replace('/', '@'));
-    boolean result_ok = exists ? marker.exists() : !marker.exists();
-    if (!result_ok) {
-      throw new RuntimeException("Marker test failed for:" + marker.getPath());
-    }
-  }
-
-  private static void loadDexFile(String dexFile) throws Exception {
-    Class<?> pathClassLoader = Class.forName("dalvik.system.PathClassLoader");
-    if (pathClassLoader == null) {
-        throw new RuntimeException("Couldn't find path class loader class");
-    }
-    Constructor<?> constructor =
-        pathClassLoader.getDeclaredConstructor(String.class, ClassLoader.class);
-    constructor.newInstance(
-            dexFile, ClassLoader.getSystemClassLoader());
-  }
-
-  private static class VMRuntime {
-    private static final Method registerAppInfoMethod;
-    static {
-      try {
-        Class<?> c = Class.forName("dalvik.system.VMRuntime");
-        registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
-            String.class, String.class, String[].class, String.class);
-      } catch (Exception e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    public static void registerAppInfo(String pkgName, String appDir,
-        String[] codePath, String foreignDexProfileDir) throws Exception {
-      registerAppInfoMethod.invoke(null, pkgName, appDir, codePath, foreignDexProfileDir);
-    }
-  }
-
-  private static void copyFile(File fromFile, File toFile) throws Exception {
-    FileInputStream in = new FileInputStream(fromFile);
-    FileOutputStream out = new FileOutputStream(toFile);
-    try {
-      byte[] buffer = new byte[4096];
-      int bytesRead;
-      while ((bytesRead = in.read(buffer)) >= 0) {
-          out.write(buffer, 0, bytesRead);
-      }
-    } finally {
-      out.flush();
-      try {
-          out.getFD().sync();
-      } catch (IOException e) {
-      }
-      out.close();
-      in.close();
-    }
-  }
-
-  private static File createTempFile() throws Exception {
-    try {
-      return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
-    } catch (IOException e) {
-      System.setProperty("java.io.tmpdir", "/data/local/tmp");
-      try {
-        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
-      } catch (IOException e2) {
-        System.setProperty("java.io.tmpdir", "/sdcard");
-        return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
-      }
-    }
-  }
-}
diff --git a/test/595-profile-saving/src/Main.java b/test/595-profile-saving/src/Main.java
index 039503f..faf94c4 100644
--- a/test/595-profile-saving/src/Main.java
+++ b/test/595-profile-saving/src/Main.java
@@ -29,9 +29,7 @@
       // String codePath = getDexBaseLocation();
       String codePath = System.getenv("DEX_LOCATION") + "/595-profile-saving.jar";
       VMRuntime.registerAppInfo(file.getPath(),
-                                System.getenv("DEX_LOCATION"),
-                                new String[] {codePath},
-                                /* foreignProfileDir */ null);
+                                new String[] {codePath});
 
       int methodIdx = $opt$noinline$testProfile();
       ensureProfileProcessing();
@@ -85,15 +83,15 @@
       try {
         Class<? extends Object> c = Class.forName("dalvik.system.VMRuntime");
         registerAppInfoMethod = c.getDeclaredMethod("registerAppInfo",
-            String.class, String.class, String[].class, String.class);
+            String.class, String[].class);
       } catch (Exception e) {
         throw new RuntimeException(e);
       }
     }
 
-    public static void registerAppInfo(String profile, String appDir,
-                                       String[] codePaths, String foreignDir) throws Exception {
-      registerAppInfoMethod.invoke(null, profile, appDir, codePaths, foreignDir);
+    public static void registerAppInfo(String profile, String[] codePaths)
+        throws Exception {
+      registerAppInfoMethod.invoke(null, profile, codePaths);
     }
   }
 }
diff --git a/test/616-cha-abstract/src/Main.java b/test/616-cha-abstract/src/Main.java
index e1d7db1..b33f575 100644
--- a/test/616-cha-abstract/src/Main.java
+++ b/test/616-cha-abstract/src/Main.java
@@ -39,8 +39,8 @@
 }
 
 public class Main {
-  static Main1 sMain1;
-  static Main1 sMain2;
+  static Base sMain1;
+  static Base sMain2;
 
   static boolean sIsOptimizing = true;
   static boolean sHasJIT = true;
diff --git a/test/616-cha-interface-default/expected.txt b/test/616-cha-interface-default/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/616-cha-interface-default/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/616-cha-interface-default/info.txt b/test/616-cha-interface-default/info.txt
new file mode 100644
index 0000000..11baa1f
--- /dev/null
+++ b/test/616-cha-interface-default/info.txt
@@ -0,0 +1,2 @@
+Test for Class Hierarchy Analysis (CHA) on interface method.
+Test it under multidex configuration to check cross-dex inlining.
diff --git a/test/616-cha-interface-default/multidex.jpp b/test/616-cha-interface-default/multidex.jpp
new file mode 100644
index 0000000..b0d200e
--- /dev/null
+++ b/test/616-cha-interface-default/multidex.jpp
@@ -0,0 +1,3 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main
diff --git a/test/577-profile-foreign-dex/run b/test/616-cha-interface-default/run
similarity index 70%
copy from test/577-profile-foreign-dex/run
copy to test/616-cha-interface-default/run
index ad57d14..d8b4f0d 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/616-cha-interface-default/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+# Run without an app image to prevent the classes to be loaded at startup.
+exec ${RUN} "${@}" --no-app-image
diff --git a/test/616-cha-interface-default/src-multidex/Base.java b/test/616-cha-interface-default/src-multidex/Base.java
new file mode 100644
index 0000000..2cbcb50
--- /dev/null
+++ b/test/616-cha-interface-default/src-multidex/Base.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface Base {
+  default public int foo(int i) {
+    if (i != 1) {
+      return -2;
+    }
+    return i + 10;
+  }
+
+  // Test default method that's not inlined.
+  default public int $noinline$bar() {
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    return -1;
+  }
+
+  default void printError(String msg) {
+    System.out.println(msg);
+  }
+}
diff --git a/test/616-cha-interface-default/src/Main.java b/test/616-cha-interface-default/src/Main.java
new file mode 100644
index 0000000..951607d
--- /dev/null
+++ b/test/616-cha-interface-default/src/Main.java
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Main1 implements Base {
+}
+
+class Main2 extends Main1 {
+  public void foobar() {}
+}
+
+class Main3 implements Base {
+  public int foo(int i) {
+    if (i != 3) {
+      printError("error3");
+    }
+    return -(i + 10);
+  }
+}
+
+public class Main {
+  static Base sMain1;
+  static Base sMain2;
+  static Base sMain3;
+
+  static boolean sIsOptimizing = true;
+  static boolean sHasJIT = true;
+  static volatile boolean sOtherThreadStarted;
+
+  private static void assertSingleImplementation(Class<?> clazz, String method_name, boolean b) {
+    if (hasSingleImplementation(clazz, method_name) != b) {
+      System.out.println(clazz + "." + method_name +
+          " doesn't have single implementation value of " + b);
+    }
+  }
+
+  static int getValue(Class<?> cls) {
+    if (cls == Main1.class || cls == Main2.class) {
+      return 1;
+    }
+    return 3;
+  }
+
+  // sMain1.foo()/sMain2.foo() will be always be Base.foo() before Main3 is loaded/linked.
+  // So sMain1.foo() can be devirtualized to Base.foo() and be inlined.
+  // After Dummy.createMain3() which links in Main3, live testImplement() on stack
+  // should be deoptimized.
+  static void testImplement(boolean createMain3, boolean wait, boolean setHasJIT) {
+    if (setHasJIT) {
+      if (isInterpreted()) {
+        sHasJIT = false;
+      }
+      return;
+    }
+
+    if (createMain3 && (sIsOptimizing || sHasJIT)) {
+      assertIsManaged();
+    }
+
+    if (sMain1.foo(getValue(sMain1.getClass())) != 11) {
+      System.out.println("11 expected.");
+    }
+    if (sMain1.$noinline$bar() != -1) {
+      System.out.println("-1 expected.");
+    }
+    if (sMain2.foo(getValue(sMain2.getClass())) != 11) {
+      System.out.println("11 expected.");
+    }
+
+    if (createMain3) {
+      // Wait for the other thread to start.
+      while (!sOtherThreadStarted);
+      // Create an Main2 instance and assign it to sMain2.
+      // sMain1 is kept the same.
+      sMain3 = Dummy.createMain3();
+      // Wake up the other thread.
+      synchronized(Main.class) {
+        Main.class.notify();
+      }
+    } else if (wait) {
+      // This is the other thread.
+      synchronized(Main.class) {
+        sOtherThreadStarted = true;
+        // Wait for Main2 to be linked and deoptimization is triggered.
+        try {
+          Main.class.wait();
+        } catch (Exception e) {
+        }
+      }
+    }
+
+    // There should be a deoptimization here right after Main3 is linked by
+    // calling Dummy.createMain3(), even though sMain1 didn't change.
+    // The behavior here would be different if inline-cache is used, which
+    // doesn't deoptimize since sMain1 still hits the type cache.
+    if (sMain1.foo(getValue(sMain1.getClass())) != 11) {
+      System.out.println("11 expected.");
+    }
+    if ((createMain3 || wait) && sHasJIT && !sIsOptimizing) {
+      // This method should be deoptimized right after Main3 is created.
+      assertIsInterpreted();
+    }
+
+    if (sMain3 != null) {
+      if (sMain3.foo(getValue(sMain3.getClass())) != -13) {
+        System.out.println("-13 expected.");
+      }
+    }
+  }
+
+  // Test scenarios under which CHA-based devirtualization happens,
+  // and class loading that implements a method can invalidate compiled code.
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    if (isInterpreted()) {
+      sIsOptimizing = false;
+    }
+
+    // sMain1 is an instance of Main1.
+    // sMain2 is an instance of Main2.
+    // Neither Main1 nor Main2 override default method Base.foo().
+    // Main3 hasn't bee loaded yet.
+    sMain1 = new Main1();
+    sMain2 = new Main2();
+
+    ensureJitCompiled(Main.class, "testImplement");
+    testImplement(false, false, true);
+
+    if (sHasJIT && !sIsOptimizing) {
+      assertSingleImplementation(Base.class, "foo", true);
+      assertSingleImplementation(Main1.class, "foo", true);
+    } else {
+      // Main3 is verified ahead-of-time so it's linked in already.
+    }
+
+    // Create another thread that also calls sMain1.foo().
+    // Try to test suspend and deopt another thread.
+    new Thread() {
+      public void run() {
+        testImplement(false, true, false);
+      }
+    }.start();
+
+    // This will create Main3 instance in the middle of testImplement().
+    testImplement(true, false, false);
+    assertSingleImplementation(Base.class, "foo", false);
+    assertSingleImplementation(Main1.class, "foo", true);
+    assertSingleImplementation(sMain3.getClass(), "foo", true);
+  }
+
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
+  private static native void assertIsInterpreted();
+  private static native void assertIsManaged();
+  private static native boolean isInterpreted();
+  private static native boolean hasSingleImplementation(Class<?> clazz, String method_name);
+}
+
+// Put createMain3() in another class to avoid class loading due to verifier.
+class Dummy {
+  static Base createMain3() {
+    return new Main3();
+  }
+}
diff --git a/test/616-cha-interface/expected.txt b/test/616-cha-interface/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/616-cha-interface/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/616-cha-interface/info.txt b/test/616-cha-interface/info.txt
new file mode 100644
index 0000000..1fd330a
--- /dev/null
+++ b/test/616-cha-interface/info.txt
@@ -0,0 +1 @@
+Test for Class Hierarchy Analysis (CHA) on interface method.
diff --git a/test/577-profile-foreign-dex/run b/test/616-cha-interface/run
similarity index 70%
copy from test/577-profile-foreign-dex/run
copy to test/616-cha-interface/run
index ad57d14..d8b4f0d 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/616-cha-interface/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+# Run without an app image to prevent the classes to be loaded at startup.
+exec ${RUN} "${@}" --no-app-image
diff --git a/test/616-cha-interface/src/Main.java b/test/616-cha-interface/src/Main.java
new file mode 100644
index 0000000..3c93496
--- /dev/null
+++ b/test/616-cha-interface/src/Main.java
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface Base {
+  void foo(int i);
+  void $noinline$bar();
+}
+
+class Main1 implements Base {
+  public void foo(int i) {
+    if (i != 1) {
+      printError("error1");
+    }
+  }
+
+  // Test rewriting invoke-interface into invoke-virtual when inlining fails.
+  public void $noinline$bar() {
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+    System.out.print("");
+  }
+
+  void printError(String msg) {
+    System.out.println(msg);
+  }
+}
+
+class Main2 extends Main1 {
+  public void foo(int i) {
+    if (i != 2) {
+      printError("error2");
+    }
+  }
+}
+
+public class Main {
+  static Base sMain1;
+  static Base sMain2;
+
+  static boolean sIsOptimizing = true;
+  static boolean sHasJIT = true;
+  static volatile boolean sOtherThreadStarted;
+
+  private static void assertSingleImplementation(Class<?> clazz, String method_name, boolean b) {
+    if (hasSingleImplementation(clazz, method_name) != b) {
+      System.out.println(clazz + "." + method_name +
+          " doesn't have single implementation value of " + b);
+    }
+  }
+
+  // sMain1.foo() will be always be Main1.foo() before Main2 is loaded/linked.
+  // So sMain1.foo() can be devirtualized to Main1.foo() and be inlined.
+  // After Dummy.createMain2() which links in Main2, live testImplement() on stack
+  // should be deoptimized.
+  static void testImplement(boolean createMain2, boolean wait, boolean setHasJIT) {
+    if (setHasJIT) {
+      if (isInterpreted()) {
+        sHasJIT = false;
+      }
+      return;
+    }
+
+    if (createMain2 && (sIsOptimizing || sHasJIT)) {
+      assertIsManaged();
+    }
+
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+    sMain1.$noinline$bar();
+
+    if (createMain2) {
+      // Wait for the other thread to start.
+      while (!sOtherThreadStarted);
+      // Create an Main2 instance and assign it to sMain2.
+      // sMain1 is kept the same.
+      sMain2 = Dummy.createMain2();
+      // Wake up the other thread.
+      synchronized(Main.class) {
+        Main.class.notify();
+      }
+    } else if (wait) {
+      // This is the other thread.
+      synchronized(Main.class) {
+        sOtherThreadStarted = true;
+        // Wait for Main2 to be linked and deoptimization is triggered.
+        try {
+          Main.class.wait();
+        } catch (Exception e) {
+        }
+      }
+    }
+
+    // There should be a deoptimization here right after Main2 is linked by
+    // calling Dummy.createMain2(), even though sMain1 didn't change.
+    // The behavior here would be different if inline-cache is used, which
+    // doesn't deoptimize since sMain1 still hits the type cache.
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+    if ((createMain2 || wait) && sHasJIT && !sIsOptimizing) {
+      // This method should be deoptimized right after Main2 is created.
+      assertIsInterpreted();
+    }
+
+    if (sMain2 != null) {
+      sMain2.foo(sMain2.getClass() == Main1.class ? 1 : 2);
+    }
+  }
+
+  // Test scenarios under which CHA-based devirtualization happens,
+  // and class loading that overrides a method can invalidate compiled code.
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    if (isInterpreted()) {
+      sIsOptimizing = false;
+    }
+
+    // sMain1 is an instance of Main1. Main2 hasn't bee loaded yet.
+    sMain1 = new Main1();
+
+    ensureJitCompiled(Main.class, "testImplement");
+    testImplement(false, false, true);
+
+    if (sHasJIT && !sIsOptimizing) {
+      assertSingleImplementation(Base.class, "foo", true);
+      assertSingleImplementation(Main1.class, "foo", true);
+    } else {
+      // Main2 is verified ahead-of-time so it's linked in already.
+    }
+
+    // Create another thread that also calls sMain1.foo().
+    // Try to test suspend and deopt another thread.
+    new Thread() {
+      public void run() {
+        testImplement(false, true, false);
+      }
+    }.start();
+
+    // This will create Main2 instance in the middle of testImplement().
+    testImplement(true, false, false);
+    assertSingleImplementation(Base.class, "foo", false);
+    assertSingleImplementation(Main1.class, "foo", false);
+  }
+
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
+  private static native void assertIsInterpreted();
+  private static native void assertIsManaged();
+  private static native boolean isInterpreted();
+  private static native boolean hasSingleImplementation(Class<?> clazz, String method_name);
+}
+
+// Put createMain2() in another class to avoid class loading due to verifier.
+class Dummy {
+  static Main1 createMain2() {
+    return new Main2();
+  }
+}
diff --git a/test/616-cha-miranda/expected.txt b/test/616-cha-miranda/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/616-cha-miranda/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/616-cha-miranda/info.txt b/test/616-cha-miranda/info.txt
new file mode 100644
index 0000000..c46f33f
--- /dev/null
+++ b/test/616-cha-miranda/info.txt
@@ -0,0 +1 @@
+Test for Class Hierarchy Analysis (CHA) on miranda method.
diff --git a/test/577-profile-foreign-dex/run b/test/616-cha-miranda/run
similarity index 70%
copy from test/577-profile-foreign-dex/run
copy to test/616-cha-miranda/run
index ad57d14..d8b4f0d 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/616-cha-miranda/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+# Run without an app image to prevent the classes to be loaded at startup.
+exec ${RUN} "${@}" --no-app-image
diff --git a/test/616-cha-miranda/src/Main.java b/test/616-cha-miranda/src/Main.java
new file mode 100644
index 0000000..e548482
--- /dev/null
+++ b/test/616-cha-miranda/src/Main.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+interface Iface {
+  public void foo(int i);
+}
+
+abstract class Base implements Iface {
+  // Iface.foo(int) will be added as a miranda method.
+
+  void printError(String msg) {
+    System.out.println(msg);
+  }
+}
+
+class Main1 extends Base {
+  public void foo(int i) {
+    if (i != 1) {
+      printError("error1");
+    }
+  }
+}
+
+class Main2 extends Main1 {
+  public void foo(int i) {
+    if (i != 2) {
+      printError("error2");
+    }
+  }
+}
+
+public class Main {
+  static Base sMain1;
+  static Base sMain2;
+
+  static boolean sIsOptimizing = true;
+  static boolean sHasJIT = true;
+  static volatile boolean sOtherThreadStarted;
+
+  private static void assertSingleImplementation(Class<?> clazz, String method_name, boolean b) {
+    if (hasSingleImplementation(clazz, method_name) != b) {
+      System.out.println(clazz + "." + method_name +
+          " doesn't have single implementation value of " + b);
+    }
+  }
+
+  // sMain1.foo() will be always be Main1.foo() before Main2 is loaded/linked.
+  // So sMain1.foo() can be devirtualized to Main1.foo() and be inlined.
+  // After Dummy.createMain2() which links in Main2, live testOverride() on stack
+  // should be deoptimized.
+  static void testOverride(boolean createMain2, boolean wait, boolean setHasJIT) {
+    if (setHasJIT) {
+      if (isInterpreted()) {
+        sHasJIT = false;
+      }
+      return;
+    }
+
+    if (createMain2 && (sIsOptimizing || sHasJIT)) {
+      assertIsManaged();
+    }
+
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+
+    if (createMain2) {
+      // Wait for the other thread to start.
+      while (!sOtherThreadStarted);
+      // Create an Main2 instance and assign it to sMain2.
+      // sMain1 is kept the same.
+      sMain2 = Dummy.createMain2();
+      // Wake up the other thread.
+      synchronized(Main.class) {
+        Main.class.notify();
+      }
+    } else if (wait) {
+      // This is the other thread.
+      synchronized(Main.class) {
+        sOtherThreadStarted = true;
+        // Wait for Main2 to be linked and deoptimization is triggered.
+        try {
+          Main.class.wait();
+        } catch (Exception e) {
+        }
+      }
+    }
+
+    // There should be a deoptimization here right after Main2 is linked by
+    // calling Dummy.createMain2(), even though sMain1 didn't change.
+    // The behavior here would be different if inline-cache is used, which
+    // doesn't deoptimize since sMain1 still hits the type cache.
+    sMain1.foo(sMain1.getClass() == Main1.class ? 1 : 2);
+    if ((createMain2 || wait) && sHasJIT && !sIsOptimizing) {
+      // This method should be deoptimized right after Main2 is created.
+      assertIsInterpreted();
+    }
+
+    if (sMain2 != null) {
+      sMain2.foo(sMain2.getClass() == Main1.class ? 1 : 2);
+    }
+  }
+
+  // Test scenarios under which CHA-based devirtualization happens,
+  // and class loading that overrides a method can invalidate compiled code.
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+
+    if (isInterpreted()) {
+      sIsOptimizing = false;
+    }
+
+    // sMain1 is an instance of Main1. Main2 hasn't bee loaded yet.
+    sMain1 = new Main1();
+
+    ensureJitCompiled(Main.class, "testOverride");
+    testOverride(false, false, true);
+
+    if (sHasJIT && !sIsOptimizing) {
+      assertSingleImplementation(Base.class, "foo", true);
+      assertSingleImplementation(Main1.class, "foo", true);
+    } else {
+      // Main2 is verified ahead-of-time so it's linked in already.
+    }
+
+    // Create another thread that also calls sMain1.foo().
+    // Try to test suspend and deopt another thread.
+    new Thread() {
+      public void run() {
+        testOverride(false, true, false);
+      }
+    }.start();
+
+    // This will create Main2 instance in the middle of testOverride().
+    testOverride(true, false, false);
+    assertSingleImplementation(Base.class, "foo", false);
+    assertSingleImplementation(Main1.class, "foo", false);
+  }
+
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
+  private static native void assertIsInterpreted();
+  private static native void assertIsManaged();
+  private static native boolean isInterpreted();
+  private static native boolean hasSingleImplementation(Class<?> clazz, String method_name);
+}
+
+// Put createMain2() in another class to avoid class loading due to verifier.
+class Dummy {
+  static Main1 createMain2() {
+    return new Main2();
+  }
+}
diff --git a/test/616-cha-proxy-method-inline/expected.txt b/test/616-cha-proxy-method-inline/expected.txt
new file mode 100644
index 0000000..6a5618e
--- /dev/null
+++ b/test/616-cha-proxy-method-inline/expected.txt
@@ -0,0 +1 @@
+JNI_OnLoad called
diff --git a/test/616-cha-proxy-method-inline/info.txt b/test/616-cha-proxy-method-inline/info.txt
new file mode 100644
index 0000000..0126855
--- /dev/null
+++ b/test/616-cha-proxy-method-inline/info.txt
@@ -0,0 +1 @@
+Test for Class Hierarchy Analysis (CHA) on inlining a cross-dex proxy method.
diff --git a/test/616-cha-proxy-method-inline/multidex.jpp b/test/616-cha-proxy-method-inline/multidex.jpp
new file mode 100644
index 0000000..b0d200e
--- /dev/null
+++ b/test/616-cha-proxy-method-inline/multidex.jpp
@@ -0,0 +1,3 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main
diff --git a/test/577-profile-foreign-dex/run b/test/616-cha-proxy-method-inline/run
similarity index 70%
copy from test/577-profile-foreign-dex/run
copy to test/616-cha-proxy-method-inline/run
index ad57d14..d8b4f0d 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/616-cha-proxy-method-inline/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+# Run without an app image to prevent the classes to be loaded at startup.
+exec ${RUN} "${@}" --no-app-image
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/616-cha-proxy-method-inline/src-multidex/Foo.java
similarity index 83%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/616-cha-proxy-method-inline/src-multidex/Foo.java
index cba73b3..9deca3e 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/616-cha-proxy-method-inline/src-multidex/Foo.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+interface Foo {
+  public Object bar(Object obj);
 }
diff --git a/test/616-cha-proxy-method-inline/src/Main.java b/test/616-cha-proxy-method-inline/src/Main.java
new file mode 100644
index 0000000..be7bc82
--- /dev/null
+++ b/test/616-cha-proxy-method-inline/src/Main.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+class DebugProxy implements java.lang.reflect.InvocationHandler {
+  private Object obj;
+  static Class<?>[] interfaces = {Foo.class};
+
+  public static Object newInstance(Object obj) {
+    return java.lang.reflect.Proxy.newProxyInstance(
+      Foo.class.getClassLoader(),
+      interfaces,
+      new DebugProxy(obj));
+  }
+
+  private DebugProxy(Object obj) {
+    this.obj = obj;
+  }
+
+  public Object invoke(Object proxy, Method m, Object[] args) throws Throwable {
+    Object result;
+    if (obj == null) {
+      return null;
+    }
+    try {
+      System.out.println("before invoking method " + m.getName());
+      result = m.invoke(obj, args);
+    } catch (InvocationTargetException e) {
+      throw e.getTargetException();
+    } catch (Exception e) {
+      throw new RuntimeException("unexpected invocation exception: " + e.getMessage());
+    } finally {
+      System.out.println("after invoking method " + m.getName());
+    }
+    return result;
+  }
+}
+
+public class Main {
+  public static void call(Foo foo) {
+    if (foo == null) {
+      return;
+    }
+    foo.bar(null);
+  }
+
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    Foo foo = (Foo)DebugProxy.newInstance(null);
+    ensureJitCompiled(Main.class, "call");
+    call(foo);
+  }
+
+  private static native void ensureJitCompiled(Class<?> itf, String method_name);
+}
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index ad3ff44..2d9daf1 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -21,6 +21,8 @@
 
   static int[] a = new int[10];
 
+  static int[] novec = new int[20];  // to prevent vectorization
+
   /// CHECK-START: void Main.deadSingleLoop() loop_optimization (before)
   /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
   //
@@ -132,16 +134,18 @@
   /// CHECK-START: void Main.deadInduction() loop_optimization (before)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.deadInduction() loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-NOT: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   static void deadInduction() {
     int dead = 0;
     for (int i = 0; i < a.length; i++) {
-      a[i] = 1;
+      a[i] = novec[2 * i] + 1;
       dead += 5;
     }
   }
@@ -151,17 +155,19 @@
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.deadManyInduction() loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-NOT: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   static void deadManyInduction() {
     int dead1 = 0, dead2 = 1, dead3 = 3;
     for (int i = 0; i < a.length; i++) {
       dead1 += 5;
-      a[i] = 2;
+      a[i] = novec[2 * i] + 2;
       dead2 += 10;
       dead3 += 100;
     }
@@ -170,16 +176,18 @@
   /// CHECK-START: void Main.deadSequence() loop_optimization (before)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   //
   /// CHECK-START: void Main.deadSequence() loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-NOT: Phi      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   static void deadSequence() {
     int dead = 0;
     for (int i = 0; i < a.length; i++) {
-      a[i] = 3;
+      a[i] = novec[2 * i] + 3;
       // Increment value defined inside loop,
       // but sequence itself not used anywhere.
       dead += i;
@@ -191,17 +199,19 @@
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-NOT: BoundsCheck
   //
   /// CHECK-START: void Main.deadCycleWithException(int) loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-NOT: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
   /// CHECK-NOT: ArrayGet loop:<<Loop>>      outer_loop:none
   static void deadCycleWithException(int k) {
     int dead = 0;
     for (int i = 0; i < a.length; i++) {
-      a[i] = 4;
+      a[i] = novec[2 * i] + 4;
       // Increment value of dead cycle may throw exception. Dynamic
       // BCE takes care of the bounds check though, which enables
       // removing the ArrayGet after removing the dead cycle.
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
index 7509d9b..eee90ab 100644
--- a/test/623-checker-loop-regressions/src/Main.java
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -213,6 +213,8 @@
   /// CHECK-START: long Main.geoLongDivLastValue(long) instruction_simplifier$after_bce (after)
   /// CHECK-DAG: <<Long:j\d+>> LongConstant 0    loop:none
   /// CHECK-DAG:               Return [<<Long>>] loop:none
+  //
+  // Tests overflow in the divisor (while updating intermediate result).
   static long geoLongDivLastValue(long x) {
     for (int i = 0; i < 10; i++) {
       x /= 1081788608;
@@ -220,6 +222,26 @@
     return x;
   }
 
+  /// CHECK-START: long Main.geoLongDivLastValue() loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: long Main.geoLongDivLastValue() loop_optimization (after)
+  /// CHECK-NOT: Phi
+  //
+  /// CHECK-START: long Main.geoLongDivLastValue() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Long:j\d+>> LongConstant 0    loop:none
+  /// CHECK-DAG:               Return [<<Long>>] loop:none
+  //
+  // Tests overflow in the divisor (while updating base).
+  static long geoLongDivLastValue() {
+    long x = -1;
+    for (int i2 = 0; i2 < 2; i2++) {
+      x /= (Long.MAX_VALUE);
+    }
+    return x;
+  }
+
   /// CHECK-START: long Main.geoLongMulLastValue(long) loop_optimization (before)
   /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
@@ -239,6 +261,15 @@
     return x;
   }
 
+  // If vectorized, the narrowing subscript should not cause
+  // type inconsistencies in the synthesized code.
+  static void narrowingSubscript(float[] a) {
+    float val = 2.0f;
+    for (long i = 0; i < a.length; i++) {
+      a[(int) i] += val;
+    }
+  }
+
   public static void main(String[] args) {
     expectEquals(10, earlyExitFirst(-1));
     for (int i = 0; i <= 10; i++) {
@@ -286,6 +317,8 @@
     expectEquals(0L, geoLongDivLastValue(9223372036854775807L));
     expectEquals(0L, geoLongDivLastValue(-9223372036854775808L));
 
+    expectEquals(0L, geoLongDivLastValue());
+
     expectEquals(                   0L, geoLongMulLastValue(0L));
     expectEquals(-8070450532247928832L, geoLongMulLastValue(1L));
     expectEquals( 2305843009213693952L, geoLongMulLastValue(2L));
@@ -296,6 +329,12 @@
     expectEquals( 8070450532247928832L, geoLongMulLastValue(9223372036854775807L));
     expectEquals(                   0L, geoLongMulLastValue(-9223372036854775808L));
 
+    float[] a = new float[16];
+    narrowingSubscript(a);
+    for (int i = 0; i < 16; i++) {
+      expectEquals(2.0f, a[i]);
+    }
+
     System.out.println("passed");
   }
 
@@ -310,4 +349,10 @@
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
+
+  private static void expectEquals(float expected, float result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
 }
diff --git a/test/626-checker-arm64-scratch-register/src/Main.java b/test/626-checker-arm64-scratch-register/src/Main.java
index 6dd4374..1394917 100644
--- a/test/626-checker-arm64-scratch-register/src/Main.java
+++ b/test/626-checker-arm64-scratch-register/src/Main.java
@@ -70,7 +70,7 @@
   /// CHECK:  end_block
   /// CHECK: begin_block
   /// CHECK:   name "<<ElseBlock>>"
-  /// CHECK:                      ParallelMove moves:[#100->d17,32(sp)->d1,36(sp)->d2,d17->d3,d3->d4,d4->d5,d5->d6,d6->d7,d7->d18,d18->d19,d19->d20,d20->d21,d21->d22,d22->d23,d23->d10,d10->d11,d11->d12,24(sp)->d13,28(sp)->d14,d14->16(sp),d12->20(sp),d13->24(sp),d1->28(sp),d2->32(sp),16(sp)->36(sp),20(sp)->40(sp)]
+  /// CHECK:                      ParallelMove moves:[40(sp)->d0,24(sp)->32(sp),28(sp)->36(sp),d0->d3,d3->d4,d2->d5,d4->d6,d5->d7,d6->d18,d7->d19,d18->d20,d19->d21,d20->d22,d21->d23,d22->d10,d23->d11,16(sp)->24(sp),20(sp)->28(sp),d10->d14,d11->d12,d12->d13,d13->d1,d14->d2,32(sp)->16(sp),36(sp)->20(sp)]
   /// CHECK: end_block
 
   /// CHECK-START-ARM64: void Main.test() disassembly (after)
@@ -85,7 +85,7 @@
   /// CHECK:  end_block
   /// CHECK: begin_block
   /// CHECK:   name "<<ElseBlock>>"
-  /// CHECK:                      ParallelMove moves:[invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid]
+  /// CHECK:                      ParallelMove moves:[invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid,invalid->invalid]
   /// CHECK:                        fmov d31, d2
   /// CHECK:                        ldr s2, [sp, #36]
   /// CHECK:                        ldr w16, [sp, #16]
@@ -111,11 +111,10 @@
   /// CHECK:                        fmov d6, d5
   /// CHECK:                        fmov d5, d4
   /// CHECK:                        fmov d4, d3
-  /// CHECK:                        fmov d3, d17
-  /// CHECK:                        fmov d17, d13
+  /// CHECK:                        fmov d3, d13
   /// CHECK:                        ldr s13, [sp, #24]
-  /// CHECK:                        str s17, [sp, #24]
-  /// CHECK:                        ldr s17, pc+{{\d+}} (addr {{0x[0-9a-f]+}}) (100)
+  /// CHECK:                        str s3, [sp, #24]
+  /// CHECK:                        ldr s3, pc+{{\d+}} (addr {{0x[0-9a-f]+}}) (100)
   /// CHECK: end_block
 
   public void test() {
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/638-checker-inline-caches/expected.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/638-checker-inline-caches/expected.txt
diff --git a/test/638-checker-inline-caches/info.txt b/test/638-checker-inline-caches/info.txt
new file mode 100644
index 0000000..1fac628
--- /dev/null
+++ b/test/638-checker-inline-caches/info.txt
@@ -0,0 +1 @@
+Verify the use of inline caches in AOT mode.
diff --git a/test/638-checker-inline-caches/multidex.jpp b/test/638-checker-inline-caches/multidex.jpp
new file mode 100644
index 0000000..69a2cc1
--- /dev/null
+++ b/test/638-checker-inline-caches/multidex.jpp
@@ -0,0 +1,12 @@
+Main:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Main
+Super:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Super
+SubA:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubA
+SubB
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubB
diff --git a/test/638-checker-inline-caches/profile b/test/638-checker-inline-caches/profile
new file mode 100644
index 0000000..1ca6d7b
--- /dev/null
+++ b/test/638-checker-inline-caches/profile
@@ -0,0 +1,6 @@
+LMain;->inlineMonomorphicSubA(LSuper;)I+LSubA;
+LMain;->inlinePolymophicSubASubB(LSuper;)I+LSubA;,LSubB;
+LMain;->inlinePolymophicCrossDexSubASubC(LSuper;)I+LSubA;,LSubC;
+LMain;->inlineMegamorphic(LSuper;)I+LSubA;,LSubB;,LSubC;,LSubD;,LSubE;
+LMain;->inlineMissingTypes(LSuper;)I+missing_types
+LMain;->noInlineCache(LSuper;)I
diff --git a/test/577-profile-foreign-dex/run b/test/638-checker-inline-caches/run
similarity index 71%
copy from test/577-profile-foreign-dex/run
copy to test/638-checker-inline-caches/run
index ad57d14..146e180 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/638-checker-inline-caches/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+exec ${RUN} $@ --profile -Xcompiler-option --compiler-filter=speed-profile
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/638-checker-inline-caches/src-multidex/SubC.java
similarity index 80%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/638-checker-inline-caches/src-multidex/SubC.java
index cba73b3..f7e3c08 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/638-checker-inline-caches/src-multidex/SubC.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+public class SubC extends Super   {
+  public int getValue() { return 24; }
 }
diff --git a/test/638-checker-inline-caches/src/Main.java b/test/638-checker-inline-caches/src/Main.java
new file mode 100644
index 0000000..680bd14
--- /dev/null
+++ b/test/638-checker-inline-caches/src/Main.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class SubA extends Super {
+  int getValue() { return 42; }
+}
+
+class SubB extends Super {
+  int getValue() { return 38; }
+}
+
+class SubD extends Super {
+  int getValue() { return 10; }
+}
+
+class SubE extends Super {
+  int getValue() { return -4; }
+}
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineMonomorphicSubA(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphicSubA(Super) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphicSubA(Super) inliner (after)
+  /// CHECK:  <<SubARet:i\d+>>      IntConstant 42
+  /// CHECK:  <<Obj:l\d+>>          NullCheck
+  /// CHECK:  <<ObjClass:l\d+>>     InstanceFieldGet [<<Obj>>] field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:  <<InlineClass:l\d+>>  LoadClass class_name:SubA
+  /// CHECK:  <<Test:z\d+>>         NotEqual [<<InlineClass>>,<<ObjClass>>]
+  /// CHECK:                        Deoptimize [<<Test>>,<<Obj>>]
+  /// CHECK:                        Return [<<SubARet>>]
+  public static int inlineMonomorphicSubA(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlinePolymophicSubASubB(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlinePolymophicSubASubB(Super) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Super.getValue
+
+  // Note that the order in which the types are added to the inline cache in the profile matters.
+
+  /// CHECK-START: int Main.inlinePolymophicSubASubB(Super) inliner (after)
+  /// CHECK-DAG:  <<SubARet:i\d+>>          IntConstant 42
+  /// CHECK-DAG:  <<SubBRet:i\d+>>          IntConstant 38
+  /// CHECK:      <<Obj:l\d+>>              NullCheck
+  /// CHECK:      <<ObjClassSubA:l\d+>>     InstanceFieldGet [<<Obj>>] field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:      <<InlineClassSubA:l\d+>>  LoadClass class_name:SubA
+  /// CHECK:      <<TestSubA:z\d+>>         NotEqual [<<InlineClassSubA>>,<<ObjClassSubA>>]
+  /// CHECK:                                If [<<TestSubA>>]
+
+  /// CHECK:      <<ObjClassSubB:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:      <<InlineClassSubB:l\d+>>  LoadClass class_name:SubB
+  /// CHECK:      <<TestSubB:z\d+>>         NotEqual [<<InlineClassSubB>>,<<ObjClassSubB>>]
+  /// CHECK:                                Deoptimize [<<TestSubB>>,<<Obj>>]
+
+  /// CHECK:      <<Ret:i\d+>>              Phi [<<SubARet>>,<<SubBRet>>]
+  /// CHECK:                                Return [<<Ret>>]
+  public static int inlinePolymophicSubASubB(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlinePolymophicCrossDexSubASubC(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlinePolymophicCrossDexSubASubC(Super) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Super.getValue
+
+  // Note that the order in which the types are added to the inline cache in the profile matters.
+
+  /// CHECK-START: int Main.inlinePolymophicCrossDexSubASubC(Super) inliner (after)
+  /// CHECK-DAG:  <<SubARet:i\d+>>          IntConstant 42
+  /// CHECK-DAG:  <<SubCRet:i\d+>>          IntConstant 24
+  /// CHECK:      <<Obj:l\d+>>              NullCheck
+  /// CHECK:      <<ObjClassSubA:l\d+>>     InstanceFieldGet [<<Obj>>] field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:      <<InlineClassSubA:l\d+>>  LoadClass class_name:SubA
+  /// CHECK:      <<TestSubA:z\d+>>         NotEqual [<<InlineClassSubA>>,<<ObjClassSubA>>]
+  /// CHECK:                                If [<<TestSubA>>]
+
+  /// CHECK:      <<ObjClassSubC:l\d+>>     InstanceFieldGet field_name:java.lang.Object.shadow$_klass_
+  /// CHECK:      <<InlineClassSubC:l\d+>>  LoadClass class_name:SubC
+  /// CHECK:      <<TestSubC:z\d+>>         NotEqual [<<InlineClassSubC>>,<<ObjClassSubC>>]
+  /// CHECK:                                Deoptimize [<<TestSubC>>,<<Obj>>]
+
+  /// CHECK:      <<Ret:i\d+>>              Phi [<<SubARet>>,<<SubCRet>>]
+  /// CHECK:                                Return [<<Ret>>]
+  public static int inlinePolymophicCrossDexSubASubC(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlineMegamorphic(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlineMegamorphic(Super) inliner (after)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+  public static int inlineMegamorphic(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlineMissingTypes(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.inlineMissingTypes(Super) inliner (after)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+  public static int inlineMissingTypes(Super a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.noInlineCache(Super) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+
+  /// CHECK-START: int Main.noInlineCache(Super) inliner (after)
+  /// CHECK:       InvokeVirtual method_name:Super.getValue
+  public static int noInlineCache(Super a) {
+    return a.getValue();
+  }
+
+  public static void testInlineMonomorphic() {
+    if (inlineMonomorphicSubA(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+
+    // Call with a different type than the one from the inline cache.
+    if (inlineMonomorphicSubA(new SubB()) != 38) {
+      throw new Error("Expected 38");
+    }
+  }
+
+  public static void testInlinePolymorhic() {
+    if (inlinePolymophicSubASubB(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+
+    if (inlinePolymophicSubASubB(new SubB()) != 38) {
+      throw new Error("Expected 38");
+    }
+
+    // Call with a different type than the one from the inline cache.
+    if (inlinePolymophicSubASubB(new SubC()) != 24) {
+      throw new Error("Expected 25");
+    }
+
+    if (inlinePolymophicCrossDexSubASubC(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+
+    if (inlinePolymophicCrossDexSubASubC(new SubC()) != 24) {
+      throw new Error("Expected 24");
+    }
+
+    // Call with a different type than the one from the inline cache.
+    if (inlinePolymophicCrossDexSubASubC(new SubB()) != 38) {
+      throw new Error("Expected 38");
+    }
+  }
+
+  public static void testInlineMegamorphic() {
+    if (inlineMegamorphic(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+  }
+
+
+  public static void testNoInlineCache() {
+    if (noInlineCache(new SubA()) != 42) {
+      throw new Error("Expected 42");
+    }
+  }
+
+  public static void main(String[] args) {
+    testInlineMonomorphic();
+    testInlinePolymorhic();
+    testInlineMegamorphic();
+    testNoInlineCache();
+  }
+
+}
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/638-checker-inline-caches/src/Super.java
similarity index 82%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/638-checker-inline-caches/src/Super.java
index cba73b3..30cdf30 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/638-checker-inline-caches/src/Super.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+public abstract class Super {
+  abstract int getValue();
 }
diff --git a/test/639-checker-code-sinking/expected.txt b/test/639-checker-code-sinking/expected.txt
new file mode 100644
index 0000000..52e756c
--- /dev/null
+++ b/test/639-checker-code-sinking/expected.txt
@@ -0,0 +1,3 @@
+0
+class java.lang.Object
+43
diff --git a/test/639-checker-code-sinking/info.txt b/test/639-checker-code-sinking/info.txt
new file mode 100644
index 0000000..9722bdf
--- /dev/null
+++ b/test/639-checker-code-sinking/info.txt
@@ -0,0 +1 @@
+Checker tests for the code sinking optimization pass.
diff --git a/test/639-checker-code-sinking/src/Main.java b/test/639-checker-code-sinking/src/Main.java
new file mode 100644
index 0000000..1da19b6
--- /dev/null
+++ b/test/639-checker-code-sinking/src/Main.java
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  public static void main(String[] args) {
+    testSimpleUse();
+    testTwoUses();
+    testFieldStores(doThrow);
+    testFieldStoreCycle();
+    testArrayStores();
+    testOnlyStoreUses();
+    testNoUse();
+    testPhiInput();
+    testVolatileStore();
+    doThrow = true;
+    try {
+      testInstanceSideEffects();
+    } catch (Error e) {
+      // expected
+      System.out.println(e.getMessage());
+    }
+    try {
+      testStaticSideEffects();
+    } catch (Error e) {
+      // expected
+      System.out.println(e.getMessage());
+    }
+
+    try {
+      testStoreStore(doThrow);
+    } catch (Error e) {
+      // expected
+      System.out.println(e.getMessage());
+    }
+  }
+
+  /// CHECK-START: void Main.testSimpleUse() code_sinking (before)
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK:                    Throw
+
+  /// CHECK-START: void Main.testSimpleUse() code_sinking (after)
+  /// CHECK-NOT:                NewInstance
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK: <<Error:l\d+>>     LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<Error>>]
+  /// CHECK:                    Throw
+  public static void testSimpleUse() {
+    Object o = new Object();
+    if (doThrow) {
+      throw new Error(o.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testTwoUses() code_sinking (before)
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK:                    Throw
+
+  /// CHECK-START: void Main.testTwoUses() code_sinking (after)
+  /// CHECK-NOT:                NewInstance
+  /// CHECK:                    If
+  /// CHECK:                    begin_block
+  /// CHECK: <<Error:l\d+>>     LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>> LoadClass class_name:java.lang.Object
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                begin_block
+  /// CHECK:                    NewInstance [<<Error>>]
+  /// CHECK:                    Throw
+  public static void testTwoUses() {
+    Object o = new Object();
+    if (doThrow) {
+      throw new Error(o.toString() + o.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testFieldStores(boolean) code_sinking (before)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testFieldStores(boolean) code_sinking (after)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK-NOT:                  NewInstance
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK-NOT:                  begin_block
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testFieldStores(boolean doThrow) {
+    Main m = new Main();
+    m.intField = 42;
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testFieldStoreCycle() code_sinking (before)
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance1:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK: <<NewInstance2:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance1>>,<<NewInstance2>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance2>>,<<NewInstance1>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+
+  // TODO(ngeoffray): Handle allocation/store cycles.
+  /// CHECK-START: void Main.testFieldStoreCycle() code_sinking (after)
+  /// CHECK: begin_block
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance1:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK: <<NewInstance2:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance1>>,<<NewInstance2>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance2>>,<<NewInstance1>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+  public static void testFieldStoreCycle() {
+    Main m1 = new Main();
+    Main m2 = new Main();
+    m1.objectField = m2;
+    m2.objectField = m1;
+    if (doThrow) {
+      throw new Error(m1.toString() + m2.toString());
+    }
+  }
+
+  /// CHECK-START: void Main.testArrayStores() code_sinking (before)
+  /// CHECK: <<Int1:i\d+>>        IntConstant 1
+  /// CHECK: <<Int0:i\d+>>        IntConstant 0
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object[]
+  /// CHECK: <<NewArray:l\d+>>    NewArray [<<LoadClass>>,<<Int1>>]
+  /// CHECK:                      ArraySet [<<NewArray>>,<<Int0>>,<<NewArray>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testArrayStores() code_sinking (after)
+  /// CHECK: <<Int1:i\d+>>        IntConstant 1
+  /// CHECK: <<Int0:i\d+>>        IntConstant 0
+  /// CHECK-NOT:                  NewArray
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object[]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK: <<NewArray:l\d+>>    NewArray [<<LoadClass>>,<<Int1>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      ArraySet [<<NewArray>>,<<Int0>>,<<NewArray>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testArrayStores() {
+    Object[] o = new Object[1];
+    o[0] = o;
+    if (doThrow) {
+      throw new Error(o.toString());
+    }
+  }
+
+  // Make sure code sinking does not crash on dead allocations.
+  public static void testOnlyStoreUses() {
+    Main m = new Main();
+    Object[] o = new Object[1];  // dead allocation, should eventually be removed b/35634932.
+    o[0] = m;
+    o = null;  // Avoid environment uses for the array allocation.
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  // Make sure code sinking does not crash on dead code.
+  public static void testNoUse() {
+    Main m = new Main();
+    boolean load = Main.doLoop;  // dead code, not removed because of environment use.
+    // Ensure one environment use for the static field
+    $opt$noinline$foo();
+    load = false;
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  // Make sure we can move code only used by a phi.
+  /// CHECK-START: void Main.testPhiInput() code_sinking (before)
+  /// CHECK: <<Null:l\d+>>        NullConstant
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Phi [<<Null>>,<<NewInstance>>]
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testPhiInput() code_sinking (after)
+  /// CHECK: <<Null:l\d+>>        NullConstant
+  /// CHECK-NOT:                  NewInstance
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:java.lang.Object
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      begin_block
+  /// CHECK:                      Phi [<<Null>>,<<NewInstance>>]
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testPhiInput() {
+    Object f = new Object();
+    if (doThrow) {
+      Object o = null;
+      int i = 2;
+      if (doLoop) {
+        o = f;
+        i = 42;
+      }
+      throw new Error(o.toString() + i);
+    }
+  }
+
+  static void $opt$noinline$foo() {}
+
+  // Check that we do not move volatile stores.
+  /// CHECK-START: void Main.testVolatileStore() code_sinking (before)
+  /// CHECK: <<Int42:i\d+>>        IntConstant 42
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>>  NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+
+  /// CHECK-START: void Main.testVolatileStore() code_sinking (after)
+  /// CHECK: <<Int42:i\d+>>        IntConstant 42
+  /// CHECK: <<LoadClass:l\d+>>    LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>>  NewInstance [<<LoadClass>>]
+  /// CHECK:                       InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                       If
+  /// CHECK:                       begin_block
+  /// CHECK:                       Throw
+  public static void testVolatileStore() {
+    Main m = new Main();
+    m.volatileField = 42;
+    if (doThrow) {
+      throw new Error(m.toString());
+    }
+  }
+
+  public static void testInstanceSideEffects() {
+    int a = mainField.intField;
+    $noinline$changeIntField();
+    if (doThrow) {
+      throw new Error("" + a);
+    }
+  }
+
+  static void $noinline$changeIntField() {
+    mainField.intField = 42;
+  }
+
+  public static void testStaticSideEffects() {
+    Object o = obj;
+    $noinline$changeStaticObjectField();
+    if (doThrow) {
+      throw new Error(o.getClass().toString());
+    }
+  }
+
+  static void $noinline$changeStaticObjectField() {
+    obj = new Main();
+  }
+
+  // Test that we preserve the order of stores.
+  /// CHECK-START: void Main.testStoreStore(boolean) code_sinking (before)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK: <<Int43:i\d+>>       IntConstant 43
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int43>>]
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK:                      Throw
+
+  /// CHECK-START: void Main.testStoreStore(boolean) code_sinking (after)
+  /// CHECK: <<Int42:i\d+>>       IntConstant 42
+  /// CHECK: <<Int43:i\d+>>       IntConstant 43
+  /// CHECK-NOT:                  NewInstance
+  /// CHECK:                      If
+  /// CHECK:                      begin_block
+  /// CHECK: <<Error:l\d+>>       LoadClass class_name:java.lang.Error
+  /// CHECK: <<LoadClass:l\d+>>   LoadClass class_name:Main
+  /// CHECK-NOT:                  begin_block
+  /// CHECK: <<NewInstance:l\d+>> NewInstance [<<LoadClass>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int42>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      InstanceFieldSet [<<NewInstance>>,<<Int43>>]
+  /// CHECK-NOT:                  begin_block
+  /// CHECK:                      NewInstance [<<Error>>]
+  /// CHECK:                      Throw
+  public static void testStoreStore(boolean doThrow) {
+    Main m = new Main();
+    m.intField = 42;
+    m.intField = 43;
+    if (doThrow) {
+      throw new Error(m.$opt$noinline$toString());
+    }
+  }
+
+  public String $opt$noinline$toString() {
+    return "" + intField;
+  }
+
+  volatile int volatileField;
+  int intField;
+  Object objectField;
+  static boolean doThrow;
+  static boolean doLoop;
+  static Main mainField = new Main();
+  static Object obj = new Object();
+}
diff --git a/test/640-checker-boolean-simd/expected.txt b/test/640-checker-boolean-simd/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/640-checker-boolean-simd/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/640-checker-boolean-simd/info.txt b/test/640-checker-boolean-simd/info.txt
new file mode 100644
index 0000000..c9c6d5e
--- /dev/null
+++ b/test/640-checker-boolean-simd/info.txt
@@ -0,0 +1 @@
+Functional tests on SIMD vectorization.
diff --git a/test/640-checker-boolean-simd/src/Main.java b/test/640-checker-boolean-simd/src/Main.java
new file mode 100644
index 0000000..f8239fa
--- /dev/null
+++ b/test/640-checker-boolean-simd/src/Main.java
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Functional tests for SIMD vectorization.
+ */
+public class Main {
+
+  static boolean[] a;
+
+  //
+  // Arithmetic operations.
+  //
+
+  /// CHECK-START: void Main.and(boolean) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.and(boolean) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAnd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void and(boolean x) {
+    for (int i = 0; i < 128; i++)
+      a[i] &= x;  // NOTE: bitwise and, not the common &&
+  }
+
+  /// CHECK-START: void Main.or(boolean) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.or(boolean) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecOr    loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void or(boolean x) {
+    for (int i = 0; i < 128; i++)
+      a[i] |= x;  // NOTE: bitwise or, not the common ||
+  }
+
+  /// CHECK-START: void Main.xor(boolean) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.xor(boolean) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecXor   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void xor(boolean x) {
+    for (int i = 0; i < 128; i++)
+      a[i] ^= x;  // NOTE: bitwise xor
+  }
+
+  /// CHECK-START: void Main.not() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void not() {
+    for (int i = 0; i < 128; i++)
+      a[i] = !a[i];
+  }
+
+  //
+  // Test Driver.
+  //
+
+  public static void main(String[] args) {
+    // Set up.
+    a = new boolean[128];
+    for (int i = 0; i < 128; i++) {
+      a[i] = (i & 1) == 0;
+    }
+    // Arithmetic operations.
+    and(true);
+    for (int i = 0; i < 128; i++) {
+      expectEquals((i & 1) == 0, a[i], "and-true");
+    }
+    xor(true);
+    for (int i = 0; i < 128; i++) {
+      expectEquals((i & 1) != 0, a[i], "xor-true");
+    }
+    xor(false);
+    for (int i = 0; i < 128; i++) {
+      expectEquals((i & 1) != 0, a[i], "xor-false");
+    }
+    not();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((i & 1) == 0, a[i], "not");
+    }
+    or(true);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(true, a[i], "or-true");
+    }
+    and(false);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(false, a[i], "and-false");
+    }
+    or(false);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(false, a[i], "or-false");
+    }
+    // Done.
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(boolean expected, boolean result, String action) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result + " for " + action);
+    }
+  }
+}
diff --git a/test/640-checker-byte-simd/expected.txt b/test/640-checker-byte-simd/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/640-checker-byte-simd/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/640-checker-byte-simd/info.txt b/test/640-checker-byte-simd/info.txt
new file mode 100644
index 0000000..c9c6d5e
--- /dev/null
+++ b/test/640-checker-byte-simd/info.txt
@@ -0,0 +1 @@
+Functional tests on SIMD vectorization.
diff --git a/test/640-checker-byte-simd/src/Main.java b/test/640-checker-byte-simd/src/Main.java
new file mode 100644
index 0000000..0f7452b
--- /dev/null
+++ b/test/640-checker-byte-simd/src/Main.java
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Functional tests for SIMD vectorization.
+ */
+public class Main {
+
+  static byte[] a;
+
+  //
+  // Arithmetic operations.
+  //
+
+  /// CHECK-START: void Main.add(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void add(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] += x;
+  }
+
+  /// CHECK-START: void Main.sub(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void sub(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] -= x;
+  }
+
+  /// CHECK-START: void Main.mul(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void mul(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] *= x;
+  }
+
+  /// CHECK-START: void Main.div(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.div(int) loop_optimization (after)
+  //
+  //  Not supported on any architecture.
+  //
+  static void div(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] /= x;
+  }
+
+  /// CHECK-START: void Main.neg() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void neg() {
+    for (int i = 0; i < 128; i++)
+      a[i] = (byte) -a[i];
+  }
+
+  /// CHECK-START: void Main.not() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void not() {
+    for (int i = 0; i < 128; i++)
+      a[i] = (byte) ~a[i];
+  }
+
+  /// CHECK-START: void Main.shl4() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void shl4() {
+    for (int i = 0; i < 128; i++)
+      a[i] <<= 4;
+  }
+
+  /// CHECK-START: void Main.sar2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void sar2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>= 2;
+  }
+
+  /// CHECK-START: void Main.shr2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void shr2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 2;
+  }
+
+  //
+  // Shift sanity.
+  //
+
+  static void sar31() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>= 31;
+  }
+
+  static void shr31() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 31;
+  }
+
+  static void shr32() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 32;  // 0, since & 31
+  }
+
+  static void shr33() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 33;  // 1, since & 31
+  }
+
+  //
+  // Loop bounds.
+  //
+
+  static void bounds() {
+    for (int i = 1; i < 127; i++)
+      a[i] += 11;
+  }
+
+  //
+  // Test Driver.
+  //
+
+  public static void main(String[] args) {
+    // Set up.
+    a = new byte[128];
+    for (int i = 0; i < 128; i++) {
+      a[i] = (byte) i;
+    }
+    // Arithmetic operations.
+    add(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte)(i + 2), a[i], "add");
+    }
+    sub(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "sub");
+    }
+    mul(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte)(i + i), a[i], "mul");
+    }
+    div(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(((byte)(i + i)) >> 1, a[i], "div");
+      a[i] = (byte) i;  // undo arithmetic wrap-around effects
+    }
+    neg();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(-i, a[i], "neg");
+    }
+    // Loop bounds.
+    bounds();
+    expectEquals(0, a[0], "bounds0");
+    for (int i = 1; i < 127; i++) {
+      expectEquals(11 - i, a[i], "bounds");
+    }
+    expectEquals(-127, a[127], "bounds127");
+    // Shifts.
+    for (int i = 0; i < 128; i++) {
+      a[i] = (byte) 0xff;
+    }
+    shl4();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte) 0xf0, a[i], "shl4");
+    }
+    sar2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte) 0xfc, a[i], "sar2");
+    }
+    shr2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte) 0xff, a[i], "shr2");  // sic!
+    }
+    sar31();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte) 0xff, a[i], "sar31");
+    }
+    shr31();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x01, a[i], "shr31");
+      a[i] = (byte) 0x12;  // reset
+    }
+    shr32();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte) 0x12, a[i], "shr32");
+    }
+    shr33();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte) 0x09, a[i], "shr33");
+      a[i] = (byte) 0xf0;  // reset
+    }
+    not();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((byte) 0x0f, a[i], "not");
+    }
+    // Done.
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result, String action) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result + " for " + action);
+    }
+  }
+}
diff --git a/test/640-checker-char-simd/expected.txt b/test/640-checker-char-simd/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/640-checker-char-simd/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/640-checker-char-simd/info.txt b/test/640-checker-char-simd/info.txt
new file mode 100644
index 0000000..c9c6d5e
--- /dev/null
+++ b/test/640-checker-char-simd/info.txt
@@ -0,0 +1 @@
+Functional tests on SIMD vectorization.
diff --git a/test/640-checker-char-simd/src/Main.java b/test/640-checker-char-simd/src/Main.java
new file mode 100644
index 0000000..0628b36
--- /dev/null
+++ b/test/640-checker-char-simd/src/Main.java
@@ -0,0 +1,278 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Functional tests for SIMD vectorization.
+ */
+public class Main {
+
+  static char[] a;
+
+  //
+  // Arithmetic operations.
+  //
+
+  /// CHECK-START: void Main.add(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void add(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] += x;
+  }
+
+  /// CHECK-START: void Main.sub(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void sub(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] -= x;
+  }
+
+  /// CHECK-START: void Main.mul(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void mul(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] *= x;
+  }
+
+  /// CHECK-START: void Main.div(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.div(int) loop_optimization (after)
+  //
+  //  Not supported on any architecture.
+  //
+  static void div(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] /= x;
+  }
+
+  /// CHECK-START: void Main.neg() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void neg() {
+    for (int i = 0; i < 128; i++)
+      a[i] = (char) -a[i];
+  }
+
+  /// CHECK-START: void Main.not() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void not() {
+    for (int i = 0; i < 128; i++)
+      a[i] = (char) ~a[i];
+  }
+
+  /// CHECK-START: void Main.shl4() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void shl4() {
+    for (int i = 0; i < 128; i++)
+      a[i] <<= 4;
+  }
+
+  /// CHECK-START: void Main.sar2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void sar2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>= 2;
+  }
+
+  /// CHECK-START: void Main.shr2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void shr2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 2;
+  }
+
+  //
+  // Shift sanity.
+  //
+
+  static void sar31() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>= 31;
+  }
+
+  static void shr31() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 31;
+  }
+
+  static void shr32() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 32;  // 0, since & 31
+  }
+
+  static void shr33() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 33;  // 1, since & 31
+  }
+
+  //
+  // Loop bounds.
+  //
+
+  static void bounds() {
+    for (int i = 1; i < 127; i++)
+      a[i] += 11;
+  }
+
+  //
+  // Test Driver.
+  //
+
+  public static void main(String[] args) {
+    // Set up.
+    a = new char[128];
+    for (int i = 0; i < 128; i++) {
+      a[i] = (char) i;
+    }
+    // Arithmetic operations.
+    add(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + 2, a[i], "add");
+    }
+    sub(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "sub");
+    }
+    mul(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + i, a[i], "mul");
+    }
+    div(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "div");
+    }
+    neg();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((char)-i, a[i], "neg");
+    }
+    // Loop bounds.
+    bounds();
+    expectEquals(0, a[0], "bounds0");
+    for (int i = 1; i < 127; i++) {
+      expectEquals((char)(11 - i), a[i], "bounds");
+    }
+    expectEquals((char)-127, a[127], "bounds127");
+    // Shifts.
+    for (int i = 0; i < 128; i++) {
+      a[i] = (char) 0xffff;
+    }
+    shl4();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((char) 0xfff0, a[i], "shl4");
+    }
+    sar2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((char) 0x3ffc, a[i], "sar2");
+    }
+    shr2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((char) 0x0fff, a[i], "shr2");
+      a[i] = (char) 0xffff;  // reset
+    }
+    sar31();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0, a[i], "sar31");
+      a[i] = (char) 0xffff;  // reset
+    }
+    shr31();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0, a[i], "shr31");
+      a[i] = (char) 0x1200;  // reset
+    }
+    shr32();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((char) 0x1200, a[i], "shr32");
+    }
+    shr33();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((char) 0x0900, a[i], "shr33");
+      a[i] = (char) 0xf1f0;  // reset
+    }
+    not();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((char) 0x0e0f, a[i], "not");
+    }
+    // Done.
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result, String action) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result + " for " + action);
+    }
+  }
+}
diff --git a/test/640-checker-double-simd/expected.txt b/test/640-checker-double-simd/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/640-checker-double-simd/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/640-checker-double-simd/info.txt b/test/640-checker-double-simd/info.txt
new file mode 100644
index 0000000..c9c6d5e
--- /dev/null
+++ b/test/640-checker-double-simd/info.txt
@@ -0,0 +1 @@
+Functional tests on SIMD vectorization.
diff --git a/test/640-checker-double-simd/src/Main.java b/test/640-checker-double-simd/src/Main.java
new file mode 100644
index 0000000..43f65f1
--- /dev/null
+++ b/test/640-checker-double-simd/src/Main.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Functional tests for SIMD vectorization. Note that this class provides a mere
+ * functional test, not a precise numerical verifier.
+ */
+public class Main {
+
+  static double[] a;
+
+  //
+  // Arithmetic operations.
+  //
+
+  /// CHECK-START: void Main.add(double) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.add(double) loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void add(double x) {
+    for (int i = 0; i < 128; i++)
+      a[i] += x;
+  }
+
+  /// CHECK-START: void Main.sub(double) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sub(double) loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void sub(double x) {
+    for (int i = 0; i < 128; i++)
+      a[i] -= x;
+  }
+
+  /// CHECK-START: void Main.mul(double) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.mul(double) loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void mul(double x) {
+    for (int i = 0; i < 128; i++)
+      a[i] *= x;
+  }
+
+  /// CHECK-START: void Main.div(double) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.div(double) loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void div(double x) {
+    for (int i = 0; i < 128; i++)
+      a[i] /= x;
+  }
+
+  /// CHECK-START: void Main.neg() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void neg() {
+    for (int i = 0; i < 128; i++)
+      a[i] = -a[i];
+  }
+
+  /// CHECK-START: void Main.abs() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.abs() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void abs() {
+    for (int i = 0; i < 128; i++)
+      a[i] = Math.abs(a[i]);
+  }
+
+  /// CHECK-START: void Main.conv(long[]) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.conv(long[]) loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void conv(long[] b) {
+    for (int i = 0; i < 128; i++)
+      a[i] = b[i];
+  }
+
+  //
+  // Loop bounds.
+  //
+
+  static void bounds() {
+    for (int i = 1; i < 127; i++)
+      a[i] += 11;
+  }
+
+  //
+  // Test Driver.
+  //
+
+  public static void main(String[] args) {
+    // Set up.
+    a = new double[128];
+    for (int i = 0; i < 128; i++) {
+      a[i] = i;
+    }
+    // Arithmetic operations.
+    add(2.0);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + 2, a[i], "add");
+    }
+    sub(2.0);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "sub");
+    }
+    mul(2.0);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + i, a[i], "mul");
+    }
+    div(2.0);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "div");
+    }
+    neg();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(-i, a[i], "neg");
+    }
+    // Loop bounds.
+    bounds();
+    expectEquals(0, a[0], "bounds0");
+    for (int i = 1; i < 127; i++) {
+      expectEquals(11 - i, a[i], "bounds");
+    }
+    expectEquals(-127, a[127], "bounds127");
+    // Abs.
+    abs();
+    expectEquals(0, a[0], "abs0");
+    for (int i = 1; i <= 11; i++) {
+      expectEquals(11 - i, a[i], "abs_lo");
+    }
+    for (int i = 12; i < 127; i++) {
+      expectEquals(i - 11, a[i], "abs_hi");
+    }
+    expectEquals(127, a[127], "abs127");
+    // Conversion.
+    long[] b = new long[128];
+    for (int i = 0; i < 128; i++) {
+      b[i] = 1000 * i;
+    }
+    conv(b);
+    for (int i = 1; i < 127; i++) {
+      expectEquals(1000.0 * i, a[i], "conv");
+    }
+    // Done.
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(double expected, double result, String action) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result + " for " + action);
+    }
+  }
+}
diff --git a/test/640-checker-float-simd/expected.txt b/test/640-checker-float-simd/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/640-checker-float-simd/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/640-checker-float-simd/info.txt b/test/640-checker-float-simd/info.txt
new file mode 100644
index 0000000..c9c6d5e
--- /dev/null
+++ b/test/640-checker-float-simd/info.txt
@@ -0,0 +1 @@
+Functional tests on SIMD vectorization.
diff --git a/test/640-checker-float-simd/src/Main.java b/test/640-checker-float-simd/src/Main.java
new file mode 100644
index 0000000..4bcb7e2
--- /dev/null
+++ b/test/640-checker-float-simd/src/Main.java
@@ -0,0 +1,209 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Functional tests for SIMD vectorization. Note that this class provides a mere
+ * functional test, not a precise numerical verifier.
+ */
+public class Main {
+
+  static float[] a;
+
+  //
+  // Arithmetic operations.
+  //
+
+  /// CHECK-START: void Main.add(float) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.add(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void add(float x) {
+    for (int i = 0; i < 128; i++)
+      a[i] += x;
+  }
+
+  /// CHECK-START: void Main.sub(float) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sub(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void sub(float x) {
+    for (int i = 0; i < 128; i++)
+      a[i] -= x;
+  }
+
+  /// CHECK-START: void Main.mul(float) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.mul(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void mul(float x) {
+    for (int i = 0; i < 128; i++)
+      a[i] *= x;
+  }
+
+  /// CHECK-START: void Main.div(float) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.div(float) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecDiv   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void div(float x) {
+    for (int i = 0; i < 128; i++)
+      a[i] /= x;
+  }
+
+  /// CHECK-START: void Main.neg() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void neg() {
+    for (int i = 0; i < 128; i++)
+      a[i] = -a[i];
+  }
+
+  /// CHECK-START: void Main.abs() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.abs() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAbs   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void abs() {
+    for (int i = 0; i < 128; i++)
+      a[i] = Math.abs(a[i]);
+  }
+
+  /// CHECK-START: void Main.conv(int[]) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.conv(int[]) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecCnv   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void conv(int[] b) {
+    for (int i = 0; i < 128; i++)
+      a[i] = b[i];
+  }
+
+  //
+  // Loop bounds.
+  //
+
+  static void bounds() {
+    for (int i = 1; i < 127; i++)
+      a[i] += 11;
+  }
+
+  //
+  // Test Driver.
+  //
+
+  public static void main(String[] args) {
+    // Set up.
+    a = new float[128];
+    for (int i = 0; i < 128; i++) {
+      a[i] = i;
+    }
+    // Arithmetic operations.
+    add(2.0f);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + 2, a[i], "add");
+    }
+    sub(2.0f);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "sub");
+    }
+    mul(2.0f);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + i, a[i], "mul");
+    }
+    div(2.0f);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "div");
+    }
+    neg();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(-i, a[i], "neg");
+    }
+    // Loop bounds.
+    bounds();
+    expectEquals(0, a[0], "bounds0");
+    for (int i = 1; i < 127; i++) {
+      expectEquals(11 - i, a[i], "bounds");
+    }
+    expectEquals(-127, a[127], "bounds127");
+    // Abs.
+    abs();
+    expectEquals(0, a[0], "abs0");
+    for (int i = 1; i <= 11; i++) {
+      expectEquals(11 - i, a[i], "abs_lo");
+    }
+    for (int i = 12; i < 127; i++) {
+      expectEquals(i - 11, a[i], "abs_hi");
+    }
+    expectEquals(127, a[127], "abs127");
+    // Conversion.
+    int[] b = new int[128];
+    for (int i = 0; i < 128; i++) {
+      b[i] = 1000 * i;
+    }
+    conv(b);
+    for (int i = 1; i < 127; i++) {
+      expectEquals(1000.0f * i, a[i], "conv");
+    }
+    // Done.
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(float expected, float result, String action) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result + " for " + action);
+    }
+  }
+}
diff --git a/test/640-checker-int-simd/expected.txt b/test/640-checker-int-simd/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/640-checker-int-simd/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/640-checker-int-simd/info.txt b/test/640-checker-int-simd/info.txt
new file mode 100644
index 0000000..c9c6d5e
--- /dev/null
+++ b/test/640-checker-int-simd/info.txt
@@ -0,0 +1 @@
+Functional tests on SIMD vectorization.
diff --git a/test/640-checker-int-simd/src/Main.java b/test/640-checker-int-simd/src/Main.java
new file mode 100644
index 0000000..ba1e142
--- /dev/null
+++ b/test/640-checker-int-simd/src/Main.java
@@ -0,0 +1,256 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Functional tests for SIMD vectorization.
+ */
+public class Main {
+
+  static int[] a;
+
+  //
+  // Arithmetic operations.
+  //
+
+  /// CHECK-START: void Main.add(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void add(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] += x;
+  }
+
+  /// CHECK-START: void Main.sub(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void sub(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] -= x;
+  }
+
+  /// CHECK-START: void Main.mul(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void mul(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] *= x;
+  }
+
+  /// CHECK-START: void Main.div(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.div(int) loop_optimization (after)
+  //
+  //  Not supported on any architecture.
+  //
+  static void div(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] /= x;
+  }
+
+  /// CHECK-START: void Main.neg() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void neg() {
+    for (int i = 0; i < 128; i++)
+      a[i] = -a[i];
+  }
+
+  /// CHECK-START: void Main.not() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void not() {
+    for (int i = 0; i < 128; i++)
+      a[i] = ~a[i];
+  }
+
+  /// CHECK-START: void Main.shl4() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void shl4() {
+    for (int i = 0; i < 128; i++)
+      a[i] <<= 4;
+  }
+
+  /// CHECK-START: void Main.sar2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void sar2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>= 2;
+  }
+
+  /// CHECK-START: void Main.shr2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void shr2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 2;
+  }
+
+  //
+  // Shift sanity.
+  //
+
+  static void shr32() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 32;  // 0, since & 31
+  }
+
+  static void shr33() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 33;  // 1, since & 31
+  }
+
+  //
+  // Loop bounds.
+  //
+
+  static void bounds() {
+    for (int i = 1; i < 127; i++)
+      a[i] += 11;
+  }
+
+  //
+  // Test Driver.
+  //
+
+  public static void main(String[] args) {
+    // Set up.
+    a = new int[128];
+    for (int i = 0; i < 128; i++) {
+      a[i] = i;
+    }
+    // Arithmetic operations.
+    add(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + 2, a[i], "add");
+    }
+    sub(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "sub");
+    }
+    mul(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + i, a[i], "mul");
+    }
+    div(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "div");
+    }
+    neg();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(-i, a[i], "neg");
+    }
+    // Loop bounds.
+    bounds();
+    expectEquals(0, a[0], "bounds0");
+    for (int i = 1; i < 127; i++) {
+      expectEquals(11 - i, a[i], "bounds");
+    }
+    expectEquals(-127, a[127], "bounds127");
+    // Shifts.
+    for (int i = 0; i < 128; i++) {
+      a[i] = 0xffffffff;
+    }
+    shl4();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0xfffffff0, a[i], "shl4");
+    }
+    sar2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0xfffffffc, a[i], "sar2");
+    }
+    shr2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x3fffffff, a[i], "shr2");
+    }
+    shr32();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x3fffffff, a[i], "shr32");
+    }
+    shr33();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x1fffffff, a[i], "shr33");
+    }
+    not();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0xe0000000, a[i], "not");
+    }
+    // Done.
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result, String action) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result + " for " + action);
+    }
+  }
+}
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/640-checker-integer-valueof/expected.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/640-checker-integer-valueof/expected.txt
diff --git a/test/640-checker-integer-valueof/info.txt b/test/640-checker-integer-valueof/info.txt
new file mode 100644
index 0000000..51021a4
--- /dev/null
+++ b/test/640-checker-integer-valueof/info.txt
@@ -0,0 +1 @@
+Test for Integer.valueOf.
diff --git a/test/640-checker-integer-valueof/src/Main.java b/test/640-checker-integer-valueof/src/Main.java
new file mode 100644
index 0000000..0837fd1
--- /dev/null
+++ b/test/640-checker-integer-valueof/src/Main.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: java.lang.Integer Main.foo(int) disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK:                      pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo(int a) {
+    return Integer.valueOf(a);
+  }
+
+  /// CHECK-START: java.lang.Integer Main.foo2() disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK-NOT:                  pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo2() {
+    return Integer.valueOf(-42);
+  }
+
+  /// CHECK-START: java.lang.Integer Main.foo3() disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK-NOT:                  pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo3() {
+    return Integer.valueOf(42);
+  }
+
+  /// CHECK-START: java.lang.Integer Main.foo4() disassembly (after)
+  /// CHECK: <<Integer:l\d+>>     InvokeStaticOrDirect method_name:java.lang.Integer.valueOf intrinsic:IntegerValueOf
+  /// CHECK:                      pAllocObjectInitialized
+  /// CHECK:                      Return [<<Integer>>]
+  public static Integer foo4() {
+    return Integer.valueOf(55555);
+  }
+
+  public static void main(String[] args) {
+    assertEqual("42", foo(intField));
+    assertEqual(foo(intField), foo(intField2));
+    assertEqual("-42", foo2());
+    assertEqual("42", foo3());
+    assertEqual("55555", foo4());
+    assertEqual("55555", foo(intField3));
+    assertEqual("-129", foo(intFieldMinus129));
+    assertEqual("-128", foo(intFieldMinus128));
+    assertEqual(foo(intFieldMinus128), foo(intFieldMinus128));
+    assertEqual("-127", foo(intFieldMinus127));
+    assertEqual(foo(intFieldMinus127), foo(intFieldMinus127));
+    assertEqual("126", foo(intField126));
+    assertEqual(foo(intField126), foo(intField126));
+    assertEqual("127", foo(intField127));
+    assertEqual(foo(intField127), foo(intField127));
+    assertEqual("128", foo(intField128));
+  }
+
+  static void assertEqual(String a, Integer b) {
+    if (!a.equals(b.toString())) {
+      throw new Error("Expected " + a + ", got " + b);
+    }
+  }
+
+  static void assertEqual(Integer a, Integer b) {
+    if (a != b) {
+      throw new Error("Expected " + a + ", got " + b);
+    }
+  }
+
+  static int intField = 42;
+  static int intField2 = 42;
+  static int intField3 = 55555;
+
+  // Edge cases.
+  static int intFieldMinus129 = -129;
+  static int intFieldMinus128 = -128;
+  static int intFieldMinus127 = -127;
+  static int intField126 = 126;
+  static int intField127 = 127;
+  static int intField128 = 128;
+}
diff --git a/test/640-checker-long-simd/expected.txt b/test/640-checker-long-simd/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/640-checker-long-simd/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/640-checker-long-simd/info.txt b/test/640-checker-long-simd/info.txt
new file mode 100644
index 0000000..c9c6d5e
--- /dev/null
+++ b/test/640-checker-long-simd/info.txt
@@ -0,0 +1 @@
+Functional tests on SIMD vectorization.
diff --git a/test/640-checker-long-simd/src/Main.java b/test/640-checker-long-simd/src/Main.java
new file mode 100644
index 0000000..90a2e76
--- /dev/null
+++ b/test/640-checker-long-simd/src/Main.java
@@ -0,0 +1,244 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Functional tests for SIMD vectorization.
+ */
+public class Main {
+
+  static long[] a;
+
+  //
+  // Arithmetic operations.
+  //
+
+  /// CHECK-START: void Main.add(long) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.add(long) loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void add(long x) {
+    for (int i = 0; i < 128; i++)
+      a[i] += x;
+  }
+
+  /// CHECK-START: void Main.sub(long) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sub(long) loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void sub(long x) {
+    for (int i = 0; i < 128; i++)
+      a[i] -= x;
+  }
+
+  /// CHECK-START: void Main.mul(long) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.mul(long) loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void mul(long x) {
+    for (int i = 0; i < 128; i++)
+      a[i] *= x;
+  }
+
+  /// CHECK-START: void Main.div(long) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.div(long) loop_optimization (after)
+  //
+  //  Not supported on any architecture.
+  //
+  static void div(long x) {
+    for (int i = 0; i < 128; i++)
+      a[i] /= x;
+  }
+
+  /// CHECK-START: void Main.neg() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void neg() {
+    for (int i = 0; i < 128; i++)
+      a[i] = -a[i];
+  }
+
+  /// CHECK-START: void Main.not() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void not() {
+    for (int i = 0; i < 128; i++)
+      a[i] = ~a[i];
+  }
+
+  /// CHECK-START: void Main.shl4() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void shl4() {
+    for (int i = 0; i < 128; i++)
+      a[i] <<= 4;
+  }
+
+  /// CHECK-START: void Main.sar2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void sar2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>= 2;
+  }
+
+  /// CHECK-START: void Main.shr2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void shr2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 2;
+  }
+
+  //
+  // Shift sanity.
+  //
+
+  static void shr64() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 64;  // 0, since & 63
+  }
+
+  static void shr65() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 65;  // 1, since & 63
+  }
+
+  //
+  // Loop bounds.
+  //
+
+  static void bounds() {
+    for (int i = 1; i < 127; i++)
+      a[i] += 11;
+  }
+
+  //
+  // Test Driver.
+  //
+
+  public static void main(String[] args) {
+    // Set up.
+    a = new long[128];
+    for (int i = 0; i < 128; i++) {
+      a[i] = i;
+    }
+    // Arithmetic operations.
+    add(2L);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + 2, a[i], "add");
+    }
+    sub(2L);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "sub");
+    }
+    mul(2L);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + i, a[i], "mul");
+    }
+    div(2L);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "div");
+    }
+    neg();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(-i, a[i], "neg");
+    }
+    // Loop bounds.
+    bounds();
+    expectEquals(0, a[0], "bounds0");
+    for (int i = 1; i < 127; i++) {
+      expectEquals(11 - i, a[i], "bounds");
+    }
+    expectEquals(-127, a[127], "bounds127");
+    // Shifts.
+    for (int i = 0; i < 128; i++) {
+      a[i] = 0xffffffffffffffffL;
+    }
+    shl4();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0xfffffffffffffff0L, a[i], "shl4");
+    }
+    sar2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0xfffffffffffffffcL, a[i], "sar2");
+    }
+    shr2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x3fffffffffffffffL, a[i], "shr2");
+    }
+    shr64();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x3fffffffffffffffL, a[i], "shr64");
+    }
+    shr65();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x1fffffffffffffffL, a[i], "shr65");
+    }
+    not();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0xe000000000000000L, a[i], "not");
+    }
+    // Done.
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(long expected, long result, String action) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result + " for " + action);
+    }
+  }
+}
diff --git a/test/640-checker-short-simd/expected.txt b/test/640-checker-short-simd/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/640-checker-short-simd/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/640-checker-short-simd/info.txt b/test/640-checker-short-simd/info.txt
new file mode 100644
index 0000000..c9c6d5e
--- /dev/null
+++ b/test/640-checker-short-simd/info.txt
@@ -0,0 +1 @@
+Functional tests on SIMD vectorization.
diff --git a/test/640-checker-short-simd/src/Main.java b/test/640-checker-short-simd/src/Main.java
new file mode 100644
index 0000000..241f8e6
--- /dev/null
+++ b/test/640-checker-short-simd/src/Main.java
@@ -0,0 +1,277 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Functional tests for SIMD vectorization.
+ */
+public class Main {
+
+  static short[] a;
+
+  //
+  // Arithmetic operations.
+  //
+
+  /// CHECK-START: void Main.add(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.add(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecAdd   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void add(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] += x;
+  }
+
+  /// CHECK-START: void Main.sub(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sub(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecSub   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void sub(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] -= x;
+  }
+
+  /// CHECK-START: void Main.mul(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.mul(int) loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecMul   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void mul(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] *= x;
+  }
+
+  /// CHECK-START: void Main.div(int) loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.div(int) loop_optimization (after)
+  //
+  //  Not supported on any architecture.
+  //
+  static void div(int x) {
+    for (int i = 0; i < 128; i++)
+      a[i] /= x;
+  }
+
+  /// CHECK-START: void Main.neg() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.neg() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNeg   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void neg() {
+    for (int i = 0; i < 128; i++)
+      a[i] = (short) -a[i];
+  }
+
+  /// CHECK-START: void Main.not() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.not() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecNot   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void not() {
+    for (int i = 0; i < 128; i++)
+      a[i] = (short) ~a[i];
+  }
+
+  /// CHECK-START: void Main.shl4() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shl4() loop_optimization (after)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: VecLoad  loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecShl   loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: VecStore loop:<<Loop>>      outer_loop:none
+  static void shl4() {
+    for (int i = 0; i < 128; i++)
+      a[i] <<= 4;
+  }
+
+  /// CHECK-START: void Main.sar2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.sar2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void sar2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>= 2;
+  }
+
+  /// CHECK-START: void Main.shr2() loop_optimization (before)
+  /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START-ARM64: void Main.shr2() loop_optimization (after)
+  //
+  // TODO: fill in when supported
+  static void shr2() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 2;
+  }
+
+  //
+  // Shift sanity.
+  //
+
+  static void sar31() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>= 31;
+  }
+
+  static void shr31() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 31;
+  }
+
+  static void shr32() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 32;  // 0, since & 31
+  }
+
+
+  static void shr33() {
+    for (int i = 0; i < 128; i++)
+      a[i] >>>= 33;  // 1, since & 31
+  }
+
+  //
+  // Loop bounds.
+  //
+
+  static void add() {
+    for (int i = 1; i < 127; i++)
+      a[i] += 11;
+  }
+
+  //
+  // Test Driver.
+  //
+
+  public static void main(String[] args) {
+    // Set up.
+    a = new short[128];
+    for (int i = 0; i < 128; i++) {
+      a[i] = (short) i;
+    }
+    // Arithmetic operations.
+    add(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + 2, a[i], "add");
+    }
+    sub(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "sub");
+    }
+    mul(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i + i, a[i], "mul");
+    }
+    div(2);
+    for (int i = 0; i < 128; i++) {
+      expectEquals(i, a[i], "div");
+    }
+    neg();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(-i, a[i], "neg");
+    }
+    // Loop bounds.
+    add();
+    expectEquals(0, a[0], "bounds0");
+    for (int i = 1; i < 127; i++) {
+      expectEquals(11 - i, a[i], "bounds");
+    }
+    expectEquals(-127, a[127], "bounds127");
+    // Shifts.
+    for (int i = 0; i < 128; i++) {
+      a[i] = (short) 0xffff;
+    }
+    shl4();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((short) 0xfff0, a[i], "shl4");
+    }
+    sar2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((short) 0xfffc, a[i], "sar2");
+    }
+    shr2();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((short) 0xffff, a[i], "shr2");  // sic!
+    }
+    sar31();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((short) 0xffff, a[i], "sar31");
+    }
+    shr31();
+    for (int i = 0; i < 128; i++) {
+      expectEquals(0x0001, a[i], "shr31");
+      a[i] = (short) 0x1200;  // reset
+    }
+    shr32();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((short) 0x1200, a[i], "shr32");
+    }
+    shr33();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((short) 0x0900, a[i], "shr33");
+      a[i] = (short) 0xf0f1;  // reset
+    }
+    not();
+    for (int i = 0; i < 128; i++) {
+      expectEquals((short) 0x0f0e, a[i], "not");
+    }
+    // Done.
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result, String action) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result + " for " + action);
+    }
+  }
+}
diff --git a/test/577-profile-foreign-dex/run b/test/641-checker-arraycopy/build
similarity index 70%
copy from test/577-profile-foreign-dex/run
copy to test/641-checker-arraycopy/build
index ad57d14..9abc618 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/641-checker-arraycopy/build
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+# make us exit on a failure
+set -e
+
+# Don't use jack for this test, to ensure we don't use
+# the typed System.arraycopy versions directly.
+export USE_JACK=false
+
+./default-build
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/641-checker-arraycopy/expected.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/641-checker-arraycopy/expected.txt
diff --git a/test/641-checker-arraycopy/info.txt b/test/641-checker-arraycopy/info.txt
new file mode 100644
index 0000000..1a1111e
--- /dev/null
+++ b/test/641-checker-arraycopy/info.txt
@@ -0,0 +1,2 @@
+Checker test for testing the arraycopy optimization in
+instruction simplifier.
diff --git a/test/641-checker-arraycopy/src/Main.java b/test/641-checker-arraycopy/src/Main.java
new file mode 100644
index 0000000..f0fcf28
--- /dev/null
+++ b/test/641-checker-arraycopy/src/Main.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  // Note that this is testing we haven't intrinsified the byte[] arraycopy version.
+  // Once we eventually start doing it, we will need to re-adjust this test.
+
+  /// CHECK-START-X86: void Main.typedCopy(java.lang.Object, byte[]) disassembly (after)
+  /// CHECK: InvokeStaticOrDirect method_name:java.lang.System.arraycopy intrinsic:SystemArrayCopy
+  /// CHECK-NOT:    call
+  /// CHECK: InvokeStaticOrDirect method_name:java.lang.System.arraycopy intrinsic:SystemArrayCopy
+  /// CHECK:        call
+  /// CHECK: ReturnVoid
+  public static void typedCopy(Object o, byte[] foo) {
+    System.arraycopy(o, 1, o, 0, 1);
+    System.arraycopy(foo, 1, foo, 0, 1);
+  }
+
+  public static void untypedCopy(Object o, Object foo) {
+    System.arraycopy(o, 1, o, 0, 1);
+    System.arraycopy(foo, 1, foo, 0, 1);
+  }
+
+  // Test that we still do the optimization after inlining.
+
+  /// CHECK-START-X86: void Main.untypedCopyCaller(java.lang.Object, byte[]) disassembly (after)
+  /// CHECK: InvokeStaticOrDirect method_name:java.lang.System.arraycopy intrinsic:SystemArrayCopy
+  /// CHECK-NOT:    call
+  /// CHECK: InvokeStaticOrDirect method_name:java.lang.System.arraycopy intrinsic:SystemArrayCopy
+  /// CHECK:        call
+  /// CHECK: ReturnVoid
+  public static void untypedCopyCaller(Object o, byte[] array) {
+    untypedCopy(o, array);
+  }
+
+  public static void assertEquals(Object one, Object two) {
+    if (one != two) {
+      throw new Error("Expected " + one + ", got " + two);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Simple sanity checks.
+    byte[] a = new byte[2];
+    Object[] o = new Object[2];
+
+    o[0] = a;
+    o[1] = o;
+    a[0] = 1;
+    a[1] = 2;
+
+    untypedCopyCaller(o, a);
+    assertEquals(o[0], o);
+    assertEquals(o[1], o);
+    assertEquals(a[0], (byte)2);
+    assertEquals(a[1], (byte)2);
+
+    o[0] = a;
+    o[1] = o;
+    a[0] = 1;
+    a[1] = 2;
+
+    typedCopy(o, a);
+    assertEquals(o[0], o);
+    assertEquals(o[1], o);
+    assertEquals(a[0], (byte)2);
+    assertEquals(a[1], (byte)2);
+  }
+}
diff --git a/test/641-irreducible-inline/expected.txt b/test/641-irreducible-inline/expected.txt
new file mode 100644
index 0000000..d81cc07
--- /dev/null
+++ b/test/641-irreducible-inline/expected.txt
@@ -0,0 +1 @@
+42
diff --git a/test/641-irreducible-inline/info.txt b/test/641-irreducible-inline/info.txt
new file mode 100644
index 0000000..ec6d0d2
--- /dev/null
+++ b/test/641-irreducible-inline/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing in the presence of
+inlining a method that throws in an irreducible loop
diff --git a/test/641-irreducible-inline/smali/IrreducibleLoop.smali b/test/641-irreducible-inline/smali/IrreducibleLoop.smali
new file mode 100644
index 0000000..3e6c1f1
--- /dev/null
+++ b/test/641-irreducible-inline/smali/IrreducibleLoop.smali
@@ -0,0 +1,54 @@
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LIrreducibleLoop;
+
+.super Ljava/lang/Object;
+
+.method public static simpleLoop(I)I
+   .registers 3
+   const/16 v0, 42
+   if-eqz p0, :loop_entry
+   goto :other_loop_pre_entry
+
+   # The then part: beginning of the irreducible loop.
+   :loop_entry
+   if-nez p0, :exit
+   invoke-static {v0},LIrreducibleLoop;->foo(I)V
+   :other_loop_entry
+   goto :loop_entry
+
+   # The else part.
+   :other_loop_pre_entry
+   if-eqz p0, :other_loop_entry
+   invoke-static {v0},LIrreducibleLoop;->foo(I)V
+   goto :other_loop_entry
+
+   :exit
+   return v0
+.end method
+
+.method public static foo(I)V
+   .registers 3
+   const/16 v0, 0
+   sget-boolean v1,LIrreducibleLoop;->doThrow:Z
+   if-eqz v1, :exit
+   # Inlining a method that throws requires re-computing loop information
+   # which is unsupported when the caller has an irreducible loop.
+   throw v0
+   :exit
+   return-void
+.end method
+
+.field public static doThrow:Z
diff --git a/test/641-irreducible-inline/src/Main.java b/test/641-irreducible-inline/src/Main.java
new file mode 100644
index 0000000..53244f7
--- /dev/null
+++ b/test/641-irreducible-inline/src/Main.java
@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+
+public class Main {
+  // Workaround for b/18051191.
+  class InnerClass {}
+
+  public static void main(String[] args) throws Exception {
+    Class<?> c = Class.forName("IrreducibleLoop");
+    Method m = c.getMethod("simpleLoop", int.class);
+    Object[] arguments = { 42 };
+    System.out.println(m.invoke(null, arguments));
+  }
+}
diff --git a/test/641-iterations/expected.txt b/test/641-iterations/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/641-iterations/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/641-iterations/info.txt b/test/641-iterations/info.txt
new file mode 100644
index 0000000..fd80595
--- /dev/null
+++ b/test/641-iterations/info.txt
@@ -0,0 +1 @@
+Tests on varying trip counts (to validate vector/cleanup loops).
diff --git a/test/641-iterations/src/Main.java b/test/641-iterations/src/Main.java
new file mode 100644
index 0000000..6a27f80
--- /dev/null
+++ b/test/641-iterations/src/Main.java
@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests of varying trip counts. Focused on testing
+ * core and cleanup loop after vectorization.
+ */
+public class Main {
+
+  static int[] sA;
+
+  static void init() {
+    for (int i = 0; i < sA.length; i++)
+      sA[i] = 100;
+  }
+
+  static void doitTo(int n) {
+    for (int i = 0; i < n; i++)
+      sA[i] += 1;
+  }
+
+  static void doitFrom(int n) {
+    for (int i = n; i < sA.length; i++)
+      sA[i] += 1;
+  }
+
+  static void verify(int n) {
+    for (int i = 0; i < n; i++)
+      if (sA[i] != 101)
+        throw new Error("failed inside loop");
+    for (int i = n; i < sA.length; i++)
+      if (sA[i] != 100)
+        throw new Error("failed outside loop");
+  }
+
+  static void verify() {
+    for (int i = 0; i < sA.length; i++)
+      if (sA[i] != 101)
+        throw new Error("failed inside loop");
+  }
+
+  static void driver() {
+    for (int n = 0; n <= sA.length; n++) {
+      init();
+      doitTo(n);
+      verify(n);
+      doitFrom(n);
+      verify();
+    }
+  }
+
+  public static void main(String[] args) {
+    sA = new int[17];
+    driver();
+    sA = new int[32];
+    driver();
+    System.out.println("passed");
+  }
+}
+
diff --git a/test/642-fp-callees/expected.txt b/test/642-fp-callees/expected.txt
new file mode 100644
index 0000000..77a1486
--- /dev/null
+++ b/test/642-fp-callees/expected.txt
@@ -0,0 +1,2 @@
+JNI_OnLoad called
+Done
diff --git a/test/642-fp-callees/fp_callees.cc b/test/642-fp-callees/fp_callees.cc
new file mode 100644
index 0000000..600f969
--- /dev/null
+++ b/test/642-fp-callees/fp_callees.cc
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/casts.h"
+#include "base/logging.h"
+#include "jni.h"
+
+namespace art {
+
+// Make the array volatile, which is apparently making the C compiler
+// use FP registers in the method below.
+volatile double array[] = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0 };
+
+extern "C" JNIEXPORT void JNICALL Java_Main_holdFpTemporaries(JNIEnv* env, jclass cls) {
+  jmethodID mid = env->GetStaticMethodID(cls, "caller", "(IIJ)V");
+  CHECK(mid != nullptr);
+  // Load values from the arrays, which will be loaded in callee-save FP registers.
+  double a = array[0];
+  double b = array[1];
+  double c = array[2];
+  double d = array[3];
+  double e = array[4];
+  double f = array[5];
+  double g = array[6];
+  double h = array[7];
+  double i = array[8];
+  double j = array[9];
+  double k = array[10];
+  double l = array[11];
+  env->CallStaticVoidMethod(cls, mid, 1, 1, 1L);
+  // Load it in a temporary to please C compiler with bit_cast.
+  double temp = array[0];
+  CHECK_EQ(bit_cast<int64_t>(a), bit_cast<int64_t>(temp));
+  temp = array[1];
+  CHECK_EQ(bit_cast<int64_t>(b), bit_cast<int64_t>(temp));
+  temp = array[2];
+  CHECK_EQ(bit_cast<int64_t>(c), bit_cast<int64_t>(temp));
+  temp = array[3];
+  CHECK_EQ(bit_cast<int64_t>(d), bit_cast<int64_t>(temp));
+  temp = array[4];
+  CHECK_EQ(bit_cast<int64_t>(e), bit_cast<int64_t>(temp));
+  temp = array[5];
+  CHECK_EQ(bit_cast<int64_t>(f), bit_cast<int64_t>(temp));
+  temp = array[6];
+  CHECK_EQ(bit_cast<int64_t>(g), bit_cast<int64_t>(temp));
+  temp = array[7];
+  CHECK_EQ(bit_cast<int64_t>(h), bit_cast<int64_t>(temp));
+  temp = array[8];
+  CHECK_EQ(bit_cast<int64_t>(i), bit_cast<int64_t>(temp));
+  temp = array[9];
+  CHECK_EQ(bit_cast<int64_t>(j), bit_cast<int64_t>(temp));
+  temp = array[10];
+  CHECK_EQ(bit_cast<int64_t>(k), bit_cast<int64_t>(temp));
+  temp = array[11];
+  CHECK_EQ(bit_cast<int64_t>(l), bit_cast<int64_t>(temp));
+}
+
+}  // namespace art
diff --git a/test/642-fp-callees/info.txt b/test/642-fp-callees/info.txt
new file mode 100644
index 0000000..d3e4bda
--- /dev/null
+++ b/test/642-fp-callees/info.txt
@@ -0,0 +1,2 @@
+Regression test for vixl32 backend, which used to incorrectly
+use D14 as a temporary register.
diff --git a/test/642-fp-callees/src/Main.java b/test/642-fp-callees/src/Main.java
new file mode 100644
index 0000000..fa57c93
--- /dev/null
+++ b/test/642-fp-callees/src/Main.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[0]);
+    holdFpTemporaries();
+    System.out.println("Done");
+  }
+
+  public static void caller(int a, int b, long c) {
+    $noinline$callee(a, b, c);
+  }
+
+  // This method is "no inline", in order to generate the
+  // bad floating point use at the call site.
+  public static void $noinline$callee(int a, int b, long c) {
+  }
+
+  public native static void holdFpTemporaries();
+}
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/643-checker-bogus-ic/expected.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/643-checker-bogus-ic/expected.txt
diff --git a/test/643-checker-bogus-ic/info.txt b/test/643-checker-bogus-ic/info.txt
new file mode 100644
index 0000000..d5dfff4
--- /dev/null
+++ b/test/643-checker-bogus-ic/info.txt
@@ -0,0 +1 @@
+Verify the compiler can handle a bogus inline cache in a profile.
diff --git a/test/643-checker-bogus-ic/profile b/test/643-checker-bogus-ic/profile
new file mode 100644
index 0000000..cbf7796
--- /dev/null
+++ b/test/643-checker-bogus-ic/profile
@@ -0,0 +1,2 @@
+LMain;->inlineMonomorphic(LMain;)I+LUnrelated;
+LMain;->inlinePolymorphic(LMain;)I+LUnrelated;,LMain;
diff --git a/test/577-profile-foreign-dex/run b/test/643-checker-bogus-ic/run
similarity index 71%
copy from test/577-profile-foreign-dex/run
copy to test/643-checker-bogus-ic/run
index ad57d14..146e180 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/643-checker-bogus-ic/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+exec ${RUN} $@ --profile -Xcompiler-option --compiler-filter=speed-profile
diff --git a/test/643-checker-bogus-ic/src/Main.java b/test/643-checker-bogus-ic/src/Main.java
new file mode 100644
index 0000000..0aa8477
--- /dev/null
+++ b/test/643-checker-bogus-ic/src/Main.java
@@ -0,0 +1,49 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Unrelated {
+}
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (after)
+  /// CHECK:   InvokeVirtual method_name:Main.getValue
+
+  public static int inlineMonomorphic(Main a) {
+    return a.getValue();
+  }
+
+  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (after)
+  /// CHECK:   InvokeVirtual method_name:Main.getValue
+  public static int inlinePolymorphic(Main a) {
+    return a.getValue();
+  }
+
+  public int getValue() {
+    return 42;
+  }
+
+  public static void main(String[] args) {
+    inlineMonomorphic(new Main());
+  }
+
+}
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/644-checker-deopt/expected.txt
similarity index 100%
copy from test/577-profile-foreign-dex/expected.txt
copy to test/644-checker-deopt/expected.txt
diff --git a/test/644-checker-deopt/info.txt b/test/644-checker-deopt/info.txt
new file mode 100644
index 0000000..c5fb12c
--- /dev/null
+++ b/test/644-checker-deopt/info.txt
@@ -0,0 +1,2 @@
+Regression test for making sure HDeoptimize is executed before
+the code it should have prevented executing.
diff --git a/test/644-checker-deopt/profile b/test/644-checker-deopt/profile
new file mode 100644
index 0000000..cb261cc
--- /dev/null
+++ b/test/644-checker-deopt/profile
@@ -0,0 +1,2 @@
+LMain;->inlineMonomorphic(LMain;)I+LMain;
+LMain;->inlinePolymorphic(LMain;)I+LMain;,LSubMain;
diff --git a/test/577-profile-foreign-dex/run b/test/644-checker-deopt/run
similarity index 71%
copy from test/577-profile-foreign-dex/run
copy to test/644-checker-deopt/run
index ad57d14..146e180 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/644-checker-deopt/run
@@ -1,12 +1,12 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+exec ${RUN} $@ --profile -Xcompiler-option --compiler-filter=speed-profile
diff --git a/test/644-checker-deopt/src/Main.java b/test/644-checker-deopt/src/Main.java
new file mode 100644
index 0000000..17c80a6
--- /dev/null
+++ b/test/644-checker-deopt/src/Main.java
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (before)
+  /// CHECK:   <<Deopt:l\d+>> Deoptimize
+  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (after)
+  /// CHECK:   <<Deopt:l\d+>> Deoptimize
+  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
+
+  public static int inlineMonomorphic(Main a) {
+    if (a == null) {
+      return 42;
+    }
+    int i = 0;
+    while (i < 100) {
+      i += a.getValue();
+    }
+    return i;
+  }
+
+  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (before)
+  /// CHECK:       InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlinePolymorphic(Main) inliner (after)
+  /// CHECK-NOT:   InvokeVirtual method_name:Main.getValue
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (before)
+  /// CHECK:   <<Deopt:l\d+>> Deoptimize
+  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
+
+  /// CHECK-START: int Main.inlineMonomorphic(Main) licm (after)
+  /// CHECK:   <<Deopt:l\d+>> Deoptimize
+  /// CHECK:                  InstanceFieldGet [<<Deopt>>] field_name:Main.value
+  public static int inlinePolymorphic(Main a) {
+    return a.getValue();
+  }
+
+  public int getValue() {
+    return value;
+  }
+
+  public static void main(String[] args) {
+    inlineMonomorphic(new Main());
+  }
+
+  int value = 1;
+}
+
+// Add a subclass of 'Main' to write the polymorphic inline cache in the profile.
+class SubMain extends Main {
+}
diff --git a/test/645-checker-abs-simd/expected.txt b/test/645-checker-abs-simd/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/645-checker-abs-simd/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/645-checker-abs-simd/info.txt b/test/645-checker-abs-simd/info.txt
new file mode 100644
index 0000000..8fa4066
--- /dev/null
+++ b/test/645-checker-abs-simd/info.txt
@@ -0,0 +1 @@
+Functional tests on abs SIMD vectorization.
diff --git a/test/645-checker-abs-simd/src/Main.java b/test/645-checker-abs-simd/src/Main.java
new file mode 100644
index 0000000..3111350
--- /dev/null
+++ b/test/645-checker-abs-simd/src/Main.java
@@ -0,0 +1,216 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests for ABS vectorization.
+ */
+public class Main {
+
+  private static final int SPQUIET = 1 << 22;
+  private static final long DPQUIET = 1L << 51;
+
+  private static void doitInt(int[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = Math.abs(x[i]);
+    }
+  }
+
+  private static void doitLong(long[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = Math.abs(x[i]);
+    }
+  }
+
+  private static void doitFloat(float[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = Math.abs(x[i]);
+    }
+  }
+
+  private static void doitDouble(double[] x) {
+    for (int i = 0; i < x.length; i++) {
+      x[i] = Math.abs(x[i]);
+    }
+  }
+
+  public static void main(String[] args) {
+    // Set up minint32, maxint32 and some others.
+    int[] xi = new int[8];
+    xi[0] = 0x80000000;
+    xi[1] = 0x7fffffff;
+    xi[2] = 0x80000001;
+    xi[3] = -13;
+    xi[4] = -1;
+    xi[5] = 0;
+    xi[6] = 1;
+    xi[7] = 999;
+    doitInt(xi);
+    expectEquals32(0x80000000, xi[0]);
+    expectEquals32(0x7fffffff, xi[1]);
+    expectEquals32(0x7fffffff, xi[2]);
+    expectEquals32(13, xi[3]);
+    expectEquals32(1, xi[4]);
+    expectEquals32(0, xi[5]);
+    expectEquals32(1, xi[6]);
+    expectEquals32(999, xi[7]);
+
+    // Set up minint64, maxint64 and some others.
+    long[] xl = new long[8];
+    xl[0] = 0x8000000000000000L;
+    xl[1] = 0x7fffffffffffffffL;
+    xl[2] = 0x8000000000000001L;
+    xl[3] = -13;
+    xl[4] = -1;
+    xl[5] = 0;
+    xl[6] = 1;
+    xl[7] = 999;
+    doitLong(xl);
+    expectEquals64(0x8000000000000000L, xl[0]);
+    expectEquals64(0x7fffffffffffffffL, xl[1]);
+    expectEquals64(0x7fffffffffffffffL, xl[2]);
+    expectEquals64(13, xl[3]);
+    expectEquals64(1, xl[4]);
+    expectEquals64(0, xl[5]);
+    expectEquals64(1, xl[6]);
+    expectEquals64(999, xl[7]);
+
+    // Set up float NaN and some others.
+    float[] xf = new float[16];
+    xf[0] = Float.intBitsToFloat(0x7f800001);
+    xf[1] = Float.intBitsToFloat(0x7fa00000);
+    xf[2] = Float.intBitsToFloat(0x7fc00000);
+    xf[3] = Float.intBitsToFloat(0x7fffffff);
+    xf[4] = Float.intBitsToFloat(0xff800001);
+    xf[5] = Float.intBitsToFloat(0xffa00000);
+    xf[6] = Float.intBitsToFloat(0xffc00000);
+    xf[7] = Float.intBitsToFloat(0xffffffff);
+    xf[8] = Float.NEGATIVE_INFINITY;
+    xf[9] = -99.2f;
+    xf[10] = -1.0f;
+    xf[11] = -0.0f;
+    xf[12] = +0.0f;
+    xf[13] = +1.0f;
+    xf[14] = +99.2f;
+    xf[15] = Float.POSITIVE_INFINITY;
+    doitFloat(xf);
+    expectEqualsNaN32(0x7f800001, Float.floatToRawIntBits(xf[0]));
+    expectEqualsNaN32(0x7fa00000, Float.floatToRawIntBits(xf[1]));
+    expectEqualsNaN32(0x7fc00000, Float.floatToRawIntBits(xf[2]));
+    expectEqualsNaN32(0x7fffffff, Float.floatToRawIntBits(xf[3]));
+    expectEqualsNaN32(0x7f800001, Float.floatToRawIntBits(xf[4]));
+    expectEqualsNaN32(0x7fa00000, Float.floatToRawIntBits(xf[5]));
+    expectEqualsNaN32(0x7fc00000, Float.floatToRawIntBits(xf[6]));
+    expectEqualsNaN32(0x7fffffff, Float.floatToRawIntBits(xf[7]));
+    expectEquals32(
+        Float.floatToRawIntBits(Float.POSITIVE_INFINITY),
+        Float.floatToRawIntBits(xf[8]));
+    expectEquals32(
+        Float.floatToRawIntBits(99.2f),
+        Float.floatToRawIntBits(xf[9]));
+    expectEquals32(
+        Float.floatToRawIntBits(1.0f),
+        Float.floatToRawIntBits(xf[10]));
+    expectEquals32(0, Float.floatToRawIntBits(xf[11]));
+    expectEquals32(0, Float.floatToRawIntBits(xf[12]));
+    expectEquals32(
+        Float.floatToRawIntBits(1.0f),
+        Float.floatToRawIntBits(xf[13]));
+    expectEquals32(
+        Float.floatToRawIntBits(99.2f),
+        Float.floatToRawIntBits(xf[14]));
+    expectEquals32(
+        Float.floatToRawIntBits(Float.POSITIVE_INFINITY),
+        Float.floatToRawIntBits(xf[15]));
+
+    // Set up double NaN and some others.
+    double[] xd = new double[16];
+    xd[0] = Double.longBitsToDouble(0x7ff0000000000001L);
+    xd[1] = Double.longBitsToDouble(0x7ff4000000000000L);
+    xd[2] = Double.longBitsToDouble(0x7ff8000000000000L);
+    xd[3] = Double.longBitsToDouble(0x7fffffffffffffffL);
+    xd[4] = Double.longBitsToDouble(0xfff0000000000001L);
+    xd[5] = Double.longBitsToDouble(0xfff4000000000000L);
+    xd[6] = Double.longBitsToDouble(0xfff8000000000000L);
+    xd[7] = Double.longBitsToDouble(0xffffffffffffffffL);
+    xd[8] = Double.NEGATIVE_INFINITY;
+    xd[9] = -99.2f;
+    xd[10] = -1.0f;
+    xd[11] = -0.0f;
+    xd[12] = +0.0f;
+    xd[13] = +1.0f;
+    xd[14] = +99.2f;
+    xd[15] = Double.POSITIVE_INFINITY;
+    doitDouble(xd);
+    expectEqualsNaN64(0x7ff0000000000001L, Double.doubleToRawLongBits(xd[0]));
+    expectEqualsNaN64(0x7ff4000000000000L, Double.doubleToRawLongBits(xd[1]));
+    expectEqualsNaN64(0x7ff8000000000000L, Double.doubleToRawLongBits(xd[2]));
+    expectEqualsNaN64(0x7fffffffffffffffL, Double.doubleToRawLongBits(xd[3]));
+    expectEqualsNaN64(0x7ff0000000000001L, Double.doubleToRawLongBits(xd[4]));
+    expectEqualsNaN64(0x7ff4000000000000L, Double.doubleToRawLongBits(xd[5]));
+    expectEqualsNaN64(0x7ff8000000000000L, Double.doubleToRawLongBits(xd[6]));
+    expectEqualsNaN64(0x7fffffffffffffffL, Double.doubleToRawLongBits(xd[7]));
+    expectEquals64(
+        Double.doubleToRawLongBits(Double.POSITIVE_INFINITY),
+        Double.doubleToRawLongBits(xd[8]));
+    expectEquals64(
+        Double.doubleToRawLongBits(99.2f),
+        Double.doubleToRawLongBits(xd[9]));
+    expectEquals64(
+        Double.doubleToRawLongBits(1.0f),
+        Double.doubleToRawLongBits(xd[10]));
+    expectEquals64(0, Double.doubleToRawLongBits(xd[11]));
+    expectEquals64(0, Double.doubleToRawLongBits(xd[12]));
+    expectEquals64(
+        Double.doubleToRawLongBits(1.0f),
+        Double.doubleToRawLongBits(xd[13]));
+    expectEquals64(
+        Double.doubleToRawLongBits(99.2f),
+        Double.doubleToRawLongBits(xd[14]));
+    expectEquals64(
+        Double.doubleToRawLongBits(Double.POSITIVE_INFINITY),
+        Double.doubleToRawLongBits(xd[15]));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals32(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  private static void expectEquals64(long expected, long result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  // We allow that an expected NaN result has become quiet.
+  private static void expectEqualsNaN32(int expected, int result) {
+    if (expected != result && (expected | SPQUIET) != result) {
+      throw new Error("Expected: 0x" + Integer.toHexString(expected)
+          + ", found: 0x" + Integer.toHexString(result));
+    }
+  }
+
+  // We allow that an expected NaN result has become quiet.
+  private static void expectEqualsNaN64(long expected, long result) {
+    if (expected != result && (expected | DPQUIET) != result) {
+      throw new Error("Expected: 0x" + Long.toHexString(expected)
+          + ", found: 0x" + Long.toHexString(result));
+    }
+  }
+}
diff --git a/test/701-easy-div-rem/build b/test/701-easy-div-rem/build
index 666fe89..d83ee82 100644
--- a/test/701-easy-div-rem/build
+++ b/test/701-easy-div-rem/build
@@ -21,12 +21,4 @@
 mkdir src
 python ./genMain.py
 
-# Increase the file size limitation for classes.lst as the machine generated
-# source file contains a lot of methods and is quite large.
-
-# Jack generates big temp files so only apply ulimit for dx.
-if [ ${USE_JACK} = "false" ]; then
-  ulimit -S 4096
-fi
-
 ./default-build
diff --git a/test/900-hello-plugin/expected.txt b/test/900-hello-plugin/expected.txt
index 43db31c..c160f65 100644
--- a/test/900-hello-plugin/expected.txt
+++ b/test/900-hello-plugin/expected.txt
@@ -3,6 +3,8 @@
 GetEnvHandler called in test 900
 GetEnvHandler called with version 0x900fffff
 GetEnv returned '900' environment!
+Agent_OnLoad called with options "test_900_round_2"
 Hello, world!
 Agent_OnUnload called
+Agent_OnUnload called
 ArtPlugin_Deinitialize called in test 900
diff --git a/test/900-hello-plugin/load_unload.cc b/test/900-hello-plugin/load_unload.cc
index a38cc3d..290997a 100644
--- a/test/900-hello-plugin/load_unload.cc
+++ b/test/900-hello-plugin/load_unload.cc
@@ -52,6 +52,9 @@
                                                char* options,
                                                void* reserved ATTRIBUTE_UNUSED) {
   printf("Agent_OnLoad called with options \"%s\"\n", options);
+  if (strcmp("test_900_round_2", options) == 0) {
+    return 0;
+  }
   uintptr_t env = 0;
   jint res = vm->GetEnv(reinterpret_cast<void**>(&env), TEST_900_ENV_VERSION_NUMBER);
   if (res != JNI_OK) {
diff --git a/test/900-hello-plugin/run b/test/900-hello-plugin/run
index 50835f8..c633f6d 100755
--- a/test/900-hello-plugin/run
+++ b/test/900-hello-plugin/run
@@ -19,4 +19,5 @@
   plugin=libartagent.so
 fi
 ./default-run "$@" --runtime-option -agentpath:${plugin}=test_900 \
+                   --runtime-option -agentpath:${plugin}=test_900_round_2 \
                    --android-runtime-option -Xplugin:${plugin}
diff --git a/test/901-hello-ti-agent/basics.cc b/test/901-hello-ti-agent/basics.cc
index 0b17656..20b227a 100644
--- a/test/901-hello-ti-agent/basics.cc
+++ b/test/901-hello-ti-agent/basics.cc
@@ -16,14 +16,17 @@
 
 #include "901-hello-ti-agent/basics.h"
 
+#include <thread>
+
 #include <jni.h>
 #include <stdio.h>
 #include <string.h>
-#include "base/macros.h"
-#include "openjdkjvmti/jvmti.h"
+#include "android-base/macros.h"
+#include "jvmti.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test901HelloTi {
@@ -146,18 +149,32 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jint iflag, jboolean val) {
   jvmtiVerboseFlag flag = static_cast<jvmtiVerboseFlag>(iflag);
   jvmtiError result = jvmti_env->SetVerboseFlag(flag, val);
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkLivePhase(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
   jvmtiPhase current_phase;
   jvmtiError phase_result = jvmti_env->GetPhase(&current_phase);
-  if (JvmtiErrorToException(env, phase_result)) {
+  if (JvmtiErrorToException(env, jvmti_env, phase_result)) {
     return JNI_FALSE;
   }
   return (current_phase == JVMTI_PHASE_LIVE) ? JNI_TRUE : JNI_FALSE;
 }
 
+static void CallJvmtiFunction(jvmtiEnv* env, jclass klass, jvmtiError* err) {
+  jint n;
+  jmethodID* methods = nullptr;
+  *err = env->GetClassMethods(klass, &n, &methods);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_checkUnattached(
+    JNIEnv* env ATTRIBUTE_UNUSED, jclass Main_klass) {
+  jvmtiError res = JVMTI_ERROR_NONE;
+  std::thread t1(CallJvmtiFunction, jvmti_env, Main_klass, &res);
+  t1.join();
+  return res == JVMTI_ERROR_UNATTACHED_THREAD;
+}
+
 }  // namespace Test901HelloTi
 }  // namespace art
diff --git a/test/901-hello-ti-agent/expected.txt b/test/901-hello-ti-agent/expected.txt
index c4b24cb..eb5b6a2 100644
--- a/test/901-hello-ti-agent/expected.txt
+++ b/test/901-hello-ti-agent/expected.txt
@@ -3,6 +3,7 @@
 VMInit
 Hello, world!
 Agent in live phase.
+Received expected error for unattached JVMTI calls
 0
 1
 2
diff --git a/test/901-hello-ti-agent/src/Main.java b/test/901-hello-ti-agent/src/Main.java
index 4d62ed3..556e05b 100644
--- a/test/901-hello-ti-agent/src/Main.java
+++ b/test/901-hello-ti-agent/src/Main.java
@@ -21,6 +21,9 @@
     if (checkLivePhase()) {
       System.out.println("Agent in live phase.");
     }
+    if (checkUnattached()) {
+      System.out.println("Received expected error for unattached JVMTI calls");
+    }
 
     set(0);  // OTHER
     set(1);  // GC
@@ -41,4 +44,5 @@
 
   private static native boolean checkLivePhase();
   private static native void setVerboseFlag(int flag, boolean value);
+  private static native boolean checkUnattached();
 }
diff --git a/test/903-hello-tagging/tagging.cc b/test/903-hello-tagging/tagging.cc
index 6177263..701b0c3 100644
--- a/test/903-hello-tagging/tagging.cc
+++ b/test/903-hello-tagging/tagging.cc
@@ -19,43 +19,30 @@
 #include <stdio.h>
 #include <vector>
 
+#include "android-base/logging.h"
 #include "jni.h"
-#include "ScopedLocalRef.h"
-#include "ScopedPrimitiveArray.h"
+#include "scoped_local_ref.h"
+#include "scoped_primitive_array.h"
 
-#include "art_method-inl.h"
-#include "base/logging.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
-#include "utils.h"
+#include "jvmti.h"
+
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test903HelloTagging {
 
-extern "C" JNIEXPORT void JNICALL Java_Main_setTag(JNIEnv* env ATTRIBUTE_UNUSED,
-                                                   jclass,
-                                                   jobject obj,
-                                                   jlong tag) {
+extern "C" JNIEXPORT void JNICALL Java_Main_setTag(JNIEnv* env, jclass, jobject obj, jlong tag) {
   jvmtiError ret = jvmti_env->SetTag(obj, tag);
-  if (ret != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(ret, &err);
-    printf("Error setting tag: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-  }
+  JvmtiErrorToException(env, jvmti_env, ret);
 }
 
-extern "C" JNIEXPORT jlong JNICALL Java_Main_getTag(JNIEnv* env ATTRIBUTE_UNUSED,
-                                                    jclass,
-                                                    jobject obj) {
+extern "C" JNIEXPORT jlong JNICALL Java_Main_getTag(JNIEnv* env, jclass, jobject obj) {
   jlong tag = 0;
   jvmtiError ret = jvmti_env->GetTag(obj, &tag);
-  if (ret != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(ret, &err);
-    printf("Error getting tag: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
+    return 0;
   }
   return tag;
 }
@@ -86,11 +73,7 @@
                                                  &result_count,
                                                  result_object_array_ptr,
                                                  result_tag_array_ptr);
-  if (ret != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(ret, &err);
-    printf("Failure running GetLoadedClasses: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return nullptr;
   }
 
@@ -197,4 +180,3 @@
 
 }  // namespace Test903HelloTagging
 }  // namespace art
-
diff --git a/test/904-object-allocation/tracking.cc b/test/904-object-allocation/tracking.cc
index 95eab0c..c829496 100644
--- a/test/904-object-allocation/tracking.cc
+++ b/test/904-object-allocation/tracking.cc
@@ -19,14 +19,15 @@
 #include <stdio.h>
 #include <vector>
 
-#include "base/logging.h"
+#include "android-base/logging.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
-#include "ScopedUtfChars.h"
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
-#include "utils.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
+#include "scoped_utf_chars.h"
+
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test904ObjectAllocation {
@@ -57,21 +58,16 @@
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_setupObjectAllocCallback(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass klass ATTRIBUTE_UNUSED, jboolean enable) {
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jboolean enable) {
   jvmtiEventCallbacks callbacks;
   memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
   callbacks.VMObjectAlloc = enable ? ObjectAllocated : nullptr;
 
   jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
-  if (ret != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(ret, &err);
-    printf("Error setting callbacks: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-  }
+  JvmtiErrorToException(env, jvmti_env, ret);
 }
 
-extern "C" JNIEXPORT void JNICALL Java_Main_enableAllocationTracking(JNIEnv* env ATTRIBUTE_UNUSED,
+extern "C" JNIEXPORT void JNICALL Java_Main_enableAllocationTracking(JNIEnv* env,
                                                                      jclass,
                                                                      jthread thread,
                                                                      jboolean enable) {
@@ -79,14 +75,8 @@
       enable ? JVMTI_ENABLE : JVMTI_DISABLE,
       JVMTI_EVENT_VM_OBJECT_ALLOC,
       thread);
-  if (ret != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(ret, &err);
-    printf("Error enabling/disabling allocation tracking: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-  }
+  JvmtiErrorToException(env, jvmti_env, ret);
 }
 
 }  // namespace Test904ObjectAllocation
 }  // namespace art
-
diff --git a/test/905-object-free/expected.txt b/test/905-object-free/expected.txt
index 436ca11..c226df7 100644
--- a/test/905-object-free/expected.txt
+++ b/test/905-object-free/expected.txt
@@ -10,3 +10,4 @@
 ---
 []
 ---
+Free counts 100000 100000
diff --git a/test/905-object-free/src/Main.java b/test/905-object-free/src/Main.java
index e41e378..0d57629 100644
--- a/test/905-object-free/src/Main.java
+++ b/test/905-object-free/src/Main.java
@@ -33,6 +33,9 @@
 
     enableFreeTracking(false);
     run(l);
+
+    enableFreeTracking(true);
+    stress();
   }
 
   private static void run(ArrayList<Object> l) {
@@ -62,6 +65,30 @@
     System.out.println("---");
   }
 
+  private static void stressAllocate(int i) {
+    Object obj = new Object();
+    setTag(obj, i);
+    setTag2(obj, i + 1);
+  }
+
+  private static void stress() {
+    getCollectedTags(0);
+    getCollectedTags(1);
+    // Allocate objects.
+    for (int i = 1; i <= 100000; ++i) {
+      stressAllocate(i);
+    }
+    Runtime.getRuntime().gc();
+    long[] freedTags1 = getCollectedTags(0);
+    long[] freedTags2 = getCollectedTags(1);
+    System.out.println("Free counts " + freedTags1.length + " " + freedTags2.length);
+    for (int i = 0; i < freedTags1.length; ++i) {
+      if (freedTags1[i] + 1 != freedTags2[i]) {
+        System.out.println("Mismatched tags " + freedTags1[i] + " " + freedTags2[i]);
+      }
+    }
+  }
+
   private static void allocate(ArrayList<Object> l, long tag) {
     Object obj = new Object();
     l.add(obj);
@@ -69,7 +96,7 @@
   }
 
   private static void getAndPrintTags() {
-    long[] freedTags = getCollectedTags();
+    long[] freedTags = getCollectedTags(0);
     Arrays.sort(freedTags);
     System.out.println(Arrays.toString(freedTags));
   }
@@ -77,5 +104,6 @@
   private static native void setupObjectFreeCallback();
   private static native void enableFreeTracking(boolean enable);
   private static native void setTag(Object o, long tag);
-  private static native long[] getCollectedTags();
+  private static native long[] getCollectedTags(int index);
+  private static native void setTag2(Object o, long tag);
 }
diff --git a/test/905-object-free/tracking_free.cc b/test/905-object-free/tracking_free.cc
index 7b26d79..59b429c 100644
--- a/test/905-object-free/tracking_free.cc
+++ b/test/905-object-free/tracking_free.cc
@@ -19,66 +19,91 @@
 #include <stdio.h>
 #include <vector>
 
-#include "base/logging.h"
+#include "android-base/logging.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
-#include "ScopedUtfChars.h"
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
-#include "utils.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
+#include "scoped_utf_chars.h"
+
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test905ObjectFree {
 
-static std::vector<jlong> collected_tags;
+static std::vector<jlong> collected_tags1;
+static std::vector<jlong> collected_tags2;
 
-static void JNICALL ObjectFree(jvmtiEnv* ti_env ATTRIBUTE_UNUSED, jlong tag) {
-  collected_tags.push_back(tag);
+jvmtiEnv* jvmti_env2;
+
+static void JNICALL ObjectFree1(jvmtiEnv* ti_env, jlong tag) {
+  CHECK_EQ(ti_env, jvmti_env);
+  collected_tags1.push_back(tag);
+}
+
+static void JNICALL ObjectFree2(jvmtiEnv* ti_env, jlong tag) {
+  CHECK_EQ(ti_env, jvmti_env2);
+  collected_tags2.push_back(tag);
+}
+
+static void setupObjectFreeCallback(JNIEnv* env, jvmtiEnv* jenv, jvmtiEventObjectFree callback) {
+  jvmtiEventCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
+  callbacks.ObjectFree = callback;
+  jvmtiError ret = jenv->SetEventCallbacks(&callbacks, sizeof(callbacks));
+  JvmtiErrorToException(env, jenv, ret);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_setupObjectFreeCallback(
-    JNIEnv* env ATTRIBUTE_UNUSED, jclass klass ATTRIBUTE_UNUSED) {
-  jvmtiEventCallbacks callbacks;
-  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
-  callbacks.ObjectFree = ObjectFree;
-
-  jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
-  if (ret != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(ret, &err);
-    printf("Error setting callbacks: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-  }
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  setupObjectFreeCallback(env, jvmti_env, ObjectFree1);
+  JavaVM* jvm = nullptr;
+  env->GetJavaVM(&jvm);
+  CHECK_EQ(jvm->GetEnv(reinterpret_cast<void**>(&jvmti_env2), JVMTI_VERSION_1_2), 0);
+  SetAllCapabilities(jvmti_env2);
+  setupObjectFreeCallback(env, jvmti_env2, ObjectFree2);
 }
 
-extern "C" JNIEXPORT void JNICALL Java_Main_enableFreeTracking(JNIEnv* env ATTRIBUTE_UNUSED,
+extern "C" JNIEXPORT void JNICALL Java_Main_enableFreeTracking(JNIEnv* env,
                                                                jclass klass ATTRIBUTE_UNUSED,
                                                                jboolean enable) {
   jvmtiError ret = jvmti_env->SetEventNotificationMode(
       enable ? JVMTI_ENABLE : JVMTI_DISABLE,
       JVMTI_EVENT_OBJECT_FREE,
       nullptr);
-  if (ret != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(ret, &err);
-    printf("Error enabling/disabling object-free callbacks: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
+    return;
   }
+  ret = jvmti_env2->SetEventNotificationMode(
+      enable ? JVMTI_ENABLE : JVMTI_DISABLE,
+      JVMTI_EVENT_OBJECT_FREE,
+      nullptr);
+  JvmtiErrorToException(env, jvmti_env, ret);
 }
 
 extern "C" JNIEXPORT jlongArray JNICALL Java_Main_getCollectedTags(JNIEnv* env,
-                                                                   jclass klass ATTRIBUTE_UNUSED) {
-  jlongArray ret = env->NewLongArray(collected_tags.size());
+                                                                   jclass klass ATTRIBUTE_UNUSED,
+                                                                   jint index) {
+  std::vector<jlong>& tags = (index == 0) ? collected_tags1 : collected_tags2;
+  jlongArray ret = env->NewLongArray(tags.size());
   if (ret == nullptr) {
     return ret;
   }
 
-  env->SetLongArrayRegion(ret, 0, collected_tags.size(), collected_tags.data());
-  collected_tags.clear();
+  env->SetLongArrayRegion(ret, 0, tags.size(), tags.data());
+  tags.clear();
 
   return ret;
 }
 
+extern "C" JNIEXPORT void JNICALL Java_Main_setTag2(JNIEnv* env,
+                                                    jclass klass ATTRIBUTE_UNUSED,
+                                                    jobject obj,
+                                                    jlong tag) {
+  jvmtiError ret = jvmti_env2->SetTag(obj, tag);
+  JvmtiErrorToException(env, jvmti_env, ret);
+}
+
 }  // namespace Test905ObjectFree
 }  // namespace art
diff --git a/test/906-iterate-heap/expected.txt b/test/906-iterate-heap/expected.txt
index 3e857ab..b6af843 100644
--- a/test/906-iterate-heap/expected.txt
+++ b/test/906-iterate-heap/expected.txt
@@ -18,3 +18,27 @@
 2
 1@0 (32, 2xD '0000000000000000000000000000f03f')
 2
+10000@0 (static, int, index=3) 0000000000000000
+10001
+10000@0 (static, int, index=11) 0000000000000000
+10001
+10000@0 (static, int, index=0) 0000000000000000
+10001
+10000@0 (static, int, index=1) 0000000000000000
+10001
+10000@0 (instance, int, index=2) 0000000000000000
+10001@0 (instance, byte, index=4) 0000000000000001
+10002@0 (instance, char, index=5) 0000000000000061
+10003@0 (instance, int, index=6) 0000000000000003
+10004@0 (instance, long, index=7) 0000000000000004
+10005@0 (instance, short, index=9) 0000000000000002
+10006
+10000@0 (instance, int, index=3) 0000000000000000
+10001@0 (instance, byte, index=5) 0000000000000001
+10002@0 (instance, char, index=6) 0000000000000061
+10003@0 (instance, int, index=7) 0000000000000003
+10004@0 (instance, long, index=8) 0000000000000004
+10005@0 (instance, short, index=10) 0000000000000002
+10006@0 (instance, double, index=12) 3ff3ae147ae147ae
+10007@0 (instance, float, index=13) 000000003f9d70a4
+10008
diff --git a/test/906-iterate-heap/iterate_heap.cc b/test/906-iterate-heap/iterate_heap.cc
index 890220e..bb30074 100644
--- a/test/906-iterate-heap/iterate_heap.cc
+++ b/test/906-iterate-heap/iterate_heap.cc
@@ -23,14 +23,18 @@
 #include <stdio.h>
 #include <vector>
 
+#include "android-base/logging.h"
 #include "android-base/stringprintf.h"
-#include "base/logging.h"
+
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedPrimitiveArray.h"
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
-#include "utf.h"
+#include "jvmti.h"
+#include "scoped_primitive_array.h"
+
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
+#include "ti_macros.h"
+#include "ti_utf.h"
 
 namespace art {
 namespace Test906IterateHeap {
@@ -52,7 +56,7 @@
   return config->Handle(class_tag, size, tag_ptr, length);
 }
 
-static bool Run(jint heap_filter, jclass klass_filter, IterationConfig* config) {
+static bool Run(JNIEnv* env, jint heap_filter, jclass klass_filter, IterationConfig* config) {
   jvmtiHeapCallbacks callbacks;
   memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
   callbacks.heap_iteration_callback = HeapIterationCallback;
@@ -61,17 +65,13 @@
                                                  klass_filter,
                                                  &callbacks,
                                                  config);
-  if (ret != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(ret, &err);
-    printf("Failure running IterateThroughHeap: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return false;
   }
   return true;
 }
 
-extern "C" JNIEXPORT jint JNICALL Java_Main_iterateThroughHeapCount(JNIEnv* env ATTRIBUTE_UNUSED,
+extern "C" JNIEXPORT jint JNICALL Java_Main_iterateThroughHeapCount(JNIEnv* env,
                                                                     jclass klass ATTRIBUTE_UNUSED,
                                                                     jint heap_filter,
                                                                     jclass klass_filter,
@@ -99,7 +99,7 @@
   };
 
   CountIterationConfig config(0, stop_after);
-  Run(heap_filter, klass_filter, &config);
+  Run(env, heap_filter, klass_filter, &config);
 
   if (config.counter > config.stop_after) {
     printf("Error: more objects visited than signaled.");
@@ -135,7 +135,7 @@
   };
 
   DataIterationConfig config;
-  if (!Run(heap_filter, klass_filter, &config)) {
+  if (!Run(env, heap_filter, klass_filter, &config)) {
     return -1;
   }
 
@@ -154,7 +154,7 @@
   return static_cast<jint>(config.class_tags_.size());
 }
 
-extern "C" JNIEXPORT void JNICALL Java_Main_iterateThroughHeapAdd(JNIEnv* env ATTRIBUTE_UNUSED,
+extern "C" JNIEXPORT void JNICALL Java_Main_iterateThroughHeapAdd(JNIEnv* env,
                                                                   jclass klass ATTRIBUTE_UNUSED,
                                                                   jint heap_filter,
                                                                   jclass klass_filter) {
@@ -175,7 +175,7 @@
   };
 
   AddIterationConfig config;
-  Run(heap_filter, klass_filter, &config);
+  Run(env, heap_filter, klass_filter, &config);
 }
 
 extern "C" JNIEXPORT jstring JNICALL Java_Main_iterateThroughHeapString(
@@ -199,10 +199,10 @@
                                             void* user_data) {
       FindStringCallbacks* p = reinterpret_cast<FindStringCallbacks*>(user_data);
       if (*tag_ptr == p->tag_to_find) {
-        size_t utf_byte_count = CountUtf8Bytes(value, value_length);
+        size_t utf_byte_count = ti::CountUtf8Bytes(value, value_length);
         std::unique_ptr<char[]> mod_utf(new char[utf_byte_count + 1]);
         memset(mod_utf.get(), 0, utf_byte_count + 1);
-        ConvertUtf16ToModifiedUtf8(mod_utf.get(), utf_byte_count, value, value_length);
+        ti::ConvertUtf16ToModifiedUtf8(mod_utf.get(), utf_byte_count, value, value_length);
         if (!p->data.empty()) {
           p->data += "\n";
         }
@@ -228,7 +228,7 @@
 
   FindStringCallbacks fsc(tag);
   jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &fsc);
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return nullptr;
   }
   return env->NewStringUTF(fsc.data.c_str());
@@ -316,11 +316,98 @@
 
   FindArrayCallbacks fac(tag);
   jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &fac);
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return nullptr;
   }
   return env->NewStringUTF(fac.data.c_str());
 }
 
+static constexpr const char* GetPrimitiveTypeName(jvmtiPrimitiveType type) {
+  switch (type) {
+    case JVMTI_PRIMITIVE_TYPE_BOOLEAN:
+      return "boolean";
+    case JVMTI_PRIMITIVE_TYPE_BYTE:
+      return "byte";
+    case JVMTI_PRIMITIVE_TYPE_CHAR:
+      return "char";
+    case JVMTI_PRIMITIVE_TYPE_SHORT:
+      return "short";
+    case JVMTI_PRIMITIVE_TYPE_INT:
+      return "int";
+    case JVMTI_PRIMITIVE_TYPE_FLOAT:
+      return "float";
+    case JVMTI_PRIMITIVE_TYPE_LONG:
+      return "long";
+    case JVMTI_PRIMITIVE_TYPE_DOUBLE:
+      return "double";
+  }
+  LOG(FATAL) << "Unknown type " << static_cast<size_t>(type);
+  UNREACHABLE();
+}
+
+extern "C" JNIEXPORT jstring JNICALL Java_Main_iterateThroughHeapPrimitiveFields(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jlong tag) {
+  struct FindFieldCallbacks {
+    explicit FindFieldCallbacks(jlong t) : tag_to_find(t) {}
+
+    static jint JNICALL HeapIterationCallback(jlong class_tag ATTRIBUTE_UNUSED,
+                                              jlong size ATTRIBUTE_UNUSED,
+                                              jlong* tag_ptr ATTRIBUTE_UNUSED,
+                                              jint length ATTRIBUTE_UNUSED,
+                                              void* user_data ATTRIBUTE_UNUSED) {
+      return 0;
+    }
+
+    static jint JNICALL PrimitiveFieldValueCallback(jvmtiHeapReferenceKind kind,
+                                                    const jvmtiHeapReferenceInfo* info,
+                                                    jlong class_tag,
+                                                    jlong* tag_ptr,
+                                                    jvalue value,
+                                                    jvmtiPrimitiveType value_type,
+                                                    void* user_data) {
+      FindFieldCallbacks* p = reinterpret_cast<FindFieldCallbacks*>(user_data);
+      if (*tag_ptr >= p->tag_to_find) {
+        std::ostringstream oss;
+        oss << *tag_ptr
+            << '@'
+            << class_tag
+            << " ("
+            << (kind == JVMTI_HEAP_REFERENCE_FIELD ? "instance, " : "static, ")
+            << GetPrimitiveTypeName(value_type)
+            << ", index="
+            << info->field.index
+            << ") ";
+        // Be lazy, always print eight bytes.
+        static_assert(sizeof(jvalue) == sizeof(uint64_t), "Unexpected jvalue size");
+        uint64_t val;
+        memcpy(&val, &value, sizeof(uint64_t));  // To avoid undefined behavior.
+        oss << android::base::StringPrintf("%016" PRIx64, val);
+
+        if (!p->data.empty()) {
+          p->data += "\n";
+        }
+        p->data += oss.str();
+        *tag_ptr = *tag_ptr + 1;
+      }
+      return 0;
+    }
+
+    std::string data;
+    const jlong tag_to_find;
+  };
+
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_iteration_callback = FindFieldCallbacks::HeapIterationCallback;
+  callbacks.primitive_field_callback = FindFieldCallbacks::PrimitiveFieldValueCallback;
+
+  FindFieldCallbacks ffc(tag);
+  jvmtiError ret = jvmti_env->IterateThroughHeap(0, nullptr, &callbacks, &ffc);
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
+    return nullptr;
+  }
+  return env->NewStringUTF(ffc.data.c_str());
+}
+
 }  // namespace Test906IterateHeap
 }  // namespace art
diff --git a/test/906-iterate-heap/src/Main.java b/test/906-iterate-heap/src/Main.java
index d499886..365ce0f 100644
--- a/test/906-iterate-heap/src/Main.java
+++ b/test/906-iterate-heap/src/Main.java
@@ -119,6 +119,60 @@
     setTag(dArray, 1);
     System.out.println(iterateThroughHeapPrimitiveArray(getTag(dArray)));
     System.out.println(getTag(dArray));
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doTestPrimitiveFieldsClasses();
+
+    doTestPrimitiveFieldsIntegral();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doTestPrimitiveFieldsFloat();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+  }
+
+  private static void doTestPrimitiveFieldsClasses() {
+    setTag(IntObject.class, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(IntObject.class));
+    setTag(IntObject.class, 0);
+
+    setTag(FloatObject.class, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(FloatObject.class));
+    setTag(FloatObject.class, 0);
+
+    setTag(Inf1.class, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(Inf1.class));
+    setTag(Inf1.class, 0);
+
+    setTag(Inf2.class, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(Inf2.class));
+    setTag(Inf2.class, 0);
+  }
+
+  private static void doTestPrimitiveFieldsIntegral() {
+    IntObject intObject = new IntObject();
+    setTag(intObject, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(intObject));
+  }
+
+  private static void doTestPrimitiveFieldsFloat() {
+    FloatObject floatObject = new FloatObject();
+    setTag(floatObject, 10000);
+    System.out.println(iterateThroughHeapPrimitiveFields(10000));
+    System.out.println(getTag(floatObject));
   }
 
   static class A {
@@ -172,6 +226,31 @@
     return ret;
   }
 
+  private static interface Inf1 {
+    public final static int A = 1;
+  }
+
+  private static interface Inf2 extends Inf1 {
+    public final static int B = 1;
+  }
+
+  private static class IntObject implements Inf1 {
+    byte b = (byte)1;
+    char c= 'a';
+    short s = (short)2;
+    int i = 3;
+    long l = 4;
+    Object o = new Object();
+    static int sI = 5;
+  }
+
+  private static class FloatObject extends IntObject implements Inf2 {
+    float f = 1.23f;
+    double d = 1.23;
+    Object p = new Object();
+    static int sI = 6;
+  }
+
   private static native void setTag(Object o, long tag);
   private static native long getTag(Object o);
 
@@ -188,4 +267,5 @@
       Class<?> klassFilter);
   private static native String iterateThroughHeapString(long tag);
   private static native String iterateThroughHeapPrimitiveArray(long tag);
+  private static native String iterateThroughHeapPrimitiveFields(long tag);
 }
diff --git a/test/907-get-loaded-classes/get_loaded_classes.cc b/test/907-get-loaded-classes/get_loaded_classes.cc
index 5bda7eb..5ec56c4 100644
--- a/test/907-get-loaded-classes/get_loaded_classes.cc
+++ b/test/907-get-loaded-classes/get_loaded_classes.cc
@@ -19,14 +19,16 @@
 #include <stdio.h>
 #include <vector>
 
-#include "base/macros.h"
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
-#include "ScopedUtfChars.h"
+#include "android-base/macros.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+#include "jni.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
+#include "scoped_utf_chars.h"
+
+// Test infrastructure
+#include "jni_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test907GetLoadedClasses {
diff --git a/test/908-gc-start-finish/gc_callbacks.cc b/test/908-gc-start-finish/gc_callbacks.cc
index 8f96ee6..f186895 100644
--- a/test/908-gc-start-finish/gc_callbacks.cc
+++ b/test/908-gc-start-finish/gc_callbacks.cc
@@ -17,11 +17,14 @@
 #include <stdio.h>
 #include <string.h>
 
-#include "base/macros.h"
+#include "android-base/macros.h"
+
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+#include "jvmti.h"
+
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test908GcStartFinish {
@@ -45,7 +48,7 @@
   callbacks.GarbageCollectionStart = GarbageCollectionStart;
 
   jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
-  JvmtiErrorToException(env, ret);
+  JvmtiErrorToException(env, jvmti_env, ret);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_enableGcTracking(JNIEnv* env,
@@ -55,14 +58,14 @@
       enable ? JVMTI_ENABLE : JVMTI_DISABLE,
       JVMTI_EVENT_GARBAGE_COLLECTION_START,
       nullptr);
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return;
   }
   ret = jvmti_env->SetEventNotificationMode(
       enable ? JVMTI_ENABLE : JVMTI_DISABLE,
       JVMTI_EVENT_GARBAGE_COLLECTION_FINISH,
       nullptr);
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return;
   }
 }
diff --git a/test/909-attach-agent/attach.cc b/test/909-attach-agent/attach.cc
index adae844..0150e09 100644
--- a/test/909-attach-agent/attach.cc
+++ b/test/909-attach-agent/attach.cc
@@ -19,8 +19,10 @@
 #include <jni.h>
 #include <stdio.h>
 #include <string.h>
-#include "base/macros.h"
-#include "openjdkjvmti/jvmti.h"
+
+#include "android-base/macros.h"
+
+#include "jvmti.h"
 
 namespace art {
 namespace Test909AttachAgent {
diff --git a/test/910-methods/methods.cc b/test/910-methods/methods.cc
index f60fabb..ded4f09 100644
--- a/test/910-methods/methods.cc
+++ b/test/910-methods/methods.cc
@@ -16,13 +16,16 @@
 
 #include <stdio.h>
 
-#include "base/macros.h"
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
+#include "android-base/macros.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+#include "jni.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
+
+// Test infrastructure
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test910Methods {
@@ -35,11 +38,7 @@
   char* sig;
   char* gen;
   jvmtiError result = jvmti_env->GetMethodName(id, &name, &sig, &gen);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetMethodName: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -67,11 +66,7 @@
 
   // Also run GetMethodName with all parameter pointers null to check for segfaults.
   jvmtiError result2 = jvmti_env->GetMethodName(id, nullptr, nullptr, nullptr);
-  if (result2 != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result2, &err);
-    printf("Failure running GetMethodName(null, null, null): %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, result2)) {
     return nullptr;
   }
 
@@ -84,11 +79,7 @@
 
   jclass declaring_class;
   jvmtiError result = jvmti_env->GetMethodDeclaringClass(id, &declaring_class);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetMethodDeclaringClass: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -101,11 +92,7 @@
 
   jint modifiers;
   jvmtiError result = jvmti_env->GetMethodModifiers(id, &modifiers);
-  if (result != JVMTI_ERROR_NONE) {
-    char* err;
-    jvmti_env->GetErrorName(result, &err);
-    printf("Failure running GetMethodModifiers: %s\n", err);
-    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return 0;
   }
 
@@ -118,7 +105,7 @@
 
   jint max_locals;
   jvmtiError result = jvmti_env->GetMaxLocals(id, &max_locals);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return -1;
   }
 
@@ -131,7 +118,7 @@
 
   jint arguments;
   jvmtiError result = jvmti_env->GetArgumentsSize(id, &arguments);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return -1;
   }
 
@@ -145,7 +132,7 @@
   jlong start;
   jlong end;
   jvmtiError result = jvmti_env->GetMethodLocation(id, &start, &end);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return -1;
   }
 
@@ -159,7 +146,7 @@
   jlong start;
   jlong end;
   jvmtiError result = jvmti_env->GetMethodLocation(id, &start, &end);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return -1;
   }
 
@@ -172,7 +159,7 @@
 
   jboolean is_native;
   jvmtiError result = jvmti_env->IsMethodNative(id, &is_native);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return JNI_FALSE;
   }
 
@@ -185,7 +172,7 @@
 
   jboolean is_obsolete;
   jvmtiError result = jvmti_env->IsMethodObsolete(id, &is_obsolete);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return JNI_FALSE;
   }
 
@@ -198,7 +185,7 @@
 
   jboolean is_synthetic;
   jvmtiError result = jvmti_env->IsMethodSynthetic(id, &is_synthetic);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return JNI_FALSE;
   }
 
diff --git a/test/911-get-stack-trace/stack_trace.cc b/test/911-get-stack-trace/stack_trace.cc
index 68f6d8d..a499e90 100644
--- a/test/911-get-stack-trace/stack_trace.cc
+++ b/test/911-get-stack-trace/stack_trace.cc
@@ -18,16 +18,19 @@
 #include <memory>
 #include <stdio.h>
 
+#include "android-base/logging.h"
 #include "android-base/stringprintf.h"
 
-#include "android-base/stringprintf.h"
-#include "base/logging.h"
-#include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
+
+// Test infrastructure
+#include "jni_binder.h"
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
+#include "ti_macros.h"
 
 namespace art {
 namespace Test911GetStackTrace {
@@ -68,7 +71,7 @@
     char* gen;
     {
       jvmtiError result2 = jvmti_env->GetMethodName(frames[method_index].method, &name, &sig, &gen);
-      if (JvmtiErrorToException(env, result2)) {
+      if (JvmtiErrorToException(env, jvmti_env, result2)) {
         return nullptr;
       }
     }
@@ -83,10 +86,7 @@
         // Accept absent info and native method errors.
         if (line_result != JVMTI_ERROR_ABSENT_INFORMATION &&
             line_result != JVMTI_ERROR_NATIVE_METHOD) {
-          char* err;
-          jvmti_env->GetErrorName(line_result, &err);
-          printf("Failure running GetLineNumberTable: %s\n", err);
-          jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
+          JvmtiErrorToException(env, jvmti_env, line_result);
           return nullptr;
         }
         line_number_table = nullptr;
@@ -139,7 +139,7 @@
   jint count;
   {
     jvmtiError result = jvmti_env->GetStackTrace(thread, start, max, frames.get(), &count);
-    if (JvmtiErrorToException(env, result)) {
+    if (JvmtiErrorToException(env, jvmti_env, result)) {
       return nullptr;
     }
   }
@@ -153,7 +153,7 @@
   jvmtiStackInfo* stack_infos;
   {
     jvmtiError result = jvmti_env->GetAllStackTraces(max, &stack_infos, &thread_count);
-    if (JvmtiErrorToException(env, result)) {
+    if (JvmtiErrorToException(env, jvmti_env, result)) {
       return nullptr;
     }
   }
@@ -189,7 +189,7 @@
                                                             threads.get(),
                                                             max,
                                                             &stack_infos);
-    if (JvmtiErrorToException(env, result)) {
+    if (JvmtiErrorToException(env, jvmti_env, result)) {
       return nullptr;
     }
   }
@@ -215,7 +215,7 @@
     JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jthread thread) {
   jint count;
   jvmtiError result = jvmti_env->GetFrameCount(thread, &count);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return -1;
   }
   return count;
@@ -227,7 +227,7 @@
   jlocation location;
 
   jvmtiError result = jvmti_env->GetFrameLocation(thread, depth, &method, &location);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -237,12 +237,12 @@
       {
         jclass decl_class;
         jvmtiError class_result = jvmti_env->GetMethodDeclaringClass(method, &decl_class);
-        if (JvmtiErrorToException(env, class_result)) {
+        if (JvmtiErrorToException(env, jvmti_env, class_result)) {
           return nullptr;
         }
         jint modifiers;
         jvmtiError mod_result = jvmti_env->GetMethodModifiers(method, &modifiers);
-        if (JvmtiErrorToException(env, mod_result)) {
+        if (JvmtiErrorToException(env, jvmti_env, mod_result)) {
           return nullptr;
         }
         constexpr jint kStatic = 0x8;
diff --git a/test/912-classes/classes.cc b/test/912-classes/classes.cc
index 3ccfe86..2636367 100644
--- a/test/912-classes/classes.cc
+++ b/test/912-classes/classes.cc
@@ -16,19 +16,22 @@
 
 #include <stdio.h>
 
-#include "base/macros.h"
+#include "android-base/macros.h"
+
 #include "class_linker.h"
 #include "jni.h"
 #include "mirror/class_loader.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "runtime.h"
-#include "ScopedLocalRef.h"
-#include "ScopedUtfChars.h"
+#include "scoped_local_ref.h"
+#include "scoped_utf_chars.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test912Classes {
@@ -233,7 +236,7 @@
   jint count = 0;
   jclass* classes = nullptr;
   jvmtiError result = jvmti_env->GetClassLoaderClasses(jclassloader, &count, &classes);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -251,7 +254,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
   jint major, minor;
   jvmtiError result = jvmti_env->GetClassVersionNumbers(klass, &minor, &major);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -270,7 +273,7 @@
   jvmtiError result = jenv->GetClassSignature(klass, &name, nullptr);
   if (result != JVMTI_ERROR_NONE) {
     if (jni_env != nullptr) {
-      JvmtiErrorToException(jni_env, result);
+      JvmtiErrorToException(jni_env, jenv, result);
     } else {
       printf("Failed to get class signature.\n");
     }
@@ -291,13 +294,13 @@
     jvmtiError ret = jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                                          JVMTI_EVENT_CLASS_LOAD,
                                                          nullptr);
-    if (JvmtiErrorToException(env, ret)) {
+    if (JvmtiErrorToException(env, jvmti_env, ret)) {
       return;
     }
     ret = jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                               JVMTI_EVENT_CLASS_PREPARE,
                                               nullptr);
-    JvmtiErrorToException(env, ret);
+    JvmtiErrorToException(env, jvmti_env, ret);
     return;
   }
 
@@ -306,20 +309,20 @@
   callbacks.ClassLoad = class_load;
   callbacks.ClassPrepare = class_prepare;
   jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return;
   }
 
   ret = jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
                                             JVMTI_EVENT_CLASS_LOAD,
                                             nullptr);
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return;
   }
   ret = jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
                                             JVMTI_EVENT_CLASS_PREPARE,
                                             nullptr);
-  JvmtiErrorToException(env, ret);
+  JvmtiErrorToException(env, jvmti_env, ret);
 }
 
 class ClassLoadPreparePrinter {
@@ -364,7 +367,7 @@
     jvmtiError result = jenv->GetThreadInfo(thread, &info);
     if (result != JVMTI_ERROR_NONE) {
       if (jni_env != nullptr) {
-        JvmtiErrorToException(jni_env, result);
+        JvmtiErrorToException(jni_env, jenv, result);
       } else {
         printf("Failed to get thread name.\n");
       }
diff --git a/test/912-classes/expected.txt b/test/912-classes/expected.txt
index 6b86ac9..0f2920a 100644
--- a/test/912-classes/expected.txt
+++ b/test/912-classes/expected.txt
@@ -92,3 +92,7 @@
 Prepare: LA; on TestRunner (cur=TestRunner)
 Load: LC; on TestRunner
 Prepare: LC; on TestRunner (cur=TestRunner)
+Load: L$Proxy1; on main
+Prepare: L$Proxy1; on main (cur=main)
+Load: [LMain; on main
+Prepare: [LMain; on main (cur=main)
diff --git a/test/912-classes/src/Main.java b/test/912-classes/src/Main.java
index 5d25d76..643b0807 100644
--- a/test/912-classes/src/Main.java
+++ b/test/912-classes/src/Main.java
@@ -282,6 +282,10 @@
     t.start();
     t.join();
 
+    // Check creation of arrays and proxies.
+    Proxy.getProxyClass(Main.class.getClassLoader(), new Class[] { Comparable.class });
+    Class.forName("[LMain;");
+
     enableClassLoadPreparePrintEvents(false);
 
     // Note: the JIT part of this test is about the JIT pulling in a class not yet touched by
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
index 46805d7..2a183ee 100644
--- a/test/913-heaps/expected.txt
+++ b/test/913-heaps/expected.txt
@@ -8,34 +8,34 @@
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 root@root --(jni-global)--> 1@1000 [size=16, length=-1]
@@ -49,38 +49,70 @@
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
-[1@0 (32, 'HelloWorld')]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+---
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+---
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+---
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+---
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=13,location= 32])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=3,method=doFollowReferencesTest,vreg=1,location= 28])--> 3000@0 [size=132, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
+---
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+---
+root@root --(jni-global)--> 1@1000 [size=16, length=-1]
+root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=13,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 10])--> 1@1000 [size=16, length=-1]
+root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=4,location= 19])--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+---
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+---
+[1@0 (32, 'HelloWorld'), 2@0 (16, '')]
 2
+3
 2@0 (15, 3xB '010203')
 3@0 (16, 2xC '41005a00')
 8@0 (32, 2xD '0000000000000000000000000000f03f')
@@ -90,18 +122,42 @@
 4@0 (18, 3xS '010002000300')
 1@0 (14, 2xZ '0001')
 23456789
+10000@0 (static, int, index=3) 0000000000000000
+10001
+10000@0 (static, int, index=11) 0000000000000000
+10001
+10000@0 (static, int, index=0) 0000000000000000
+10001
+10000@0 (static, int, index=1) 0000000000000000
+10001
+10000@0 (instance, int, index=2) 0000000000000000
+10001@0 (instance, byte, index=4) 0000000000000001
+10002@0 (instance, char, index=5) 0000000000000061
+10003@0 (instance, int, index=6) 0000000000000003
+10004@0 (instance, long, index=7) 0000000000000004
+10005@0 (instance, short, index=9) 0000000000000002
+10006
+10000@0 (instance, int, index=3) 0000000000000000
+10001@0 (instance, byte, index=5) 0000000000000001
+10002@0 (instance, char, index=6) 0000000000000061
+10003@0 (instance, int, index=7) 0000000000000003
+10004@0 (instance, long, index=8) 0000000000000004
+10005@0 (instance, short, index=10) 0000000000000002
+10006@0 (instance, double, index=12) 3ff3ae147ae147ae
+10007@0 (instance, float, index=13) 000000003f9d70a4
+10008
 --- klass ---
 root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=13,location= 32])--> 1@1000 [size=16, length=-1]
 0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 ---
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 ---
 root@root --(jni-global)--> 1@1000 [size=16, length=-1]
 root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
@@ -109,15 +165,15 @@
 root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 10])--> 1@1000 [size=16, length=-1]
 root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=4,location= 19])--> 1@1000 [size=16, length=-1]
 root@root --(thread)--> 1@1000 [size=16, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 ---
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 ---
 --- heap_filter ---
 ---- tagged objects
@@ -128,41 +184,40 @@
 ---- untagged objects
 root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=13,location= 32])--> 1@1000 [size=16, length=-1]
 root@root --(stack-local[id=1,tag=3000,depth=3,method=doFollowReferencesTest,vreg=1,location= 28])--> 3000@0 [size=132, length=-1]
-root@root --(system-class)--> 2@0 [size=32, length=-1]
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 root@root --(jni-global)--> 1@1000 [size=16, length=-1]
@@ -170,46 +225,44 @@
 root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=13,location= 10])--> 1@1000 [size=16, length=-1]
 root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 10])--> 1@1000 [size=16, length=-1]
 root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=4,location= 19])--> 1@1000 [size=16, length=-1]
-root@root --(system-class)--> 2@0 [size=32, length=-1]
 root@root --(thread)--> 1@1000 [size=16, length=-1]
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
 1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
 1@1000 --(class)--> 1000@0 [size=123, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
 2001@0 --(interface)--> 2000@0 [size=124, length=-1]
 2@1000 --(class)--> 1000@0 [size=123, length=-1]
 3@1001 --(class)--> 1001@0 [size=123, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
 4@1000 --(class)--> 1000@0 [size=123, length=-1]
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
 ---- tagged classes
 root@root --(stack-local[id=1,tag=3000,depth=3,method=doFollowReferencesTest,vreg=1,location= 28])--> 3000@0 [size=132, length=-1]
-root@root --(system-class)--> 2@0 [size=32, length=-1]
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
@@ -233,7 +286,6 @@
 5@1002 --(class)--> 1002@0 [size=123, length=-1]
 6@1000 --(class)--> 1000@0 [size=123, length=-1]
 ---
-root@root --(system-class)--> 2@0 [size=32, length=-1]
 root@root --(thread)--> 3000@0 [size=132, length=-1]
 1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
 1002@0 --(interface)--> 2001@0 [size=124, length=-1]
@@ -260,19 +312,19 @@
 ---- untagged classes
 root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestNonRoot,vreg=13,location= 32])--> 1@1000 [size=16, length=-1]
 0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 ---
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 ---
 root@root --(jni-global)--> 1@1000 [size=16, length=-1]
 root@root --(jni-local[id=1,tag=3000,depth=0,method=followReferences])--> 1@1000 [size=16, length=-1]
@@ -280,17 +332,17 @@
 root@root --(stack-local[id=1,tag=3000,depth=1,method=doFollowReferencesTestImpl,vreg=5,location= 10])--> 1@1000 [size=16, length=-1]
 root@root --(stack-local[id=1,tag=3000,depth=2,method=doFollowReferencesTestRoot,vreg=4,location= 19])--> 1@1000 [size=16, length=-1]
 root@root --(thread)--> 1@1000 [size=16, length=-1]
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 ---
-1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
-1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
-3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
-3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
-5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
-5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+1@1000 --(field@2)--> 2@1000 [size=16, length=-1]
+1@1000 --(field@3)--> 3@1001 [size=24, length=-1]
+3@1001 --(field@4)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@5)--> 5@1002 [size=32, length=-1]
+5@1002 --(field@8)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@9)--> 1@1000 [size=16, length=-1]
 ---
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
index 99bc48e..6a47ca1 100644
--- a/test/913-heaps/heaps.cc
+++ b/test/913-heaps/heaps.cc
@@ -21,21 +21,23 @@
 #include <iostream>
 #include <vector>
 
+#include "android-base/macros.h"
+#include "android-base/logging.h"
 #include "android-base/stringprintf.h"
 
-#include "base/logging.h"
-#include "base/macros.h"
 #include "jit/jit.h"
 #include "jni.h"
 #include "native_stack_dump.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test913Heaps {
@@ -550,7 +552,7 @@
 
   FindStringCallbacks fsc;
   jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, initial_object, &callbacks, &fsc);
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return nullptr;
   }
 
@@ -648,11 +650,101 @@
 
   FindArrayCallbacks fac;
   jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, initial_object, &callbacks, &fac);
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return nullptr;
   }
   return env->NewStringUTF(fac.data.c_str());
 }
 
+static constexpr const char* GetPrimitiveTypeName(jvmtiPrimitiveType type) {
+  switch (type) {
+    case JVMTI_PRIMITIVE_TYPE_BOOLEAN:
+      return "boolean";
+    case JVMTI_PRIMITIVE_TYPE_BYTE:
+      return "byte";
+    case JVMTI_PRIMITIVE_TYPE_CHAR:
+      return "char";
+    case JVMTI_PRIMITIVE_TYPE_SHORT:
+      return "short";
+    case JVMTI_PRIMITIVE_TYPE_INT:
+      return "int";
+    case JVMTI_PRIMITIVE_TYPE_FLOAT:
+      return "float";
+    case JVMTI_PRIMITIVE_TYPE_LONG:
+      return "long";
+    case JVMTI_PRIMITIVE_TYPE_DOUBLE:
+      return "double";
+  }
+  LOG(FATAL) << "Unknown type " << static_cast<size_t>(type);
+  UNREACHABLE();
+}
+
+extern "C" JNIEXPORT jstring JNICALL Java_Main_followReferencesPrimitiveFields(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jobject initial_object) {
+  struct FindFieldCallbacks {
+    static jint JNICALL FollowReferencesCallback(
+        jvmtiHeapReferenceKind reference_kind ATTRIBUTE_UNUSED,
+        const jvmtiHeapReferenceInfo* reference_info ATTRIBUTE_UNUSED,
+        jlong class_tag ATTRIBUTE_UNUSED,
+        jlong referrer_class_tag ATTRIBUTE_UNUSED,
+        jlong size ATTRIBUTE_UNUSED,
+        jlong* tag_ptr ATTRIBUTE_UNUSED,
+        jlong* referrer_tag_ptr ATTRIBUTE_UNUSED,
+        jint length ATTRIBUTE_UNUSED,
+        void* user_data ATTRIBUTE_UNUSED) {
+      return JVMTI_VISIT_OBJECTS;  // Continue visiting.
+    }
+
+    static jint JNICALL PrimitiveFieldValueCallback(jvmtiHeapReferenceKind kind,
+                                                    const jvmtiHeapReferenceInfo* info,
+                                                    jlong class_tag,
+                                                    jlong* tag_ptr,
+                                                    jvalue value,
+                                                    jvmtiPrimitiveType value_type,
+                                                    void* user_data) {
+      FindFieldCallbacks* p = reinterpret_cast<FindFieldCallbacks*>(user_data);
+      if (*tag_ptr != 0) {
+        std::ostringstream oss;
+        oss << *tag_ptr
+            << '@'
+            << class_tag
+            << " ("
+            << (kind == JVMTI_HEAP_REFERENCE_FIELD ? "instance, " : "static, ")
+            << GetPrimitiveTypeName(value_type)
+            << ", index="
+            << info->field.index
+            << ") ";
+        // Be lazy, always print eight bytes.
+        static_assert(sizeof(jvalue) == sizeof(uint64_t), "Unexpected jvalue size");
+        uint64_t val;
+        memcpy(&val, &value, sizeof(uint64_t));  // To avoid undefined behavior.
+        oss << android::base::StringPrintf("%016" PRIx64, val);
+
+        if (!p->data.empty()) {
+          p->data += "\n";
+        }
+        p->data += oss.str();
+        // Update the tag to test whether that works.
+        *tag_ptr = *tag_ptr + 1;
+      }
+      return 0;
+    }
+
+    std::string data;
+  };
+
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_reference_callback = FindFieldCallbacks::FollowReferencesCallback;
+  callbacks.primitive_field_callback = FindFieldCallbacks::PrimitiveFieldValueCallback;
+
+  FindFieldCallbacks ffc;
+  jvmtiError ret = jvmti_env->FollowReferences(0, nullptr, initial_object, &callbacks, &ffc);
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
+    return nullptr;
+  }
+  return env->NewStringUTF(ffc.data.c_str());
+}
+
 }  // namespace Test913Heaps
 }  // namespace art
diff --git a/test/913-heaps/src/Main.java b/test/913-heaps/src/Main.java
index df89f34..10778ff 100644
--- a/test/913-heaps/src/Main.java
+++ b/test/913-heaps/src/Main.java
@@ -25,8 +25,29 @@
     doTest();
     new TestConfig().doFollowReferencesTest();
 
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    new TestConfig(null, 0, 1, -1).doFollowReferencesTest();
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    new TestConfig(null, 0, Integer.MAX_VALUE, 1).doFollowReferencesTest();
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
     doStringTest();
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
     doPrimitiveArrayTest();
+    doPrimitiveFieldTest();
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
 
     // Test klass filter.
     System.out.println("--- klass ---");
@@ -53,14 +74,18 @@
   }
 
   public static void doStringTest() throws Exception {
-    final String str = "HelloWorld";
+    final String str = new String("HelloWorld");
+    final String str2 = new String("");
     Object o = new Object() {
       String s = str;
+      String s2 = str2;
     };
 
     setTag(str, 1);
+    setTag(str2, 2);
     System.out.println(Arrays.toString(followReferencesString(o)));
     System.out.println(getTag(str));
+    System.out.println(getTag(str2));
   }
 
   public static void doPrimitiveArrayTest() throws Exception {
@@ -110,6 +135,62 @@
     System.out.println(getTag(dArray));
   }
 
+  public static void doPrimitiveFieldTest() throws Exception {
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doTestPrimitiveFieldsClasses();
+
+    doTestPrimitiveFieldsIntegral();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doTestPrimitiveFieldsFloat();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+  }
+
+  private static void doTestPrimitiveFieldsClasses() {
+    setTag(IntObject.class, 10000);
+    System.out.println(followReferencesPrimitiveFields(IntObject.class));
+    System.out.println(getTag(IntObject.class));
+    setTag(IntObject.class, 0);
+
+    setTag(FloatObject.class, 10000);
+    System.out.println(followReferencesPrimitiveFields(FloatObject.class));
+    System.out.println(getTag(FloatObject.class));
+    setTag(FloatObject.class, 0);
+
+    setTag(Inf1.class, 10000);
+    System.out.println(followReferencesPrimitiveFields(Inf1.class));
+    System.out.println(getTag(Inf1.class));
+    setTag(Inf1.class, 0);
+
+    setTag(Inf2.class, 10000);
+    System.out.println(followReferencesPrimitiveFields(Inf2.class));
+    System.out.println(getTag(Inf2.class));
+    setTag(Inf2.class, 0);
+  }
+
+  private static void doTestPrimitiveFieldsIntegral() {
+    IntObject intObject = new IntObject();
+    setTag(intObject, 10000);
+    System.out.println(followReferencesPrimitiveFields(intObject));
+    System.out.println(getTag(intObject));
+  }
+
+  private static void doTestPrimitiveFieldsFloat() {
+    FloatObject floatObject = new FloatObject();
+    setTag(floatObject, 10000);
+    System.out.println(followReferencesPrimitiveFields(floatObject));
+    System.out.println(getTag(floatObject));
+  }
+
   private static void run() {
     clearStats();
     forceGarbageCollection();
@@ -131,6 +212,8 @@
   private static class TestConfig {
     private Class<?> klass = null;
     private int heapFilter = 0;
+    private int stopAfter = Integer.MAX_VALUE;
+    private int followSet = -1;
 
     public TestConfig() {
     }
@@ -138,6 +221,12 @@
       this.klass = klass;
       this.heapFilter = heapFilter;
     }
+    public TestConfig(Class<?> klass, int heapFilter, int stopAfter, int followSet) {
+      this.klass = klass;
+      this.heapFilter = heapFilter;
+      this.stopAfter = stopAfter;
+      this.followSet = followSet;
+    }
 
     public void doFollowReferencesTest() throws Exception {
       // Force GCs to clean up dirt.
@@ -170,8 +259,8 @@
       tmpStorage.add(a);
       v.add("0@0", "1@1000");  // tmpStorage[0] --(array-element)--> a.
 
-      doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, null, v, null);
-      doFollowReferencesTestImpl(a.foo2, Integer.MAX_VALUE, -1, null, v, "3@1001");
+      doFollowReferencesTestImpl(null, stopAfter, followSet, null, v, null);
+      doFollowReferencesTestImpl(a.foo2, stopAfter, followSet, null, v, "3@1001");
 
       tmpStorage.clear();
     }
@@ -181,8 +270,8 @@
       tagClasses(v);
       A a = createTree(v);
 
-      doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, a, v, null);
-      doFollowReferencesTestImpl(a.foo2, Integer.MAX_VALUE, -1, a, v, "3@1001");
+      doFollowReferencesTestImpl(null, stopAfter, followSet, a, v, null);
+      doFollowReferencesTestImpl(a.foo2, stopAfter, followSet, a, v, "3@1001");
     }
 
     private void doFollowReferencesTestImpl(A root, int stopAfter, int followSet,
@@ -301,6 +390,31 @@
     }
   }
 
+  private static interface Inf1 {
+    public final static int A = 1;
+  }
+
+  private static interface Inf2 extends Inf1 {
+    public final static int B = 1;
+  }
+
+  private static class IntObject implements Inf1 {
+    byte b = (byte)1;
+    char c= 'a';
+    short s = (short)2;
+    int i = 3;
+    long l = 4;
+    Object o = new Object();
+    static int sI = 5;
+  }
+
+  private static class FloatObject extends IntObject implements Inf2 {
+    float f = 1.23f;
+    double d = 1.23;
+    Object p = new Object();
+    static int sI = 6;
+  }
+
   public static class Verifier {
     // Should roots with vreg=-1 be printed?
     public final static boolean PRINT_ROOTS_WITH_UNKNOWN_VREG = false;
@@ -494,4 +608,5 @@
       Object initialObject, int stopAfter, int followSet, Object jniRef);
   public static native String[] followReferencesString(Object initialObject);
   public static native String followReferencesPrimitiveArray(Object initialObject);
+  public static native String followReferencesPrimitiveFields(Object initialObject);
 }
diff --git a/test/918-fields/fields.cc b/test/918-fields/fields.cc
index 7d29912..726c5cf 100644
--- a/test/918-fields/fields.cc
+++ b/test/918-fields/fields.cc
@@ -16,13 +16,14 @@
 
 #include <stdio.h>
 
-#include "base/macros.h"
+#include "android-base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jni_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test918Fields {
diff --git a/test/920-objects/objects.cc b/test/920-objects/objects.cc
index 0553a9d..5263e75 100644
--- a/test/920-objects/objects.cc
+++ b/test/920-objects/objects.cc
@@ -16,13 +16,13 @@
 
 #include <stdio.h>
 
-#include "base/macros.h"
+#include "android-base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "test_env.h"
 
 namespace art {
 namespace Test920Objects {
diff --git a/test/921-hello-failure/expected.txt b/test/921-hello-failure/expected.txt
index a5dc10d..fdbfbe2 100644
--- a/test/921-hello-failure/expected.txt
+++ b/test/921-hello-failure/expected.txt
@@ -50,3 +50,6 @@
 hello there again - FieldChange
 Transformation error : java.lang.Exception(Failed to redefine class <LTransform4;> due to JVMTI_ERROR_UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED)
 hello there again - FieldChange
+hello - Unmodifiable
+Transformation error : java.lang.Exception(Failed to redefine class <[LTransform;> due to JVMTI_ERROR_UNMODIFIABLE_CLASS)
+hello - Unmodifiable
diff --git a/test/921-hello-failure/src/Main.java b/test/921-hello-failure/src/Main.java
index 5bbe2b5..6779ed8 100644
--- a/test/921-hello-failure/src/Main.java
+++ b/test/921-hello-failure/src/Main.java
@@ -32,6 +32,7 @@
     NewField.doTest(new Transform());
     MissingField.doTest(new Transform4("there"));
     FieldChange.doTest(new Transform4("there again"));
+    Unmodifiable.doTest(new Transform[] { new Transform(), });
   }
 
   // Transforms the class. This throws an exception if something goes wrong.
diff --git a/test/921-hello-failure/src/Unmodifiable.java b/test/921-hello-failure/src/Unmodifiable.java
new file mode 100644
index 0000000..ad05f51
--- /dev/null
+++ b/test/921-hello-failure/src/Unmodifiable.java
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Base64;
+
+class Unmodifiable {
+  // The following is a base64 encoding of a valid class file.
+  private static final byte[] CLASS_BYTES = Base64.getDecoder().decode(
+    "yv66vgAAADQAFQoABgAPBwAQCAARCgACABIHABMHABQBAAY8aW5pdD4BAAMoKVYBAARDb2RlAQAP" +
+    "TGluZU51bWJlclRhYmxlAQAFc2F5SGkBABUoTGphdmEvbGFuZy9TdHJpbmc7KVYBAApTb3VyY2VG" +
+    "aWxlAQAOVHJhbnNmb3JtLmphdmEMAAcACAEAD2phdmEvbGFuZy9FcnJvcgEAFVNob3VsZCBub3Qg" +
+    "YmUgY2FsbGVkIQwABwAMAQAJVHJhbnNmb3JtAQAQamF2YS9sYW5nL09iamVjdAAgAAUABgAAAAAA" +
+    "AgAAAAcACAABAAkAAAAdAAEAAQAAAAUqtwABsQAAAAEACgAAAAYAAQAAAAIAAAALAAwAAQAJAAAA" +
+    "IgADAAIAAAAKuwACWRIDtwAEvwAAAAEACgAAAAYAAQAAAAQAAQANAAAAAgAO");
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+    "ZGV4CjAzNQCrV81cy4Q+YKMMMqc0bZEO5Y1X5u7irPeQAgAAcAAAAHhWNBIAAAAAAAAAAPwBAAAL" +
+    "AAAAcAAAAAUAAACcAAAAAgAAALAAAAAAAAAAAAAAAAQAAADIAAAAAQAAAOgAAACIAQAACAEAAEoB" +
+    "AABSAQAAXwEAAHIBAACGAQAAmgEAALEBAADBAQAAxAEAAMgBAADcAQAAAQAAAAIAAAADAAAABAAA" +
+    "AAcAAAAHAAAABAAAAAAAAAAIAAAABAAAAEQBAAAAAAAAAAAAAAAAAQAKAAAAAQABAAAAAAACAAAA" +
+    "AAAAAAAAAAAAAAAAAgAAAAAAAAAGAAAAAAAAAO4BAAAAAAAAAQABAAEAAADjAQAABAAAAHAQAwAA" +
+    "AA4ABAACAAIAAADoAQAACQAAACIAAQAbAQUAAABwIAIAEAAnAAAAAQAAAAMABjxpbml0PgALTFRy" +
+    "YW5zZm9ybTsAEUxqYXZhL2xhbmcvRXJyb3I7ABJMamF2YS9sYW5nL09iamVjdDsAEkxqYXZhL2xh" +
+    "bmcvU3RyaW5nOwAVU2hvdWxkIG5vdCBiZSBjYWxsZWQhAA5UcmFuc2Zvcm0uamF2YQABVgACVkwA" +
+    "EmVtaXR0ZXI6IGphY2stNC4yNAAFc2F5SGkAAgAHDgAEAQAHDgAAAAEBAICABIgCAQCgAgwAAAAA" +
+    "AAAAAQAAAAAAAAABAAAACwAAAHAAAAACAAAABQAAAJwAAAADAAAAAgAAALAAAAAFAAAABAAAAMgA" +
+    "AAAGAAAAAQAAAOgAAAABIAAAAgAAAAgBAAABEAAAAQAAAEQBAAACIAAACwAAAEoBAAADIAAAAgAA" +
+    "AOMBAAAAIAAAAQAAAO4BAAAAEAAAAQAAAPwBAAA=");
+
+  public static void doTest(Transform[] ts) {
+    ts[0].sayHi("Unmodifiable");
+    try {
+      Main.doCommonClassRedefinition(Transform[].class, CLASS_BYTES, DEX_BYTES);
+    } catch (Exception e) {
+      System.out.println(
+          "Transformation error : " + e.getClass().getName() + "(" + e.getMessage() + ")");
+    }
+    ts[0].sayHi("Unmodifiable");
+  }
+}
diff --git a/test/922-properties/properties.cc b/test/922-properties/properties.cc
index cb732c7..896e4c3 100644
--- a/test/922-properties/properties.cc
+++ b/test/922-properties/properties.cc
@@ -16,13 +16,15 @@
 
 #include <stdio.h>
 
-#include "base/macros.h"
+#include "android-base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedUtfChars.h"
+#include "jvmti.h"
+#include "scoped_utf_chars.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test922Properties {
@@ -32,7 +34,7 @@
   jint count;
   char** properties;
   jvmtiError result = jvmti_env->GetSystemProperties(&count, &properties);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -61,7 +63,7 @@
 
   char* value = nullptr;
   jvmtiError result = jvmti_env->GetSystemProperty(string.c_str(), &value);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -84,7 +86,7 @@
   }
 
   jvmtiError result = jvmti_env->SetSystemProperty(key_string.c_str(), value_string.c_str());
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return;
   }
 }
diff --git a/test/923-monitors/monitors.cc b/test/923-monitors/monitors.cc
index 4baa530..6369a74 100644
--- a/test/923-monitors/monitors.cc
+++ b/test/923-monitors/monitors.cc
@@ -16,13 +16,14 @@
 
 #include <stdio.h>
 
-#include "base/macros.h"
+#include "android-base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedUtfChars.h"
+#include "jvmti.h"
+#include "scoped_utf_chars.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test923Monitors {
@@ -40,7 +41,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
   jrawMonitorID id;
   jvmtiError result = jvmti_env->CreateRawMonitor("dummy", &id);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return 0;
   }
   return MonitorToLong(id);
@@ -49,37 +50,37 @@
 extern "C" JNIEXPORT void JNICALL Java_Main_destroyRawMonitor(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jlong l) {
   jvmtiError result = jvmti_env->DestroyRawMonitor(LongToMonitor(l));
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_rawMonitorEnter(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jlong l) {
   jvmtiError result = jvmti_env->RawMonitorEnter(LongToMonitor(l));
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_rawMonitorExit(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jlong l) {
   jvmtiError result = jvmti_env->RawMonitorExit(LongToMonitor(l));
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_rawMonitorWait(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jlong l, jlong millis) {
   jvmtiError result = jvmti_env->RawMonitorWait(LongToMonitor(l), millis);
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_rawMonitorNotify(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jlong l) {
   jvmtiError result = jvmti_env->RawMonitorNotify(LongToMonitor(l));
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_rawMonitorNotifyAll(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jlong l) {
   jvmtiError result = jvmti_env->RawMonitorNotifyAll(LongToMonitor(l));
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 }  // namespace Test923Monitors
diff --git a/test/924-threads/expected.txt b/test/924-threads/expected.txt
index 67d20eb..4c0f4ea 100644
--- a/test/924-threads/expected.txt
+++ b/test/924-threads/expected.txt
@@ -19,6 +19,11 @@
 true
 java.lang.ThreadGroup[name=main,maxpri=10]
 class dalvik.system.PathClassLoader
+Subclass
+5
+false
+java.lang.ThreadGroup[name=main,maxpri=10]
+class dalvik.system.PathClassLoader
 5
 5
 0 = NEW
diff --git a/test/924-threads/src/Main.java b/test/924-threads/src/Main.java
index f18d70e..7328560 100644
--- a/test/924-threads/src/Main.java
+++ b/test/924-threads/src/Main.java
@@ -52,6 +52,11 @@
     // Thread has died, check that we can still get info.
     printThreadInfo(t3);
 
+    // Try a subclass of thread.
+    Thread t4 = new Thread("Subclass") {
+    };
+    printThreadInfo(t4);
+
     doStateTests();
 
     doAllThreadsTests();
@@ -135,8 +140,12 @@
     synchronized(cdl3_2) {
       cdl3_1.countDown();
       cdl3_2.await();
-      Thread.yield();
-      Thread.sleep(100);
+      // While the latch improves the chances to make good progress, scheduling might still be
+      // messy. Wait till we get the right Java-side Thread state.
+      do {
+        Thread.yield();
+      } while (t.getState() != Thread.State.BLOCKED);
+      Thread.sleep(10);
       printThreadState(t);
     }
 
diff --git a/test/924-threads/threads.cc b/test/924-threads/threads.cc
index 0380433..a8b37ec 100644
--- a/test/924-threads/threads.cc
+++ b/test/924-threads/threads.cc
@@ -16,15 +16,17 @@
 
 #include <stdio.h>
 
+#include "android-base/logging.h"
 #include "android-base/stringprintf.h"
-#include "base/macros.h"
-#include "base/logging.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
+#include "ti_macros.h"
 
 namespace art {
 namespace Test924Threads {
@@ -36,7 +38,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
   jthread thread = nullptr;
   jvmtiError result = jvmti_env->GetCurrentThread(&thread);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
   return thread;
@@ -48,7 +50,7 @@
   memset(&info, 0, sizeof(jvmtiThreadInfo));
 
   jvmtiError result = jvmti_env->GetThreadInfo(thread, &info);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -94,7 +96,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jthread thread) {
   jint state;
   jvmtiError result = jvmti_env->GetThreadState(thread, &state);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return 0;
   }
   return state;
@@ -106,7 +108,7 @@
   jthread* threads;
 
   jvmtiError result = jvmti_env->GetAllThreads(&thread_count, &threads);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -124,7 +126,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jthread thread) {
   void* tls;
   jvmtiError result = jvmti_env->GetThreadLocalStorage(thread, &tls);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return 0;
   }
   return static_cast<jlong>(reinterpret_cast<uintptr_t>(tls));
@@ -134,7 +136,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jthread thread, jlong val) {
   const void* tls = reinterpret_cast<void*>(static_cast<uintptr_t>(val));
   jvmtiError result = jvmti_env->SetThreadLocalStorage(thread, tls);
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 static void JNICALL ThreadEvent(jvmtiEnv* jvmti_env,
@@ -172,13 +174,13 @@
     jvmtiError ret = jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                                          JVMTI_EVENT_THREAD_START,
                                                          nullptr);
-    if (JvmtiErrorToException(env, ret)) {
+    if (JvmtiErrorToException(env, jvmti_env, ret)) {
       return;
     }
     ret = jvmti_env->SetEventNotificationMode(JVMTI_DISABLE,
                                               JVMTI_EVENT_THREAD_END,
                                               nullptr);
-    JvmtiErrorToException(env, ret);
+    JvmtiErrorToException(env, jvmti_env, ret);
     return;
   }
 
@@ -187,20 +189,20 @@
   callbacks.ThreadStart = ThreadStart;
   callbacks.ThreadEnd = ThreadEnd;
   jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return;
   }
 
   ret = jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
                                             JVMTI_EVENT_THREAD_START,
                                             nullptr);
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return;
   }
   ret = jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
                                             JVMTI_EVENT_THREAD_END,
                                             nullptr);
-  JvmtiErrorToException(env, ret);
+  JvmtiErrorToException(env, jvmti_env, ret);
 }
 
 }  // namespace Test924Threads
diff --git a/test/925-threadgroups/threadgroups.cc b/test/925-threadgroups/threadgroups.cc
index 6c6e835..d555553 100644
--- a/test/925-threadgroups/threadgroups.cc
+++ b/test/925-threadgroups/threadgroups.cc
@@ -16,15 +16,17 @@
 
 #include <stdio.h>
 
+#include "android-base/logging.h"
 #include "android-base/stringprintf.h"
-#include "base/macros.h"
-#include "base/logging.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
+#include "ti_macros.h"
 
 namespace art {
 namespace Test925ThreadGroups {
@@ -38,7 +40,7 @@
   jthreadGroup* groups;
   jint group_count;
   jvmtiError result = jvmti_env->GetTopThreadGroups(&group_count, &groups);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -56,7 +58,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jthreadGroup group) {
   jvmtiThreadGroupInfo info;
   jvmtiError result = jvmti_env->GetThreadGroupInfo(group, &info);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
@@ -96,7 +98,7 @@
                                                         &threads,
                                                         &threadgroup_count,
                                                         &groups);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
diff --git a/test/927-timers/timers.cc b/test/927-timers/timers.cc
index 58d5c27..55d3921 100644
--- a/test/927-timers/timers.cc
+++ b/test/927-timers/timers.cc
@@ -16,14 +16,17 @@
 
 #include <inttypes.h>
 
+#include "android-base/logging.h"
 #include "android-base/stringprintf.h"
-#include "base/logging.h"
-#include "base/macros.h"
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+#include "jni.h"
+#include "jvmti.h"
+
+// Test infrastructure
+#include "jni_helper.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
+#include "ti_macros.h"
 
 namespace art {
 namespace Test926Timers {
@@ -32,7 +35,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
   jint count;
   jvmtiError result = jvmti_env->GetAvailableProcessors(&count);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return -1;
   }
   return count;
@@ -42,7 +45,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
   jlong time;
   jvmtiError result = jvmti_env->GetTime(&time);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return -1;
   }
   return time;
@@ -52,7 +55,7 @@
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
   jvmtiTimerInfo info;
   jvmtiError result = jvmti_env->GetTimerInfo(&info);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return nullptr;
   }
 
diff --git a/test/928-jni-table/jni_table.cc b/test/928-jni-table/jni_table.cc
index 5123d3a..26a6707 100644
--- a/test/928-jni-table/jni_table.cc
+++ b/test/928-jni-table/jni_table.cc
@@ -17,13 +17,14 @@
 #include <stdio.h>
 
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
-#include "base/logging.h"
-#include "base/macros.h"
+#include "android-base/logging.h"
+#include "android-base/macros.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test927JNITable {
@@ -42,14 +43,14 @@
     JNIEnv* env, jclass klass) {
   // Get the current table, as the delegate.
   jvmtiError getorig_result = jvmti_env->GetJNIFunctionTable(&gOriginalEnv);
-  if (JvmtiErrorToException(env, getorig_result)) {
+  if (JvmtiErrorToException(env, jvmti_env, getorig_result)) {
     return;
   }
 
   // Get the current table, as the override we'll install.
   JNINativeInterface* env_override;
   jvmtiError getoverride_result = jvmti_env->GetJNIFunctionTable(&env_override);
-  if (JvmtiErrorToException(env, getoverride_result)) {
+  if (JvmtiErrorToException(env, jvmti_env, getoverride_result)) {
     return;
   }
 
@@ -58,7 +59,7 @@
 
   // Install the override.
   jvmtiError setoverride_result = jvmti_env->SetJNIFunctionTable(env_override);
-  if (JvmtiErrorToException(env, setoverride_result)) {
+  if (JvmtiErrorToException(env, jvmti_env, setoverride_result)) {
     return;
   }
 
@@ -68,7 +69,7 @@
 
   // Install the "original." There is no real reset.
   jvmtiError setoverride2_result = jvmti_env->SetJNIFunctionTable(gOriginalEnv);
-  if (JvmtiErrorToException(env, setoverride2_result)) {
+  if (JvmtiErrorToException(env, jvmti_env, setoverride2_result)) {
     return;
   }
 
diff --git a/test/929-search/search.cc b/test/929-search/search.cc
index d1c6984..5516105 100644
--- a/test/929-search/search.cc
+++ b/test/929-search/search.cc
@@ -16,15 +16,16 @@
 
 #include <inttypes.h>
 
+#include "android-base/logging.h"
+#include "android-base/macros.h"
 #include "android-base/stringprintf.h"
-#include "base/logging.h"
-#include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedUtfChars.h"
+#include "jvmti.h"
+#include "scoped_utf_chars.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test929Search {
@@ -36,7 +37,7 @@
     return;
   }
   jvmtiError result = jvmti_env->AddToBootstrapClassLoaderSearch(utf.c_str());
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_addToSystemClassLoader(
@@ -46,7 +47,7 @@
     return;
   }
   jvmtiError result = jvmti_env->AddToSystemClassLoaderSearch(utf.c_str());
-  JvmtiErrorToException(env, result);
+  JvmtiErrorToException(env, jvmti_env, result);
 }
 
 }  // namespace Test929Search
diff --git a/test/931-agent-thread/agent_thread.cc b/test/931-agent-thread/agent_thread.cc
index a488d9a..f9af8cf 100644
--- a/test/931-agent-thread/agent_thread.cc
+++ b/test/931-agent-thread/agent_thread.cc
@@ -15,20 +15,18 @@
  */
 
 #include <inttypes.h>
+#include <pthread.h>
 #include <sched.h>
 
-#include "barrier.h"
-#include "base/logging.h"
-#include "base/macros.h"
+#include "android-base/logging.h"
+#include "android-base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "runtime.h"
-#include "ScopedLocalRef.h"
-#include "thread-inl.h"
-#include "well_known_classes.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test930AgentThread {
@@ -36,12 +34,12 @@
 struct AgentData {
   AgentData() : main_thread(nullptr),
                 jvmti_env(nullptr),
-                b(2) {
+                priority(0) {
   }
 
   jthread main_thread;
   jvmtiEnv* jvmti_env;
-  Barrier b;
+  pthread_barrier_t b;
   jint priority;
 };
 
@@ -52,14 +50,21 @@
   // This thread is not the main thread.
   jthread this_thread;
   jvmtiError this_thread_result = jenv->GetCurrentThread(&this_thread);
-  CHECK(!JvmtiErrorToException(env, this_thread_result));
+  CheckJvmtiError(jenv, this_thread_result);
   CHECK(!env->IsSameObject(this_thread, data->main_thread));
 
   // The thread is a daemon.
   jvmtiThreadInfo info;
   jvmtiError info_result = jenv->GetThreadInfo(this_thread, &info);
-  CHECK(!JvmtiErrorToException(env, info_result));
+  CheckJvmtiError(jenv, info_result);
   CHECK(info.is_daemon);
+  CheckJvmtiError(jenv, jenv->Deallocate(reinterpret_cast<unsigned char*>(info.name)));
+  if (info.thread_group != nullptr) {
+    env->DeleteLocalRef(info.thread_group);
+  }
+  if (info.context_class_loader != nullptr) {
+    env->DeleteLocalRef(info.context_class_loader);
+  }
 
   // The thread has the requested priority.
   // TODO: Our thread priorities do not work on the host.
@@ -69,7 +74,7 @@
   jint thread_count;
   jthread* threads;
   jvmtiError threads_result = jenv->GetAllThreads(&thread_count, &threads);
-  CHECK(!JvmtiErrorToException(env, threads_result));
+  CheckJvmtiError(jenv, threads_result);
   bool found = false;
   for (jint i = 0; i != thread_count; ++i) {
     if (env->IsSameObject(threads[i], this_thread)) {
@@ -80,29 +85,53 @@
   CHECK(found);
 
   // Done, let the main thread progress.
-  data->b.Pass(Thread::Current());
+  int wait_result = pthread_barrier_wait(&data->b);
+  CHECK(wait_result == PTHREAD_BARRIER_SERIAL_THREAD || wait_result == 0);
 }
 
 extern "C" JNIEXPORT void JNICALL Java_Main_testAgentThread(
     JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED) {
   // Create a Thread object.
-  ScopedLocalRef<jobject> thread_name(env,
-                                      env->NewStringUTF("Agent Thread"));
+  ScopedLocalRef<jobject> thread_name(env, env->NewStringUTF("Agent Thread"));
   if (thread_name.get() == nullptr) {
     return;
   }
 
-  ScopedLocalRef<jobject> thread(env, env->AllocObject(WellKnownClasses::java_lang_Thread));
+  ScopedLocalRef<jclass> thread_klass(env, env->FindClass("java/lang/Thread"));
+  if (thread_klass.get() == nullptr) {
+    return;
+  }
+  ScopedLocalRef<jobject> thread(env, env->AllocObject(thread_klass.get()));
   if (thread.get() == nullptr) {
     return;
   }
 
+  // Get a ThreadGroup from the current thread. We need a non-null one as we're gonna call a
+  // runtime-only constructor (so we can set priority and daemon state).
+  jvmtiThreadInfo cur_thread_info;
+  jvmtiError info_result = jvmti_env->GetThreadInfo(nullptr, &cur_thread_info);
+  if (JvmtiErrorToException(env, jvmti_env, info_result)) {
+    return;
+  }
+  CheckJvmtiError(jvmti_env,
+                  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(cur_thread_info.name)));
+  ScopedLocalRef<jobject> thread_group(env, cur_thread_info.thread_group);
+  if (cur_thread_info.context_class_loader != nullptr) {
+    env->DeleteLocalRef(cur_thread_info.context_class_loader);
+  }
+
+  jmethodID initID = env->GetMethodID(thread_klass.get(),
+                                      "<init>",
+                                      "(Ljava/lang/ThreadGroup;Ljava/lang/String;IZ)V");
+  if (initID == nullptr) {
+    return;
+  }
   env->CallNonvirtualVoidMethod(thread.get(),
-                                WellKnownClasses::java_lang_Thread,
-                                WellKnownClasses::java_lang_Thread_init,
-                                Runtime::Current()->GetMainThreadGroup(),
+                                thread_klass.get(),
+                                initID,
+                                thread_group.get(),
                                 thread_name.get(),
-                                kMinThreadPriority,
+                                0,
                                 JNI_FALSE);
   if (env->ExceptionCheck()) {
     return;
@@ -110,7 +139,7 @@
 
   jthread main_thread;
   jvmtiError main_thread_result = jvmti_env->GetCurrentThread(&main_thread);
-  if (JvmtiErrorToException(env, main_thread_result)) {
+  if (JvmtiErrorToException(env, jvmti_env, main_thread_result)) {
     return;
   }
 
@@ -118,21 +147,23 @@
   data.main_thread = env->NewGlobalRef(main_thread);
   data.jvmti_env = jvmti_env;
   data.priority = JVMTI_THREAD_MIN_PRIORITY;
+  CHECK_EQ(0, pthread_barrier_init(&data.b, nullptr, 2));
 
   jvmtiError result = jvmti_env->RunAgentThread(thread.get(), AgentMain, &data, data.priority);
-  if (JvmtiErrorToException(env, result)) {
+  if (JvmtiErrorToException(env, jvmti_env, result)) {
     return;
   }
 
-  data.b.Wait(Thread::Current());
+  int wait_result = pthread_barrier_wait(&data.b);
+  CHECK(wait_result == PTHREAD_BARRIER_SERIAL_THREAD || wait_result == 0);
 
   // Scheduling may mean that the agent thread is put to sleep. Wait until it's dead in an effort
   // to not unload the plugin and crash.
   for (;;) {
-    NanoSleep(1000 * 1000);
+    sleep(1);
     jint thread_state;
     jvmtiError state_result = jvmti_env->GetThreadState(thread.get(), &thread_state);
-    if (JvmtiErrorToException(env, state_result)) {
+    if (JvmtiErrorToException(env, jvmti_env, state_result)) {
       return;
     }
     if (thread_state == 0 ||                                    // Was never alive.
@@ -142,9 +173,11 @@
   }
   // Yield and sleep a bit more, to give the plugin time to tear down the native thread structure.
   sched_yield();
-  NanoSleep(100 * 1000 * 1000);
+  sleep(1);
 
   env->DeleteGlobalRef(data.main_thread);
+
+  pthread_barrier_destroy(&data.b);
 }
 
 }  // namespace Test930AgentThread
diff --git a/test/932-transform-saves/src/Transform.java b/test/932-transform-saves/src/Transform.java
index 8e8af35..83f7aa4 100644
--- a/test/932-transform-saves/src/Transform.java
+++ b/test/932-transform-saves/src/Transform.java
@@ -23,6 +23,6 @@
     // of the two different strings were the same).
     // We know the string ids will be different because lexicographically:
     // "Goodbye" < "LTransform;" < "hello".
-    System.out.println("hello");
+    System.out.println("foobar");
   }
 }
diff --git a/test/933-misc-events/misc_events.cc b/test/933-misc-events/misc_events.cc
index 860d4b5..2b74c40 100644
--- a/test/933-misc-events/misc_events.cc
+++ b/test/933-misc-events/misc_events.cc
@@ -18,13 +18,14 @@
 #include <signal.h>
 #include <sys/types.h>
 
-#include "base/logging.h"
-#include "base/macros.h"
+#include "android-base/logging.h"
+#include "android-base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test933MiscEvents {
@@ -42,14 +43,14 @@
   memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
   callbacks.DataDumpRequest = DumpRequestCallback;
   jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return;
   }
 
   ret = jvmti_env->SetEventNotificationMode(JVMTI_ENABLE,
                                             JVMTI_EVENT_DATA_DUMP_REQUEST,
                                             nullptr);
-  if (JvmtiErrorToException(env, ret)) {
+  if (JvmtiErrorToException(env, jvmti_env, ret)) {
     return;
   }
 
@@ -65,7 +66,7 @@
   }
 
   ret = jvmti_env->SetEventNotificationMode(JVMTI_DISABLE, JVMTI_EVENT_DATA_DUMP_REQUEST, nullptr);
-  JvmtiErrorToException(env, ret);
+  JvmtiErrorToException(env, jvmti_env, ret);
 }
 
 }  // namespace Test933MiscEvents
diff --git a/test/936-search-onload/search_onload.cc b/test/936-search-onload/search_onload.cc
index 2286a46..b2ef056 100644
--- a/test/936-search-onload/search_onload.cc
+++ b/test/936-search-onload/search_onload.cc
@@ -22,11 +22,12 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedUtfChars.h"
+#include "jvmti.h"
+#include "scoped_utf_chars.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test936SearchOnload {
diff --git a/test/944-transform-classloaders/classloader.cc b/test/944-transform-classloaders/classloader.cc
index 5fbd8e1..698e023 100644
--- a/test/944-transform-classloaders/classloader.cc
+++ b/test/944-transform-classloaders/classloader.cc
@@ -14,19 +14,18 @@
  * limitations under the License.
  */
 
-#include "base/macros.h"
+#include "android-base/macros.h"
 #include "jni.h"
+#include "jvmti.h"
 #include "mirror/class-inl.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
+#include "scoped_local_ref.h"
 
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+// Test infrastructure
+#include "test_env.h"
 
 namespace art {
 namespace Test944TransformClassloaders {
 
-
 extern "C" JNIEXPORT jlong JNICALL Java_Main_getDexFilePointer(JNIEnv* env, jclass, jclass klass) {
   if (Runtime::Current() == nullptr) {
     env->ThrowNew(env->FindClass("java/lang/Exception"),
diff --git a/test/945-obsolete-native/obsolete_native.cc b/test/945-obsolete-native/obsolete_native.cc
index 061e7af..ee653a4 100644
--- a/test/945-obsolete-native/obsolete_native.cc
+++ b/test/945-obsolete-native/obsolete_native.cc
@@ -24,10 +24,12 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
-#include "ti-agent/common_helper.h"
-#include "ti-agent/common_load.h"
+#include "jvmti.h"
+#include "scoped_local_ref.h"
+
+// Test infrastructure
+#include "jni_binder.h"
+#include "test_env.h"
 
 namespace art {
 namespace Test945ObsoleteNative {
diff --git a/test/951-threaded-obsolete/expected.txt b/test/951-threaded-obsolete/expected.txt
new file mode 100644
index 0000000..83efda1
--- /dev/null
+++ b/test/951-threaded-obsolete/expected.txt
@@ -0,0 +1,9 @@
+hello
+Not doing anything here
+goodbye
+hello
+transforming calling function
+goodbye
+Hello - Transformed
+Not doing anything here
+Goodbye - Transformed
diff --git a/test/951-threaded-obsolete/info.txt b/test/951-threaded-obsolete/info.txt
new file mode 100644
index 0000000..e7ef4a2
--- /dev/null
+++ b/test/951-threaded-obsolete/info.txt
@@ -0,0 +1,4 @@
+Tests basic obsolete method support
+
+This test ensures that obsolete methods will work even if the obsolete method is
+on a different thread then where the redefinition was triggered.
diff --git a/test/577-profile-foreign-dex/run b/test/951-threaded-obsolete/run
old mode 100644
new mode 100755
similarity index 85%
rename from test/577-profile-foreign-dex/run
rename to test/951-threaded-obsolete/run
index ad57d14..c6e62ae
--- a/test/577-profile-foreign-dex/run
+++ b/test/951-threaded-obsolete/run
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+./default-run "$@" --jvmti
diff --git a/test/951-threaded-obsolete/src/Main.java b/test/951-threaded-obsolete/src/Main.java
new file mode 100644
index 0000000..98e7236
--- /dev/null
+++ b/test/951-threaded-obsolete/src/Main.java
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Base64;
+import java.util.concurrent.Semaphore;
+
+public class Main {
+  // class Transform {
+  //   public void sayHi(Runnable r) {
+  //     System.out.println("Hello - Transformed");
+  //     r.run();
+  //     System.out.println("Goodbye - Transformed");
+  //   }
+  // }
+  private static final byte[] CLASS_BYTES = Base64.getDecoder().decode(
+    "yv66vgAAADQAJAoACAARCQASABMIABQKABUAFgsAFwAYCAAZBwAaBwAbAQAGPGluaXQ+AQADKClW" +
+    "AQAEQ29kZQEAD0xpbmVOdW1iZXJUYWJsZQEABXNheUhpAQAXKExqYXZhL2xhbmcvUnVubmFibGU7" +
+    "KVYBAApTb3VyY2VGaWxlAQAOVHJhbnNmb3JtLmphdmEMAAkACgcAHAwAHQAeAQATSGVsbG8gLSBU" +
+    "cmFuc2Zvcm1lZAcAHwwAIAAhBwAiDAAjAAoBABVHb29kYnllIC0gVHJhbnNmb3JtZWQBAAlUcmFu" +
+    "c2Zvcm0BABBqYXZhL2xhbmcvT2JqZWN0AQAQamF2YS9sYW5nL1N5c3RlbQEAA291dAEAFUxqYXZh" +
+    "L2lvL1ByaW50U3RyZWFtOwEAE2phdmEvaW8vUHJpbnRTdHJlYW0BAAdwcmludGxuAQAVKExqYXZh" +
+    "L2xhbmcvU3RyaW5nOylWAQASamF2YS9sYW5nL1J1bm5hYmxlAQADcnVuACAABwAIAAAAAAACAAAA" +
+    "CQAKAAEACwAAAB0AAQABAAAABSq3AAGxAAAAAQAMAAAABgABAAAAAQABAA0ADgABAAsAAAA7AAIA" +
+    "AgAAABeyAAISA7YABCu5AAUBALIAAhIGtgAEsQAAAAEADAAAABIABAAAAAMACAAEAA4ABQAWAAYA" +
+    "AQAPAAAAAgAQ");
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+    "ZGV4CjAzNQAYeAMMXgYWxoeSHAS9EWKCCtVRSAGpqZVQAwAAcAAAAHhWNBIAAAAAAAAAALACAAAR" +
+    "AAAAcAAAAAcAAAC0AAAAAwAAANAAAAABAAAA9AAAAAUAAAD8AAAAAQAAACQBAAAMAgAARAEAAKIB" +
+    "AACqAQAAwQEAANYBAADjAQAA+gEAAA4CAAAkAgAAOAIAAEwCAABcAgAAXwIAAGMCAAB3AgAAfAIA" +
+    "AIUCAACKAgAAAwAAAAQAAAAFAAAABgAAAAcAAAAIAAAACgAAAAoAAAAGAAAAAAAAAAsAAAAGAAAA" +
+    "lAEAAAsAAAAGAAAAnAEAAAUAAQANAAAAAAAAAAAAAAAAAAEAEAAAAAEAAgAOAAAAAgAAAAAAAAAD" +
+    "AAAADwAAAAAAAAAAAAAAAgAAAAAAAAAJAAAAAAAAAJ8CAAAAAAAAAQABAAEAAACRAgAABAAAAHAQ" +
+    "AwAAAA4ABAACAAIAAACWAgAAFAAAAGIAAAAbAQIAAABuIAIAEAByEAQAAwBiAAAAGwEBAAAAbiAC" +
+    "ABAADgABAAAAAwAAAAEAAAAEAAY8aW5pdD4AFUdvb2RieWUgLSBUcmFuc2Zvcm1lZAATSGVsbG8g" +
+    "LSBUcmFuc2Zvcm1lZAALTFRyYW5zZm9ybTsAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwASTGphdmEv" +
+    "bGFuZy9PYmplY3Q7ABRMamF2YS9sYW5nL1J1bm5hYmxlOwASTGphdmEvbGFuZy9TdHJpbmc7ABJM" +
+    "amF2YS9sYW5nL1N5c3RlbTsADlRyYW5zZm9ybS5qYXZhAAFWAAJWTAASZW1pdHRlcjogamFjay00" +
+    "LjEzAANvdXQAB3ByaW50bG4AA3J1bgAFc2F5SGkAAQAHDgADAQAHDoc8hwAAAAEBAICABMQCAQHc" +
+    "AgAAAA0AAAAAAAAAAQAAAAAAAAABAAAAEQAAAHAAAAACAAAABwAAALQAAAADAAAAAwAAANAAAAAE" +
+    "AAAAAQAAAPQAAAAFAAAABQAAAPwAAAAGAAAAAQAAACQBAAABIAAAAgAAAEQBAAABEAAAAgAAAJQB" +
+    "AAACIAAAEQAAAKIBAAADIAAAAgAAAJECAAAAIAAAAQAAAJ8CAAAAEAAAAQAAALACAAA=");
+
+  public static void main(String[] args) {
+    // Semaphores to let each thread know where the other is. We could use barriers but semaphores
+    // mean we don't need to have the worker thread be waiting around.
+    final Semaphore sem_redefine_start = new Semaphore(0);
+    final Semaphore sem_redefine_end = new Semaphore(0);
+    // Create a thread to do the actual redefinition. We will just communicate through an
+    // atomic-integer.
+    new Thread(() -> {
+      try {
+        // Wait for the other thread to ask for redefinition.
+        sem_redefine_start.acquire();
+        // Do the redefinition.
+        doCommonClassRedefinition(Transform.class, CLASS_BYTES, DEX_BYTES);
+        // Allow the other thread to wake up if it is waiting.
+        sem_redefine_end.release();
+      } catch (InterruptedException e) {
+        throw new Error("unable to do redefinition", e);
+      }
+    }).start();
+
+    Transform t = new Transform();
+    t.sayHi(() -> { System.out.println("Not doing anything here"); });
+    t.sayHi(() -> {
+      try {
+        System.out.println("transforming calling function");
+        // Wake up the waiting thread.
+        sem_redefine_start.release();
+        // Wait for the other thread to finish with redefinition.
+        sem_redefine_end.acquire();
+      } catch (InterruptedException e) {
+        throw new Error("unable to do redefinition", e);
+      }
+    });
+    t.sayHi(() -> { System.out.println("Not doing anything here"); });
+  }
+
+  // Transforms the class
+  private static native void doCommonClassRedefinition(Class<?> target,
+                                                       byte[] classfile,
+                                                       byte[] dexfile);
+}
diff --git a/test/951-threaded-obsolete/src/Transform.java b/test/951-threaded-obsolete/src/Transform.java
new file mode 100644
index 0000000..8cda6cd
--- /dev/null
+++ b/test/951-threaded-obsolete/src/Transform.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Transform {
+  public void sayHi(Runnable r) {
+    // Use lower 'h' to make sure the string will have a different string id
+    // than the transformation (the transformation code is the same except
+    // the actual printed String, which was making the test inacurately passing
+    // in JIT mode when loading the string from the dex cache, as the string ids
+    // of the two different strings were the same).
+    // We know the string ids will be different because lexicographically:
+    // "Hello" < "LTransform;" < "hello".
+    System.out.println("hello");
+    r.run();
+    System.out.println("goodbye");
+  }
+}
diff --git a/test/958-methodhandle-emulated-stackframe/build b/test/958-methodhandle-stackframe/build
similarity index 100%
rename from test/958-methodhandle-emulated-stackframe/build
rename to test/958-methodhandle-stackframe/build
diff --git a/test/958-methodhandle-emulated-stackframe/expected.txt b/test/958-methodhandle-stackframe/expected.txt
similarity index 100%
rename from test/958-methodhandle-emulated-stackframe/expected.txt
rename to test/958-methodhandle-stackframe/expected.txt
diff --git a/test/958-methodhandle-emulated-stackframe/info.txt b/test/958-methodhandle-stackframe/info.txt
similarity index 100%
rename from test/958-methodhandle-emulated-stackframe/info.txt
rename to test/958-methodhandle-stackframe/info.txt
diff --git a/test/958-methodhandle-emulated-stackframe/src/Main.java b/test/958-methodhandle-stackframe/src/Main.java
similarity index 100%
rename from test/958-methodhandle-emulated-stackframe/src/Main.java
rename to test/958-methodhandle-stackframe/src/Main.java
diff --git a/test/961-default-iface-resolution-gen/build b/test/961-default-iface-resolution-gen/build
index ccebbe4..2f7e3ba 100755
--- a/test/961-default-iface-resolution-gen/build
+++ b/test/961-default-iface-resolution-gen/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 if [[ $@ != *"--jvm"* ]]; then
   # Don't do anything with jvm
   # Hard-wired use of experimental jack.
@@ -39,6 +30,3 @@
 ./util-src/generate_java.py ./src ./expected.txt
 
 ./default-build "$@" --experimental default-methods
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/964-default-iface-init-gen/build b/test/964-default-iface-init-gen/build
index ccebbe4..2f7e3ba 100755
--- a/test/964-default-iface-init-gen/build
+++ b/test/964-default-iface-init-gen/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 if [[ $@ != *"--jvm"* ]]; then
   # Don't do anything with jvm
   # Hard-wired use of experimental jack.
@@ -39,6 +30,3 @@
 ./util-src/generate_java.py ./src ./expected.txt
 
 ./default-build "$@" --experimental default-methods
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/968-default-partial-compile-gen/build b/test/968-default-partial-compile-gen/build
index 1e9f8aa..00ccb89 100755
--- a/test/968-default-partial-compile-gen/build
+++ b/test/968-default-partial-compile-gen/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 # TODO: Support running with jack.
 
 if [[ $@ == *"--jvm"* ]]; then
@@ -45,6 +36,3 @@
   # Use the default build script
   ./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
 fi
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/970-iface-super-resolution-gen/build b/test/970-iface-super-resolution-gen/build
index fd1b271..7217fac 100755
--- a/test/970-iface-super-resolution-gen/build
+++ b/test/970-iface-super-resolution-gen/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 # Should we compile with Java source code. By default we will use Smali.
 USES_JAVA_SOURCE="false"
 if [[ $@ == *"--jvm"* ]]; then
@@ -50,6 +41,3 @@
 fi
 
 ./default-build "$@" --experimental default-methods
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/971-iface-super/build b/test/971-iface-super/build
index 1e9f8aa..00ccb89 100755
--- a/test/971-iface-super/build
+++ b/test/971-iface-super/build
@@ -17,15 +17,6 @@
 # make us exit on a failure
 set -e
 
-# We will be making more files than the ulimit is set to allow. Remove it temporarily.
-OLD_ULIMIT=`ulimit -S`
-ulimit -S unlimited
-
-restore_ulimit() {
-  ulimit -S "$OLD_ULIMIT"
-}
-trap 'restore_ulimit' ERR
-
 # TODO: Support running with jack.
 
 if [[ $@ == *"--jvm"* ]]; then
@@ -45,6 +36,3 @@
   # Use the default build script
   ./default-build "$@" "$EXTRA_ARGS" --experimental default-methods
 fi
-
-# Reset the ulimit back to its initial value
-restore_ulimit
diff --git a/test/980-redefine-object/check b/test/980-redefine-object/check
new file mode 100755
index 0000000..987066f
--- /dev/null
+++ b/test/980-redefine-object/check
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright (C) 2014 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The number of paused background threads (and therefore InterruptedExceptions)
+# can change so we will just delete their lines from the log.
+
+sed "/Object allocated of type 'Ljava\/lang\/InterruptedException;'/d" "$2" | diff --strip-trailing-cr -q "$1" - >/dev/null
diff --git a/test/980-redefine-object/expected.txt b/test/980-redefine-object/expected.txt
new file mode 100644
index 0000000..6e9bce0
--- /dev/null
+++ b/test/980-redefine-object/expected.txt
@@ -0,0 +1,52 @@
+	Initializing and loading the TestWatcher class that will (eventually) be notified of object allocations
+	Allocating an j.l.Object before redefining Object class
+	Allocating a Transform before redefining Object class
+	Redefining the Object class to add a hook into the <init> method
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating an j.l.Object after redefining Object class
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating a Transform after redefining Object class
+Object allocated of type 'LTransform;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating an int[] after redefining Object class
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating an array list
+Object allocated of type 'Ljava/util/ArrayList;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Adding a bunch of stuff to the array list
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'LTransform;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Allocating a linked list
+Object allocated of type 'Ljava/util/LinkedList;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Adding a bunch of stuff to the linked list
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/lang/Object;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'LTransform;'
+Object allocated of type 'Ljava/util/LinkedList$Node;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Throwing from down 4 stack frames
+Object allocated of type 'Ljava/lang/Exception;'
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Exception caught.
+Object allocated of type 'Ljava/lang/StringBuilder;'
+Object allocated of type 'Ljava/nio/HeapCharBuffer;'
+	Finishing test!
diff --git a/test/980-redefine-object/info.txt b/test/980-redefine-object/info.txt
new file mode 100644
index 0000000..f3e01b5
--- /dev/null
+++ b/test/980-redefine-object/info.txt
@@ -0,0 +1,23 @@
+Tests basic functions in the jvmti plugin.
+
+This tests that we are able to redefine methods/constructors on the
+java.lang.Object class at runtime.
+
+This also (indirectly) tests that we correctly handle reading annotations on
+obsolete methods. This is something that is not normally done since there is no
+way to get a reference to an obsolete method outside of the runtime but some
+annotations on the Object class are read by the runtime directly.
+
+NB This test cannot be run on the RI at the moment.
+
+If this test starts failing during the doCommonClassRedefinition call it is
+possible that the definition of Object contained in the base64 DEX_BYTES array
+has become stale and will need to be recreated. The only difference from the
+normal Object dex bytes is that (a) it contains only the bytes of the Object
+class itself, and (b) it adds an
+'invoke-static {p0}, Ljava/lang/Object;->NotifyConstructed(Ljava/lang/Object;)V'
+to the <init> function.
+
+It is also possible it could fail due to the pattern of allocations caused by
+doing string concatenation or printing changing. In this case you should simply
+update the expected.txt file.
diff --git a/test/980-redefine-object/redefine_object.cc b/test/980-redefine-object/redefine_object.cc
new file mode 100644
index 0000000..1faf1a1
--- /dev/null
+++ b/test/980-redefine-object/redefine_object.cc
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <iostream>
+
+#include "android-base/stringprintf.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "jni.h"
+#include "jvmti.h"
+#include "scoped_utf_chars.h"
+
+// Test infrastructure
+#include "jni_binder.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
+
+namespace art {
+namespace Test980RedefineObjects {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_bindFunctionsForClass(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass target) {
+  BindFunctionsOnClass(jvmti_env, env, target);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_art_test_TestWatcher_NotifyConstructed(
+    JNIEnv* env, jclass TestWatcherClass ATTRIBUTE_UNUSED, jobject constructed) {
+  char* sig = nullptr;
+  char* generic_sig = nullptr;
+  if (JvmtiErrorToException(env,
+                            jvmti_env,
+                            jvmti_env->GetClassSignature(env->GetObjectClass(constructed),
+                                                         &sig,
+                                                         &generic_sig))) {
+    // Exception.
+    return;
+  }
+  std::cout << "Object allocated of type '" << sig << "'" << std::endl;
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(sig));
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(generic_sig));
+}
+
+}  // namespace Test980RedefineObjects
+}  // namespace art
diff --git a/test/577-profile-foreign-dex/run b/test/980-redefine-object/run
old mode 100644
new mode 100755
similarity index 85%
copy from test/577-profile-foreign-dex/run
copy to test/980-redefine-object/run
index ad57d14..c6e62ae
--- a/test/577-profile-foreign-dex/run
+++ b/test/980-redefine-object/run
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+./default-run "$@" --jvmti
diff --git a/test/980-redefine-object/src-ex/TestWatcher.java b/test/980-redefine-object/src-ex/TestWatcher.java
new file mode 100644
index 0000000..d15e688
--- /dev/null
+++ b/test/980-redefine-object/src-ex/TestWatcher.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package art.test;
+
+public class TestWatcher {
+  // NB This function is native since it is called in the Object.<init> method and so cannot cause
+  // any java allocations at all. The normal System.out.print* functions will cause allocations to
+  // occur so we cannot use them. This means the easiest way to report the object as being created
+  // is to go into native code and do it there.
+  public static native void NotifyConstructed(Object o);
+}
diff --git a/test/980-redefine-object/src/Main.java b/test/980-redefine-object/src/Main.java
new file mode 100644
index 0000000..348951c
--- /dev/null
+++ b/test/980-redefine-object/src/Main.java
@@ -0,0 +1,390 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.LinkedList;
+
+public class Main {
+
+  // TODO We should make this run on the RI.
+  /**
+   * This test cannot be run on the RI.
+   */
+  private static final byte[] CLASS_BYTES = new byte[0];
+
+  // TODO It might be a good idea to replace this hard-coded Object definition with a
+  // retransformation based test.
+  /**
+   * Base64 encoding of the following smali file.
+   *
+   *  .class public Ljava/lang/Object;
+   *  .source "Object.java"
+   *  # instance fields
+   *  .field private transient shadow$_klass_:Ljava/lang/Class;
+   *      .annotation system Ldalvik/annotation/Signature;
+   *          value = {
+   *              "Ljava/lang/Class",
+   *              "<*>;"
+   *          }
+   *      .end annotation
+   *  .end field
+   *
+   *  .field private transient shadow$_monitor_:I
+   *  # direct methods
+   *  .method public constructor <init>()V
+   *      .registers 1
+   *      .prologue
+   *      invoke-static {p0}, Lart/test/TestWatcher;->NotifyConstructed(Ljava/lang/Object;)V
+   *      return-void
+   *  .end method
+   *
+   *  .method static identityHashCode(Ljava/lang/Object;)I
+   *      .registers 7
+   *      .prologue
+   *      iget v0, p0, Ljava/lang/Object;->shadow$_monitor_:I
+   *      const/high16 v3, -0x40000000    # -2.0f
+   *      const/high16 v2, -0x80000000
+   *      const v1, 0xfffffff
+   *      const/high16 v4, -0x40000000    # -2.0f
+   *      and-int/2addr v4, v0
+   *      const/high16 v5, -0x80000000
+   *      if-ne v4, v5, :cond_15
+   *      const v4, 0xfffffff
+   *      and-int/2addr v4, v0
+   *      return v4
+   *      :cond_15
+   *      invoke-static {p0}, Ljava/lang/Object;->identityHashCodeNative(Ljava/lang/Object;)I
+   *      move-result v4
+   *      return v4
+   *  .end method
+   *
+   *  .method private static native identityHashCodeNative(Ljava/lang/Object;)I
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *  .method private native internalClone()Ljava/lang/Object;
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *
+   *  # virtual methods
+   *  .method protected clone()Ljava/lang/Object;
+   *      .registers 4
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/CloneNotSupportedException;
+   *          }
+   *      .end annotation
+   *
+   *      .prologue
+   *      instance-of v0, p0, Ljava/lang/Cloneable;
+   *      if-nez v0, :cond_2d
+   *      new-instance v0, Ljava/lang/CloneNotSupportedException;
+   *      new-instance v1, Ljava/lang/StringBuilder;
+   *      invoke-direct {v1}, Ljava/lang/StringBuilder;-><init>()V
+   *      const-string/jumbo v2, "Class "
+   *      invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v1
+   *      invoke-virtual {p0}, Ljava/lang/Object;->getClass()Ljava/lang/Class;
+   *      move-result-object v2
+   *      invoke-virtual {v2}, Ljava/lang/Class;->getName()Ljava/lang/String;
+   *      move-result-object v2
+   *      invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v1
+   *      const-string/jumbo v2, " doesn\'t implement Cloneable"
+   *      invoke-virtual {v1, v2}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v1
+   *      invoke-virtual {v1}, Ljava/lang/StringBuilder;->toString()Ljava/lang/String;
+   *      move-result-object v1
+   *      invoke-direct {v0, v1}, Ljava/lang/CloneNotSupportedException;-><init>(Ljava/lang/String;)V
+   *      throw v0
+   *      :cond_2d
+   *      invoke-direct {p0}, Ljava/lang/Object;->internalClone()Ljava/lang/Object;
+   *      move-result-object v0
+   *      return-object v0
+   *  .end method
+   *
+   *  .method public equals(Ljava/lang/Object;)Z
+   *      .registers 3
+   *      .prologue
+   *      if-ne p0, p1, :cond_4
+   *      const/4 v0, 0x1
+   *      :goto_3
+   *      return v0
+   *      :cond_4
+   *      const/4 v0, 0x0
+   *      goto :goto_3
+   *  .end method
+   *
+   *  .method protected finalize()V
+   *      .registers 1
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/Throwable;
+   *          }
+   *      .end annotation
+   *      .prologue
+   *      return-void
+   *  .end method
+   *
+   *  .method public final getClass()Ljava/lang/Class;
+   *      .registers 2
+   *      .annotation system Ldalvik/annotation/Signature;
+   *          value = {
+   *              "()",
+   *              "Ljava/lang/Class",
+   *              "<*>;"
+   *          }
+   *      .end annotation
+   *      .prologue
+   *      iget-object v0, p0, Ljava/lang/Object;->shadow$_klass_:Ljava/lang/Class;
+   *      return-object v0
+   *  .end method
+   *
+   *  .method public hashCode()I
+   *      .registers 2
+   *      .prologue
+   *      invoke-static {p0}, Ljava/lang/Object;->identityHashCode(Ljava/lang/Object;)I
+   *      move-result v0
+   *      return v0
+   *  .end method
+   *
+   *  .method public final native notify()V
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *  .method public final native notifyAll()V
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *  .method public toString()Ljava/lang/String;
+   *      .registers 3
+   *      .prologue
+   *      new-instance v0, Ljava/lang/StringBuilder;
+   *      invoke-direct {v0}, Ljava/lang/StringBuilder;-><init>()V
+   *      invoke-virtual {p0}, Ljava/lang/Object;->getClass()Ljava/lang/Class;
+   *      move-result-object v1
+   *      invoke-virtual {v1}, Ljava/lang/Class;->getName()Ljava/lang/String;
+   *      move-result-object v1
+   *      invoke-virtual {v0, v1}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v0
+   *      const-string/jumbo v1, "@"
+   *      invoke-virtual {v0, v1}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v0
+   *      invoke-virtual {p0}, Ljava/lang/Object;->hashCode()I
+   *      move-result v1
+   *      invoke-static {v1}, Ljava/lang/Integer;->toHexString(I)Ljava/lang/String;
+   *      move-result-object v1
+   *      invoke-virtual {v0, v1}, Ljava/lang/StringBuilder;->append(Ljava/lang/String;)Ljava/lang/StringBuilder;
+   *      move-result-object v0
+   *      invoke-virtual {v0}, Ljava/lang/StringBuilder;->toString()Ljava/lang/String;
+   *      move-result-object v0
+   *      return-object v0
+   *  .end method
+   *
+   *  .method public final native wait()V
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/InterruptedException;
+   *          }
+   *      .end annotation
+   *
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   *
+   *  .method public final wait(J)V
+   *      .registers 4
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/InterruptedException;
+   *          }
+   *      .end annotation
+   *      .prologue
+   *      const/4 v0, 0x0
+   *      invoke-virtual {p0, p1, p2, v0}, Ljava/lang/Object;->wait(JI)V
+   *      return-void
+   *  .end method
+   *
+   *  .method public final native wait(JI)V
+   *      .annotation system Ldalvik/annotation/Throws;
+   *          value = {
+   *              Ljava/lang/InterruptedException;
+   *          }
+   *      .end annotation
+   *
+   *      .annotation build Ldalvik/annotation/optimization/FastNative;
+   *      .end annotation
+   *  .end method
+   */
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+      "ZGV4CjAzNQDUlMR9j03MYuOKekKs2p7zJzu2IfDb7RlMCgAAcAAAAHhWNBIAAAAAAAAAAIgJAAA6" +
+      "AAAAcAAAABEAAABYAQAADQAAAJwBAAACAAAAOAIAABYAAABIAgAAAQAAAPgCAAA0BwAAGAMAABgD" +
+      "AAA2AwAAOgMAAEADAABIAwAASwMAAFMDAABWAwAAWgMAAF0DAABgAwAAZAMAAGgDAACAAwAAnwMA" +
+      "ALsDAADoAwAA+gMAAA0EAAA1BAAATAQAAGEEAACDBAAAlwQAAKsEAADGBAAA3QQAAPAEAAD9BAAA" +
+      "AAUAAAQFAAAJBQAADQUAABAFAAAUBQAAHAUAACMFAAArBQAANQUAAD8FAABIBQAAUgUAAGQFAAB8" +
+      "BQAAiwUAAJUFAACnBQAAugUAAM0FAADVBQAA3QUAAOgFAADtBQAA/QUAAA8GAAAcBgAAJgYAAC0G" +
+      "AAAGAAAACAAAAAwAAAANAAAADgAAAA8AAAARAAAAEgAAABMAAAAUAAAAFQAAABYAAAAXAAAAGAAA" +
+      "ABkAAAAcAAAAIAAAAAYAAAAAAAAAAAAAAAcAAAAAAAAAPAYAAAkAAAAGAAAAAAAAAAkAAAALAAAA" +
+      "AAAAAAkAAAAMAAAAAAAAAAoAAAAMAAAARAYAAAsAAAANAAAAVAYAABwAAAAPAAAAAAAAAB0AAAAP" +
+      "AAAATAYAAB4AAAAPAAAANAYAAB8AAAAPAAAAPAYAAB8AAAAPAAAAVAYAACEAAAAQAAAAPAYAAAsA" +
+      "BgA0AAAACwAAADUAAAACAAoAGgAAAAYABAAnAAAABwALAAMAAAAJAAUANgAAAAsABwADAAAACwAD" +
+      "ACMAAAALAAwAJAAAAAsABwAlAAAACwACACYAAAALAAAAKAAAAAsAAQApAAAACwABACoAAAALAAMA" +
+      "KwAAAAsABwAxAAAACwAHADIAAAALAAQANwAAAAsABwA5AAAACwAIADkAAAALAAkAOQAAAA0ABwAD" +
+      "AAAADQAGACIAAAANAAQANwAAAAsAAAABAAAA/////wAAAAAbAAAA0AYAAD4JAAAAAAAAHCBkb2Vz" +
+      "bid0IGltcGxlbWVudCBDbG9uZWFibGUAAigpAAQ8Kj47AAY8aW5pdD4AAUAABkNsYXNzIAABSQAC" +
+      "SUwAAUoAAUwAAkxJAAJMTAAWTGFydC90ZXN0L1Rlc3RXYXRjaGVyOwAdTGRhbHZpay9hbm5vdGF0" +
+      "aW9uL1NpZ25hdHVyZTsAGkxkYWx2aWsvYW5ub3RhdGlvbi9UaHJvd3M7ACtMZGFsdmlrL2Fubm90" +
+      "YXRpb24vb3B0aW1pemF0aW9uL0Zhc3ROYXRpdmU7ABBMamF2YS9sYW5nL0NsYXNzABFMamF2YS9s" +
+      "YW5nL0NsYXNzOwAmTGphdmEvbGFuZy9DbG9uZU5vdFN1cHBvcnRlZEV4Y2VwdGlvbjsAFUxqYXZh" +
+      "L2xhbmcvQ2xvbmVhYmxlOwATTGphdmEvbGFuZy9JbnRlZ2VyOwAgTGphdmEvbGFuZy9JbnRlcnJ1" +
+      "cHRlZEV4Y2VwdGlvbjsAEkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABlM" +
+      "amF2YS9sYW5nL1N0cmluZ0J1aWxkZXI7ABVMamF2YS9sYW5nL1Rocm93YWJsZTsAEU5vdGlmeUNv" +
+      "bnN0cnVjdGVkAAtPYmplY3QuamF2YQABVgACVkoAA1ZKSQACVkwAAVoAAlpMAAZhcHBlbmQABWNs" +
+      "b25lAAZlcXVhbHMACGZpbmFsaXplAAhnZXRDbGFzcwAHZ2V0TmFtZQAIaGFzaENvZGUAEGlkZW50" +
+      "aXR5SGFzaENvZGUAFmlkZW50aXR5SGFzaENvZGVOYXRpdmUADWludGVybmFsQ2xvbmUACGxvY2tX" +
+      "b3JkABBsb2NrV29yZEhhc2hNYXNrABFsb2NrV29yZFN0YXRlSGFzaAARbG9ja1dvcmRTdGF0ZU1h" +
+      "c2sABm1pbGxpcwAGbm90aWZ5AAlub3RpZnlBbGwAA29iagAOc2hhZG93JF9rbGFzc18AEHNoYWRv" +
+      "dyRfbW9uaXRvcl8AC3RvSGV4U3RyaW5nAAh0b1N0cmluZwAFdmFsdWUABHdhaXQAAAIAAAABAAAA" +
+      "AQAAAAsAAAABAAAAAAAAAAEAAAABAAAAAQAAAAwAAgQBOBwBGAcCBAE4HAEYCgIDATgcAhcQFwIC" +
+      "BAE4HAEYDgAFAAIDATgcAxcBFxAXAgAAAAAAAAAAAAEAAABaBgAAAgAAAGIGAAB8BgAAAQAAAGIG" +
+      "AAABAAAAagYAAAEAAAB0BgAAAQAAAHwGAAABAAAAfwYAAAAAAAABAAAACgAAAAAAAAAAAAAAsAYA" +
+      "AAUAAACUBgAABwAAALgGAAAIAAAAyAYAAAsAAADABgAADAAAAMAGAAANAAAAwAYAAA4AAADABgAA" +
+      "EAAAAJwGAAARAAAAqAYAABIAAACcBgAAKAAHDgBwATQHDi0DAC0BLQMDMAEtAwIvATwDAS4BeFsA" +
+      "7AEABw5LARoPOsYArAEBNAcOAMUEAAcOAEEABw4AaAAHDgCRAgAHDgCmAwExBw5LAAAAAQABAAEA" +
+      "AAA4BwAABAAAAHEQAAAAAA4ABwABAAEAAAA9BwAAGgAAAFJgAQAVAwDAFQIAgBQB////DxUEAMC1" +
+      "BBUFAIAzVAcAFAT///8PtQQPBHEQCwAGAAoEDwQEAAEAAgAAAFkHAAAyAAAAIDAIADkAKwAiAAcA" +
+      "IgENAHAQEwABABsCBQAAAG4gFAAhAAwBbhAIAAMADAJuEAEAAgAMAm4gFAAhAAwBGwIAAAAAbiAU" +
+      "ACEADAFuEBUAAQAMAXAgAgAQACcAcBAMAAMADAARAAMAAgAAAAAAZQcAAAYAAAAzIQQAEhAPABIA" +
+      "KP4BAAEAAAAAAGwHAAABAAAADgAAAAIAAQAAAAAAcgcAAAMAAABUEAAAEQAAAAIAAQABAAAAdwcA" +
+      "AAUAAABxEAoAAQAKAA8AAAADAAEAAgAAAHwHAAApAAAAIgANAHAQEwAAAG4QCAACAAwBbhABAAEA" +
+      "DAFuIBQAEAAMABsBBAAAAG4gFAAQAAwAbhAJAAIACgFxEAMAAQAMAW4gFAAQAAwAbhAVAAAADAAR" +
+      "AAAABAADAAQAAACCBwAABQAAABIAbkASACEDDgAAAgQLAIIBAYIBBIGABIwPBgikDwGKAgABggIA" +
+      "BQToDwEB3BABBPgQARGMEQEBpBEEkQIAAZECAAEBwBEBkQIAARGkEgGRAgAAABAAAAAAAAAAAQAA" +
+      "AAAAAAABAAAAOgAAAHAAAAACAAAAEQAAAFgBAAADAAAADQAAAJwBAAAEAAAAAgAAADgCAAAFAAAA" +
+      "FgAAAEgCAAAGAAAAAQAAAPgCAAACIAAAOgAAABgDAAABEAAABQAAADQGAAAEIAAABgAAAFoGAAAD" +
+      "EAAACQAAAIwGAAAGIAAAAQAAANAGAAADIAAACQAAADgHAAABIAAACQAAAIwHAAAAIAAAAQAAAD4J" +
+      "AAAAEAAAAQAAAIgJAAA=");
+
+  private static final String LISTENER_LOCATION =
+      System.getenv("DEX_LOCATION") + "/980-redefine-object-ex.jar";
+
+  public static void main(String[] args) {
+    doTest();
+  }
+
+  private static void ensureTestWatcherInitialized() {
+    try {
+      // Make sure the TestWatcher class can be found from the Object <init> function.
+      addToBootClassLoader(LISTENER_LOCATION);
+      // Load TestWatcher from the bootclassloader and make sure it is initialized.
+      Class<?> testwatcher_class = Class.forName("art.test.TestWatcher", true, null);
+      // Bind the native functions of testwatcher_class.
+      bindFunctionsForClass(testwatcher_class);
+    } catch (Exception e) {
+      throw new Error("Exception while making testwatcher", e);
+    }
+  }
+
+  // NB This function will cause 2 objects of type "Ljava/nio/HeapCharBuffer;" and
+  // "Ljava/nio/HeapCharBuffer;" to be allocated each time it is called.
+  private static void safePrintln(Object o) {
+    System.out.flush();
+    System.out.print("\t" + o + "\n");
+    System.out.flush();
+  }
+
+  private static void throwFrom(int depth) throws Exception {
+    if (depth <= 0) {
+      throw new Exception("Throwing the exception");
+    } else {
+      throwFrom(depth - 1);
+    }
+  }
+
+  public static void doTest() {
+    safePrintln("Initializing and loading the TestWatcher class that will (eventually) be " +
+                "notified of object allocations");
+    // Make sure the TestWatcher class is initialized before we do anything else.
+    ensureTestWatcherInitialized();
+    safePrintln("Allocating an j.l.Object before redefining Object class");
+    // Make sure these aren't shown.
+    Object o = new Object();
+    safePrintln("Allocating a Transform before redefining Object class");
+    Transform t = new Transform();
+
+    // Redefine the Object Class.
+    safePrintln("Redefining the Object class to add a hook into the <init> method");
+    doCommonClassRedefinition(Object.class, CLASS_BYTES, DEX_BYTES);
+
+    safePrintln("Allocating an j.l.Object after redefining Object class");
+    Object o2 = new Object();
+    safePrintln("Allocating a Transform after redefining Object class");
+    Transform t2 = new Transform();
+
+    // This shouldn't cause the Object constructor to be run.
+    safePrintln("Allocating an int[] after redefining Object class");
+    int[] abc = new int[12];
+
+    // Try adding stuff to an array list.
+    safePrintln("Allocating an array list");
+    ArrayList<Object> al = new ArrayList<>();
+    safePrintln("Adding a bunch of stuff to the array list");
+    al.add(new Object());
+    al.add(new Object());
+    al.add(o2);
+    al.add(o);
+    al.add(t);
+    al.add(t2);
+    al.add(new Transform());
+
+    // Try adding stuff to a LinkedList
+    safePrintln("Allocating a linked list");
+    LinkedList<Object> ll = new LinkedList<>();
+    safePrintln("Adding a bunch of stuff to the linked list");
+    ll.add(new Object());
+    ll.add(new Object());
+    ll.add(o2);
+    ll.add(o);
+    ll.add(t);
+    ll.add(t2);
+    ll.add(new Transform());
+
+    // Try making an exception.
+    safePrintln("Throwing from down 4 stack frames");
+    try {
+      throwFrom(4);
+    } catch (Exception e) {
+      safePrintln("Exception caught.");
+    }
+
+    safePrintln("Finishing test!");
+  }
+
+  private static native void addToBootClassLoader(String s);
+
+  private static native void bindFunctionsForClass(Class<?> target);
+
+  // Transforms the class
+  private static native void doCommonClassRedefinition(Class<?> target,
+                                                       byte[] class_file,
+                                                       byte[] dex_file);
+}
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/980-redefine-object/src/Transform.java
similarity index 95%
rename from test/577-profile-foreign-dex/src-ex/OtherDex.java
rename to test/980-redefine-object/src/Transform.java
index cba73b3..23f67d9 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/980-redefine-object/src/Transform.java
@@ -13,5 +13,5 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
-}
+
+class Transform { }
diff --git a/test/577-profile-foreign-dex/expected.txt b/test/981-dedup-original-dex/expected.txt
similarity index 100%
rename from test/577-profile-foreign-dex/expected.txt
rename to test/981-dedup-original-dex/expected.txt
diff --git a/test/981-dedup-original-dex/info.txt b/test/981-dedup-original-dex/info.txt
new file mode 100644
index 0000000..62696e0
--- /dev/null
+++ b/test/981-dedup-original-dex/info.txt
@@ -0,0 +1,4 @@
+Tests basic functions in the jvmti plugin.
+
+This checks that we do not needlessly duplicate the contents of retransformed
+classes original dex files.
diff --git a/test/577-profile-foreign-dex/run b/test/981-dedup-original-dex/run
old mode 100644
new mode 100755
similarity index 78%
copy from test/577-profile-foreign-dex/run
copy to test/981-dedup-original-dex/run
index ad57d14..e92b873
--- a/test/577-profile-foreign-dex/run
+++ b/test/981-dedup-original-dex/run
@@ -1,6 +1,6 @@
 #!/bin/bash
 #
-# Copyright 2016 The Android Open Source Project
+# Copyright 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+./default-run "$@" --jvmti
diff --git a/test/981-dedup-original-dex/src/Main.java b/test/981-dedup-original-dex/src/Main.java
new file mode 100644
index 0000000..1e063cf
--- /dev/null
+++ b/test/981-dedup-original-dex/src/Main.java
@@ -0,0 +1,202 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Field;
+import java.util.Base64;
+import java.nio.ByteBuffer;
+
+import dalvik.system.ClassExt;
+import dalvik.system.InMemoryDexClassLoader;
+
+public class Main {
+
+  /**
+   * base64 encoded class/dex file for
+   * class Transform {
+   *   public void sayHi() {
+   *    System.out.println("Goodbye");
+   *   }
+   * }
+   */
+  private static final byte[] DEX_BYTES_1 = Base64.getDecoder().decode(
+    "ZGV4CjAzNQCLXSBQ5FiS3f16krSYZFF8xYZtFVp0GRXMAgAAcAAAAHhWNBIAAAAAAAAAACwCAAAO" +
+    "AAAAcAAAAAYAAACoAAAAAgAAAMAAAAABAAAA2AAAAAQAAADgAAAAAQAAAAABAACsAQAAIAEAAGIB" +
+    "AABqAQAAcwEAAIABAACXAQAAqwEAAL8BAADTAQAA4wEAAOYBAADqAQAA/gEAAAMCAAAMAgAAAgAA" +
+    "AAMAAAAEAAAABQAAAAYAAAAIAAAACAAAAAUAAAAAAAAACQAAAAUAAABcAQAABAABAAsAAAAAAAAA" +
+    "AAAAAAAAAAANAAAAAQABAAwAAAACAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAHAAAAAAAAAB4CAAAA" +
+    "AAAAAQABAAEAAAATAgAABAAAAHAQAwAAAA4AAwABAAIAAAAYAgAACQAAAGIAAAAbAQEAAABuIAIA" +
+    "EAAOAAAAAQAAAAMABjxpbml0PgAHR29vZGJ5ZQALTFRyYW5zZm9ybTsAFUxqYXZhL2lvL1ByaW50" +
+    "U3RyZWFtOwASTGphdmEvbGFuZy9PYmplY3Q7ABJMamF2YS9sYW5nL1N0cmluZzsAEkxqYXZhL2xh" +
+    "bmcvU3lzdGVtOwAOVHJhbnNmb3JtLmphdmEAAVYAAlZMABJlbWl0dGVyOiBqYWNrLTMuMzYAA291" +
+    "dAAHcHJpbnRsbgAFc2F5SGkAEQAHDgATAAcOhQAAAAEBAICABKACAQG4Ag0AAAAAAAAAAQAAAAAA" +
+    "AAABAAAADgAAAHAAAAACAAAABgAAAKgAAAADAAAAAgAAAMAAAAAEAAAAAQAAANgAAAAFAAAABAAA" +
+    "AOAAAAAGAAAAAQAAAAABAAABIAAAAgAAACABAAABEAAAAQAAAFwBAAACIAAADgAAAGIBAAADIAAA" +
+    "AgAAABMCAAAAIAAAAQAAAB4CAAAAEAAAAQAAACwCAAA=");
+
+  /**
+   * base64 encoded class/dex file for
+   * class Transform2 {
+   *   public void sayHi() {
+   *    System.out.println("Goodbye2");
+   *   }
+   * }
+   */
+  private static final byte[] DEX_BYTES_2 = Base64.getDecoder().decode(
+    "ZGV4CjAzNQAjXDED2iflQ3NXbPtBRVjQVMqoDU9nDz/QAgAAcAAAAHhWNBIAAAAAAAAAADACAAAO" +
+    "AAAAcAAAAAYAAACoAAAAAgAAAMAAAAABAAAA2AAAAAQAAADgAAAAAQAAAAABAACwAQAAIAEAAGIB" +
+    "AABqAQAAdAEAAIIBAACZAQAArQEAAMEBAADVAQAA5gEAAOkBAADtAQAAAQIAAAYCAAAPAgAAAgAA" +
+    "AAMAAAAEAAAABQAAAAYAAAAIAAAACAAAAAUAAAAAAAAACQAAAAUAAABcAQAABAABAAsAAAAAAAAA" +
+    "AAAAAAAAAAANAAAAAQABAAwAAAACAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAHAAAAAAAAACECAAAA" +
+    "AAAAAQABAAEAAAAWAgAABAAAAHAQAwAAAA4AAwABAAIAAAAbAgAACQAAAGIAAAAbAQEAAABuIAIA" +
+    "EAAOAAAAAQAAAAMABjxpbml0PgAIR29vZGJ5ZTIADExUcmFuc2Zvcm0yOwAVTGphdmEvaW8vUHJp" +
+    "bnRTdHJlYW07ABJMamF2YS9sYW5nL09iamVjdDsAEkxqYXZhL2xhbmcvU3RyaW5nOwASTGphdmEv" +
+    "bGFuZy9TeXN0ZW07AA9UcmFuc2Zvcm0yLmphdmEAAVYAAlZMABJlbWl0dGVyOiBqYWNrLTQuMzAA" +
+    "A291dAAHcHJpbnRsbgAFc2F5SGkAAQAHDgADAAcOhwAAAAEBAICABKACAQG4AgANAAAAAAAAAAEA" +
+    "AAAAAAAAAQAAAA4AAABwAAAAAgAAAAYAAACoAAAAAwAAAAIAAADAAAAABAAAAAEAAADYAAAABQAA" +
+    "AAQAAADgAAAABgAAAAEAAAAAAQAAASAAAAIAAAAgAQAAARAAAAEAAABcAQAAAiAAAA4AAABiAQAA" +
+    "AyAAAAIAAAAWAgAAACAAAAEAAAAhAgAAABAAAAEAAAAwAgAA");
+
+
+  /**
+   * base64 encoded class/dex file for
+   * class Transform3 {
+   *   public void sayHi() {
+   *    System.out.println("hello3");
+   *   }
+   * }
+   */
+  private static final byte[] DEX_BYTES_3_INITIAL = Base64.getDecoder().decode(
+    "ZGV4CjAzNQC2W2fBsAeLNAwWYlG8FVigzfsV7nBWITzQAgAAcAAAAHhWNBIAAAAAAAAAADACAAAO" +
+    "AAAAcAAAAAYAAACoAAAAAgAAAMAAAAABAAAA2AAAAAQAAADgAAAAAQAAAAABAACwAQAAIAEAAGIB" +
+    "AABqAQAAeAEAAI8BAACjAQAAtwEAAMsBAADcAQAA3wEAAOMBAAD3AQAA/wEAAAQCAAANAgAAAQAA" +
+    "AAIAAAADAAAABAAAAAUAAAAHAAAABwAAAAUAAAAAAAAACAAAAAUAAABcAQAABAABAAsAAAAAAAAA" +
+    "AAAAAAAAAAANAAAAAQABAAwAAAACAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAGAAAAAAAAAB8CAAAA" +
+    "AAAAAQABAAEAAAAUAgAABAAAAHAQAwAAAA4AAwABAAIAAAAZAgAACQAAAGIAAAAbAQoAAABuIAIA" +
+    "EAAOAAAAAQAAAAMABjxpbml0PgAMTFRyYW5zZm9ybTM7ABVMamF2YS9pby9QcmludFN0cmVhbTsA" +
+    "EkxqYXZhL2xhbmcvT2JqZWN0OwASTGphdmEvbGFuZy9TdHJpbmc7ABJMamF2YS9sYW5nL1N5c3Rl" +
+    "bTsAD1RyYW5zZm9ybTMuamF2YQABVgACVkwAEmVtaXR0ZXI6IGphY2stNC4zMAAGaGVsbG8zAANv" +
+    "dXQAB3ByaW50bG4ABXNheUhpAAIABw4ABAAHDocAAAABAQCAgASgAgEBuAIAAAANAAAAAAAAAAEA" +
+    "AAAAAAAAAQAAAA4AAABwAAAAAgAAAAYAAACoAAAAAwAAAAIAAADAAAAABAAAAAEAAADYAAAABQAA" +
+    "AAQAAADgAAAABgAAAAEAAAAAAQAAASAAAAIAAAAgAQAAARAAAAEAAABcAQAAAiAAAA4AAABiAQAA" +
+    "AyAAAAIAAAAUAgAAACAAAAEAAAAfAgAAABAAAAEAAAAwAgAA");
+
+  /**
+   * base64 encoded class/dex file for
+   * class Transform3 {
+   *   public void sayHi() {
+   *    System.out.println("Goodbye3");
+   *   }
+   * }
+   */
+  private static final byte[] DEX_BYTES_3_FINAL = Base64.getDecoder().decode(
+    "ZGV4CjAzNQBAXE5GthgMydaFBuinf+ZBfXcBYIw2UlXQAgAAcAAAAHhWNBIAAAAAAAAAADACAAAO" +
+    "AAAAcAAAAAYAAACoAAAAAgAAAMAAAAABAAAA2AAAAAQAAADgAAAAAQAAAAABAACwAQAAIAEAAGIB" +
+    "AABqAQAAdAEAAIIBAACZAQAArQEAAMEBAADVAQAA5gEAAOkBAADtAQAAAQIAAAYCAAAPAgAAAgAA" +
+    "AAMAAAAEAAAABQAAAAYAAAAIAAAACAAAAAUAAAAAAAAACQAAAAUAAABcAQAABAABAAsAAAAAAAAA" +
+    "AAAAAAAAAAANAAAAAQABAAwAAAACAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAAHAAAAAAAAACECAAAA" +
+    "AAAAAQABAAEAAAAWAgAABAAAAHAQAwAAAA4AAwABAAIAAAAbAgAACQAAAGIAAAAbAQEAAABuIAIA" +
+    "EAAOAAAAAQAAAAMABjxpbml0PgAIR29vZGJ5ZTMADExUcmFuc2Zvcm0zOwAVTGphdmEvaW8vUHJp" +
+    "bnRTdHJlYW07ABJMamF2YS9sYW5nL09iamVjdDsAEkxqYXZhL2xhbmcvU3RyaW5nOwASTGphdmEv" +
+    "bGFuZy9TeXN0ZW07AA9UcmFuc2Zvcm0zLmphdmEAAVYAAlZMABJlbWl0dGVyOiBqYWNrLTQuMzAA" +
+    "A291dAAHcHJpbnRsbgAFc2F5SGkAAgAHDgAEAAcOhwAAAAEBAICABKACAQG4AgANAAAAAAAAAAEA" +
+    "AAAAAAAAAQAAAA4AAABwAAAAAgAAAAYAAACoAAAAAwAAAAIAAADAAAAABAAAAAEAAADYAAAABQAA" +
+    "AAQAAADgAAAABgAAAAEAAAAAAQAAASAAAAIAAAAgAQAAARAAAAEAAABcAQAAAiAAAA4AAABiAQAA" +
+    "AyAAAAIAAAAWAgAAACAAAAEAAAAhAgAAABAAAAEAAAAwAgAA");
+
+  public static void main(String[] args) {
+    try {
+      doTest();
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+  private static void assertSame(Object a, Object b) throws Exception {
+    if (a != b) {
+      throw new AssertionError("'" + (a != null ? a.toString() : "null") + "' is not the same as " +
+                               "'" + (b != null ? b.toString() : "null") + "'");
+    }
+  }
+
+  private static Object getObjectField(Object o, String name) throws Exception {
+    return getObjectField(o, o.getClass(), name);
+  }
+
+  private static Object getObjectField(Object o, Class<?> type, String name) throws Exception {
+    Field f = type.getDeclaredField(name);
+    f.setAccessible(true);
+    return f.get(o);
+  }
+
+  private static Object getOriginalDexFile(Class<?> k) throws Exception {
+    ClassExt ext_data_object = (ClassExt) getObjectField(k, "extData");
+    if (ext_data_object == null) {
+      return null;
+    }
+
+    return getObjectField(ext_data_object, "originalDexFile");
+  }
+
+  public static void doTest() throws Exception {
+    // Make sure both of these are loaded prior to transformations being added so they have the same
+    // original dex files.
+    Transform t1 = new Transform();
+    Transform2 t2 = new Transform2();
+
+    assertSame(null, getOriginalDexFile(t1.getClass()));
+    assertSame(null, getOriginalDexFile(t2.getClass()));
+    assertSame(null, getOriginalDexFile(Main.class));
+
+    addCommonTransformationResult("Transform", new byte[0], DEX_BYTES_1);
+    addCommonTransformationResult("Transform2", new byte[0], DEX_BYTES_2);
+    enableCommonRetransformation(true);
+    doCommonClassRetransformation(Transform.class, Transform2.class);
+
+    assertSame(getOriginalDexFile(t1.getClass()), getOriginalDexFile(t2.getClass()));
+    assertSame(null, getOriginalDexFile(Main.class));
+    // Make sure that the original dex file is a DexCache object.
+    assertSame(getOriginalDexFile(t1.getClass()).getClass(), Class.forName("java.lang.DexCache"));
+
+    // Check that we end up with a byte[] if we do a direct RedefineClasses
+    enableCommonRetransformation(false);
+    doCommonClassRedefinition(Transform.class, new byte[0], DEX_BYTES_1);
+    assertSame((new byte[0]).getClass(), getOriginalDexFile(t1.getClass()).getClass());
+
+    // Check we don't have anything if we don't have any originalDexFile if the onload
+    // transformation doesn't do anything.
+    enableCommonRetransformation(true);
+    Class<?> transform3Class = new InMemoryDexClassLoader(
+        ByteBuffer.wrap(DEX_BYTES_3_INITIAL), Main.class.getClassLoader()).loadClass("Transform3");
+    assertSame(null, getOriginalDexFile(transform3Class));
+
+    // Check that we end up with a java.lang.Long pointer if we do an 'on-load' redefinition.
+    addCommonTransformationResult("Transform3", new byte[0], DEX_BYTES_3_FINAL);
+    enableCommonRetransformation(true);
+    Class<?> transform3ClassTransformed = new InMemoryDexClassLoader(
+        ByteBuffer.wrap(DEX_BYTES_3_INITIAL), Main.class.getClassLoader()).loadClass("Transform3");
+    assertSame(Long.class, getOriginalDexFile(transform3ClassTransformed).getClass());
+  }
+
+  // Transforms the class
+  private static native void doCommonClassRetransformation(Class<?>... target);
+  private static native void doCommonClassRedefinition(Class<?> target,
+                                                       byte[] class_file,
+                                                       byte[] dex_file);
+  private static native void enableCommonRetransformation(boolean enable);
+  private static native void addCommonTransformationResult(String target_name,
+                                                           byte[] class_bytes,
+                                                           byte[] dex_bytes);
+}
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/981-dedup-original-dex/src/Transform.java
similarity index 87%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/981-dedup-original-dex/src/Transform.java
index cba73b3..3c97907 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/981-dedup-original-dex/src/Transform.java
@@ -13,5 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+class Transform {
+  public void sayHi() {
+    System.out.println("hello");
+  }
 }
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/981-dedup-original-dex/src/Transform2.java
similarity index 87%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/981-dedup-original-dex/src/Transform2.java
index cba73b3..eb22842 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/981-dedup-original-dex/src/Transform2.java
@@ -13,5 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+class Transform2 {
+  public void sayHi() {
+    System.out.println("hello2");
+  }
 }
diff --git a/test/982-ok-no-retransform/expected.txt b/test/982-ok-no-retransform/expected.txt
new file mode 100644
index 0000000..317e967
--- /dev/null
+++ b/test/982-ok-no-retransform/expected.txt
@@ -0,0 +1,2 @@
+hello
+hello
diff --git a/test/982-ok-no-retransform/info.txt b/test/982-ok-no-retransform/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/982-ok-no-retransform/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/577-profile-foreign-dex/run b/test/982-ok-no-retransform/run
old mode 100644
new mode 100755
similarity index 85%
copy from test/577-profile-foreign-dex/run
copy to test/982-ok-no-retransform/run
index ad57d14..c6e62ae
--- a/test/577-profile-foreign-dex/run
+++ b/test/982-ok-no-retransform/run
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+./default-run "$@" --jvmti
diff --git a/test/982-ok-no-retransform/src/Main.java b/test/982-ok-no-retransform/src/Main.java
new file mode 100644
index 0000000..7bb4a46
--- /dev/null
+++ b/test/982-ok-no-retransform/src/Main.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Base64;
+public class Main {
+
+  public static void main(String[] args) {
+    doTest(new Transform());
+  }
+
+  public static void doTest(Transform t) {
+    t.sayHi();
+    enableCommonRetransformation(true);
+    doCommonClassRetransformation(Transform.class);
+    t.sayHi();
+  }
+
+  // Transforms the class
+  private static native void doCommonClassRetransformation(Class<?>... target);
+  private static native void enableCommonRetransformation(boolean enable);
+}
diff --git a/test/982-ok-no-retransform/src/Transform.java b/test/982-ok-no-retransform/src/Transform.java
new file mode 100644
index 0000000..8e8af35
--- /dev/null
+++ b/test/982-ok-no-retransform/src/Transform.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Transform {
+  public void sayHi() {
+    // Use lower 'h' to make sure the string will have a different string id
+    // than the transformation (the transformation code is the same except
+    // the actual printed String, which was making the test inacurately passing
+    // in JIT mode when loading the string from the dex cache, as the string ids
+    // of the two different strings were the same).
+    // We know the string ids will be different because lexicographically:
+    // "Goodbye" < "LTransform;" < "hello".
+    System.out.println("hello");
+  }
+}
diff --git a/test/983-source-transform-verify/expected.txt b/test/983-source-transform-verify/expected.txt
new file mode 100644
index 0000000..0a94212
--- /dev/null
+++ b/test/983-source-transform-verify/expected.txt
@@ -0,0 +1,2 @@
+Dex file hook for Transform
+Dex file hook for java/lang/Object
diff --git a/test/983-source-transform-verify/info.txt b/test/983-source-transform-verify/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/983-source-transform-verify/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/577-profile-foreign-dex/run b/test/983-source-transform-verify/run
old mode 100644
new mode 100755
similarity index 85%
copy from test/577-profile-foreign-dex/run
copy to test/983-source-transform-verify/run
index ad57d14..c6e62ae
--- a/test/577-profile-foreign-dex/run
+++ b/test/983-source-transform-verify/run
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+./default-run "$@" --jvmti
diff --git a/test/983-source-transform-verify/source_transform.cc b/test/983-source-transform-verify/source_transform.cc
new file mode 100644
index 0000000..3ef3c7c
--- /dev/null
+++ b/test/983-source-transform-verify/source_transform.cc
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <iostream>
+#include <vector>
+
+#include "android-base/stringprintf.h"
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "bytecode_utils.h"
+#include "dex_file.h"
+#include "dex_instruction.h"
+#include "jit/jit.h"
+#include "jni.h"
+#include "native_stack_dump.h"
+#include "jvmti.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+// Test infrastructure
+#include "jvmti_helper.h"
+#include "test_env.h"
+
+namespace art {
+namespace Test983SourceTransformVerify {
+
+constexpr bool kSkipInitialLoad = true;
+
+// The hook we are using.
+void JNICALL CheckDexFileHook(jvmtiEnv* jvmti_env ATTRIBUTE_UNUSED,
+                              JNIEnv* jni_env ATTRIBUTE_UNUSED,
+                              jclass class_being_redefined,
+                              jobject loader ATTRIBUTE_UNUSED,
+                              const char* name,
+                              jobject protection_domain ATTRIBUTE_UNUSED,
+                              jint class_data_len,
+                              const unsigned char* class_data,
+                              jint* new_class_data_len ATTRIBUTE_UNUSED,
+                              unsigned char** new_class_data ATTRIBUTE_UNUSED) {
+  if (kSkipInitialLoad && class_being_redefined == nullptr) {
+    // Something got loaded concurrently. Just ignore it for now.
+    return;
+  }
+  std::cout << "Dex file hook for " << name << std::endl;
+  if (IsJVM()) {
+    return;
+  }
+  std::string error;
+  std::unique_ptr<const DexFile> dex(DexFile::Open(class_data,
+                                                   class_data_len,
+                                                   "fake_location.dex",
+                                                   /*location_checksum*/ 0,
+                                                   /*oat_dex_file*/ nullptr,
+                                                   /*verify*/ true,
+                                                   /*verify_checksum*/ true,
+                                                   &error));
+  if (dex.get() == nullptr) {
+    std::cout << "Failed to verify dex file for " << name << " because " << error << std::endl;
+    return;
+  }
+  for (uint32_t i = 0; i < dex->NumClassDefs(); i++) {
+    const DexFile::ClassDef& def = dex->GetClassDef(i);
+    const uint8_t* data_item = dex->GetClassData(def);
+    if (data_item == nullptr) {
+      continue;
+    }
+    for (ClassDataItemIterator it(*dex, data_item); it.HasNext(); it.Next()) {
+      if (!it.IsAtMethod() || it.GetMethodCodeItem() == nullptr) {
+        continue;
+      }
+      for (CodeItemIterator code_it(*it.GetMethodCodeItem()); !code_it.Done(); code_it.Advance()) {
+        const Instruction& inst = code_it.CurrentInstruction();
+        int forbiden_flags = (Instruction::kVerifyError | Instruction::kVerifyRuntimeOnly);
+        if (inst.Opcode() == Instruction::RETURN_VOID_NO_BARRIER ||
+            (inst.GetVerifyExtraFlags() & forbiden_flags) != 0) {
+          std::cout << "Unexpected instruction found in " << dex->PrettyMethod(it.GetMemberIndex())
+                    << " [Dex PC: 0x" << std::hex << code_it.CurrentDexPc() << std::dec << "] : "
+                    << inst.DumpString(dex.get()) << std::endl;
+          continue;
+        }
+      }
+    }
+  }
+}
+
+// Get all capabilities except those related to retransformation.
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  jvmtiEventCallbacks cb;
+  memset(&cb, 0, sizeof(cb));
+  cb.ClassFileLoadHook = CheckDexFileHook;
+  if (jvmti_env->SetEventCallbacks(&cb, sizeof(cb)) != JVMTI_ERROR_NONE) {
+    printf("Unable to set class file load hook cb!\n");
+    return 1;
+  }
+  return 0;
+}
+
+}  // namespace Test983SourceTransformVerify
+}  // namespace art
diff --git a/runtime/native/java_lang_DexCache.h b/test/983-source-transform-verify/source_transform.h
similarity index 60%
copy from runtime/native/java_lang_DexCache.h
copy to test/983-source-transform-verify/source_transform.h
index b1c1f5e..db9415a 100644
--- a/runtime/native/java_lang_DexCache.h
+++ b/test/983-source-transform-verify/source_transform.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2014 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,15 +14,17 @@
  * limitations under the License.
  */
 
-#ifndef ART_RUNTIME_NATIVE_JAVA_LANG_DEXCACHE_H_
-#define ART_RUNTIME_NATIVE_JAVA_LANG_DEXCACHE_H_
+#ifndef ART_TEST_983_SOURCE_TRANSFORM_VERIFY_SOURCE_TRANSFORM_H_
+#define ART_TEST_983_SOURCE_TRANSFORM_VERIFY_SOURCE_TRANSFORM_H_
 
 #include <jni.h>
 
 namespace art {
+namespace Test983SourceTransformVerify {
 
-void register_java_lang_DexCache(JNIEnv* env);
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
 
+}  // namespace Test983SourceTransformVerify
 }  // namespace art
 
-#endif  // ART_RUNTIME_NATIVE_JAVA_LANG_DEXCACHE_H_
+#endif  // ART_TEST_983_SOURCE_TRANSFORM_VERIFY_SOURCE_TRANSFORM_H_
diff --git a/test/983-source-transform-verify/src/Main.java b/test/983-source-transform-verify/src/Main.java
new file mode 100644
index 0000000..5f42d29
--- /dev/null
+++ b/test/983-source-transform-verify/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Base64;
+public class Main {
+
+  public static void main(String[] args) {
+    doTest();
+  }
+
+  public static void doTest() {
+    Transform abc = new Transform();
+    enableCommonRetransformation(true);
+    doCommonClassRetransformation(Transform.class);
+    doCommonClassRetransformation(Object.class);
+    enableCommonRetransformation(false);
+  }
+
+  // Transforms the class
+  private static native void doCommonClassRetransformation(Class<?>... target);
+  private static native void enableCommonRetransformation(boolean enable);
+}
diff --git a/test/983-source-transform-verify/src/Transform.java b/test/983-source-transform-verify/src/Transform.java
new file mode 100644
index 0000000..8e8af35
--- /dev/null
+++ b/test/983-source-transform-verify/src/Transform.java
@@ -0,0 +1,28 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+class Transform {
+  public void sayHi() {
+    // Use lower 'h' to make sure the string will have a different string id
+    // than the transformation (the transformation code is the same except
+    // the actual printed String, which was making the test inacurately passing
+    // in JIT mode when loading the string from the dex cache, as the string ids
+    // of the two different strings were the same).
+    // We know the string ids will be different because lexicographically:
+    // "Goodbye" < "LTransform;" < "hello".
+    System.out.println("hello");
+  }
+}
diff --git a/test/984-obsolete-invoke/expected.txt b/test/984-obsolete-invoke/expected.txt
new file mode 100644
index 0000000..8052c46
--- /dev/null
+++ b/test/984-obsolete-invoke/expected.txt
@@ -0,0 +1,10 @@
+hello
+transforming calling function
+Retrieving obsolete method from current stack
+goodbye
+Invoking redefined version of method.
+Hello - Transformed
+Not doing anything here
+Goodbye - Transformed
+invoking obsolete method
+Caught expected error from attempting to invoke an obsolete method.
diff --git a/test/984-obsolete-invoke/info.txt b/test/984-obsolete-invoke/info.txt
new file mode 100644
index 0000000..48e0de0
--- /dev/null
+++ b/test/984-obsolete-invoke/info.txt
@@ -0,0 +1,4 @@
+Tests basic obsolete method support
+
+Tests that a specific method of potentially executing obsolete method code does
+not work.
diff --git a/test/984-obsolete-invoke/obsolete_invoke.cc b/test/984-obsolete-invoke/obsolete_invoke.cc
new file mode 100644
index 0000000..27e36ba
--- /dev/null
+++ b/test/984-obsolete-invoke/obsolete_invoke.cc
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "android-base/macros.h"
+#include "jni.h"
+#include "jvmti.h"
+#include "mirror/class-inl.h"
+#include "scoped_local_ref.h"
+
+// Test infrastructure
+#include "test_env.h"
+
+#include "jvmti_helper.h"
+
+namespace art {
+namespace Test984ObsoleteInvoke {
+
+static constexpr size_t kNumFrames = 30;
+
+extern "C" JNIEXPORT jobject JNICALL Java_Main_getFirstObsoleteMethod984(JNIEnv* env, jclass) {
+  jthread cur;
+  jint frame_count;
+  jvmtiFrameInfo frames[kNumFrames];
+  // jint cur_start = 0;
+  if (JvmtiErrorToException(env, jvmti_env, jvmti_env->GetCurrentThread(&cur))) {
+    // ERROR
+    return nullptr;
+  }
+  if (JvmtiErrorToException(env, jvmti_env,
+                            jvmti_env->GetStackTrace(cur,
+                                                     0,
+                                                     kNumFrames,
+                                                     frames,
+                                                     &frame_count))) {
+    // ERROR
+    return nullptr;
+  }
+  for (jint i = 0; i < frame_count; i++) {
+    jmethodID method = frames[i].method;
+    jboolean is_obsolete = false;
+    if (JvmtiErrorToException(env, jvmti_env, jvmti_env->IsMethodObsolete(method, &is_obsolete))) {
+      // ERROR
+      return nullptr;
+    }
+    if (is_obsolete) {
+      return env->ToReflectedMethod(env->FindClass("java/lang/reflect/Method"),
+                                    method,
+                                    JNI_TRUE);
+    }
+  }
+  ScopedLocalRef<jclass> rt_exception(env, env->FindClass("java/lang/RuntimeException"));
+  env->ThrowNew(rt_exception.get(), "Unable to find obsolete method!");
+  return nullptr;
+}
+
+}  // namespace Test984ObsoleteInvoke
+}  // namespace art
diff --git a/test/577-profile-foreign-dex/run b/test/984-obsolete-invoke/run
old mode 100644
new mode 100755
similarity index 85%
copy from test/577-profile-foreign-dex/run
copy to test/984-obsolete-invoke/run
index ad57d14..c6e62ae
--- a/test/577-profile-foreign-dex/run
+++ b/test/984-obsolete-invoke/run
@@ -14,7 +14,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+./default-run "$@" --jvmti
diff --git a/test/984-obsolete-invoke/src/Main.java b/test/984-obsolete-invoke/src/Main.java
new file mode 100644
index 0000000..1a8d9bc
--- /dev/null
+++ b/test/984-obsolete-invoke/src/Main.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.util.Base64;
+
+public class Main {
+  // class Transform {
+  //   public static void sayHi(Runnable r) {
+  //     System.out.println("Hello - Transformed");
+  //     r.run();
+  //     System.out.println("Goodbye - Transformed");
+  //   }
+  // }
+  private static final byte[] CLASS_BYTES = Base64.getDecoder().decode(
+    "yv66vgAAADQAJAoACAARCQASABMIABQKABUAFgsAFwAYCAAZBwAaBwAbAQAGPGluaXQ+AQADKClW" +
+    "AQAEQ29kZQEAD0xpbmVOdW1iZXJUYWJsZQEABXNheUhpAQAXKExqYXZhL2xhbmcvUnVubmFibGU7" +
+    "KVYBAApTb3VyY2VGaWxlAQAOVHJhbnNmb3JtLmphdmEMAAkACgcAHAwAHQAeAQATSGVsbG8gLSBU" +
+    "cmFuc2Zvcm1lZAcAHwwAIAAhBwAiDAAjAAoBABVHb29kYnllIC0gVHJhbnNmb3JtZWQBAAlUcmFu" +
+    "c2Zvcm0BABBqYXZhL2xhbmcvT2JqZWN0AQAQamF2YS9sYW5nL1N5c3RlbQEAA291dAEAFUxqYXZh" +
+    "L2lvL1ByaW50U3RyZWFtOwEAE2phdmEvaW8vUHJpbnRTdHJlYW0BAAdwcmludGxuAQAVKExqYXZh" +
+    "L2xhbmcvU3RyaW5nOylWAQASamF2YS9sYW5nL1J1bm5hYmxlAQADcnVuACAABwAIAAAAAAACAAAA" +
+    "CQAKAAEACwAAAB0AAQABAAAABSq3AAGxAAAAAQAMAAAABgABAAAAAQAJAA0ADgABAAsAAAA7AAIA" +
+    "AQAAABeyAAISA7YABCq5AAUBALIAAhIGtgAEsQAAAAEADAAAABIABAAAAAMACAAEAA4ABQAWAAYA" +
+    "AQAPAAAAAgAQ");
+  private static final byte[] DEX_BYTES = Base64.getDecoder().decode(
+    "ZGV4CjAzNQCMekj2NPwzrEp/v+2yzzSg8xZvBtU1bC1QAwAAcAAAAHhWNBIAAAAAAAAAALACAAAR" +
+    "AAAAcAAAAAcAAAC0AAAAAwAAANAAAAABAAAA9AAAAAUAAAD8AAAAAQAAACQBAAAMAgAARAEAAKIB" +
+    "AACqAQAAwQEAANYBAADjAQAA+gEAAA4CAAAkAgAAOAIAAEwCAABcAgAAXwIAAGMCAAB3AgAAfAIA" +
+    "AIUCAACKAgAAAwAAAAQAAAAFAAAABgAAAAcAAAAIAAAACgAAAAoAAAAGAAAAAAAAAAsAAAAGAAAA" +
+    "lAEAAAsAAAAGAAAAnAEAAAUAAQANAAAAAAAAAAAAAAAAAAEAEAAAAAEAAgAOAAAAAgAAAAAAAAAD" +
+    "AAAADwAAAAAAAAAAAAAAAgAAAAAAAAAJAAAAAAAAAJ8CAAAAAAAAAQABAAEAAACRAgAABAAAAHAQ" +
+    "AwAAAA4AAwABAAIAAACWAgAAFAAAAGIAAAAbAQIAAABuIAIAEAByEAQAAgBiAAAAGwEBAAAAbiAC" +
+    "ABAADgABAAAAAwAAAAEAAAAEAAY8aW5pdD4AFUdvb2RieWUgLSBUcmFuc2Zvcm1lZAATSGVsbG8g" +
+    "LSBUcmFuc2Zvcm1lZAALTFRyYW5zZm9ybTsAFUxqYXZhL2lvL1ByaW50U3RyZWFtOwASTGphdmEv" +
+    "bGFuZy9PYmplY3Q7ABRMamF2YS9sYW5nL1J1bm5hYmxlOwASTGphdmEvbGFuZy9TdHJpbmc7ABJM" +
+    "amF2YS9sYW5nL1N5c3RlbTsADlRyYW5zZm9ybS5qYXZhAAFWAAJWTAASZW1pdHRlcjogamFjay00" +
+    "LjMxAANvdXQAB3ByaW50bG4AA3J1bgAFc2F5SGkAAQAHDgADAQAHDoc8hwAAAAIAAICABMQCAQnc" +
+    "AgAAAA0AAAAAAAAAAQAAAAAAAAABAAAAEQAAAHAAAAACAAAABwAAALQAAAADAAAAAwAAANAAAAAE" +
+    "AAAAAQAAAPQAAAAFAAAABQAAAPwAAAAGAAAAAQAAACQBAAABIAAAAgAAAEQBAAABEAAAAgAAAJQB" +
+    "AAACIAAAEQAAAKIBAAADIAAAAgAAAJECAAAAIAAAAQAAAJ8CAAAAEAAAAQAAALACAAA=");
+
+  public static void main(String[] args) {
+    doTest();
+  }
+
+  // The Method that holds an obsolete method pointer. We will fill it in by getting a jmethodID
+  // from a stack with an obsolete method in it. There should be no other ways to obtain an obsolete
+  // jmethodID in ART without unsafe casts.
+  public static Method obsolete_method = null;
+
+  public static void doTest() {
+    // Capture the obsolete method.
+    //
+    // NB The obsolete method must be direct so that we will not look in the receiver type to get
+    // the actual method.
+    Transform.sayHi(() -> {
+      System.out.println("transforming calling function");
+      doCommonClassRedefinition(Transform.class, CLASS_BYTES, DEX_BYTES);
+      System.out.println("Retrieving obsolete method from current stack");
+      // This should get the obsolete sayHi method (as the only obsolete method on the current
+      // threads stack).
+      Main.obsolete_method = getFirstObsoleteMethod984();
+    });
+
+    // Prove we did actually redefine something.
+    System.out.println("Invoking redefined version of method.");
+    Transform.sayHi(() -> { System.out.println("Not doing anything here"); });
+
+    System.out.println("invoking obsolete method");
+    try {
+      obsolete_method.invoke(null, (Runnable)() -> {
+        throw new Error("Unexpected code running from invoke of obsolete method!");
+      });
+      throw new Error("Running obsolete method did not throw exception");
+    } catch (Throwable e) {
+      if (e instanceof InternalError || e.getCause() instanceof InternalError) {
+        System.out.println("Caught expected error from attempting to invoke an obsolete method.");
+      } else {
+        System.out.println("Unexpected error type for calling obsolete method! Expected either "
+            + "an InternalError or something that is caused by an InternalError.");
+        throw new Error("Unexpected error caught: ", e);
+      }
+    }
+  }
+
+  // Transforms the class
+  private static native void doCommonClassRedefinition(Class<?> target,
+                                                       byte[] classfile,
+                                                       byte[] dexfile);
+
+  // Gets the first obsolete method on the current threads stack (NB only looks through the first 30
+  // stack frames).
+  private static native Method getFirstObsoleteMethod984();
+}
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/984-obsolete-invoke/src/Transform.java
similarity index 65%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/984-obsolete-invoke/src/Transform.java
index cba73b3..536de84 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/984-obsolete-invoke/src/Transform.java
@@ -13,5 +13,13 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+class Transform {
+  // This method must be 'static' so that when we try to invoke it through a j.l.r.Method we will
+  // simply use the jmethodID directly and not do any lookup in any receiver object.
+  public static void sayHi(Runnable r) {
+    System.out.println("hello");
+    r.run();
+    System.out.println("goodbye");
+  }
 }
diff --git a/test/Android.bp b/test/Android.bp
index 00c890a..b79006f 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -241,26 +241,24 @@
 }
 
 art_cc_defaults {
-    name: "libtiagent-defaults",
+   name: "libtiagent-base-defaults",
     defaults: ["libartagent-defaults"],
     srcs: [
-        // This is to get the IsInterpreted native method.
-        "common/stack_inspect.cc",
-        "common/runtime_state.cc",
-        "ti-agent/common_load.cc",
-        "ti-agent/common_helper.cc",
-        "901-hello-ti-agent/basics.cc",
+        // These are the ART-independent parts.
+        "ti-agent/agent_startup.cc",
+        "ti-agent/jni_binder.cc",
+        "ti-agent/jvmti_helper.cc",
+        "ti-agent/test_env.cc",
+        // This is the list of non-special OnLoad things and excludes BCI and anything that depends
+        // on ART internals.
         "903-hello-tagging/tagging.cc",
         "904-object-allocation/tracking.cc",
         "905-object-free/tracking_free.cc",
         "906-iterate-heap/iterate_heap.cc",
         "907-get-loaded-classes/get_loaded_classes.cc",
         "908-gc-start-finish/gc_callbacks.cc",
-        "909-attach-agent/attach.cc",
         "910-methods/methods.cc",
         "911-get-stack-trace/stack_trace.cc",
-        "912-classes/classes.cc",
-        "913-heaps/heaps.cc",
         "918-fields/fields.cc",
         "920-objects/objects.cc",
         "922-properties/properties.cc",
@@ -272,13 +270,36 @@
         "929-search/search.cc",
         "931-agent-thread/agent_thread.cc",
         "933-misc-events/misc_events.cc",
-        "936-search-onload/search_onload.cc",
-        "944-transform-classloaders/classloader.cc",
-        "945-obsolete-native/obsolete_native.cc",
     ],
     shared_libs: [
         "libbase",
     ],
+    header_libs: ["libopenjdkjvmti_headers"],
+    include_dirs: ["art/test/ti-agent"],
+}
+
+art_cc_defaults {
+    name: "libtiagent-defaults",
+    defaults: ["libtiagent-base-defaults"],
+    srcs: [
+        // This is to get the IsInterpreted native method.
+        "common/stack_inspect.cc",
+        "common/runtime_state.cc",
+        // This includes the remaining test functions. We should try to refactor things to
+        // make this list smaller.
+        "ti-agent/common_helper.cc",
+        "ti-agent/common_load.cc",
+        "901-hello-ti-agent/basics.cc",
+        "909-attach-agent/attach.cc",
+        "912-classes/classes.cc",
+        "913-heaps/heaps.cc",
+        "936-search-onload/search_onload.cc",
+        "944-transform-classloaders/classloader.cc",
+        "945-obsolete-native/obsolete_native.cc",
+        "980-redefine-object/redefine_object.cc",
+        "983-source-transform-verify/source_transform.cc",
+        "984-obsolete-invoke/obsolete_invoke.cc",
+    ],
 }
 
 art_cc_test_library {
@@ -296,6 +317,12 @@
     shared_libs: ["libartd"],
 }
 
+art_cc_test_library {
+    name: "libctstiagent",
+    defaults: ["libtiagent-base-defaults"],
+    export_include_dirs: ["ti-agent"],
+}
+
 cc_defaults {
     name: "libarttest-defaults",
     defaults: [
@@ -335,6 +362,7 @@
         "596-monitor-inflation/monitor_inflation.cc",
         "597-deopt-new-string/deopt.cc",
         "626-const-class-linking/clear_dex_cache_types.cc",
+        "642-fp-callees/fp_callees.cc",
     ],
     shared_libs: [
         "libbacktrace",
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 95967b5..187b383 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -43,18 +43,6 @@
 
 TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES := setup-jack-server
 
-ifeq ($(ART_TEST_DEBUG_GC),true)
-  ART_TEST_WITH_STRACE := true
-endif
-
-ifeq ($(ART_TEST_BISECTION),true)
-  # Need to keep rebuilding the test to bisection search it.
-  ART_TEST_RUN_TEST_NO_PREBUILD := true
-  ART_TEST_RUN_TEST_PREBUILD := false
-  # Bisection search writes to standard output.
-  ART_TEST_QUIET := false
-endif
-
 # Helper to create individual build targets for tests. Must be called with $(eval).
 # $(1): the test number
 define define-build-art-run-test
@@ -97,681 +85,11 @@
 
 include $(BUILD_PHONY_PACKAGE)
 
-# Clear temp vars.
-art_run_tests_build_dir :=
-art_run_tests_install_dir :=
-define-build-art-run-test :=
-TEST_ART_RUN_TEST_BUILD_RULES :=
-
-########################################################################
-# General rules to build and run a run-test.
-
-TARGET_TYPES := host target
-PREBUILD_TYPES :=
-ifeq ($(ART_TEST_RUN_TEST_PREBUILD),true)
-  PREBUILD_TYPES += prebuild
-endif
-ifeq ($(ART_TEST_RUN_TEST_NO_PREBUILD),true)
-  PREBUILD_TYPES += no-prebuild
-endif
-ifeq ($(ART_TEST_RUN_TEST_NO_DEX2OAT),true)
-  PREBUILD_TYPES += no-dex2oat
-endif
-COMPILER_TYPES :=
-ifeq ($(ART_TEST_INTERPRETER_ACCESS_CHECKS),true)
-  COMPILER_TYPES += interp-ac
-endif
-ifeq ($(ART_TEST_INTERPRETER),true)
-  COMPILER_TYPES += interpreter
-endif
-ifeq ($(ART_TEST_JIT),true)
-  COMPILER_TYPES += jit
-endif
-OPTIMIZING_COMPILER_TYPES :=
-ifeq ($(ART_TEST_OPTIMIZING),true)
-  COMPILER_TYPES += optimizing
-  OPTIMIZING_COMPILER_TYPES += optimizing
-endif
-ifeq ($(ART_TEST_OPTIMIZING_GRAPH_COLOR),true)
-  COMPILER_TYPES += regalloc_gc
-  OPTIMIZING_COMPILER_TYPES += regalloc_gc
-endif
-RELOCATE_TYPES := no-relocate
-ifeq ($(ART_TEST_RUN_TEST_RELOCATE),true)
-  RELOCATE_TYPES += relocate
-endif
-ifeq ($(ART_TEST_RUN_TEST_RELOCATE_NO_PATCHOAT),true)
-  RELOCATE_TYPES += relocate-npatchoat
-endif
-TRACE_TYPES := ntrace
-ifeq ($(ART_TEST_TRACE),true)
-  TRACE_TYPES += trace
-endif
-ifeq ($(ART_TEST_TRACE_STREAM),true)
-  TRACE_TYPES += stream
-endif
-GC_TYPES := cms
-ifeq ($(ART_TEST_GC_STRESS),true)
-  GC_TYPES += gcstress
-endif
-ifeq ($(ART_TEST_GC_VERIFY),true)
-  GC_TYPES += gcverify
-endif
-JNI_TYPES := checkjni
-ifeq ($(ART_TEST_JNI_FORCECOPY),true)
-  JNI_TYPES += forcecopy
-endif
-ifeq ($(ART_TEST_RUN_TEST_IMAGE),true)
-IMAGE_TYPES := picimage
-endif
-ifeq ($(ART_TEST_RUN_TEST_NO_IMAGE),true)
-  IMAGE_TYPES += no-image
-endif
-ifeq ($(ART_TEST_RUN_TEST_MULTI_IMAGE),true)
-  IMAGE_TYPES := multipicimage
-endif
-PICTEST_TYPES := npictest
-ifeq ($(ART_TEST_PIC_TEST),true)
-  PICTEST_TYPES += pictest
-endif
-RUN_TYPES :=
-ifeq ($(ART_TEST_RUN_TEST_DEBUG),true)
-  RUN_TYPES += debug
-endif
-ifeq ($(ART_TEST_RUN_TEST_NDEBUG),true)
-  RUN_TYPES += ndebug
-endif
-DEBUGGABLE_TYPES := ndebuggable
-ifeq ($(ART_TEST_RUN_TEST_DEBUGGABLE),true)
-DEBUGGABLE_TYPES += debuggable
-endif
-ADDRESS_SIZES_TARGET := $(ART_PHONY_TEST_TARGET_SUFFIX)
-ADDRESS_SIZES_HOST := $(ART_PHONY_TEST_HOST_SUFFIX)
-ifeq ($(ART_TEST_RUN_TEST_2ND_ARCH),true)
-  ADDRESS_SIZES_TARGET += $(2ND_ART_PHONY_TEST_TARGET_SUFFIX)
-  ADDRESS_SIZES_HOST += $(2ND_ART_PHONY_TEST_HOST_SUFFIX)
-endif
-ALL_ADDRESS_SIZES := 64 32
-
-# List all run test names with number arguments agreeing with the comment above.
-define all-run-test-names
-  $(foreach target, $(1), \
-    $(foreach run-type, $(2), \
-      $(foreach prebuild, $(3), \
-        $(foreach compiler, $(4), \
-          $(foreach relocate, $(5), \
-            $(foreach trace, $(6), \
-              $(foreach gc, $(7), \
-                $(foreach jni, $(8), \
-                  $(foreach image, $(9), \
-                    $(foreach pictest, $(10), \
-                      $(foreach debuggable, $(11), \
-                        $(foreach test, $(12), \
-                          $(foreach address_size, $(13), \
-                            test-art-$(target)-run-test-$(run-type)-$(prebuild)-$(compiler)-$(relocate)-$(trace)-$(gc)-$(jni)-$(image)-$(pictest)-$(debuggable)-$(test)$(address_size) \
-                    )))))))))))))
-endef  # all-run-test-names
-
-# To generate a full list or tests:
-# $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \
-#        $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
-#        $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-
 # Convert's a rule name to the form used in variables, e.g. no-relocate to NO_RELOCATE
 define name-to-var
 $(shell echo $(1) | tr '[:lower:]' '[:upper:]' | tr '-' '_')
 endef  # name-to-var
 
-# Disable 153-reference-stress temporarily until a fix arrives. b/33389022.
-# Disable 080-oom-fragmentation due to flakes. b/33795328
-# Disable 497-inlining-and-class-loader and 542-unresolved-access-check until
-#     they are rewritten. These tests use a broken class loader that tries to
-#     register a dex file that's already registered with a different loader.
-#     b/34193123
-ART_TEST_RUN_TEST_SKIP += \
-  153-reference-stress \
-  080-oom-fragmentation \
-  497-inlining-and-class-loader \
-  542-unresolved-access-check
-
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(ART_TEST_RUN_TEST_SKIP), $(ALL_ADDRESS_SIZES))
-
-
-# Disable 149-suspend-all-stress, its output is flaky (b/28988206).
-# Disable 577-profile-foreign-dex (b/27454772).
-TEST_ART_BROKEN_ALL_TARGET_TESTS := \
-  149-suspend-all-stress \
-  577-profile-foreign-dex \
-
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-    $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_ALL_TARGET_TESTS), \
-    $(ALL_ADDRESS_SIZES))
-
-TEST_ART_BROKEN_ALL_TARGET_TESTS :=
-
-# Tests that are timing sensitive and flaky on heavily loaded systems.
-TEST_ART_TIMING_SENSITIVE_RUN_TESTS := \
-  002-sleep \
-  053-wait-some \
-  055-enum-performance \
-  133-static-invoke-super
-
-# disable timing sensitive tests on "dist" builds.
-ifdef dist_goal
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_TIMING_SENSITIVE_RUN_TESTS), $(ALL_ADDRESS_SIZES))
-endif
-
-# 147-stripped-dex-fallback isn't supported on device because --strip-dex
-# requires the zip command.
-# 569-checker-pattern-replacement tests behaviour present only on host.
-TEST_ART_BROKEN_TARGET_TESTS := \
-  147-stripped-dex-fallback \
-  569-checker-pattern-replacement
-
-ifneq (,$(filter target,$(TARGET_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_TARGET_TESTS), $(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_TARGET_TESTS :=
-
-# Tests that require python3.
-TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS := \
-  960-default-smali \
-  961-default-iface-resolution-gen \
-  964-default-iface-init-gen \
-  968-default-partial-compile-gen \
-  969-iface-super \
-  970-iface-super-resolution-gen \
-  971-iface-super
-
-# Check if we have python3 to run our tests.
-ifeq ($(wildcard /usr/bin/python3),)
-  $(warning "No python3 found. Disabling tests: $(TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS)")
-
-  # Currently disable tests requiring python3 when it is not installed.
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-        $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_PYTHON3_DEPENDENCY_RUN_TESTS), $(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_TIMING_SENSITIVE_RUN_TESTS :=
-
-# Note 116-nodex2oat is not broken per-se it just doesn't (and isn't meant to) work with --prebuild.
-TEST_ART_BROKEN_PREBUILD_RUN_TESTS := \
-  116-nodex2oat \
-  118-noimage-dex2oat \
-  134-nodex2oat-nofallback
-
-ifneq (,$(filter prebuild,$(PREBUILD_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),prebuild, \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_PREBUILD_RUN_TESTS), $(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_PREBUILD_RUN_TESTS :=
-
-# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
-# 529 and 555: b/27784033
-TEST_ART_BROKEN_NO_PREBUILD_TESTS := \
-  117-nopatchoat \
-  147-stripped-dex-fallback \
-  554-jit-profile-file \
-  529-checker-unresolved \
-  555-checker-regression-x86const \
-  608-checker-unresolved-lse
-
-ifneq (,$(filter no-prebuild,$(PREBUILD_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-prebuild, \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_NO_PREBUILD_TESTS), $(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_NO_PREBUILD_TESTS :=
-
-# Note 117-nopatchoat is not broken per-se it just doesn't work (and isn't meant to) without
-# --prebuild --relocate
-TEST_ART_BROKEN_NO_RELOCATE_TESTS := \
-  117-nopatchoat \
-  118-noimage-dex2oat \
-  119-noimage-patchoat \
-  554-jit-profile-file
-
-ifneq (,$(filter no-relocate,$(RELOCATE_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES), no-relocate,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_NO_RELOCATE_TESTS), $(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_NO_RELOCATE_TESTS :=
-
-# Temporarily disable some broken tests when forcing access checks in interpreter b/22414682
-# 629 requires compilation.
-TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS := \
-  137-cfi \
-  629-vdex-speed
-
-ifneq (,$(filter interp-ac,$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      interp-ac,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS), $(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_INTERPRETER_ACCESS_CHECK_TESTS :=
-
-# Tests that are broken with GC stress.
-# * 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we
-#   hope the second process got into the expected state. The slowness of gcstress makes this bad.
-# * 908-gc-start-finish expects GCs only to be run at clear points. The reduced heap size makes
-#   this non-deterministic. Same for 913.
-# * 961-default-iface-resolution-gen and 964-default-iface-init-genare very long tests that often
-#   will take more than the timeout to run when gcstress is enabled. This is because gcstress
-#   slows down allocations significantly which these tests do a lot.
-TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
-  137-cfi \
-  154-gc-loop \
-  908-gc-start-finish \
-  913-heaps \
-  961-default-iface-resolution-gen \
-  964-default-iface-init-gen
-
-ifneq (,$(filter gcstress,$(GC_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),gcstress,$(JNI_TYPES), \
-      $(IMAGE_TYPES), $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_GCSTRESS_RUN_TESTS), $(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_GCSTRESS_RUN_TESTS :=
-
-# 115-native-bridge setup is complicated. Need to implement it correctly for the target.
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES),$(COMPILER_TYPES), \
-    $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), 115-native-bridge, \
-    $(ALL_ADDRESS_SIZES))
-
-# 130-hprof dumps the heap and runs hprof-conv to check whether the file is somewhat readable. This
-# is only possible on the host.
-# TODO: Turn off all the other combinations, this is more about testing actual ART code. A gtest is
-#       very hard to write here, as (for a complete test) JDWP must be set up.
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
-    $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),130-hprof,$(ALL_ADDRESS_SIZES))
-
-# 131 is an old test. The functionality has been implemented at an earlier stage and is checked
-# in tests 138. Blacklisted for debug builds since these builds have duplicate classes checks which
-# punt to interpreter.
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),debug,$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
-    $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),131-structural-change,$(ALL_ADDRESS_SIZES))
-
-# 138-duplicate-classes-check. Turned on for debug builds since debug builds have duplicate classes
-# checks enabled, b/2133391.
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),ndebug,$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
-    $(PICTEST_TYPES),$(DEBUGGABLE_TYPES),138-duplicate-classes-check,$(ALL_ADDRESS_SIZES))
-
-# All these tests check that we have sane behavior if we don't have a patchoat or dex2oat.
-# Therefore we shouldn't run them in situations where we actually don't have these since they
-# explicitly test for them. These all also assume we have an image.
-# 147-stripped-dex-fallback is disabled because it requires --prebuild.
-# 554-jit-profile-file is disabled because it needs a primary oat file to know what it should save.
-# 629-vdex-speed requires compiled code.
-TEST_ART_BROKEN_FALLBACK_RUN_TESTS := \
-  116-nodex2oat \
-  117-nopatchoat \
-  118-noimage-dex2oat \
-  119-noimage-patchoat \
-  137-cfi \
-  138-duplicate-classes-check2 \
-  147-stripped-dex-fallback \
-  554-jit-profile-file \
-  629-vdex-speed
-
-# This test fails without an image.
-# 018, 961, 964, 968 often time out. b/34369284
-TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS := \
-  137-cfi \
-  138-duplicate-classes-check \
-  018-stack-overflow \
-  961-default-iface-resolution-gen \
-  964-default-iface-init \
-  968-default-partial-compile-gen \
-
-ifneq (,$(filter no-dex2oat,$(PREBUILD_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),no-dex2oat, \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
-      $(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-
-ifneq (,$(filter no-image,$(IMAGE_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),no-image, \
-      $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES),no-image, \
-      $(PICTEST_TYPES), $(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_NO_IMAGE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-ifneq (,$(filter relocate-npatchoat,$(RELOCATE_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES), relocate-npatchoat,$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_FALLBACK_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_FALLBACK_RUN_TESTS :=
-
-# 137:
-# This test unrolls and expects managed frames, but tracing means we run the interpreter.
-# 802 and 570-checker-osr:
-# This test dynamically enables tracing to force a deoptimization. This makes the test meaningless
-# when already tracing, and writes an error message that we do not want to check for.
-# 130 occasional timeout b/32383962.
-# 629 requires compilation.
-TEST_ART_BROKEN_TRACING_RUN_TESTS := \
-  087-gc-after-link \
-  130-hprof \
-  137-cfi \
-  141-class-unload \
-  570-checker-osr \
-  629-vdex-speed \
-  802-deoptimization
-
-ifneq (,$(filter trace stream,$(TRACE_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(COMPILER_TYPES),$(RELOCATE_TYPES),trace stream,$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
-      $(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_TRACING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_TRACING_RUN_TESTS :=
-
-# These tests expect JIT compilation, which is suppressed when tracing.
-TEST_ART_BROKEN_JIT_TRACING_RUN_TESTS := \
-  604-hot-static-interface \
-  612-jit-dex-cache \
-  613-inlining-dex-cache \
-  616-cha \
-  626-set-resolved-string \
-
-ifneq (,$(filter trace stream,$(TRACE_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      jit,$(RELOCATE_TYPES),trace stream,$(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES), \
-      $(PICTEST_TYPES),$(DEBUGGABLE_TYPES), $(TEST_ART_BROKEN_JIT_TRACING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_JIT_TRACING_RUN_TESTS :=
-
-# Known broken tests for the interpreter.
-# CFI unwinding expects managed frames.
-# 629 requires compilation.
-TEST_ART_BROKEN_INTERPRETER_RUN_TESTS := \
-  137-cfi \
-  554-jit-profile-file \
-  629-vdex-speed
-
-ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_INTERPRETER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_INTERPRETER_RUN_TESTS :=
-
-# Known broken tests for the JIT.
-# CFI unwinding expects managed frames, and the test does not iterate enough to even compile. JIT
-# also uses Generic JNI instead of the JNI compiler.
-# Test 906 iterates the heap filtering with different options. No instances should be created
-# between those runs to be able to have precise checks.
-# Test 629 requires compilation.
-# 912: b/34655682
-TEST_ART_BROKEN_JIT_RUN_TESTS := \
-  137-cfi \
-  629-vdex-speed \
-  904-object-allocation \
-  906-iterate-heap \
-
-ifneq (,$(filter jit,$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_JIT_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_JIT_RUN_TESTS :=
-
-# Known broken tests for the graph coloring register allocator.
-# These tests were based on the linear scan allocator, which makes different decisions than
-# the graph coloring allocator. (These attempt to test for code quality, not correctness.)
-TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR := \
-  570-checker-select \
-  484-checker-register-hints
-
-ifneq (,$(filter regalloc_gc,$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      regalloc_gc,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-      $(TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR),$(ALL_ADDRESS_SIZES))
-endif
-
-# Known broken tests for the mips32 optimizing compiler backend.
-TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
-
-ifeq (mips,$(TARGET_ARCH))
-  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-        $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-  endif
-endif
-
-TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS :=
-
-# Known broken tests for the mips64 optimizing compiler backend.
-TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS := \
-
-ifeq (mips64,$(TARGET_ARCH))
-  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
-        $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-  endif
-endif
-
-TEST_ART_BROKEN_OPTIMIZING_MIPS64_RUN_TESTS :=
-
-# Tests that should fail when the optimizing compiler compiles them non-debuggable.
-TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS := \
-  454-get-vreg \
-  457-regs \
-  602-deoptimizeable
-
-ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),ndebuggable,$(TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_OPTIMIZING_NONDEBUGGABLE_RUN_TESTS :=
-
-# Tests that should fail when the optimizing compiler compiles them debuggable.
-TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS := \
-
-ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),debuggable,$(TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-TEST_ART_BROKEN_OPTIMIZING_DEBUGGABLE_RUN_TESTS :=
-
-# Tests that should fail in the read barrier configuration with the interpreter.
-TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS :=
-
-# Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
-TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
-
-# Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
-TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
-
-# Tests failing in non-Baker read barrier configurations with the Optimizing compiler (AOT).
-# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
-#      handled in non-Baker read barrier configurations.
-TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS := \
-  537-checker-arraycopy
-
-# Tests failing in non-Baker read barrier configurations with JIT (Optimizing compiler).
-# 537: Expects an array copy to be intrinsified, but calling-on-slowpath intrinsics are not yet
-#      handled in non-Baker read barrier configurations.
-TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS := \
-  537-checker-arraycopy
-
-ifeq ($(ART_USE_READ_BARRIER),true)
-  ifneq (,$(filter interpreter,$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),interpreter,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
-        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_INTERPRETER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-  endif
-
-  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \
-        $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-    ifneq ($(ART_READ_BARRIER_TYPE),BAKER)
-      ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-          $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES), \
-          $(GC_TYPES),$(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-          $(TEST_ART_BROKEN_OPTIMIZING_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-    endif
-  endif
-
-  ifneq (,$(filter jit,$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
-        $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-    ifneq ($(ART_READ_BARRIER_TYPE),BAKER)
-      ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-          $(PREBUILD_TYPES),jit,$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES), \
-          $(JNI_TYPES),$(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-          $(TEST_ART_BROKEN_JIT_NON_BAKER_READ_BARRIER_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-    endif
-  endif
-endif
-
-TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS :=
-TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
-
-TEST_ART_BROKEN_NPIC_RUN_TESTS := 596-app-images
-ifneq (,$(filter npictest,$(PICTEST_TYPES)))
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-      ${COMPILER_TYPES},$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),npictest,$(DEBUGGABLE_TYPES),$(TEST_ART_BROKEN_NPIC_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-# Tests that should fail in the heap poisoning configuration with the Optimizing compiler.
-# 055: Exceeds run time limits due to heap poisoning instrumentation (on ARM and ARM64 devices).
-TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS := \
-  055-enum-performance
-
-ifeq ($(ART_HEAP_POISONING),true)
-  ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
-    ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-        $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-        $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-        $(TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-  endif
-endif
-
-TEST_ART_BROKEN_OPTIMIZING_HEAP_POISONING_RUN_TESTS :=
-
-# 909: Tests that check semantics for a non-debuggable app.
-# 137: relies on AOT code and debuggable makes us JIT always.
-TEST_ART_BROKEN_DEBUGGABLE_RUN_TESTS := \
-  137-cfi \
-  909-attach-agent \
-
-ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
-    $(COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-    $(IMAGE_TYPES),$(PICTEST_TYPES),debuggable,$(TEST_ART_BROKEN_DEBUGGABLE_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-
-TEST_ART_BROKEN_DEBUGGABLE_RUN_TESTS :=
-
-# Tests incompatible with bisection bug search. Sorted by incompatibility reason.
-# 000 through 595 do not compile anything. 089 tests a build failure. 018 through 137
-# run dalvikvm more than once. 115 and 088 assume they are always compiled.
-# 055 tests performance which is degraded during bisecting.
-TEST_ART_INCOMPATIBLE_BISECTION_SEARCH_RUN_TESTS := \
-  000-nop \
-  134-nodex2oat-nofallback \
-  147-stripped-dex-fallback \
-  595-profile-saving \
-  \
-  089-many-methods \
-  \
-  018-stack-overflow \
-  116-nodex2oat \
-  117-nopatchoat \
-  118-noimage-dex2oat \
-  119-noimage-patchoat \
-  126-miranda-multidex \
-  137-cfi \
-  \
-  115-native-bridge \
-  088-monitor-verification \
-  \
-  055-enum-performance
-
-ifeq ($(ART_TEST_BISECTION),true)
-  ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES), \
-      $(PREBUILD_TYPES),$(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
-      $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
-      $(TEST_ART_INCOMPATIBLE_BISECTION_SEARCH_RUN_TESTS),$(ALL_ADDRESS_SIZES))
-endif
-
-# Clear variables ahead of appending to them when defining tests.
-$(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach prebuild, $(PREBUILD_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(prebuild))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach compiler, $(COMPILER_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(compiler))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach relocate, $(RELOCATE_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(relocate))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach trace, $(TRACE_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(trace))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach gc, $(GC_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(gc))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach jni, $(JNI_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(jni))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach image, $(IMAGE_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(image))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach test, $(TEST_ART_RUN_TESTS), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(test))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach address_size, $(ALL_ADDRESS_SIZES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(address_size))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach run_type, $(RUN_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(run_type))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach debuggable_type, $(DEBUGGABLE_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(debuggable_type))_RULES :=)))
-
 # We need dex2oat and dalvikvm on the target as well as the core images (all images as we sync
 # only once).
 TEST_ART_TARGET_SYNC_DEPS += $(ART_TARGET_EXECUTABLES) $(TARGET_CORE_IMG_OUTS)
@@ -810,10 +128,17 @@
 TEST_ART_TARGET_SYNC_DEPS += libopenjdkjvmti
 TEST_ART_TARGET_SYNC_DEPS += libopenjdkjvmtid
 
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/core-libart-testdex.jar
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/core-oj-testdex.jar
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/okhttp-testdex.jar
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/bouncycastle-testdex.jar
+TEST_ART_TARGET_SYNC_DEPS += $(TARGET_OUT_JAVA_LIBRARIES)/conscrypt-testdex.jar
+
 # All tests require the host executables. The tests also depend on the core images, but on
 # specific version depending on the compiler.
 ART_TEST_HOST_RUN_TEST_DEPENDENCIES := \
   $(ART_HOST_EXECUTABLES) \
+  $(HOST_OUT_EXECUTABLES)/hprof-conv \
   $(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libtiagent) \
   $(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libtiagentd) \
   $(OUT_DIR)/$(ART_TEST_LIST_host_$(ART_HOST_ARCH)_libartagent) \
@@ -853,8 +178,6 @@
 # Required for dx, jasmin, smali, dexmerger, jack.
 host_prereq_rules += $(TEST_ART_RUN_TEST_DEPENDENCIES)
 
-host_prereq_rules += $(HOST_OUT_EXECUTABLES)/hprof-conv
-
 # Classpath for Jack compilation for target.
 target_prereq_rules := $(TARGET_JACK_CLASSPATH_DEPENDENCIES)
 
@@ -884,423 +207,48 @@
   endif
 endef
 
-COMPILER_TYPES_2 := optimizing
-COMPILER_TYPES_2 += interpreter
-COMPILER_TYPES_2 += jit
-COMPILER_TYPES_2 += regalloc_gc
-COMPILER_TYPES_2 += interp-ac
-ALL_ADDRESS_SIZES_2 := 32 64
-IMAGE_TYPES_2 := picimage
-IMAGE_TYPES_2 += no-image
-IMAGE_TYPES_2 += npicimage
-IMAGE_TYPES_2 += multinpicimage
-IMAGE_TYPES_2 += multipicimage
+TARGET_TYPES := host target
+COMPILER_TYPES := jit interpreter optimizing regalloc_gc jit interp-ac speed-profile
+IMAGE_TYPES := picimage no-image multipicimage
+ALL_ADDRESS_SIZES := 64 32
 
 # Add core image dependencies required for given target - HOST or TARGET,
 # IMAGE_TYPE, COMPILER_TYPE and ADDRESS_SIZE to the prereq_rules.
 $(foreach target, $(TARGET_TYPES), \
-  $(foreach image, $(IMAGE_TYPES_2), \
-    $(foreach compiler, $(COMPILER_TYPES_2), \
-      $(foreach address_size, $(ALL_ADDRESS_SIZES_2), $(eval \
+  $(foreach image, $(IMAGE_TYPES), \
+    $(foreach compiler, $(COMPILER_TYPES), \
+      $(foreach address_size, $(ALL_ADDRESS_SIZES), $(eval \
         $(call core-image-dependencies,$(target),$(image),$(compiler),$(address_size)))))))
 
 test-art-host-run-test-dependencies : $(host_prereq_rules)
 test-art-target-run-test-dependencies : $(target_prereq_rules)
 test-art-run-test-dependencies : test-art-host-run-test-dependencies test-art-target-run-test-dependencies
 
-host_prereq_rules :=
-target_prereq_rules :=
+# Create a rule to build and run a test group of the following form:
+# test-art-{1: host target}-run-test
+define define-test-art-host-or-target-run-test-group
+  build_target := test-art-$(1)-run-test
+  .PHONY: $$(build_target)
 
-# Create a rule to build and run a tests following the form:
-# test-art-{1: host or target}-run-test-{2: debug ndebug}-{3: prebuild no-prebuild no-dex2oat}-
-#    {4: interpreter optimizing jit interp-ac}-
-#    {5: relocate nrelocate relocate-npatchoat}-
-#    {6: trace or ntrace}-{7: gcstress gcverify cms}-{8: forcecopy checkjni jni}-
-#    {9: no-image image picimage}-{10: pictest npictest}-
-#    {11: ndebuggable debuggable}-{12: test name}{13: 32 or 64}
-define define-test-art-run-test
-  run_test_options :=
-  prereq_rule :=
-  test_groups :=
-  uc_host_or_target :=
-  jack_classpath :=
-  ifeq ($(ART_TEST_WITH_STRACE),true)
-    run_test_options += --strace
-  endif
-  ifeq ($(ART_TEST_RUN_TEST_ALWAYS_CLEAN),true)
-    run_test_options += --always-clean
-  endif
-  ifeq ($(ART_TEST_BISECTION),true)
-    run_test_options += --bisection-search
-  endif
-  ifeq ($(1),host)
-    uc_host_or_target := HOST
-    test_groups := ART_RUN_TEST_HOST_RULES
-    run_test_options += --host
-    prereq_rule := $(ART_TEST_HOST_RUN_TEST_DEPENDENCIES) $(HOST_JACK_CLASSPATH_DEPENDENCIES)
-    jack_classpath := $(HOST_JACK_CLASSPATH)
-  else
-    ifeq ($(1),target)
-      uc_host_or_target := TARGET
-      test_groups := ART_RUN_TEST_TARGET_RULES
-      prereq_rule := test-art-target-sync $(TARGET_JACK_CLASSPATH_DEPENDENCIES)
-      jack_classpath := $(TARGET_JACK_CLASSPATH)
-    else
-      $$(error found $(1) expected $(TARGET_TYPES))
-    endif
-  endif
-  ifeq ($(2),debug)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEBUG_RULES
-  else
-    ifeq ($(2),ndebug)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELEASE_RULES
-      run_test_options += -O
-    else
-      $$(error found $(2) expected $(RUN_TYPES))
-    endif
-  endif
-  ifeq ($(3),prebuild)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PREBUILD_RULES
-    run_test_options += --prebuild
-  else
-    ifeq ($(3),no-prebuild)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_PREBUILD_RULES
-      run_test_options += --no-prebuild
-    else
-      ifeq ($(3),no-dex2oat)
-        test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_DEX2OAT_RULES
-        run_test_options += --no-prebuild --no-dex2oat
-      else
-        $$(error found $(3) expected $(PREBUILD_TYPES))
-      endif
-    endif
-  endif
-  ifeq ($(4),optimizing)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_RULES
-    run_test_options += --optimizing
-  else ifeq ($(4),regalloc_gc)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_OPTIMIZING_GRAPH_COLOR_RULES
-    run_test_options += --optimizing -Xcompiler-option --register-allocation-strategy=graph-color
-  else
-    ifeq ($(4),interpreter)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_RULES
-      run_test_options += --interpreter
-    else ifeq ($(4),interp-ac)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_INTERPRETER_ACCESS_CHECKS_RULES
-      run_test_options += --interpreter --verify-soft-fail
-    else
-      ifeq ($(4),jit)
-        test_groups += ART_RUN_TEST_$$(uc_host_or_target)_JIT_RULES
-        run_test_options += --jit
-      else
-        $$(error found $(4) expected $(COMPILER_TYPES))
-      endif
-    endif
-  endif
-
-  ifeq ($(5),relocate)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELOCATE_RULES
-    run_test_options += --relocate
-  else
-    ifeq ($(5),no-relocate)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_RELOCATE_RULES
-      run_test_options += --no-relocate
-    else
-      ifeq ($(5),relocate-npatchoat)
-        test_groups += ART_RUN_TEST_$$(uc_host_or_target)_RELOCATE_NO_PATCHOAT_RULES
-        run_test_options += --relocate --no-patchoat
-      else
-        $$(error found $(5) expected $(RELOCATE_TYPES))
-      endif
-    endif
-  endif
-  ifeq ($(6),trace)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_TRACE_RULES
-    run_test_options += --trace
-  else
-    ifeq ($(6),ntrace)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_TRACE_RULES
-    else
-      ifeq ($(6),stream)
-        # Group streaming under normal tracing rules.
-        test_groups += ART_RUN_TEST_$$(uc_host_or_target)_TRACE_RULES
-        run_test_options += --trace --stream
-      else
-        $$(error found $(6) expected $(TRACE_TYPES))
-      endif
-    endif
-  endif
-  ifeq ($(7),gcverify)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_GCVERIFY_RULES
-    run_test_options += --gcverify
-  else
-    ifeq ($(7),gcstress)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_GCSTRESS_RULES
-      run_test_options += --gcstress
-    else
-      ifeq ($(7),cms)
-        test_groups += ART_RUN_TEST_$$(uc_host_or_target)_CMS_RULES
-      else
-        $$(error found $(7) expected $(GC_TYPES))
-      endif
-    endif
-  endif
-  ifeq ($(8),forcecopy)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_FORCECOPY_RULES
-    run_test_options += --runtime-option -Xjniopts:forcecopy
-    ifneq ($$(ART_TEST_JNI_FORCECOPY),true)
-      skip_test := true
-    endif
-  else
-    ifeq ($(8),checkjni)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_CHECKJNI_RULES
-      run_test_options += --runtime-option -Xcheck:jni
-    else
-      ifeq ($(8),jni)
-        test_groups += ART_RUN_TEST_$$(uc_host_or_target)_JNI_RULES
-      else
-        $$(error found $(8) expected $(JNI_TYPES))
-      endif
-    endif
-  endif
-  image_suffix := $(4)
-  ifeq ($(4),regalloc_gc)
-    # Graph coloring tests share the image_suffix with optimizing tests.
-    image_suffix := optimizing
-  else
-    ifeq ($(4),jit)
-      # JIT tests share the image_suffix with interpreter tests.
-      image_suffix := interpreter
-    endif
-  endif
-  ifeq ($(9),no-image)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NO_IMAGE_RULES
-    run_test_options += --no-image
-    # Add the core dependency. This is required for pre-building.
-    # Use the PIC image, as it is the default in run-test, to match dependencies.
-    ifeq ($(1),host)
-      prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_$(13))
-    else
-      prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_$(13))
-    endif
-  else
-    ifeq ($(9),picimage)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES
-      ifeq ($(1),host)
-        prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_$(13))
-      else
-        prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_$(13))
-      endif
-    else
-      ifeq ($(9),multipicimage)
-        test_groups += ART_RUN_TEST_$$(uc_host_or_target)_PICIMAGE_RULES
-        run_test_options += --multi-image
-        ifeq ($(1),host)
-          prereq_rule += $$(HOST_CORE_IMAGE_$$(image_suffix)_multi_$(13))
-        else
-          prereq_rule += $$(TARGET_CORE_IMAGE_$$(image_suffix)_multi_$(13))
-        endif
-      else
-        $$(error found $(9) expected $(IMAGE_TYPES))
-      endif
-    endif
-  endif
-  ifeq ($(10),pictest)
-    run_test_options += --pic-test
-  else
-    ifeq ($(10),npictest)
-      # Nothing to be done.
-    else
-      $$(error found $(10) expected $(PICTEST_TYPES))
-    endif
-  endif
-  ifeq ($(11),debuggable)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_DEBUGGABLE_RULES
-    run_test_options += --debuggable
-  else
-    ifeq ($(11),ndebuggable)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_NONDEBUGGABLE_RULES
-      # Nothing to be done.
-    else
-      $$(error found $(11) expected $(DEBUGGABLE_TYPES))
-    endif
-  endif
-  # $(12) is the test name.
-  test_groups += ART_RUN_TEST_$$(uc_host_or_target)_$(call name-to-var,$(12))_RULES
-  ifeq ($(13),64)
-    test_groups += ART_RUN_TEST_$$(uc_host_or_target)_64_RULES
-    run_test_options += --64
-  else
-    ifeq ($(13),32)
-      test_groups += ART_RUN_TEST_$$(uc_host_or_target)_32_RULES
-    else
-      $$(error found $(13) expected $(ALL_ADDRESS_SIZES))
-    endif
-  endif
-  # Override of host instruction-set-features. Required to test advanced x86 intrinsics. The
-  # conditionals aren't really correct, they will fail to do the right thing on a 32-bit only
-  # host. However, this isn't common enough to worry here and make the conditions complicated.
-  ifneq ($(DEX2OAT_HOST_INSTRUCTION_SET_FEATURES),)
-    ifeq ($(13),64)
-      run_test_options += --instruction-set-features $(DEX2OAT_HOST_INSTRUCTION_SET_FEATURES)
-    endif
-  endif
-  ifneq ($($(HOST_2ND_ARCH_VAR_PREFIX)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES),)
-    ifeq ($(13),32)
-      run_test_options += --instruction-set-features $($(HOST_2ND_ARCH_VAR_PREFIX)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES)
-    endif
-  endif
-  run_test_rule_name := test-art-$(1)-run-test-$(2)-$(3)-$(4)-$(5)-$(6)-$(7)-$(8)-$(9)-$(10)-$(11)-$(12)$(13)
-  run_test_options := --output-path $(ART_HOST_TEST_DIR)/run-test-output/$$(run_test_rule_name) \
-      $$(run_test_options)
-  ifneq ($(ART_TEST_ANDROID_ROOT),)
-    run_test_options := --android-root $(ART_TEST_ANDROID_ROOT) $$(run_test_options)
-  endif
-  ifeq ($(ART_TEST_QUIET),true)
-    run_test_options += --quiet
-  endif
-$$(run_test_rule_name): PRIVATE_RUN_TEST_OPTIONS := $$(run_test_options)
-$$(run_test_rule_name): PRIVATE_JACK_CLASSPATH := $$(jack_classpath)
-.PHONY: $$(run_test_rule_name)
-$$(run_test_rule_name): $(TEST_ART_RUN_TEST_DEPENDENCIES) $(HOST_OUT_EXECUTABLES)/hprof-conv $$(prereq_rule) | $(TEST_ART_RUN_TEST_ORDERONLY_DEPENDENCIES)
-	$(hide) $$(call ART_TEST_SKIP,$$@) && \
-	  DX=$(abspath $(DX)) \
-	    JASMIN=$(abspath $(HOST_OUT_EXECUTABLES)/jasmin) \
-	    SMALI=$(abspath $(HOST_OUT_EXECUTABLES)/smali) \
-	    DXMERGER=$(abspath $(HOST_OUT_EXECUTABLES)/dexmerger) \
-	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
-	    JACK=$(abspath $(JACK)) \
-	    JACK_VERSION=$(JACK_DEFAULT_VERSION) \
-	    JACK_CLASSPATH=$$(PRIVATE_JACK_CLASSPATH) \
-	    art/test/run-test $$(PRIVATE_RUN_TEST_OPTIONS) $(12) \
-	      && $$(call ART_TEST_PASSED,$$@) || $$(call ART_TEST_FAILED,$$@)
-	$$(hide) (echo $(MAKECMDGOALS) | grep -q $$@ && \
-	  echo "run-test run as top-level target, removing test directory $(ART_HOST_TEST_DIR)" && \
-	  rm -r $(ART_HOST_TEST_DIR)) || true
-
-  $$(foreach test_group,$$(test_groups), $$(eval $$(value test_group) += $$(run_test_rule_name)))
-
-  # Clear locally defined variables.
-  uc_host_or_target :=
-  test_groups :=
-  run_test_options :=
-  run_test_rule_name :=
-  prereq_rule :=
-  jack_classpath :=
-endef  # define-test-art-run-test
-
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach test, $(TEST_ART_RUN_TESTS), \
-    $(foreach run_type, $(RUN_TYPES), \
-      $(foreach address_size, $(ADDRESS_SIZES_$(call name-to-var,$(target))), \
-        $(foreach prebuild, $(PREBUILD_TYPES), \
-          $(foreach compiler, $(COMPILER_TYPES), \
-            $(foreach relocate, $(RELOCATE_TYPES), \
-              $(foreach trace, $(TRACE_TYPES), \
-                $(foreach gc, $(GC_TYPES), \
-                  $(foreach jni, $(JNI_TYPES), \
-                    $(foreach image, $(IMAGE_TYPES), \
-                      $(foreach pictest, $(PICTEST_TYPES), \
-                        $(foreach debuggable, $(DEBUGGABLE_TYPES), \
-                          $(eval $(call define-test-art-run-test,$(target),$(run_type),$(prebuild),$(compiler),$(relocate),$(trace),$(gc),$(jni),$(image),$(pictest),$(debuggable),$(test),$(address_size))) \
-                  )))))))))))))
-define-test-art-run-test :=
-
-# Define a phony rule whose purpose is to test its prerequisites.
-# $(1): host or target
-# $(2): list of prerequisites
-define define-test-art-run-test-group
-.PHONY: $(1)
-$(1): $(2)
-	$(hide) $$(call ART_TEST_PREREQ_FINISHED,$$@)
-
-endef  # define-test-art-run-test-group
-
+  $$(build_target) : args := --$(1) --verbose
+  $$(build_target) : test-art-$(1)-run-test-dependencies
+	./art/test/testrunner/testrunner.py $$(args)
+  build_target :=
+  args :=
+endef  # define-test-art-host-or-target-run-test-group
 
 $(foreach target, $(TARGET_TYPES), $(eval \
-  $(call define-test-art-run-test-group,test-art-$(target)-run-test,$(ART_RUN_TEST_$(call name-to-var,$(target))_RULES))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach prebuild, $(PREBUILD_TYPES), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(prebuild),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(prebuild))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach run-type, $(RUN_TYPES), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(run-type),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(run-type))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach compiler, $(COMPILER_TYPES), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(compiler),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(compiler))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach relocate, $(RELOCATE_TYPES), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(relocate),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(relocate))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach trace, $(TRACE_TYPES), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(trace),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(trace))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach gc, $(GC_TYPES), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(gc),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(gc))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach jni, $(JNI_TYPES), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(jni),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(jni))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach debuggable, $(DEBUGGABLE_TYPES), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(debuggable),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(debuggable))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach image, $(IMAGE_TYPES), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(image),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(image))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach test, $(TEST_ART_RUN_TESTS), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test-$(test),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(test))_RULES)))))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach address_size, $(ADDRESS_SIZES_$(call name-to-var,$(target))), $(eval \
-    $(call define-test-art-run-test-group,test-art-$(target)-run-test$(address_size),$(ART_RUN_TEST_$(call name-to-var,$(target))_$(address_size)_RULES)))))
+  $(call define-test-art-host-or-target-run-test-group,$(target))))
 
-# Clear variables now we're finished with them.
-$(foreach target, $(TARGET_TYPES), $(eval ART_RUN_TEST_$(call name-to-var,$(target))_RULES :=))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach prebuild, $(PREBUILD_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(prebuild))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach compiler, $(COMPILER_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(compiler))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach relocate, $(RELOCATE_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(relocate))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach trace, $(TRACE_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(trace))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach gc, $(GC_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(gc))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach jni, $(JNI_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(jni))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach debuggable, $(DEBUGGABLE_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(debuggable))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach image, $(IMAGE_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(image))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach test, $(TEST_ART_RUN_TESTS), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(test))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach address_size, $(ALL_ADDRESS_SIZES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(address_size))_RULES :=)))
-$(foreach target, $(TARGET_TYPES), \
-  $(foreach run_type, $(RUN_TYPES), \
-    $(eval ART_RUN_TEST_$(call name-to-var,$(target))_$(call name-to-var,$(run_type))_RULES :=)))
-define-test-art-run-test-group :=
+test-art-run-test : test-art-host-run-test test-art-target-run-test
+
+host_prereq_rules :=
+target_prereq_rules :=
+core-image-dependencies :=
+name-to-var :=
+define-test-art-host-or-target-run-test-group :=
 TARGET_TYPES :=
-PREBUILD_TYPES :=
 COMPILER_TYPES :=
-RELOCATE_TYPES :=
-TRACE_TYPES :=
-GC_TYPES :=
-JNI_TYPES :=
 IMAGE_TYPES :=
-ADDRESS_SIZES_TARGET :=
-ADDRESS_SIZES_HOST :=
 ALL_ADDRESS_SIZES :=
-RUN_TYPES :=
-DEBUGGABLE_TYPES :=
-
 LOCAL_PATH :=
diff --git a/test/577-profile-foreign-dex/src-ex/OtherDex.java b/test/DefaultMethods/IterableBase.java
similarity index 73%
copy from test/577-profile-foreign-dex/src-ex/OtherDex.java
copy to test/DefaultMethods/IterableBase.java
index cba73b3..4cefdef 100644
--- a/test/577-profile-foreign-dex/src-ex/OtherDex.java
+++ b/test/DefaultMethods/IterableBase.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -13,5 +13,15 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-public class OtherDex {
+
+interface Iface {
+    default void defaultMethod() {
+    }
 }
+
+class Impl implements Iface {
+}
+
+abstract class IterableBase implements Iterable {
+}
+
diff --git a/test/ProfileTestMultiDex/Main.java b/test/ProfileTestMultiDex/Main.java
index 41532ea..a8ced54 100644
--- a/test/ProfileTestMultiDex/Main.java
+++ b/test/ProfileTestMultiDex/Main.java
@@ -25,3 +25,45 @@
     return "C";
   }
 }
+
+class TestInline {
+  public int inlineMonomorphic(Super s) {
+    return s.getValue();
+  }
+
+  public int inlinePolymorphic(Super s) {
+    return s.getValue();
+  }
+
+  public int inlineMegamorphic(Super s) {
+    return s.getValue();
+  }
+
+  public int inlineMissingTypes(Super s) {
+    return s.getValue();
+  }
+
+  public int noInlineCache(Super s) {
+    return s.getValue();
+  }
+}
+
+abstract class Super {
+  abstract int getValue();
+}
+
+class SubA extends Super {
+  int getValue() { return 42; }
+}
+
+class SubB extends Super {
+  int getValue() { return 38; };
+}
+
+class SubD extends Super {
+  int getValue() { return 20; };
+}
+
+class SubE extends Super {
+  int getValue() { return 16; };
+}
diff --git a/test/ProfileTestMultiDex/Second.java b/test/ProfileTestMultiDex/Second.java
index 4ac5abc..4b3c7a4 100644
--- a/test/ProfileTestMultiDex/Second.java
+++ b/test/ProfileTestMultiDex/Second.java
@@ -25,3 +25,8 @@
     return "Z";
   }
 }
+
+class SubC extends Super {
+  int getValue() { return 24; }
+}
+
diff --git a/test/ProfileTestMultiDex/main.jpp b/test/ProfileTestMultiDex/main.jpp
index f2e3b4e..5e55e96 100644
--- a/test/ProfileTestMultiDex/main.jpp
+++ b/test/ProfileTestMultiDex/main.jpp
@@ -1,3 +1,21 @@
-main:
+Main:
   @@com.android.jack.annotations.ForceInMainDex
-  class Second
+  class Main
+TestInqline:
+  @@com.android.jack.annotations.ForceInMainDex
+  class TestInline
+Super:
+  @@com.android.jack.annotations.ForceInMainDex
+  class Super
+SubA:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubA
+SubB:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubB
+SubD:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubD
+SubE:
+  @@com.android.jack.annotations.ForceInMainDex
+  class SubE
diff --git a/test/ProfileTestMultiDex/main.list b/test/ProfileTestMultiDex/main.list
index 44ba78e..ec131f0 100644
--- a/test/ProfileTestMultiDex/main.list
+++ b/test/ProfileTestMultiDex/main.list
@@ -1 +1,7 @@
 Main.class
+TestInline.class
+Super.class
+SubA.class
+SubB.class
+SubD.class
+SubE.class
diff --git a/test/VerifierDeps/Iface.smali b/test/VerifierDeps/Iface.smali
new file mode 100644
index 0000000..8607307
--- /dev/null
+++ b/test/VerifierDeps/Iface.smali
@@ -0,0 +1,18 @@
+# /*
+#  * Copyright (C) 2017 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+
+.class public abstract interface LIface;
+.super Ljava/lang/Object;
diff --git a/test/577-profile-foreign-dex/run b/test/VerifierDeps/MySub1SoftVerificationFailure.smali
similarity index 76%
copy from test/577-profile-foreign-dex/run
copy to test/VerifierDeps/MySub1SoftVerificationFailure.smali
index ad57d14..8123394 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/VerifierDeps/MySub1SoftVerificationFailure.smali
@@ -1,6 +1,4 @@
-#!/bin/bash
-#
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+.class public LMySub1SoftVerificationFailure;
+.super LMySoftVerificationFailure;
diff --git a/test/577-profile-foreign-dex/run b/test/VerifierDeps/MySub2SoftVerificationFailure.smali
similarity index 76%
copy from test/577-profile-foreign-dex/run
copy to test/VerifierDeps/MySub2SoftVerificationFailure.smali
index ad57d14..8d00323 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/VerifierDeps/MySub2SoftVerificationFailure.smali
@@ -1,6 +1,4 @@
-#!/bin/bash
-#
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+.class public LMySub2SoftVerificationFailure;
+.super LMySoftVerificationFailure;
diff --git a/test/577-profile-foreign-dex/run b/test/VerifierDepsMulti/MySoftVerificationFailure.smali
similarity index 64%
copy from test/577-profile-foreign-dex/run
copy to test/VerifierDepsMulti/MySoftVerificationFailure.smali
index ad57d14..6b56a3b 100644
--- a/test/577-profile-foreign-dex/run
+++ b/test/VerifierDepsMulti/MySoftVerificationFailure.smali
@@ -1,6 +1,4 @@
-#!/bin/bash
-#
-# Copyright 2016 The Android Open Source Project
+# Copyright (C) 2017 The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-exec ${RUN} \
-  --runtime-option -Xjitsaveprofilinginfo \
-  --runtime-option -Xusejit:true \
-  "${@}"
+.class public LMySoftVerificationFailure;
+.super Ljava/lang/Object;
+
+.method public final foo()V
+  .registers 1
+  sget-object v0, LMySoftVerificationFailure;->error:LUnknownType;
+  throw v0
+.end method
+
+.field public static error:LUnknownType;
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index a841f9e..c7a57ce 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -180,6 +180,9 @@
   }
 
   jit::JitCodeCache* code_cache = jit->GetCodeCache();
+  // Update the code cache to make sure the JIT code does not get deleted.
+  // Note: this will apply to all JIT compilations.
+  code_cache->SetGarbageCollectCode(false);
   while (true) {
     const void* pc = method->GetEntryPointFromQuickCompiledCode();
     if (code_cache->ContainsPc(pc)) {
diff --git a/test/etc/default-build b/test/etc/default-build
index 4318966..d74b24d 100755
--- a/test/etc/default-build
+++ b/test/etc/default-build
@@ -97,7 +97,7 @@
 while true; do
   if [ "x$1" = "x--dx-option" ]; then
     shift
-    on="$1"
+    option="$1"
     DX_FLAGS="${DX_FLAGS} $option"
     shift
   elif [ "x$1" = "x--jvm" ]; then
@@ -209,9 +209,9 @@
     ${JACK} --import classes.jill.jar --output-dex .
   else
     if [ ${NEED_DEX} = "true" ]; then
-      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 classes-ex
+      ${DX} -JXmx256m --debug --dex --dump-to=classes-ex.lst --output=classes.dex --dump-width=1000 ${DX_FLAGS} classes-ex
       zip ${TEST_NAME}-ex.jar classes.dex
-      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 classes
+      ${DX} -JXmx256m --debug --dex --dump-to=classes.lst --output=classes.dex --dump-width=1000 ${DX_FLAGS} classes
     fi
   fi
 else
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index f3d4332..f1b6132 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -63,6 +63,8 @@
 TEST_IS_NDEBUG="n"
 APP_IMAGE="y"
 VDEX_FILTER=""
+PROFILE="n"
+RANDOM_PROFILE="n"
 
 # if "y", run 'sync' before dalvikvm to make sure all files from
 # build step (e.g. dex2oat) were finished writing.
@@ -269,6 +271,12 @@
     elif [ "x$1" = "x--sync" ]; then
         SYNC_BEFORE_RUN="y"
         shift
+    elif [ "x$1" = "x--profile" ]; then
+        PROFILE="y"
+        shift
+    elif [ "x$1" = "x--random-profile" ]; then
+        RANDOM_PROFILE="y"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         exit 1
@@ -371,6 +379,20 @@
 
 
 if [ "$HAVE_IMAGE" = "n" ]; then
+    if [ "${HOST}" = "y" ]; then
+        framework="${ANDROID_HOST_OUT}/framework"
+        bpath_suffix="-hostdex"
+    else
+        framework="${ANDROID_ROOT}/framework"
+        bpath_suffix="-testdex"
+    fi
+    bpath="${framework}/core-libart${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/core-oj${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/conscrypt${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/okhttp${bpath_suffix}.jar"
+    bpath="${bpath}:${framework}/bouncycastle${bpath_suffix}.jar"
+    # Pass down the bootclasspath
+    FLAGS="${FLAGS} -Xbootclasspath:${bpath}"
     # Add 5 minutes to give some time to generate the boot image.
     TIME_OUT_VALUE=$((${TIME_OUT_VALUE} + 300))
     DALVIKVM_BOOT_OPT="-Ximage:/system/non-existant/core.art"
@@ -426,25 +448,11 @@
 
 JNI_OPTS="-Xjnigreflimit:512 -Xcheck:jni"
 
+COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xnorelocate"
 if [ "$RELOCATE" = "y" ]; then
-    COMPILE_FLAGS="${COMPILE_FLAGS} --include-patch-information --runtime-arg -Xnorelocate"
-    FLAGS="${FLAGS} -Xrelocate -Xcompiler-option --include-patch-information"
-    if [ "$HOST" = "y" ]; then
-        # Run test sets a fairly draconian ulimit that we will likely blow right over
-        # since we are relocating. Get the total size of the /system/framework directory
-        # in 512 byte blocks and set it as the ulimit. This should be more than enough
-        # room.
-        if [ ! `uname` = "Darwin" ]; then  # TODO: Darwin doesn't support "du -B..."
-          ulimit -S $(du -c -B512 ${ANDROID_HOST_OUT}/framework 2>/dev/null | tail -1 | cut -f1) || exit 1
-        fi
-    fi
+    FLAGS="${FLAGS} -Xrelocate"
 else
     FLAGS="$FLAGS -Xnorelocate"
-    COMPILE_FLAGS="${COMPILE_FLAGS} --runtime-arg -Xnorelocate"
-    if [ "$HOST" = "y" ]; then
-        # Increase ulimit to 64MB in case we are running hprof test.
-        ulimit -S 64000 || exit 1
-    fi
 fi
 
 if [ "$HOST" = "n" ]; then
@@ -490,16 +498,34 @@
 DEX_LOCATION_STRIPPED="${DEX_LOCATION#/}"
 VDEX_NAME="${DEX_LOCATION_STRIPPED//\//@}@$TEST_NAME.jar@classes.vdex"
 if [ ${#VDEX_NAME} -gt $max_filename_size ]; then
-    echo  "Dex location path too long."
+    echo "Dex location path too long:"
+    echo "$VDEX_NAME is ${#VDEX_NAME} character long, and the limit is $max_filename_size."
     exit 1
 fi
 
+profman_cmdline="true"
 dex2oat_cmdline="true"
 vdex_cmdline="true"
 mkdir_locations="${DEX_LOCATION}/dalvik-cache/$ISA"
 strip_cmdline="true"
 sync_cmdline="true"
 
+# PROFILE takes precedence over RANDOM_PROFILE, since PROFILE tests require a
+# specific profile to run properly.
+if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
+  profman_cmdline="${ANDROID_ROOT}/bin/profman  \
+    --apk=$DEX_LOCATION/$TEST_NAME.jar \
+    --dex-location=$DEX_LOCATION/$TEST_NAME.jar"
+  COMPILE_FLAGS="${COMPILE_FLAGS} --profile-file=$DEX_LOCATION/$TEST_NAME.prof"
+  FLAGS="${FLAGS} -Xcompiler-option --profile-file=$DEX_LOCATION/$TEST_NAME.prof"
+  if [ "$PROFILE" = "y" ]; then
+    profman_cmdline="${profman_cmdline} --create-profile-from=$DEX_LOCATION/profile \
+        --reference-profile-file=$DEX_LOCATION/$TEST_NAME.prof"
+  else
+    profman_cmdline="${profman_cmdline} --generate-test-profile=$DEX_LOCATION/$TEST_NAME.prof \
+        --generate-test-profile-seed=0"
+  fi
+fi
 
 if [ "$PREBUILD" = "y" ]; then
   mkdir_locations="${mkdir_locations} ${DEX_LOCATION}/oat/$ISA"
@@ -577,6 +603,7 @@
 dex2oat_cmdline=$(echo $dex2oat_cmdline)
 dalvikvm_cmdline=$(echo $dalvikvm_cmdline)
 vdex_cmdline=$(echo $vdex_cmdline)
+profman_cmdline=$(echo $profman_cmdline)
 
 if [ "$HOST" = "n" ]; then
     adb root > /dev/null
@@ -586,11 +613,18 @@
       adb shell mkdir -p $DEX_LOCATION
       adb push $TEST_NAME.jar $DEX_LOCATION
       adb push $TEST_NAME-ex.jar $DEX_LOCATION
+      if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
+        adb push profile $DEX_LOCATION
+      fi
     else
       adb shell rm -r $DEX_LOCATION >/dev/null 2>&1
       adb shell mkdir -p $DEX_LOCATION >/dev/null 2>&1
       adb push $TEST_NAME.jar $DEX_LOCATION >/dev/null 2>&1
       adb push $TEST_NAME-ex.jar $DEX_LOCATION >/dev/null 2>&1
+      if [ "$PROFILE" = "y" ] || [ "$RANDOM_PROFILE" = "y" ]; then
+        adb push profile $DEX_LOCATION >/dev/null 2>&1
+      fi
+
     fi
 
     LD_LIBRARY_PATH=/data/$TEST_DIRECTORY/art/$ISA
@@ -617,6 +651,7 @@
              mkdir -p ${mkdir_locations} && \
              export LD_LIBRARY_PATH=$LD_LIBRARY_PATH && \
              export PATH=$ANDROID_ROOT/bin:$PATH && \
+             $profman_cmdline && \
              $dex2oat_cmdline && \
              $vdex_cmdline && \
              $strip_cmdline && \
@@ -693,13 +728,14 @@
     fi
 
     if [ "$DEV_MODE" = "y" ]; then
-      echo "mkdir -p ${mkdir_locations} && $dex2oat_cmdline && $vdex_cmdline && $strip_cmdline && $sync_cmdline && $cmdline"
+      echo "mkdir -p ${mkdir_locations} && $profman_cmdline && $dex2oat_cmdline && $vdex_cmdline && $strip_cmdline && $sync_cmdline && $cmdline"
     fi
 
     cd $ANDROID_BUILD_TOP
 
     rm -rf ${DEX_LOCATION}/dalvik-cache/
     mkdir -p ${mkdir_locations} || exit 1
+    $profman_cmdline || { echo "Profman failed." >&2 ; exit 2; }
     $dex2oat_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
     $vdex_cmdline || { echo "Dex2oat failed." >&2 ; exit 2; }
     $strip_cmdline || { echo "Strip failed." >&2 ; exit 3; }
diff --git a/test/knownfailures.json b/test/knownfailures.json
index 784f49c..7891d4c 100644
--- a/test/knownfailures.json
+++ b/test/knownfailures.json
@@ -1,12 +1,12 @@
 [
     {
-        "test": "153-reference-stress",
+        "tests": "153-reference-stress",
         "description": ["Disable 153-reference-stress temporarily until a fix",
                         "arrives."],
         "bug": "http://b/33389022"
     },
     {
-        "test": "080-oom-fragmentation",
+        "tests": "080-oom-fragmentation",
         "description": "Disable 080-oom-fragmentation due to flakes.",
         "bug": "http://b/33795328"
     },
@@ -21,16 +21,11 @@
         "bug": "http://b/34193123"
     },
     {
-        "test": "149-suspend-all-stress",
+        "tests": "149-suspend-all-stress",
         "description": "Disable 149-suspend-all-stress, its output is flaky",
         "bug": "http://b/28988206"
     },
     {
-        "test": "577-profile-foreign-dex",
-        "description": "Disable 577-profile-foreign-dex",
-        "bug": "http://b/27454772"
-    },
-    {
         "tests": ["002-sleep",
                   "053-wait-some",
                   "055-enum-performance",
@@ -39,13 +34,13 @@
                         "loaded systems."]
     },
     {
-        "test": "147-stripped-dex-fallback",
+        "tests": "147-stripped-dex-fallback",
         "variant": "target",
         "description": ["147-stripped-dex-fallback isn't supported on device",
                         "because --strip-dex  requires the zip command."]
     },
     {
-        "test": "569-checker-pattern-replacement",
+        "tests": "569-checker-pattern-replacement",
         "variant": "target",
         "description": ["569-checker-pattern-replacement tests behaviour",
                         "present only on host."]
@@ -59,13 +54,7 @@
                         "doesn't (and isn't meant to) work with --prebuild."]
     },
     {
-        "test": "554-jit-profile-file",
-        "variant": "no-prebuild | interpreter",
-        "description": ["554-jit-profile-file is disabled because it needs a",
-                        "primary oat file to know what it should save."]
-    },
-    {
-        "tests": ["529-checker-unresolved", "555-checker-regression-x86const"],
+        "tests": ["529-checker-unresolved"],
         "variant": "no-prebuild",
         "bug": "http://b/27784033"
     },
@@ -78,27 +67,26 @@
     {
         "tests": ["117-nopatchoat",
                   "118-noimage-dex2oat",
-                  "119-noimage-patchoat",
-                  "554-jit-profile-file"],
+                  "119-noimage-patchoat"],
         "variant": "no-relocate",
         "description": ["117-nopatchoat is not broken per-se it just doesn't",
                         "work (and isn't meant to) without --prebuild",
                         "--relocate"]
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "variant": "interp-ac",
         "description": ["Temporarily disable some broken tests when forcing",
                         "access checks in interpreter"],
         "bug": "http://b/22414682"
     },
     {
-        "test" : "629-vdex-speed",
+        "tests" : "629-vdex-speed",
         "variant": "interp-ac | no-dex2oat | interpreter | jit | relocate-npatchoat",
         "description": "629 requires compilation."
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "variant": "gcstress",
         "description": ["137-cfi needs to unwind a second forked process. We're",
                         "using a primitive sleep to wait till we hope the",
@@ -106,6 +94,12 @@
                         "slowness of gcstress makes this bad."]
     },
     {
+        "tests": "152-dead-large-object",
+        "variant": "gcstress",
+        "description": ["152-dead-large-object requires a heap larger than what gcstress uses."],
+        "bug": "http://b/35800768"
+    },
+    {
         "tests": ["908-gc-start-finish",
                   "913-heaps"],
         "variant": "gcstress",
@@ -114,7 +108,7 @@
                         "non-deterministic. Same for 913."]
     },
     {
-        "test": "961-default-iface-resolution-gen",
+        "tests": "961-default-iface-resolution-gen",
         "variant": "gcstress",
         "description": ["961-default-iface-resolution-gen and",
                         "964-default-iface-init-genare very long tests that",
@@ -124,20 +118,25 @@
                         "lot."]
     },
     {
-        "tests": ["964-default-iface-init-gen",
-                 "154-gc-loop"],
+        "tests": "964-default-iface-init-gen",
         "variant": "gcstress"
     },
     {
-        "test": "115-native-bridge",
+        "tests": "154-gc-loop",
+        "variant": "gcstress | jit & debug",
+        "description": ["154-gc-loop depends GC not happening too often"],
+        "bug": "http://b/35917229"
+    },
+    {
+        "tests": "115-native-bridge",
         "variant": "target",
         "description": ["115-native-bridge setup is complicated. Need to",
                         "implement it correctly for the target."]
     },
     {
-        "test": "130-hprof",
+        "tests": "130-hprof",
         "variant": "target",
-        "desription": ["130-hprof dumps the heap and runs hprof-conv to check",
+        "description": ["130-hprof dumps the heap and runs hprof-conv to check",
                        "whether the file is somewhat readable. Thi is only",
                        "possible on the host. TODO: Turn off all the other",
                        "combinations, this is more about testing actual ART",
@@ -145,7 +144,7 @@
                        "complete test) JDWP must be set up."]
     },
     {
-        "test": "131-structural-change",
+        "tests": "131-structural-change",
         "variant": "debug",
         "description": ["131 is an old test. The functionality has been",
                         "implemented at an earlier stage and is checked",
@@ -154,25 +153,19 @@
                         "punt to interpreter"]
     },
     {
-        "test": "138-duplicate-classes-check",
+        "tests": "138-duplicate-classes-check",
         "variant": "ndebug",
         "description": ["Turned on for debug builds since debug builds have",
                         "duplicate classes checks enabled"],
         "bug": "http://b/2133391"
     },
     {
-        "test": "147-stripped-dex-fallback",
+        "tests": "147-stripped-dex-fallback",
         "variant": "no-dex2oat | no-image | relocate-npatchoat",
         "description": ["147-stripped-dex-fallback is disabled because it",
                         "requires --prebuild."]
     },
     {
-        "test": "554-jit-profile-file",
-        "variant": "no-dex2oat | no-image | relocate-npatchoat",
-        "description": ["554-jit-profile-file is disabled because it needs a",
-                        "primary oat file to know what it should save."]
-    },
-    {
         "tests": ["116-nodex2oat",
                   "117-nopatchoat",
                   "118-noimage-dex2oat",
@@ -191,14 +184,14 @@
                   "138-duplicate-classes-check",
                   "018-stack-overflow",
                   "961-default-iface-resolution-gen",
-                  "964-default-iface-init"],
+                  "964-default-iface-init-gen"],
         "variant": "no-image",
         "description": ["This test fails without an image. 018, 961, 964 often",
                         "time out."],
         "bug": "http://b/34369284"
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "description": ["This test unrolls and expects managed frames, but",
                         "tracing means we run the interpreter."],
         "variant": "trace | stream"
@@ -213,7 +206,7 @@
         "variant": "trace | stream"
     },
     {
-        "test": "130-hprof",
+        "tests": "130-hprof",
         "description": "130 occasional timeout",
         "bug": "http://b/32383962",
         "variant": "trace | stream"
@@ -234,14 +227,14 @@
                         "suppressed when tracing."]
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "description": ["CFI unwinding expects managed frames, and the test",
                         "does not iterate enough to even compile. JIT also",
                         "uses Generic JNI instead of the JNI compiler."],
         "variant": "interpreter | jit"
     },
     {
-        "test": "906-iterate-heap",
+        "tests": "906-iterate-heap",
         "description": ["Test 906 iterates the heap filtering with different",
                         "options. No instances should be created between those",
                         "runs to be able to have precise checks."],
@@ -266,25 +259,25 @@
                   "602-deoptimizeable"],
         "description": ["Tests that should fail when the optimizing compiler ",
                         "compiles them non-debuggable."],
-        "variant": "optimizing &  ndebuggable | regalloc_gc & ndebuggable"
+        "variant": "optimizing & ndebuggable | regalloc_gc & ndebuggable | speed-profile & ndebuggable"
     },
     {
-        "test": "596-app-images",
+        "tests": "596-app-images",
         "variant": "npictest"
     },
     {
-        "test": "055-enum-performance",
+        "tests": "055-enum-performance",
         "variant": "optimizing | regalloc_gc",
         "description": ["055: Exceeds run time limits due to heap poisoning ",
                         "instrumentation (on ARM and ARM64 devices)."]
     },
     {
-        "test": "909-attach-agent",
+        "tests": "909-attach-agent",
         "variant": "debuggable",
         "description": "Tests that check semantics for a non-debuggable app."
     },
     {
-        "test": "137-cfi",
+        "tests": "137-cfi",
         "variant": "debuggable",
         "description": ["The test relies on AOT code and debuggable makes us",
                         "JIT always."]
@@ -293,7 +286,7 @@
         "tests": ["000-nop",
                   "134-nodex2oat-nofallback",
                   "147-stripped-dex-fallback",
-                 "595-profile-saving"],
+                  "595-profile-saving"],
         "description": "The doesn't compile anything",
         "env_vars": {"ART_TEST_BISECTION": "true"},
         "variant": "optimizing | regalloc_gc"
@@ -318,21 +311,60 @@
     },
     {
         "tests": ["115-native-bridge",
-                 "088-monitor-verification"],
+                  "088-monitor-verification"],
         "description": "The test assume they are always compiled.",
         "env_vars": {"ART_TEST_BISECTION": "true"},
         "variant": "optimizing | regalloc_gc"
     },
     {
-        "test": "055-enum-performance",
+        "tests": "055-enum-performance",
         "description": ["The test tests performance which degrades during",
                         "bisecting."],
         "env_vars": {"ART_TEST_BISECTION": "true"},
         "variant": "optimizing | regalloc_gc"
     },
     {
-        "test": "537-checker-arraycopy",
+        "tests": ["537-checker-arraycopy",
+                  "641-checker-arraycopy"],
         "env_vars": {"ART_USE_READ_BARRIER": "true"},
         "variant": "interpreter | optimizing | regalloc_gc | jit"
+    },
+    {
+        "tests": ["912-classes",
+                  "616-cha",
+                  "616-cha-abstract",
+                  "616-cha-interface",
+                  "616-cha-interface-default",
+                  "616-cha-miranda",
+                  "616-cha-proxy-method-inline"],
+        "bug": "http://b/36344364 http://b/36344221",
+        "variant": "no-dex2oat | relocate-npatchoat"
+    },
+    {
+        "tests": ["476-clinit-inline-static-invoke",
+                  "496-checker-inlining-class-loader",
+                  "508-referrer-method",
+                  "637-checker-throw-inline"],
+        "bug": "http://b/36365552",
+        "variant": "no-image & jit"
+    },
+    {
+        "tests": ["597-deopt-new-string"],
+        "bug": "http://b/36467228",
+        "variant": "no-image & jit"
+    },
+    {
+        "tests": ["530-checker-lse",
+                  "530-checker-lse2",
+                  "030-bad-finalizer",
+                  "080-oom-throw"],
+        "bug": "http://b/36377828",
+        "variant": "interp-ac"
+    },
+    {
+        "tests": ["629-vdex-speed",
+                  "634-vdex-duplicate"],
+        "description": ["Profile driven dexlayout does not work with vdex or dex verifier."],
+        "variant": "speed-profile"
     }
 ]
diff --git a/test/run-all-tests b/test/run-all-tests
index 402c299..a0d2f23 100755
--- a/test/run-all-tests
+++ b/test/run-all-tests
@@ -155,6 +155,9 @@
     elif [ "x$1" = "x--strace" ]; then
         run_args="${run_args} --strace"
         shift
+    elif [ "x$1" = "x--random-profile" ]; then
+        run_args="${run_args} --random-profile"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
diff --git a/test/run-test b/test/run-test
index e808dee..e46099d 100755
--- a/test/run-test
+++ b/test/run-test
@@ -80,7 +80,7 @@
 
 # ANDROID_HOST_OUT is not set in a build environment.
 if [ -z "$ANDROID_HOST_OUT" ]; then
-    export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out/}host/linux-x86
+    export ANDROID_HOST_OUT=${OUT_DIR:-$ANDROID_BUILD_TOP/out}/host/linux-x86
 fi
 
 # If JACK_CLASSPATH is not set, assume it only contains core-libart.
@@ -247,6 +247,11 @@
         option="$1"
         run_args="${run_args} -Xcompiler-option $option"
         shift
+    elif [ "x$1" = "x--build-option" ]; then
+        shift
+        option="$1"
+        build_args="${build_args} $option"
+        shift
     elif [ "x$1" = "x--runtime-option" ]; then
         shift
         option="$1"
@@ -377,6 +382,9 @@
         filter=$1
         run_args="${run_args} --vdex-filter $filter"
         shift
+    elif [ "x$1" = "x--random-profile" ]; then
+        run_args="${run_args} --random-profile"
+        shift
     elif expr "x$1" : "x--" >/dev/null 2>&1; then
         echo "unknown $0 option: $1" 1>&2
         usage="yes"
@@ -525,22 +533,6 @@
         err_echo "--no-image is only supported on the art runtime"
         exit 1
     fi
-    if [ "$target_mode" = "no" ]; then
-        framework="${ANDROID_HOST_OUT}/framework"
-        bpath_suffix="-hostdex"
-    else
-        framework="${android_root}/framework"
-        bpath_suffix=""
-    fi
-    # TODO If the target was compiled WITH_DEXPREOPT=true then these tests will
-    # fail since these jar files will be stripped.
-    bpath="${framework}/core-libart${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/core-oj${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/conscrypt${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/okhttp${bpath_suffix}.jar"
-    bpath="${bpath}:${framework}/bouncycastle${bpath_suffix}.jar"
-    # Pass down the bootclasspath
-    run_args="${run_args} --runtime-option -Xbootclasspath:${bpath}"
     run_args="${run_args} --no-image"
 fi
 
@@ -611,6 +603,7 @@
         echo "  Runtime Options:"
         echo "    -O                    Run non-debug rather than debug build (off by default)."
         echo "    -Xcompiler-option     Pass an option to the compiler."
+        echo "    --build-option        Pass an option to the build script."
         echo "    --runtime-option      Pass an option to the runtime."
         echo "    --debug               Wait for a debugger to attach."
         echo "    --debuggable          Whether to compile Java code for a debugger."
@@ -723,36 +716,13 @@
 
 export TEST_NAME=`basename ${test_dir}`
 
-# arch_supports_read_barrier ARCH
-# -------------------------------
-# Return whether the Optimizing compiler has read barrier support for ARCH.
-function arch_supports_read_barrier() {
-  # Optimizing has read barrier support for ARM, ARM64, x86 and x86-64 at the
-  # moment.
-  [ "x$1" = xarm ] || [ "x$1" = xarm64 ] || [ "x$1" = xx86 ] || [ "x$1" = xx86_64 ]
-}
-
 # Tests named '<number>-checker-*' will also have their CFGs verified with
 # Checker when compiled with Optimizing on host.
 if [[ "$TEST_NAME" =~ ^[0-9]+-checker- ]]; then
   if [ "$runtime" = "art" -a "$image_suffix" = "" -a "$USE_JACK" = "true" ]; then
-    # Optimizing has read barrier support for certain architectures
-    # only. On other architectures, compiling is disabled when read
-    # barriers are enabled, meaning that we do not produce a CFG file
-    # as a side-effect of compilation, thus the Checker assertions
-    # cannot be checked. Disable Checker for those cases.
-    #
-    # TODO: Enable Checker when read barrier support is added to more
-    # architectures (b/12687968).
-    if [ "x$ART_USE_READ_BARRIER" != xfalse ]                  \
-       && (([ "x$host_mode" = "xyes" ]                         \
-            && ! arch_supports_read_barrier "$host_arch_name") \
-           || ([ "x$target_mode" = "xyes" ]                    \
-               && ! arch_supports_read_barrier "$target_arch_name")); then
-      run_checker="no"
     # In no-prebuild mode, the compiler is only invoked if both dex2oat and
     # patchoat are available. Disable Checker otherwise (b/22552692).
-    elif [ "$prebuild_mode" = "yes" ] \
+    if [ "$prebuild_mode" = "yes" ] \
          || [ "$have_patchoat" = "yes" -a "$have_dex2oat" = "yes" ]; then
       run_checker="yes"
 
@@ -776,27 +746,14 @@
 
   run_args="${run_args} --testlib ${testlib}"
 
-# To cause tests to fail fast, limit the file sizes created by dx, dex2oat and ART output to 2MB.
-build_file_size_limit=2048
-run_file_size_limit=2048
-
-# Add tests requiring a higher ulimit to this list. Ulimits might need to be raised to deal with
-# large amounts of expected output or large generated files.
-if echo "$test_dir" | grep -Eq "(083|089|961|964|971)" > /dev/null; then
-  build_file_size_limit=5120
-  run_file_size_limit=5120
-fi
-if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then
-  # We will need to `adb pull` the .cfg output from the target onto the host to
-  # run checker on it. This file can be big.
-  build_file_size_limit=32768
-  run_file_size_limit=32768
-fi
-if [ ${USE_JACK} = "false" ]; then
-  # Set ulimit if we build with dx only, Jack can generate big temp files.
-  if ! ulimit -S "$build_file_size_limit"; then
-    err_echo "ulimit file size setting failed"
-  fi
+# To cause tests to fail fast, limit the file sizes created by dx, dex2oat and
+# ART output to approximately 128MB. This should be more than sufficient
+# for any test while still catching cases of runaway output.
+# Set a hard limit to encourage ART developers to increase the ulimit here if
+# needed to support a test case rather than resetting the limit in the run
+# script for the particular test in question.
+if ! ulimit -f 128000; then
+  err_echo "ulimit file size setting failed"
 fi
 
 good="no"
@@ -807,9 +764,6 @@
     build_exit="$?"
     echo "build exit status: $build_exit" 1>&2
     if [ "$build_exit" = '0' ]; then
-        if ! ulimit -S "$run_file_size_limit"; then
-          err_echo "ulimit file size setting failed"
-        fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" 2>&1
         run_exit="$?"
@@ -835,9 +789,6 @@
     "./${build}" $build_args >"$build_output" 2>&1
     build_exit="$?"
     if [ "$build_exit" = '0' ]; then
-        if ! ulimit -S "$run_file_size_limit"; then
-          err_echo "ulimit file size setting failed"
-        fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" >"$output" 2>&1
         if [ "$run_checker" = "yes" ]; then
@@ -872,9 +823,6 @@
     "./${build}" $build_args >"$build_output" 2>&1
     build_exit="$?"
     if [ "$build_exit" = '0' ]; then
-        if ! ulimit -S "$run_file_size_limit"; then
-          err_echo "ulimit file size setting failed"
-        fi
         echo "${test_dir}: running..." 1>&2
         "./${run}" $run_args "$@" >"$output" 2>&1
         run_exit="$?"
@@ -944,9 +892,6 @@
       echo "${test_dir}: not bisecting, checker test." 1>&2
     else
       # Increase file size limit, bisection search can generate large logfiles.
-      if ! ulimit -S unlimited; then
-        err_echo "ulimit file size setting failed"
-      fi
       echo "${test_dir}: bisecting..." 1>&2
       cwd=`pwd`
       maybe_device_mode=""
diff --git a/test/testrunner/env.py b/test/testrunner/env.py
index 4336d77..46244a4 100644
--- a/test/testrunner/env.py
+++ b/test/testrunner/env.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-#
 # Copyright 2017, The Android Open Source Project
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -57,7 +55,9 @@
              "make --no-print-directory -C \"%s\" -f build/core/config.mk "
              "dump-many-vars DUMP_MANY_VARS=\"%s\"") % (ANDROID_BUILD_TOP, all_vars)
 
-  config = subprocess.Popen(command, stdout=subprocess.PIPE,
+  config = subprocess.Popen(command,
+                            stdout=subprocess.PIPE,
+                            universal_newlines=True,
                             shell=True).communicate()[0] # read until EOF, select stdin
   # Prints out something like:
   # TARGET_ARCH='arm64'
@@ -105,6 +105,9 @@
 # Do you want to test the optimizing compiler with graph coloring register allocation?
 ART_TEST_OPTIMIZING_GRAPH_COLOR = getEnvBoolean('ART_TEST_OPTIMIZING_GRAPH_COLOR', ART_TEST_FULL)
 
+# Do you want to do run-tests with profiles?
+ART_TEST_SPEED_PROFILE = getEnvBoolean('ART_TEST_SPEED_PROFILE', ART_TEST_FULL)
+
 # Do we want to test PIC-compiled tests ("apps")?
 ART_TEST_PIC_TEST = getEnvBoolean('ART_TEST_PIC_TEST', ART_TEST_FULL)
 # Do you want tracing tests run?
@@ -178,6 +181,8 @@
 
 EXTRA_DISABLED_TESTS = set(env.get("ART_TEST_RUN_TEST_SKIP", "").split())
 
+ART_TEST_RUN_TEST_BUILD = getEnvBoolean('ART_TEST_RUN_TEST_BUILD', False)
+
 TARGET_2ND_ARCH = get_build_var('TARGET_2ND_ARCH')
 TARGET_ARCH = get_build_var('TARGET_ARCH')
 if TARGET_2ND_ARCH:
diff --git a/test/testrunner/run_build_test_target.py b/test/testrunner/run_build_test_target.py
new file mode 100755
index 0000000..0ab50af
--- /dev/null
+++ b/test/testrunner/run_build_test_target.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+#
+# Copyright 2017, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Build and run go/ab/git_master-art-host target
+
+This script is executed by the android build server and must not be moved,
+or changed in an otherwise backwards-incompatible manner.
+
+Provided with a target name, the script setup the environment for
+building the test target by taking config information from
+from target_config.py.
+
+See target_config.py for the configuration syntax.
+"""
+
+import argparse
+import os
+import subprocess
+import sys
+
+from target_config import target_config
+import env
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-j', default='1', dest='n_threads')
+# either -l/--list OR build-target is required (but not both).
+group = parser.add_mutually_exclusive_group(required=True)
+group.add_argument('build_target', nargs='?')
+group.add_argument('-l', '--list', action='store_true', help='List all possible run-build targets.')
+options = parser.parse_args()
+
+##########
+
+if options.list:
+  print "List of all known build_target: "
+  for k in sorted(target_config.iterkeys()):
+    print " * " + k
+  # TODO: would be nice if this was the same order as the target config file.
+  sys.exit(1)
+
+if not target_config.get(options.build_target):
+  sys.stderr.write("error: invalid build_target, see -l/--list.\n")
+  sys.exit(1)
+
+target = target_config[options.build_target]
+n_threads = options.n_threads
+custom_env = target.get('env', {})
+custom_env['SOONG_ALLOW_MISSING_DEPENDENCIES'] = 'true'
+print custom_env
+os.environ.update(custom_env)
+
+if target.get('make'):
+  build_command = 'make'
+  build_command += ' -j' + str(n_threads)
+  build_command += ' -C ' + env.ANDROID_BUILD_TOP
+  build_command += ' ' + target.get('make')
+  # Add 'dist' to avoid Jack issues b/36169180.
+  build_command += ' dist'
+  sys.stdout.write(str(build_command) + '\n')
+  sys.stdout.flush()
+  if subprocess.call(build_command.split()):
+    sys.exit(1)
+
+if target.get('golem'):
+  machine_type = target.get('golem')
+  # use art-opt-cc by default since it mimics the default preopt config.
+  default_golem_config = 'art-opt-cc'
+
+  os.chdir(env.ANDROID_BUILD_TOP)
+  cmd =  ['art/tools/golem/build-target.sh']
+  cmd += ['-j' + str(n_threads)]
+  cmd += ['--showcommands']
+  cmd += ['--machine-type=%s' %(machine_type)]
+  cmd += ['--golem=%s' %(default_golem_config)]
+  cmd += ['--tarball']
+  sys.stdout.write(str(cmd) + '\n')
+  sys.stdout.flush()
+
+  if subprocess.call(cmd):
+    sys.exit(1)
+
+if target.get('run-test'):
+  run_test_command = [os.path.join(env.ANDROID_BUILD_TOP,
+                                   'art/test/testrunner/testrunner.py')]
+  run_test_command += target.get('run-test', [])
+  run_test_command += ['-j', str(n_threads)]
+  run_test_command += ['-b']
+  run_test_command += ['--host']
+  run_test_command += ['--verbose']
+
+  sys.stdout.write(str(run_test_command) + '\n')
+  sys.stdout.flush()
+  if subprocess.call(run_test_command):
+    sys.exit(1)
+
+sys.exit(0)
diff --git a/test/testrunner/target_config.py b/test/testrunner/target_config.py
new file mode 100644
index 0000000..82f7832
--- /dev/null
+++ b/test/testrunner/target_config.py
@@ -0,0 +1,318 @@
+target_config = {
+
+# Configuration syntax:
+#
+#   Required keys: (Use one or more of these)
+#    * golem - specify a golem machine-type to build, e.g. android-armv8
+#              (uses art/tools/golem/build-target.sh)
+#    * make - specify a make target to build, e.g. build-art-host
+#    * run-test - runs the tests in art/test/ directory with testrunner.py,
+#                 specify a list of arguments to pass to testrunner.py
+#
+#   Optional keys: (Use any of these)
+#    * env - Add additional environment variable to the current environment.
+#
+# *** IMPORTANT ***:
+#    This configuration is used by the android build server. Targets must not be renamed
+#    or removed.
+#
+
+##########################################
+
+    # ART run-test configurations
+    # (calls testrunner which builds and then runs the test targets)
+
+    'art-test' : {
+        'make' : 'test-art-host-gtest',
+        'run-test' : [],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-interpreter' : {
+        'run-test' : ['--interpreter'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-interpreter-access-checks' : {
+        'run-test' : ['--interp-ac'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-jit' : {
+        'run-test' : ['--jit'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gcstress-gcverify': {
+        'run-test': ['--gcstress',
+                     '--gcverify'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_DEFAULT_GC_TYPE' : 'SS'
+        }
+    },
+    'art-interpreter-gcstress' : {
+        'run-test' : ['--interpreter',
+                      '--gcstress'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_DEFAULT_GC_TYPE' : 'SS'
+        }
+    },
+    'art-optimizing-gcstress' : {
+        'run-test' : ['--gcstress',
+                      '--optimizing'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_DEFAULT_GC_TYPE' : 'SS'
+        }
+    },
+    'art-jit-gcstress' : {
+        'run-test' : ['--jit',
+                      '--gcstress'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_DEFAULT_GC_TYPE' : 'SS'
+        }
+    },
+    'art-read-barrier' : {
+        'run-test': ['--interpreter',
+                  '--optimizing'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-read-barrier-gcstress' : {
+        'run-test' : ['--interpreter',
+                      '--optimizing',
+                      '--gcstress'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-read-barrier-table-lookup' : {
+        'run-test' : ['--interpreter',
+                      '--optimizing'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_READ_BARRIER_TYPE' : 'TABLELOOKUP',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-debug-gc' : {
+        'run-test' : ['--interpreter',
+                      '--optimizing'],
+        'env' : {
+            'ART_TEST_DEBUG_GC' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-ss-gc' : {
+        'run-test' : ['--interpreter',
+                      '--optimizing',
+                      '--jit'],
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'SS',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gss-gc' : {
+        'run-test' : ['--interpreter',
+                      '--optimizing',
+                      '--jit'],
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'GSS',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-ss-gc-tlab' : {
+        'run-test' : ['--interpreter',
+                      '--optimizing',
+                      '--jit'],
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'SS',
+            'ART_USE_TLAB' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gss-gc-tlab' : {
+        'run-test' : ['--interpreter',
+                      '--optimizing',
+                      '--jit'],
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'GSS',
+            'ART_USE_TLAB' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-tracing' : {
+        'run-test' : ['--trace'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-interpreter-tracing' : {
+        'run-test' : ['--interpreter',
+                      '--trace'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-forcecopy' : {
+        'run-test' : ['--forcecopy'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-no-prebuild' : {
+        'run-test' : ['--no-prebuild'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-no-image' : {
+        'run-test' : ['--no-image'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-interpreter-no-image' : {
+        'run-test' : ['--interpreter',
+                      '--no-image'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-relocate-no-patchoat' : {
+        'run-test' : ['--relocate-npatchoat'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-no-dex2oat' : {
+        'run-test' : ['--no-dex2oat'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-heap-poisoning' : {
+        'run-test' : ['--interpreter',
+                      '--optimizing'],
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'false',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+
+    # ART gtest configurations
+    # (calls make 'target' which builds and then runs the gtests).
+
+    'art-gtest' : {
+        'make' :  'test-art-host-gtest',
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true'
+        }
+    },
+    'art-gtest-read-barrier': {
+        'make' :  'test-art-host-gtest',
+        'env' : {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-gtest-read-barrier-table-lookup': {
+        'make' :  'test-art-host-gtest',
+        'env': {
+            'ART_USE_READ_BARRIER' : 'true',
+            'ART_READ_BARRIER_TYPE' : 'TABLELOOKUP',
+            'ART_HEAP_POISONING' : 'true'
+        }
+    },
+    'art-gtest-ss-gc': {
+        'make' :  'test-art-host-gtest',
+        'env': {
+            'ART_DEFAULT_GC_TYPE' : 'SS',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-gss-gc': {
+        'make' :  'test-art-host-gtest',
+        'env' : {
+            'ART_DEFAULT_GC_TYPE' : 'GSS',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-ss-gc-tlab': {
+        'make' :  'test-art-host-gtest',
+        'env': {
+            'ART_DEFAULT_GC_TYPE' : 'SS',
+            'ART_USE_TLAB' : 'true',
+            'ART_USE_READ_BARRIER' : 'false',
+        }
+    },
+    'art-gtest-gss-gc-tlab': {
+        'make' :  'test-art-host-gtest',
+        'env': {
+            'ART_DEFAULT_GC_TYPE' : 'GSS',
+            'ART_USE_TLAB' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-debug-gc' : {
+        'make' :  'test-art-host-gtest',
+        'env' : {
+            'ART_TEST_DEBUG_GC' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-valgrind32': {
+        'make' : 'valgrind-test-art-host32',
+        'env': {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-valgrind64': {
+        'make' : 'valgrind-test-art-host64',
+        'env': {
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+    'art-gtest-heap-poisoning': {
+        'make' : 'valgrind-test-art-host64',
+        'env' : {
+            'ART_HEAP_POISONING' : 'true',
+            'ART_USE_READ_BARRIER' : 'false'
+        }
+    },
+
+   # ART Golem build targets used by go/lem (continuous ART benchmarking),
+   # (art-opt-cc is used by default since it mimics the default preopt config),
+   #
+   # calls golem/build-target.sh which builds a golem tarball of the target name,
+   #     e.g. 'golem: android-armv7' produces an 'android-armv7.tar.gz' upon success.
+
+    'art-golem-android-armv7': {
+        'golem' : 'android-armv7'
+    },
+    'art-golem-android-armv8': {
+        'golem' : 'android-armv8'
+    },
+    'art-golem-linux-armv7': {
+        'golem' : 'linux-armv7'
+    },
+    'art-golem-linux-armv8': {
+        'golem' : 'linux-armv8'
+    },
+    'art-golem-linux-ia32': {
+        'golem' : 'linux-ia32'
+    },
+    'art-golem-linux-x64': {
+        'golem' : 'linux-x64'
+    },
+}
diff --git a/test/testrunner/testrunner.py b/test/testrunner/testrunner.py
index c22b0be..6a8b0ae 100755
--- a/test/testrunner/testrunner.py
+++ b/test/testrunner/testrunner.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 #
 # Copyright 2017, The Android Open Source Project
 #
@@ -48,14 +48,17 @@
 import fnmatch
 import itertools
 import json
+import multiprocessing
 import os
 import re
 import subprocess
 import sys
+import tempfile
 import threading
 import time
 
 import env
+from target_config import target_config
 
 TARGET_TYPES = set()
 RUN_TYPES = set()
@@ -71,6 +74,9 @@
 ADDRESS_SIZES = set()
 OPTIMIZING_COMPILER_TYPES = set()
 ADDRESS_SIZES_TARGET = {'host': set(), 'target': set()}
+# timeout for individual tests.
+# TODO: make it adjustable per tests and for buildbots
+timeout = 3000 # 50 minutes
 
 # DISABLED_TEST_CONTAINER holds information about the disabled tests. It is a map
 # that has key as the test name (like 001-HelloWorld), and value as set of
@@ -111,7 +117,7 @@
 skipped_tests = []
 
 # Flags
-n_thread = 1
+n_thread = -1
 test_count = 0
 total_test_count = 0
 verbose = False
@@ -141,7 +147,7 @@
   VARIANT_TYPE_DICT['jni'] = {'jni', 'forcecopy', 'checkjni'}
   VARIANT_TYPE_DICT['address_sizes'] = {'64', '32'}
   VARIANT_TYPE_DICT['compiler'] = {'interp-ac', 'interpreter', 'jit', 'optimizing',
-                              'regalloc_gc'}
+                              'regalloc_gc', 'speed-profile'}
 
   for v_type in VARIANT_TYPE_DICT:
     TOTAL_VARIANTS_SET = TOTAL_VARIANTS_SET.union(VARIANT_TYPE_DICT.get(v_type))
@@ -183,9 +189,20 @@
   if env.ART_TEST_OPTIMIZING_GRAPH_COLOR:
     COMPILER_TYPES.add('regalloc_gc')
     OPTIMIZING_COMPILER_TYPES.add('regalloc_gc')
-  if env.ART_TEST_OPTIMIZING or not COMPILER_TYPES: # Default
+  if env.ART_TEST_OPTIMIZING:
     COMPILER_TYPES.add('optimizing')
     OPTIMIZING_COMPILER_TYPES.add('optimizing')
+  if env.ART_TEST_SPEED_PROFILE:
+    COMPILER_TYPES.add('speed-profile')
+
+  # By default we run all 'compiler' variants.
+  if not COMPILER_TYPES:
+    COMPILER_TYPES.add('optimizing')
+    COMPILER_TYPES.add('jit')
+    COMPILER_TYPES.add('interpreter')
+    COMPILER_TYPES.add('interp-ac')
+    COMPILER_TYPES.add('speed-profile')
+    OPTIMIZING_COMPILER_TYPES.add('optimizing')
 
   if env.ART_TEST_RUN_TEST_RELOCATE:
     RELOCATE_TYPES.add('relocate')
@@ -245,9 +262,26 @@
     ADDRESS_SIZES_TARGET['host'] = ADDRESS_SIZES_TARGET['host'].union(ADDRESS_SIZES)
     ADDRESS_SIZES_TARGET['target'] = ADDRESS_SIZES_TARGET['target'].union(ADDRESS_SIZES)
 
+  global n_thread
+  if n_thread is -1:
+    if 'target' in TARGET_TYPES:
+      n_thread = get_default_threads('target')
+    else:
+      n_thread = get_default_threads('host')
+
   global semaphore
   semaphore = threading.Semaphore(n_thread)
 
+  if not sys.stdout.isatty():
+    global COLOR_ERROR
+    global COLOR_PASS
+    global COLOR_SKIP
+    global COLOR_NORMAL
+    COLOR_ERROR = ''
+    COLOR_PASS = ''
+    COLOR_SKIP = ''
+    COLOR_NORMAL = ''
+
 
 def run_tests(tests):
   """Creates thread workers to run the tests.
@@ -358,6 +392,8 @@
         options_test += ' --interpreter --verify-soft-fail'
       elif compiler == 'jit':
         options_test += ' --jit'
+      elif compiler == 'speed-profile':
+        options_test += ' --random-profile'
 
       if relocate == 'relocate':
         options_test += ' --relocate'
@@ -403,8 +439,10 @@
           options_test += ' --instruction-set-features ' + \
                           env.HOST_2ND_ARCH_PREFIX_DEX2OAT_HOST_INSTRUCTION_SET_FEATURES
 
-      options_test = (' --output-path %s/run-test-output/%s') % (
-        env.ART_HOST_TEST_DIR, test_name) + options_test
+      # TODO(http://36039166): This is a temporary solution to
+      # fix build breakages.
+      options_test = (' --output-path %s') % (
+          tempfile.mkdtemp(dir=env.ART_HOST_TEST_DIR)) + options_test
 
       run_test_sh = env.ANDROID_BUILD_TOP + '/art/test/run-test'
       command = run_test_sh + ' ' + options_test + ' ' + test
@@ -442,15 +480,15 @@
       test_skipped = True
     else:
       test_skipped = False
-      proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout=subprocess.PIPE)
-      script_output = proc.stdout.read().strip()
+      proc = subprocess.Popen(command.split(), stderr=subprocess.STDOUT, stdout=subprocess.PIPE, universal_newlines=True)
+      script_output = proc.communicate(timeout=timeout)[0]
       test_passed = not proc.wait()
 
     if not test_skipped:
       if test_passed:
         print_test_info(test_name, 'PASS')
       else:
-        failed_tests.append(test_name)
+        failed_tests.append((test_name, script_output))
         if not env.ART_TEST_KEEP_GOING:
           stop_testrunner = True
         print_test_info(test_name, 'FAIL', ('%s\n%s') % (
@@ -460,9 +498,14 @@
       skipped_tests.append(test_name)
     else:
       print_test_info(test_name, '')
-  except Exception, e:
-    failed_tests.append(test_name)
-    print_text(('%s\n%s\n') % (command, str(e)))
+  except subprocess.TimeoutExpired as e:
+    failed_tests.append((test_name, 'Timed out in %d seconds' % timeout))
+    print_test_info(test_name, 'TIMEOUT', 'Timed out in %d seconds\n%s' % (
+        timeout, command))
+  except Exception as e:
+    failed_tests.append((test_name, str(e)))
+    print_test_info(test_name, 'FAIL',
+    ('%s\n%s\n\n') % (command, str(e)))
   finally:
     semaphore.release()
 
@@ -499,11 +542,11 @@
       test_count,
       total_test_count)
 
-    if result == "FAIL":
+    if result == 'FAIL' or result == 'TIMEOUT':
       info += ('%s %s %s\n%s\n') % (
         progress_info,
         test_name,
-        COLOR_ERROR + 'FAIL' + COLOR_NORMAL,
+        COLOR_ERROR + result + COLOR_NORMAL,
         failed_test_info)
     else:
       result_text = ''
@@ -524,20 +567,35 @@
         allowed_test_length = console_width - total_output_length
         test_name_len = len(test_name)
         if allowed_test_length < test_name_len:
-          test_name = ('%s...%s') % (
-            test_name[:(allowed_test_length - 3)/2],
-            test_name[-(allowed_test_length - 3)/2:])
+          test_name = ('...%s') % (
+            test_name[-(allowed_test_length - 3):])
         info += ('%s %s %s') % (
           progress_info,
           test_name,
           result_text)
     print_text(info)
-  except Exception, e:
+  except Exception as e:
     print_text(('%s\n%s\n') % (test_name, str(e)))
     failed_tests.append(test_name)
   finally:
     print_mutex.release()
 
+def verify_knownfailure_entry(entry):
+  supported_field = {
+      'tests' : (list, str),
+      'description' : (list, str),
+      'bug' : (str,),
+      'variant' : (str,),
+      'env_vars' : (dict,),
+  }
+  for field in entry:
+    field_type = type(entry[field])
+    if field_type not in supported_field[field]:
+      raise ValueError('%s is not supported type for %s\n%s' % (
+          str(field_type),
+          field,
+          str(entry)))
+
 def get_disabled_test_info():
   """Generate set of known failures.
 
@@ -554,15 +612,18 @@
 
   disabled_test_info = {}
   for failure in known_failures_info:
-    tests = failure.get('test')
-    if tests:
+    verify_knownfailure_entry(failure)
+    tests = failure.get('tests', [])
+    if isinstance(tests, str):
       tests = [tests]
-    else:
-      tests = failure.get('tests', [])
     variants = parse_variants(failure.get('variant'))
     env_vars = failure.get('env_vars')
+
     if check_env_vars(env_vars):
       for test in tests:
+        if test not in RUN_TEST_SET:
+          raise ValueError('%s is not a valid run-test' % (
+              test))
         if test in disabled_test_info:
           disabled_test_info[test] = disabled_test_info[test].union(variants)
         else:
@@ -626,6 +687,9 @@
     variant = set()
     for and_variant in and_variants:
       and_variant = and_variant.strip()
+      if and_variant not in TOTAL_VARIANTS_SET:
+        raise ValueError('%s is not a valid variant' % (
+            and_variant))
       variant.add(and_variant)
     variant_list.add(frozenset(variant))
   return variant_list
@@ -642,16 +706,29 @@
     console_width = int(os.popen('stty size', 'r').read().split()[1])
     eraser_text = '\r' + ' ' * console_width + '\r'
     print_text(eraser_text)
+
+  # Prints information about the total tests run.
+  # E.g., "2/38 (5%) tests passed".
+  passed_test_count = total_test_count - len(skipped_tests) - len(failed_tests)
+  passed_test_information = ('%d/%d (%d%%) %s passed.\n') % (
+      passed_test_count,
+      total_test_count,
+      (passed_test_count*100)/total_test_count,
+      'tests' if passed_test_count > 1 else 'test')
+  print_text(passed_test_information)
+
+  # Prints the list of skipped tests, if any.
   if skipped_tests:
-    print_text(COLOR_SKIP + 'SKIPPED TESTS' + COLOR_NORMAL + '\n')
+    print_text(COLOR_SKIP + 'SKIPPED TESTS: ' + COLOR_NORMAL + '\n')
     for test in skipped_tests:
       print_text(test + '\n')
     print_text('\n')
 
+  # Prints the list of failed tests, if any.
   if failed_tests:
-    print_text(COLOR_ERROR + 'FAILED TESTS' + COLOR_NORMAL + '\n')
-    for test in failed_tests:
-      print_text(test + '\n')
+    print_text(COLOR_ERROR + 'FAILED: ' + COLOR_NORMAL + '\n')
+    for test_info in failed_tests:
+      print_text(('%s\n%s\n' % (test_info[0], test_info[1])))
 
 
 def parse_test_name(test_name):
@@ -705,6 +782,33 @@
   raise ValueError(test_name + " is not a valid test")
 
 
+def setup_env_for_build_target(build_target, parser, options):
+  """Setup environment for the build target
+
+  The method setup environment for the master-art-host targets.
+  """
+  os.environ.update(build_target['env'])
+  os.environ['SOONG_ALLOW_MISSING_DEPENDENCIES'] = 'true'
+  print_text('%s\n' % (str(os.environ)))
+
+  target_options = vars(parser.parse_args(build_target['flags']))
+  target_options['host'] = True
+  target_options['verbose'] = True
+  target_options['build'] = True
+  target_options['n_thread'] = options['n_thread']
+  target_options['dry_run'] = options['dry_run']
+
+  return target_options
+
+def get_default_threads(target):
+  if target is 'target':
+    adb_command = 'adb shell cat /sys/devices/system/cpu/present'
+    cpu_info_proc = subprocess.Popen(adb_command.split(), stdout=subprocess.PIPE)
+    cpu_info = cpu_info_proc.stdout.read()
+    return int(cpu_info.split('-')[1])
+  else:
+    return multiprocessing.cpu_count()
+
 def parse_option():
   global verbose
   global dry_run
@@ -712,10 +816,12 @@
   global build
   global gdb
   global gdb_arg
+  global timeout
 
   parser = argparse.ArgumentParser(description="Runs all or a subset of the ART test suite.")
   parser.add_argument('-t', '--test', dest='test', help='name of the test')
   parser.add_argument('-j', type=int, dest='n_thread')
+  parser.add_argument('--timeout', default=timeout, type=int, dest='timeout')
   for variant in TOTAL_VARIANTS_SET:
     flag = '--' + variant
     flag_dest = variant.replace('-', '_')
@@ -726,91 +832,106 @@
   parser.add_argument('--dry-run', action='store_true', dest='dry_run')
   parser.add_argument("--skip", action="append", dest="skips", default=[],
                       help="Skip the given test in all circumstances.")
-  parser.add_argument('-b', '--build-dependencies', action='store_true', dest='build')
+  parser.add_argument('--no-build-dependencies',
+                      action='store_false', dest='build',
+                      help="Don't build dependencies under any circumstances. This is the " +
+                           "behavior if ART_TEST_RUN_TEST_ALWAYS_BUILD is not set to 'true'.")
+  parser.add_argument('-b', '--build-dependencies',
+                      action='store_true', dest='build',
+                      help="Build dependencies under all circumstances. By default we will " +
+                           "not build dependencies unless ART_TEST_RUN_TEST_BUILD=true.")
+  parser.add_argument('--build-target', dest='build_target', help='master-art-host targets')
+  parser.set_defaults(build = env.ART_TEST_RUN_TEST_BUILD)
   parser.add_argument('--gdb', action='store_true', dest='gdb')
   parser.add_argument('--gdb-arg', dest='gdb_arg')
 
-  options = parser.parse_args()
+  options = vars(parser.parse_args())
+  if options['build_target']:
+    options = setup_env_for_build_target(target_config[options['build_target']],
+                                         parser, options)
+
   test = ''
-  env.EXTRA_DISABLED_TESTS.update(set(options.skips))
-  if options.test:
-    test = parse_test_name(options.test)
-  if options.pictest:
+  env.EXTRA_DISABLED_TESTS.update(set(options['skips']))
+  if options['test']:
+    test = parse_test_name(options['test'])
+  if options['pictest']:
     PICTEST_TYPES.add('pictest')
-  if options.ndebug:
+  if options['ndebug']:
     RUN_TYPES.add('ndebug')
-  if options.interp_ac:
+  if options['interp_ac']:
     COMPILER_TYPES.add('interp-ac')
-  if options.picimage:
+  if options['picimage']:
     IMAGE_TYPES.add('picimage')
-  if options.n64:
+  if options['n64']:
     ADDRESS_SIZES.add('64')
-  if options.interpreter:
+  if options['interpreter']:
     COMPILER_TYPES.add('interpreter')
-  if options.jni:
+  if options['jni']:
     JNI_TYPES.add('jni')
-  if options.relocate_npatchoat:
+  if options['relocate_npatchoat']:
     RELOCATE_TYPES.add('relocate-npatchoat')
-  if options.no_prebuild:
+  if options['no_prebuild']:
     PREBUILD_TYPES.add('no-prebuild')
-  if options.npictest:
+  if options['npictest']:
     PICTEST_TYPES.add('npictest')
-  if options.no_dex2oat:
+  if options['no_dex2oat']:
     PREBUILD_TYPES.add('no-dex2oat')
-  if options.jit:
+  if options['jit']:
     COMPILER_TYPES.add('jit')
-  if options.relocate:
+  if options['relocate']:
     RELOCATE_TYPES.add('relocate')
-  if options.ndebuggable:
+  if options['ndebuggable']:
     DEBUGGABLE_TYPES.add('ndebuggable')
-  if options.no_image:
+  if options['no_image']:
     IMAGE_TYPES.add('no-image')
-  if options.optimizing:
+  if options['optimizing']:
     COMPILER_TYPES.add('optimizing')
-  if options.trace:
+  if options['speed_profile']:
+    COMPILER_TYPES.add('speed-profile')
+  if options['trace']:
     TRACE_TYPES.add('trace')
-  if options.gcstress:
+  if options['gcstress']:
     GC_TYPES.add('gcstress')
-  if options.no_relocate:
+  if options['no_relocate']:
     RELOCATE_TYPES.add('no-relocate')
-  if options.target:
+  if options['target']:
     TARGET_TYPES.add('target')
-  if options.forcecopy:
+  if options['forcecopy']:
     JNI_TYPES.add('forcecopy')
-  if options.n32:
+  if options['n32']:
     ADDRESS_SIZES.add('32')
-  if options.host:
+  if options['host']:
     TARGET_TYPES.add('host')
-  if options.gcverify:
+  if options['gcverify']:
     GC_TYPES.add('gcverify')
-  if options.debuggable:
+  if options['debuggable']:
     DEBUGGABLE_TYPES.add('debuggable')
-  if options.prebuild:
+  if options['prebuild']:
     PREBUILD_TYPES.add('prebuild')
-  if options.debug:
+  if options['debug']:
     RUN_TYPES.add('debug')
-  if options.checkjni:
+  if options['checkjni']:
     JNI_TYPES.add('checkjni')
-  if options.ntrace:
+  if options['ntrace']:
     TRACE_TYPES.add('ntrace')
-  if options.cms:
+  if options['cms']:
     GC_TYPES.add('cms')
-  if options.multipicimage:
+  if options['multipicimage']:
     IMAGE_TYPES.add('multipicimage')
-  if options.verbose:
+  if options['verbose']:
     verbose = True
-  if options.n_thread:
-    n_thread = max(1, options.n_thread)
-  if options.dry_run:
+  if options['n_thread']:
+    n_thread = max(1, options['n_thread'])
+  if options['dry_run']:
     dry_run = True
     verbose = True
-  if options.build:
-    build = True
-  if options.gdb:
+  build = options['build']
+  if options['gdb']:
     n_thread = 1
     gdb = True
-    if options.gdb_arg:
-      gdb_arg = options.gdb_arg
+    if options['gdb_arg']:
+      gdb_arg = options['gdb_arg']
+  timeout = options['timeout']
 
   return test
 
@@ -825,9 +946,11 @@
     if 'target' in TARGET_TYPES:
       build_targets += 'test-art-target-run-test-dependencies'
     build_command = 'make'
-    build_command += ' -j' + str(n_thread)
+    build_command += ' -j'
     build_command += ' -C ' + env.ANDROID_BUILD_TOP
     build_command += ' ' + build_targets
+    # Add 'dist' to avoid Jack issues b/36169180.
+    build_command += ' dist'
     if subprocess.call(build_command.split()):
       sys.exit(1)
   if user_requested_test:
@@ -840,7 +963,7 @@
     while threading.active_count() > 1:
       time.sleep(0.1)
     print_analysis()
-  except Exception, e:
+  except Exception as e:
     print_analysis()
     print_text(str(e))
     sys.exit(1)
diff --git a/test/ti-agent/agent_startup.cc b/test/ti-agent/agent_startup.cc
new file mode 100644
index 0000000..b55db7b
--- /dev/null
+++ b/test/ti-agent/agent_startup.cc
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "agent_startup.h"
+
+#include "android-base/logging.h"
+#include "android-base/macros.h"
+
+#include "jni_binder.h"
+#include "jvmti_helper.h"
+
+namespace art {
+
+static constexpr const char* kMainClass = "Main";
+
+static StartCallback gCallback = nullptr;
+
+// TODO: Check this. This may not work on device. The classloader containing the app's classes
+//       may not have been created at this point (i.e., if it's not the system classloader).
+static void JNICALL VMInitCallback(jvmtiEnv* jvmti_env,
+                                   JNIEnv* jni_env,
+                                   jthread thread ATTRIBUTE_UNUSED) {
+  // Bind kMainClass native methods.
+  BindFunctions(jvmti_env, jni_env, kMainClass);
+
+  if (gCallback != nullptr) {
+    gCallback(jvmti_env, jni_env);
+    gCallback = nullptr;
+  }
+
+  // And delete the jvmtiEnv.
+  jvmti_env->DisposeEnvironment();
+}
+
+// Install a phase callback that will bind JNI functions on VMInit.
+void BindOnLoad(JavaVM* vm, StartCallback callback) {
+  // Use a new jvmtiEnv. Otherwise we might collide with table changes.
+  jvmtiEnv* install_env;
+  if (vm->GetEnv(reinterpret_cast<void**>(&install_env), JVMTI_VERSION_1_0) != 0) {
+    LOG(FATAL) << "Could not get jvmtiEnv";
+  }
+  SetAllCapabilities(install_env);
+
+  {
+    jvmtiEventCallbacks callbacks;
+    memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
+    callbacks.VMInit = VMInitCallback;
+
+    CheckJvmtiError(install_env, install_env->SetEventCallbacks(&callbacks, sizeof(callbacks)));
+  }
+
+  CheckJvmtiError(install_env, install_env->SetEventNotificationMode(JVMTI_ENABLE,
+                                                                     JVMTI_EVENT_VM_INIT,
+                                                                     nullptr));
+
+  gCallback = callback;
+}
+
+// Ensure binding of the Main class when the agent is started through OnAttach.
+void BindOnAttach(JavaVM* vm, StartCallback callback) {
+  // Get a JNIEnv. As the thread is attached, we must not destroy it.
+  JNIEnv* env;
+  CHECK_EQ(0, vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6))
+      << "Could not get JNIEnv";
+
+  jvmtiEnv* jvmti_env;
+  CHECK_EQ(0, vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0))
+      << "Could not get jvmtiEnv";
+  SetAllCapabilities(jvmti_env);
+
+  BindFunctions(jvmti_env, env, kMainClass);
+
+  if (callback != nullptr) {
+    callback(jvmti_env, env);
+  }
+
+  if (jvmti_env->DisposeEnvironment() != JVMTI_ERROR_NONE) {
+    LOG(FATAL) << "Could not dispose temporary jvmtiEnv";
+  }
+}
+
+}  // namespace art
diff --git a/test/ti-agent/agent_startup.h b/test/ti-agent/agent_startup.h
new file mode 100644
index 0000000..4963320
--- /dev/null
+++ b/test/ti-agent/agent_startup.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_AGENT_STARTUP_H_
+#define ART_TEST_TI_AGENT_AGENT_STARTUP_H_
+
+#include <functional>
+
+#include "jni.h"
+#include "jvmti.h"
+
+namespace art {
+
+using StartCallback = void(*)(jvmtiEnv*, JNIEnv*);
+
+// Ensure binding of the Main class when the agent is started through OnLoad.
+void BindOnLoad(JavaVM* vm, StartCallback callback);
+
+// Ensure binding of the Main class when the agent is started through OnAttach.
+void BindOnAttach(JavaVM* vm, StartCallback callback);
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_AGENT_STARTUP_H_
diff --git a/test/ti-agent/common_helper.cc b/test/ti-agent/common_helper.cc
index ea6359e..ab5dbcc 100644
--- a/test/ti-agent/common_helper.cc
+++ b/test/ti-agent/common_helper.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "ti-agent/common_helper.h"
+#include "common_helper.h"
 
 #include <dlfcn.h>
 #include <stdio.h>
@@ -25,46 +25,17 @@
 #include "art_method.h"
 #include "jni.h"
 #include "jni_internal.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 #include "scoped_thread_state_change-inl.h"
-#include "ScopedLocalRef.h"
 #include "stack.h"
-#include "ti-agent/common_load.h"
 #include "utils.h"
 
+#include "jni_binder.h"
+#include "jvmti_helper.h"
+#include "scoped_local_ref.h"
+#include "test_env.h"
+
 namespace art {
-bool RuntimeIsJVM;
-
-bool IsJVM() {
-  return RuntimeIsJVM;
-}
-
-void SetAllCapabilities(jvmtiEnv* env) {
-  jvmtiCapabilities caps;
-  env->GetPotentialCapabilities(&caps);
-  env->AddCapabilities(&caps);
-}
-
-bool JvmtiErrorToException(JNIEnv* env, jvmtiError error) {
-  if (error == JVMTI_ERROR_NONE) {
-    return false;
-  }
-
-  ScopedLocalRef<jclass> rt_exception(env, env->FindClass("java/lang/RuntimeException"));
-  if (rt_exception.get() == nullptr) {
-    // CNFE should be pending.
-    return true;
-  }
-
-  char* err;
-  jvmti_env->GetErrorName(error, &err);
-
-  env->ThrowNew(rt_exception.get(), err);
-
-  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(err));
-  return true;
-}
-
 
 template <bool is_redefine>
 static void throwCommonRedefinitionError(jvmtiEnv* jvmti,
@@ -303,7 +274,7 @@
                                                        JVMTI_EVENT_CLASS_FILE_LOAD_HOOK,
                                                        nullptr);
   if (res != JVMTI_ERROR_NONE) {
-    JvmtiErrorToException(env, res);
+    JvmtiErrorToException(env, jvmti_env, res);
   }
 }
 
@@ -412,137 +383,4 @@
 
 }  // namespace common_transform
 
-static void BindMethod(jvmtiEnv* jenv,
-                       JNIEnv* env,
-                       jclass klass,
-                       jmethodID method) {
-  char* name;
-  char* signature;
-  jvmtiError name_result = jenv->GetMethodName(method, &name, &signature, nullptr);
-  if (name_result != JVMTI_ERROR_NONE) {
-    LOG(FATAL) << "Could not get methods";
-  }
-
-  std::string names[2];
-  if (IsJVM()) {
-    // TODO Get the JNI long name
-    char* klass_name;
-    jvmtiError klass_result = jenv->GetClassSignature(klass, &klass_name, nullptr);
-    if (klass_result == JVMTI_ERROR_NONE) {
-      std::string name_str(name);
-      std::string klass_str(klass_name);
-      names[0] = GetJniShortName(klass_str, name_str);
-      jenv->Deallocate(reinterpret_cast<unsigned char*>(klass_name));
-    } else {
-      LOG(FATAL) << "Could not get class name!";
-    }
-  } else {
-    ScopedObjectAccess soa(Thread::Current());
-    ArtMethod* m = jni::DecodeArtMethod(method);
-    names[0] = m->JniShortName();
-    names[1] = m->JniLongName();
-  }
-  for (const std::string& mangled_name : names) {
-    if (mangled_name == "") {
-      continue;
-    }
-    void* sym = dlsym(RTLD_DEFAULT, mangled_name.c_str());
-    if (sym == nullptr) {
-      continue;
-    }
-
-    JNINativeMethod native_method;
-    native_method.fnPtr = sym;
-    native_method.name = name;
-    native_method.signature = signature;
-
-    env->RegisterNatives(klass, &native_method, 1);
-
-    jenv->Deallocate(reinterpret_cast<unsigned char*>(name));
-    jenv->Deallocate(reinterpret_cast<unsigned char*>(signature));
-    return;
-  }
-
-  LOG(FATAL) << "Could not find " << names[0];
-}
-
-static jclass FindClassWithSystemClassLoader(JNIEnv* env, const char* class_name) {
-  // Find the system classloader.
-  ScopedLocalRef<jclass> cl_klass(env, env->FindClass("java/lang/ClassLoader"));
-  if (cl_klass.get() == nullptr) {
-    return nullptr;
-  }
-  jmethodID getsystemclassloader_method = env->GetStaticMethodID(cl_klass.get(),
-                                                                 "getSystemClassLoader",
-                                                                 "()Ljava/lang/ClassLoader;");
-  if (getsystemclassloader_method == nullptr) {
-    return nullptr;
-  }
-  ScopedLocalRef<jobject> cl(env, env->CallStaticObjectMethod(cl_klass.get(),
-                                                              getsystemclassloader_method));
-  if (cl.get() == nullptr) {
-    return nullptr;
-  }
-
-  // Create a String of the name.
-  std::string descriptor = android::base::StringPrintf("L%s;", class_name);
-  std::string dot_name = DescriptorToDot(descriptor.c_str());
-  ScopedLocalRef<jstring> name_str(env, env->NewStringUTF(dot_name.c_str()));
-
-  // Call Class.forName with it.
-  ScopedLocalRef<jclass> c_klass(env, env->FindClass("java/lang/Class"));
-  if (c_klass.get() == nullptr) {
-    return nullptr;
-  }
-  jmethodID forname_method = env->GetStaticMethodID(
-      c_klass.get(),
-      "forName",
-      "(Ljava/lang/String;ZLjava/lang/ClassLoader;)Ljava/lang/Class;");
-  if (forname_method == nullptr) {
-    return nullptr;
-  }
-
-  return reinterpret_cast<jclass>(env->CallStaticObjectMethod(c_klass.get(),
-                                                              forname_method,
-                                                              name_str.get(),
-                                                              JNI_FALSE,
-                                                              cl.get()));
-}
-
-void BindFunctions(jvmtiEnv* jenv, JNIEnv* env, const char* class_name) {
-  // Use JNI to load the class.
-  ScopedLocalRef<jclass> klass(env, env->FindClass(class_name));
-  if (klass.get() == nullptr) {
-    // We may be called with the wrong classloader. Try explicitly using the system classloader.
-    env->ExceptionClear();
-    klass.reset(FindClassWithSystemClassLoader(env, class_name));
-    if (klass.get() == nullptr) {
-      LOG(FATAL) << "Could not load " << class_name;
-    }
-  }
-
-  // Use JVMTI to get the methods.
-  jint method_count;
-  jmethodID* methods;
-  jvmtiError methods_result = jenv->GetClassMethods(klass.get(), &method_count, &methods);
-  if (methods_result != JVMTI_ERROR_NONE) {
-    LOG(FATAL) << "Could not get methods";
-  }
-
-  // Check each method.
-  for (jint i = 0; i < method_count; ++i) {
-    jint modifiers;
-    jvmtiError mod_result = jenv->GetMethodModifiers(methods[i], &modifiers);
-    if (mod_result != JVMTI_ERROR_NONE) {
-      LOG(FATAL) << "Could not get methods";
-    }
-    constexpr jint kNative = static_cast<jint>(kAccNative);
-    if ((modifiers & kNative) != 0) {
-      BindMethod(jenv, env, klass.get(), methods[i]);
-    }
-  }
-
-  jenv->Deallocate(reinterpret_cast<unsigned char*>(methods));
-}
-
 }  // namespace art
diff --git a/test/ti-agent/common_helper.h b/test/ti-agent/common_helper.h
index 0318501..610019e 100644
--- a/test/ti-agent/common_helper.h
+++ b/test/ti-agent/common_helper.h
@@ -18,14 +18,12 @@
 #define ART_TEST_TI_AGENT_COMMON_HELPER_H_
 
 #include "jni.h"
-#include "openjdkjvmti/jvmti.h"
-#include "ScopedLocalRef.h"
+#include "jvmti.h"
 
 namespace art {
+
 namespace common_redefine {
-
 jint OnLoad(JavaVM* vm, char* options, void* reserved);
-
 }  // namespace common_redefine
 
 namespace common_retransform {
@@ -36,52 +34,6 @@
 jint OnLoad(JavaVM* vm, char* options, void* reserved);
 }  // namespace common_transform
 
-
-extern bool RuntimeIsJVM;
-
-bool IsJVM();
-
-template <typename T>
-static jobjectArray CreateObjectArray(JNIEnv* env,
-                                      jint length,
-                                      const char* component_type_descriptor,
-                                      T src) {
-  if (length < 0) {
-    return nullptr;
-  }
-
-  ScopedLocalRef<jclass> obj_class(env, env->FindClass(component_type_descriptor));
-  if (obj_class.get() == nullptr) {
-    return nullptr;
-  }
-
-  ScopedLocalRef<jobjectArray> ret(env, env->NewObjectArray(length, obj_class.get(), nullptr));
-  if (ret.get() == nullptr) {
-    return nullptr;
-  }
-
-  for (jint i = 0; i < length; ++i) {
-    jobject element = src(i);
-    env->SetObjectArrayElement(ret.get(), static_cast<jint>(i), element);
-    env->DeleteLocalRef(element);
-    if (env->ExceptionCheck()) {
-      return nullptr;
-    }
-  }
-
-  return ret.release();
-}
-
-void SetAllCapabilities(jvmtiEnv* env);
-
-bool JvmtiErrorToException(JNIEnv* env, jvmtiError error);
-
-// Load the class through JNI. Inspect it, find all native methods. Construct the corresponding
-// mangled name, run dlsym and bind the method.
-//
-// This will abort on failure.
-void BindFunctions(jvmtiEnv* jvmti_env, JNIEnv* env, const char* class_name);
-
 }  // namespace art
 
 #endif  // ART_TEST_TI_AGENT_COMMON_HELPER_H_
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
index 351857d..9e7b75d 100644
--- a/test/ti-agent/common_load.cc
+++ b/test/ti-agent/common_load.cc
@@ -14,26 +14,25 @@
  * limitations under the License.
  */
 
-#include "common_load.h"
-
 #include <jni.h>
 #include <stdio.h>
-// TODO I don't know?
-#include "openjdkjvmti/jvmti.h"
 
-#include "art_method-inl.h"
 #include "base/logging.h"
 #include "base/macros.h"
+
+#include "agent_startup.h"
 #include "common_helper.h"
+#include "jni_binder.h"
+#include "jvmti_helper.h"
+#include "test_env.h"
 
 #include "901-hello-ti-agent/basics.h"
 #include "909-attach-agent/attach.h"
 #include "936-search-onload/search_onload.h"
+#include "983-source-transform-verify/source_transform.h"
 
 namespace art {
 
-jvmtiEnv* jvmti_env;
-
 namespace {
 
 using OnLoad   = jint (*)(JavaVM* vm, char* options, void* reserved);
@@ -45,45 +44,6 @@
   OnAttach attach;
 };
 
-static void JNICALL VMInitCallback(jvmtiEnv *jvmti_env,
-                                   JNIEnv* jni_env,
-                                   jthread thread ATTRIBUTE_UNUSED) {
-  // Bind Main native methods.
-  BindFunctions(jvmti_env, jni_env, "Main");
-}
-
-// Install a phase callback that will bind JNI functions on VMInit.
-bool InstallBindCallback(JavaVM* vm) {
-  // Use a new jvmtiEnv. Otherwise we might collide with table changes.
-  jvmtiEnv* install_env;
-  if (vm->GetEnv(reinterpret_cast<void**>(&install_env), JVMTI_VERSION_1_0) != 0) {
-    return false;
-  }
-  SetAllCapabilities(install_env);
-
-  {
-    jvmtiEventCallbacks callbacks;
-    memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
-    callbacks.VMInit = VMInitCallback;
-
-    jvmtiError install_error = install_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
-    if (install_error != JVMTI_ERROR_NONE) {
-      return false;
-    }
-  }
-
-  {
-    jvmtiError enable_error = install_env->SetEventNotificationMode(JVMTI_ENABLE,
-                                                                    JVMTI_EVENT_VM_INIT,
-                                                                    nullptr);
-    if (enable_error != JVMTI_ERROR_NONE) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
 // A trivial OnLoad implementation that only initializes the global jvmti_env.
 static jint MinimalOnLoad(JavaVM* vm,
                           char* options ATTRIBUTE_UNUSED,
@@ -123,6 +83,9 @@
   { "943-private-recursive-jit", common_redefine::OnLoad, nullptr },
   { "944-transform-classloaders", common_redefine::OnLoad, nullptr },
   { "945-obsolete-native", common_redefine::OnLoad, nullptr },
+  { "981-dedup-original-dex", common_retransform::OnLoad, nullptr },
+  { "982-ok-no-retransform", common_retransform::OnLoad, nullptr },
+  { "983-source-transform-verify", Test983SourceTransformVerify::OnLoad, nullptr },
 };
 
 static AgentLib* FindAgent(char* name) {
@@ -152,28 +115,8 @@
   return true;
 }
 
-static void SetIsJVM(char* options) {
-  RuntimeIsJVM = strncmp(options, "jvm", 3) == 0;
-}
-
-static bool BindFunctionsAttached(JavaVM* vm, const char* class_name) {
-  // Get a JNIEnv. As the thread is attached, we must not destroy it.
-  JNIEnv* env;
-  if (vm->GetEnv(reinterpret_cast<void**>(&env), JNI_VERSION_1_6) != 0) {
-    printf("Unable to get JNI env!\n");
-    return false;
-  }
-
-  jvmtiEnv* jenv;
-  if (vm->GetEnv(reinterpret_cast<void**>(&jenv), JVMTI_VERSION_1_0) != 0) {
-    printf("Unable to get jvmti env!\n");
-    return false;
-  }
-  SetAllCapabilities(jenv);
-
-  BindFunctions(jenv, env, class_name);
-
-  return true;
+static void SetIsJVM(const char* options) {
+  SetJVM(strncmp(options, "jvm", 3) == 0);
 }
 
 }  // namespace
@@ -188,9 +131,7 @@
 
   SetIsJVM(remaining_options);
 
-  if (!InstallBindCallback(vm)) {
-    return 1;
-  }
+  BindOnLoad(vm, nullptr);
 
   AgentLib* lib = FindAgent(name_option);
   OnLoad fn = nullptr;
@@ -214,7 +155,7 @@
     return -1;
   }
 
-  BindFunctionsAttached(vm, "Main");
+  BindOnAttach(vm, nullptr);
 
   AgentLib* lib = FindAgent(name_option);
   if (lib == nullptr) {
diff --git a/test/ti-agent/jni_binder.cc b/test/ti-agent/jni_binder.cc
new file mode 100644
index 0000000..b66c2c7
--- /dev/null
+++ b/test/ti-agent/jni_binder.cc
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jni_binder.h"
+
+#include <dlfcn.h>
+#include <inttypes.h>
+#include <stdio.h>
+
+#include "android-base/logging.h"
+#include "android-base/stringprintf.h"
+
+#include "jvmti_helper.h"
+#include "scoped_local_ref.h"
+#include "scoped_utf_chars.h"
+#include "ti_utf.h"
+
+namespace art {
+
+static std::string MangleForJni(const std::string& s) {
+  std::string result;
+  size_t char_count = ti::CountModifiedUtf8Chars(s.c_str(), s.length());
+  const char* cp = &s[0];
+  for (size_t i = 0; i < char_count; ++i) {
+    uint32_t ch = ti::GetUtf16FromUtf8(&cp);
+    if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9')) {
+      result.push_back(ch);
+    } else if (ch == '.' || ch == '/') {
+      result += "_";
+    } else if (ch == '_') {
+      result += "_1";
+    } else if (ch == ';') {
+      result += "_2";
+    } else if (ch == '[') {
+      result += "_3";
+    } else {
+      const uint16_t leading = ti::GetLeadingUtf16Char(ch);
+      const uint32_t trailing = ti::GetTrailingUtf16Char(ch);
+
+      android::base::StringAppendF(&result, "_0%04x", leading);
+      if (trailing != 0) {
+        android::base::StringAppendF(&result, "_0%04x", trailing);
+      }
+    }
+  }
+  return result;
+}
+
+static std::string GetJniShortName(const std::string& class_descriptor, const std::string& method) {
+  // Remove the leading 'L' and trailing ';'...
+  std::string class_name(class_descriptor);
+  CHECK_EQ(class_name[0], 'L') << class_name;
+  CHECK_EQ(class_name[class_name.size() - 1], ';') << class_name;
+  class_name.erase(0, 1);
+  class_name.erase(class_name.size() - 1, 1);
+
+  std::string short_name;
+  short_name += "Java_";
+  short_name += MangleForJni(class_name);
+  short_name += "_";
+  short_name += MangleForJni(method);
+  return short_name;
+}
+
+static void BindMethod(jvmtiEnv* jvmti_env, JNIEnv* env, jclass klass, jmethodID method) {
+  std::string name;
+  std::string signature;
+  std::string mangled_names[2];
+  {
+    char* name_cstr;
+    char* sig_cstr;
+    jvmtiError name_result = jvmti_env->GetMethodName(method, &name_cstr, &sig_cstr, nullptr);
+    CheckJvmtiError(jvmti_env, name_result);
+    CHECK(name_cstr != nullptr);
+    CHECK(sig_cstr != nullptr);
+    name = name_cstr;
+    signature = sig_cstr;
+
+    char* klass_name;
+    jvmtiError klass_result = jvmti_env->GetClassSignature(klass, &klass_name, nullptr);
+    CheckJvmtiError(jvmti_env, klass_result);
+
+    mangled_names[0] = GetJniShortName(klass_name, name);
+    // TODO: Long JNI name.
+
+    CheckJvmtiError(jvmti_env, Deallocate(jvmti_env, name_cstr));
+    CheckJvmtiError(jvmti_env, Deallocate(jvmti_env, sig_cstr));
+    CheckJvmtiError(jvmti_env, Deallocate(jvmti_env, klass_name));
+  }
+
+  for (const std::string& mangled_name : mangled_names) {
+    if (mangled_name.empty()) {
+      continue;
+    }
+    void* sym = dlsym(RTLD_DEFAULT, mangled_name.c_str());
+    if (sym == nullptr) {
+      continue;
+    }
+
+    JNINativeMethod native_method;
+    native_method.fnPtr = sym;
+    native_method.name = name.c_str();
+    native_method.signature = signature.c_str();
+
+    env->RegisterNatives(klass, &native_method, 1);
+
+    return;
+  }
+
+  LOG(FATAL) << "Could not find " << mangled_names[0];
+}
+
+static std::string DescriptorToDot(const char* descriptor) {
+  size_t length = strlen(descriptor);
+  if (length > 1) {
+    if (descriptor[0] == 'L' && descriptor[length - 1] == ';') {
+      // Descriptors have the leading 'L' and trailing ';' stripped.
+      std::string result(descriptor + 1, length - 2);
+      std::replace(result.begin(), result.end(), '/', '.');
+      return result;
+    } else {
+      // For arrays the 'L' and ';' remain intact.
+      std::string result(descriptor);
+      std::replace(result.begin(), result.end(), '/', '.');
+      return result;
+    }
+  }
+  // Do nothing for non-class/array descriptors.
+  return descriptor;
+}
+
+static jobject GetSystemClassLoader(JNIEnv* env) {
+  ScopedLocalRef<jclass> cl_klass(env, env->FindClass("java/lang/ClassLoader"));
+  CHECK(cl_klass.get() != nullptr);
+  jmethodID getsystemclassloader_method = env->GetStaticMethodID(cl_klass.get(),
+                                                                 "getSystemClassLoader",
+                                                                 "()Ljava/lang/ClassLoader;");
+  CHECK(getsystemclassloader_method != nullptr);
+  return env->CallStaticObjectMethod(cl_klass.get(), getsystemclassloader_method);
+}
+
+static jclass FindClassWithClassLoader(JNIEnv* env, const char* class_name, jobject class_loader) {
+  // Create a String of the name.
+  std::string descriptor = android::base::StringPrintf("L%s;", class_name);
+  std::string dot_name = DescriptorToDot(descriptor.c_str());
+  ScopedLocalRef<jstring> name_str(env, env->NewStringUTF(dot_name.c_str()));
+
+  // Call Class.forName with it.
+  ScopedLocalRef<jclass> c_klass(env, env->FindClass("java/lang/Class"));
+  CHECK(c_klass.get() != nullptr);
+  jmethodID forname_method = env->GetStaticMethodID(
+      c_klass.get(),
+      "forName",
+      "(Ljava/lang/String;ZLjava/lang/ClassLoader;)Ljava/lang/Class;");
+  CHECK(forname_method != nullptr);
+
+  return static_cast<jclass>(env->CallStaticObjectMethod(c_klass.get(),
+                                                         forname_method,
+                                                         name_str.get(),
+                                                         JNI_FALSE,
+                                                         class_loader));
+}
+
+// Find the given classname. First try the implied classloader, then the system classloader,
+// then use JVMTI to find all classloaders.
+static jclass FindClass(jvmtiEnv* jvmti_env,
+                        JNIEnv* env,
+                        const char* class_name,
+                        jobject class_loader) {
+  if (class_loader != nullptr) {
+    return FindClassWithClassLoader(env, class_name, class_loader);
+  }
+
+  jclass from_implied = env->FindClass(class_name);
+  if (from_implied != nullptr) {
+    return from_implied;
+  }
+  env->ExceptionClear();
+
+  ScopedLocalRef<jobject> system_class_loader(env, GetSystemClassLoader(env));
+  CHECK(system_class_loader.get() != nullptr);
+  jclass from_system = FindClassWithClassLoader(env, class_name, system_class_loader.get());
+  if (from_system != nullptr) {
+    return from_system;
+  }
+  env->ExceptionClear();
+
+  // Look at the context classloaders of all threads.
+  jint thread_count;
+  jthread* threads;
+  CheckJvmtiError(jvmti_env, jvmti_env->GetAllThreads(&thread_count, &threads));
+  JvmtiUniquePtr threads_uptr = MakeJvmtiUniquePtr(jvmti_env, threads);
+
+  jclass result = nullptr;
+  for (jint t = 0; t != thread_count; ++t) {
+    // Always loop over all elements, as we need to free the local references.
+    if (result == nullptr) {
+      jvmtiThreadInfo info;
+      CheckJvmtiError(jvmti_env, jvmti_env->GetThreadInfo(threads[t], &info));
+      CheckJvmtiError(jvmti_env, Deallocate(jvmti_env, info.name));
+      if (info.thread_group != nullptr) {
+        env->DeleteLocalRef(info.thread_group);
+      }
+      if (info.context_class_loader != nullptr) {
+        result = FindClassWithClassLoader(env, class_name, info.context_class_loader);
+        env->ExceptionClear();
+        env->DeleteLocalRef(info.context_class_loader);
+      }
+    }
+    env->DeleteLocalRef(threads[t]);
+  }
+
+  if (result != nullptr) {
+    return result;
+  }
+
+  // TODO: Implement scanning *all* classloaders.
+  LOG(FATAL) << "Unimplemented";
+
+  return nullptr;
+}
+
+void BindFunctionsOnClass(jvmtiEnv* jvmti_env, JNIEnv* env, jclass klass) {
+  // Use JVMTI to get the methods.
+  jint method_count;
+  jmethodID* methods;
+  jvmtiError methods_result = jvmti_env->GetClassMethods(klass, &method_count, &methods);
+  CheckJvmtiError(jvmti_env, methods_result);
+
+  // Check each method.
+  for (jint i = 0; i < method_count; ++i) {
+    jint modifiers;
+    jvmtiError mod_result = jvmti_env->GetMethodModifiers(methods[i], &modifiers);
+    CheckJvmtiError(jvmti_env, mod_result);
+    constexpr jint kNative = static_cast<jint>(0x0100);
+    if ((modifiers & kNative) != 0) {
+      BindMethod(jvmti_env, env, klass, methods[i]);
+    }
+  }
+
+  CheckJvmtiError(jvmti_env, Deallocate(jvmti_env, methods));
+}
+
+void BindFunctions(jvmtiEnv* jvmti_env, JNIEnv* env, const char* class_name, jobject class_loader) {
+  // Use JNI to load the class.
+  ScopedLocalRef<jclass> klass(env, FindClass(jvmti_env, env, class_name, class_loader));
+  CHECK(klass.get() != nullptr) << class_name;
+  BindFunctionsOnClass(jvmti_env, env, klass.get());
+}
+
+}  // namespace art
diff --git a/test/ti-agent/jni_binder.h b/test/ti-agent/jni_binder.h
new file mode 100644
index 0000000..6f96257
--- /dev/null
+++ b/test/ti-agent/jni_binder.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_JNI_BINDER_H_
+#define ART_TEST_TI_AGENT_JNI_BINDER_H_
+
+#include "jni.h"
+#include "jvmti.h"
+
+namespace art {
+
+// Load the class through JNI. Inspect it, find all native methods. Construct the corresponding
+// mangled name, run dlsym and bind the method.
+//
+// This will abort on failure.
+void BindFunctions(jvmtiEnv* jvmti_env,
+                   JNIEnv* env,
+                   const char* class_name,
+                   jobject class_loader = nullptr);
+
+void BindFunctionsOnClass(jvmtiEnv* jvmti_env, JNIEnv* env, jclass klass);
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_JNI_BINDER_H_
diff --git a/test/ti-agent/jni_helper.h b/test/ti-agent/jni_helper.h
new file mode 100644
index 0000000..0cbc634
--- /dev/null
+++ b/test/ti-agent/jni_helper.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_JNI_HELPER_H_
+#define ART_TEST_TI_AGENT_JNI_HELPER_H_
+
+#include "jni.h"
+#include "scoped_local_ref.h"
+
+namespace art {
+
+// Create an object array using a lambda that returns a local ref for each element.
+template <typename T>
+static inline jobjectArray CreateObjectArray(JNIEnv* env,
+                                             jint length,
+                                             const char* component_type_descriptor,
+                                             T src) {
+  if (length < 0) {
+    return nullptr;
+  }
+
+  ScopedLocalRef<jclass> obj_class(env, env->FindClass(component_type_descriptor));
+  if (obj_class.get() == nullptr) {
+    return nullptr;
+  }
+
+  ScopedLocalRef<jobjectArray> ret(env, env->NewObjectArray(length, obj_class.get(), nullptr));
+  if (ret.get() == nullptr) {
+    return nullptr;
+  }
+
+  for (jint i = 0; i < length; ++i) {
+    jobject element = src(i);
+    env->SetObjectArrayElement(ret.get(), static_cast<jint>(i), element);
+    env->DeleteLocalRef(element);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+  }
+
+  return ret.release();
+}
+
+inline bool JniThrowNullPointerException(JNIEnv* env, const char* msg) {
+  if (env->ExceptionCheck()) {
+    env->ExceptionClear();
+  }
+
+  ScopedLocalRef<jclass> exc_class(env, env->FindClass("java/lang/NullPointerException"));
+  if (exc_class.get() == nullptr) {
+    return -1;
+  }
+
+  return env->ThrowNew(exc_class.get(), msg) == JNI_OK;
+}
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_JNI_HELPER_H_
diff --git a/test/ti-agent/jvmti_helper.cc b/test/ti-agent/jvmti_helper.cc
new file mode 100644
index 0000000..598a30f
--- /dev/null
+++ b/test/ti-agent/jvmti_helper.cc
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jvmti_helper.h"
+
+#include <algorithm>
+#include <dlfcn.h>
+#include <stdio.h>
+#include <sstream>
+#include <string.h>
+
+#include "android-base/logging.h"
+#include "scoped_local_ref.h"
+
+namespace art {
+
+void CheckJvmtiError(jvmtiEnv* env, jvmtiError error) {
+  if (error != JVMTI_ERROR_NONE) {
+    char* error_name;
+    jvmtiError name_error = env->GetErrorName(error, &error_name);
+    if (name_error != JVMTI_ERROR_NONE) {
+      LOG(FATAL) << "Unable to get error name for " << error;
+    }
+    LOG(FATAL) << "Unexpected error: " << error_name;
+  }
+}
+
+void SetAllCapabilities(jvmtiEnv* env) {
+  jvmtiCapabilities caps;
+  jvmtiError error1 = env->GetPotentialCapabilities(&caps);
+  CheckJvmtiError(env, error1);
+  jvmtiError error2 = env->AddCapabilities(&caps);
+  CheckJvmtiError(env, error2);
+}
+
+bool JvmtiErrorToException(JNIEnv* env, jvmtiEnv* jvmti_env, jvmtiError error) {
+  if (error == JVMTI_ERROR_NONE) {
+    return false;
+  }
+
+  ScopedLocalRef<jclass> rt_exception(env, env->FindClass("java/lang/RuntimeException"));
+  if (rt_exception.get() == nullptr) {
+    // CNFE should be pending.
+    return true;
+  }
+
+  char* err;
+  CheckJvmtiError(jvmti_env, jvmti_env->GetErrorName(error, &err));
+
+  env->ThrowNew(rt_exception.get(), err);
+
+  Deallocate(jvmti_env, err);
+  return true;
+}
+
+std::ostream& operator<<(std::ostream& os, const jvmtiError& rhs) {
+  switch (rhs) {
+    case JVMTI_ERROR_NONE:
+      return os << "NONE";
+    case JVMTI_ERROR_INVALID_THREAD:
+      return os << "INVALID_THREAD";
+    case JVMTI_ERROR_INVALID_THREAD_GROUP:
+      return os << "INVALID_THREAD_GROUP";
+    case JVMTI_ERROR_INVALID_PRIORITY:
+      return os << "INVALID_PRIORITY";
+    case JVMTI_ERROR_THREAD_NOT_SUSPENDED:
+      return os << "THREAD_NOT_SUSPENDED";
+    case JVMTI_ERROR_THREAD_SUSPENDED:
+      return os << "THREAD_SUSPENDED";
+    case JVMTI_ERROR_THREAD_NOT_ALIVE:
+      return os << "THREAD_NOT_ALIVE";
+    case JVMTI_ERROR_INVALID_OBJECT:
+      return os << "INVALID_OBJECT";
+    case JVMTI_ERROR_INVALID_CLASS:
+      return os << "INVALID_CLASS";
+    case JVMTI_ERROR_CLASS_NOT_PREPARED:
+      return os << "CLASS_NOT_PREPARED";
+    case JVMTI_ERROR_INVALID_METHODID:
+      return os << "INVALID_METHODID";
+    case JVMTI_ERROR_INVALID_LOCATION:
+      return os << "INVALID_LOCATION";
+    case JVMTI_ERROR_INVALID_FIELDID:
+      return os << "INVALID_FIELDID";
+    case JVMTI_ERROR_NO_MORE_FRAMES:
+      return os << "NO_MORE_FRAMES";
+    case JVMTI_ERROR_OPAQUE_FRAME:
+      return os << "OPAQUE_FRAME";
+    case JVMTI_ERROR_TYPE_MISMATCH:
+      return os << "TYPE_MISMATCH";
+    case JVMTI_ERROR_INVALID_SLOT:
+      return os << "INVALID_SLOT";
+    case JVMTI_ERROR_DUPLICATE:
+      return os << "DUPLICATE";
+    case JVMTI_ERROR_NOT_FOUND:
+      return os << "NOT_FOUND";
+    case JVMTI_ERROR_INVALID_MONITOR:
+      return os << "INVALID_MONITOR";
+    case JVMTI_ERROR_NOT_MONITOR_OWNER:
+      return os << "NOT_MONITOR_OWNER";
+    case JVMTI_ERROR_INTERRUPT:
+      return os << "INTERRUPT";
+    case JVMTI_ERROR_INVALID_CLASS_FORMAT:
+      return os << "INVALID_CLASS_FORMAT";
+    case JVMTI_ERROR_CIRCULAR_CLASS_DEFINITION:
+      return os << "CIRCULAR_CLASS_DEFINITION";
+    case JVMTI_ERROR_FAILS_VERIFICATION:
+      return os << "FAILS_VERIFICATION";
+    case JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_ADDED:
+      return os << "UNSUPPORTED_REDEFINITION_METHOD_ADDED";
+    case JVMTI_ERROR_UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED:
+      return os << "UNSUPPORTED_REDEFINITION_SCHEMA_CHANGED";
+    case JVMTI_ERROR_INVALID_TYPESTATE:
+      return os << "INVALID_TYPESTATE";
+    case JVMTI_ERROR_UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED:
+      return os << "UNSUPPORTED_REDEFINITION_HIERARCHY_CHANGED";
+    case JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_DELETED:
+      return os << "UNSUPPORTED_REDEFINITION_METHOD_DELETED";
+    case JVMTI_ERROR_UNSUPPORTED_VERSION:
+      return os << "UNSUPPORTED_VERSION";
+    case JVMTI_ERROR_NAMES_DONT_MATCH:
+      return os << "NAMES_DONT_MATCH";
+    case JVMTI_ERROR_UNSUPPORTED_REDEFINITION_CLASS_MODIFIERS_CHANGED:
+      return os << "UNSUPPORTED_REDEFINITION_CLASS_MODIFIERS_CHANGED";
+    case JVMTI_ERROR_UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED:
+      return os << "UNSUPPORTED_REDEFINITION_METHOD_MODIFIERS_CHANGED";
+    case JVMTI_ERROR_UNMODIFIABLE_CLASS:
+      return os << "JVMTI_ERROR_UNMODIFIABLE_CLASS";
+    case JVMTI_ERROR_NOT_AVAILABLE:
+      return os << "NOT_AVAILABLE";
+    case JVMTI_ERROR_MUST_POSSESS_CAPABILITY:
+      return os << "MUST_POSSESS_CAPABILITY";
+    case JVMTI_ERROR_NULL_POINTER:
+      return os << "NULL_POINTER";
+    case JVMTI_ERROR_ABSENT_INFORMATION:
+      return os << "ABSENT_INFORMATION";
+    case JVMTI_ERROR_INVALID_EVENT_TYPE:
+      return os << "INVALID_EVENT_TYPE";
+    case JVMTI_ERROR_ILLEGAL_ARGUMENT:
+      return os << "ILLEGAL_ARGUMENT";
+    case JVMTI_ERROR_NATIVE_METHOD:
+      return os << "NATIVE_METHOD";
+    case JVMTI_ERROR_CLASS_LOADER_UNSUPPORTED:
+      return os << "CLASS_LOADER_UNSUPPORTED";
+    case JVMTI_ERROR_OUT_OF_MEMORY:
+      return os << "OUT_OF_MEMORY";
+    case JVMTI_ERROR_ACCESS_DENIED:
+      return os << "ACCESS_DENIED";
+    case JVMTI_ERROR_WRONG_PHASE:
+      return os << "WRONG_PHASE";
+    case JVMTI_ERROR_INTERNAL:
+      return os << "INTERNAL";
+    case JVMTI_ERROR_UNATTACHED_THREAD:
+      return os << "UNATTACHED_THREAD";
+    case JVMTI_ERROR_INVALID_ENVIRONMENT:
+      return os << "INVALID_ENVIRONMENT";
+  }
+  LOG(FATAL) << "Unexpected error type " << static_cast<int>(rhs);
+  __builtin_unreachable();
+}
+
+}  // namespace art
diff --git a/test/ti-agent/jvmti_helper.h b/test/ti-agent/jvmti_helper.h
new file mode 100644
index 0000000..66d88d0
--- /dev/null
+++ b/test/ti-agent/jvmti_helper.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_JVMTI_HELPER_H_
+#define ART_TEST_TI_AGENT_JVMTI_HELPER_H_
+
+#include "jni.h"
+#include "jvmti.h"
+#include <memory>
+#include <ostream>
+
+#include "android-base/logging.h"
+
+namespace art {
+
+// Add all capabilities to the given env.
+void SetAllCapabilities(jvmtiEnv* env);
+
+// Check whether the given error is NONE. If not, print out the corresponding error message
+// and abort.
+void CheckJvmtiError(jvmtiEnv* env, jvmtiError error);
+
+// Convert the given error to a RuntimeException with a message derived from the error. Returns
+// true on error, false if error is JVMTI_ERROR_NONE.
+bool JvmtiErrorToException(JNIEnv* env, jvmtiEnv* jvmti_env, jvmtiError error);
+
+class JvmtiDeleter {
+ public:
+  JvmtiDeleter() : env_(nullptr) {}
+  explicit JvmtiDeleter(jvmtiEnv* env) : env_(env) {}
+
+  JvmtiDeleter(JvmtiDeleter&) = default;
+  JvmtiDeleter(JvmtiDeleter&&) = default;
+  JvmtiDeleter& operator=(const JvmtiDeleter&) = default;
+
+  void operator()(unsigned char* ptr) const {
+    CHECK(env_ != nullptr);
+    jvmtiError ret = env_->Deallocate(ptr);
+    CheckJvmtiError(env_, ret);
+  }
+
+ private:
+  mutable jvmtiEnv* env_;
+};
+
+using JvmtiUniquePtr = std::unique_ptr<unsigned char, JvmtiDeleter>;
+
+template <typename T>
+static inline JvmtiUniquePtr MakeJvmtiUniquePtr(jvmtiEnv* env, T* mem) {
+  return JvmtiUniquePtr(reinterpret_cast<unsigned char*>(mem), JvmtiDeleter(env));
+}
+
+template <typename T>
+static inline jvmtiError Deallocate(jvmtiEnv* env, T* mem) {
+  return env->Deallocate(reinterpret_cast<unsigned char*>(mem));
+}
+
+// To print jvmtiError. Does not rely on GetErrorName, so is an approximation.
+std::ostream& operator<<(std::ostream& os, const jvmtiError& rhs);
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_JVMTI_HELPER_H_
diff --git a/test/ti-agent/scoped_local_ref.h b/test/ti-agent/scoped_local_ref.h
new file mode 100644
index 0000000..daa1583
--- /dev/null
+++ b/test/ti-agent/scoped_local_ref.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_SCOPED_LOCAL_REF_H_
+#define ART_TEST_TI_AGENT_SCOPED_LOCAL_REF_H_
+
+#include "jni.h"
+
+#include <stddef.h>
+
+#include "android-base/macros.h"
+
+namespace art {
+
+template<typename T>
+class ScopedLocalRef {
+ public:
+  ScopedLocalRef(JNIEnv* env, T localRef) : mEnv(env), mLocalRef(localRef) {
+  }
+
+  ~ScopedLocalRef() {
+    reset();
+  }
+
+  void reset(T ptr = nullptr) {
+    if (ptr != mLocalRef) {
+      if (mLocalRef != nullptr) {
+        mEnv->DeleteLocalRef(mLocalRef);
+      }
+      mLocalRef = ptr;
+    }
+  }
+
+  T release() __attribute__((warn_unused_result)) {
+    T localRef = mLocalRef;
+    mLocalRef = nullptr;
+    return localRef;
+  }
+
+  T get() const {
+    return mLocalRef;
+  }
+
+ private:
+  JNIEnv* const mEnv;
+  T mLocalRef;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedLocalRef);
+};
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_SCOPED_LOCAL_REF_H_
diff --git a/test/ti-agent/scoped_primitive_array.h b/test/ti-agent/scoped_primitive_array.h
new file mode 100644
index 0000000..1649ed9
--- /dev/null
+++ b/test/ti-agent/scoped_primitive_array.h
@@ -0,0 +1,154 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_SCOPED_PRIMITIVE_ARRAY_H_
+#define ART_TEST_TI_AGENT_SCOPED_PRIMITIVE_ARRAY_H_
+
+#include "jni.h"
+
+#include "android-base/macros.h"
+
+#include "jni_helper.h"
+
+namespace art {
+
+#ifdef POINTER_TYPE
+#error POINTER_TYPE is defined.
+#else
+#define POINTER_TYPE(T) T*  /* NOLINT */
+#endif
+
+#ifdef REFERENCE_TYPE
+#error REFERENCE_TYPE is defined.
+#else
+#define REFERENCE_TYPE(T) T&  /* NOLINT */
+#endif
+
+// ScopedBooleanArrayRO, ScopedByteArrayRO, ScopedCharArrayRO, ScopedDoubleArrayRO,
+// ScopedFloatArrayRO, ScopedIntArrayRO, ScopedLongArrayRO, and ScopedShortArrayRO provide
+// convenient read-only access to Java arrays from JNI code. This is cheaper than read-write
+// access and should be used by default.
+#define INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO(PRIMITIVE_TYPE, NAME) \
+    class Scoped ## NAME ## ArrayRO { \
+    public: \
+        explicit Scoped ## NAME ## ArrayRO(JNIEnv* env) \
+        : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(0) {} \
+        Scoped ## NAME ## ArrayRO(JNIEnv* env, PRIMITIVE_TYPE ## Array javaArray) \
+        : mEnv(env) { \
+            if (javaArray == nullptr) { \
+                mJavaArray = nullptr; \
+                mSize = 0; \
+                mRawArray = nullptr; \
+                JniThrowNullPointerException(env, nullptr); \
+            } else { \
+                reset(javaArray); \
+            } \
+        } \
+        ~Scoped ## NAME ## ArrayRO() { \
+            if (mRawArray != nullptr && mRawArray != mBuffer) { \
+                mEnv->Release ## NAME ## ArrayElements(mJavaArray, mRawArray, JNI_ABORT); \
+            } \
+        } \
+        void reset(PRIMITIVE_TYPE ## Array javaArray) { \
+            mJavaArray = javaArray; \
+            mSize = mEnv->GetArrayLength(mJavaArray); \
+            if (mSize <= kBufferSize) { \
+                mEnv->Get ## NAME ## ArrayRegion(mJavaArray, 0, mSize, mBuffer); \
+                mRawArray = mBuffer; \
+            } else { \
+                mRawArray = mEnv->Get ## NAME ## ArrayElements(mJavaArray, nullptr); \
+            } \
+        } \
+        const PRIMITIVE_TYPE* get() const { return mRawArray; } \
+        PRIMITIVE_TYPE ## Array getJavaArray() const { return mJavaArray; } \
+        const PRIMITIVE_TYPE& operator[](size_t n) const { return mRawArray[n]; } \
+        size_t size() const { return mSize; } \
+    private: \
+        static constexpr jsize kBufferSize = 1024; \
+        JNIEnv* const mEnv; \
+        PRIMITIVE_TYPE ## Array mJavaArray; \
+        POINTER_TYPE(PRIMITIVE_TYPE) mRawArray; \
+        jsize mSize; \
+        PRIMITIVE_TYPE mBuffer[kBufferSize]; \
+        DISALLOW_COPY_AND_ASSIGN(Scoped ## NAME ## ArrayRO); \
+    }
+
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO(jboolean, Boolean);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO(jbyte, Byte);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO(jchar, Char);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO(jdouble, Double);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO(jfloat, Float);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO(jint, Int);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO(jlong, Long);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO(jshort, Short);
+
+#undef INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RO
+
+// ScopedBooleanArrayRW, ScopedByteArrayRW, ScopedCharArrayRW, ScopedDoubleArrayRW,
+// ScopedFloatArrayRW, ScopedIntArrayRW, ScopedLongArrayRW, and ScopedShortArrayRW provide
+// convenient read-write access to Java arrays from JNI code. These are more expensive,
+// since they entail a copy back onto the Java heap, and should only be used when necessary.
+#define INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW(PRIMITIVE_TYPE, NAME) \
+    class Scoped ## NAME ## ArrayRW { \
+    public: \
+        explicit Scoped ## NAME ## ArrayRW(JNIEnv* env) \
+        : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr) {} \
+        Scoped ## NAME ## ArrayRW(JNIEnv* env, PRIMITIVE_TYPE ## Array javaArray) \
+        : mEnv(env), mJavaArray(javaArray), mRawArray(nullptr) { \
+            if (mJavaArray == nullptr) { \
+                JniThrowNullPointerException(env, nullptr); \
+            } else { \
+                mRawArray = mEnv->Get ## NAME ## ArrayElements(mJavaArray, nullptr); \
+            } \
+        } \
+        ~Scoped ## NAME ## ArrayRW() { \
+            if (mRawArray) { \
+                mEnv->Release ## NAME ## ArrayElements(mJavaArray, mRawArray, 0); \
+            } \
+        } \
+        void reset(PRIMITIVE_TYPE ## Array javaArray) { \
+            mJavaArray = javaArray; \
+            mRawArray = mEnv->Get ## NAME ## ArrayElements(mJavaArray, nullptr); \
+        } \
+        const PRIMITIVE_TYPE* get() const { return mRawArray; } \
+        PRIMITIVE_TYPE ## Array getJavaArray() const { return mJavaArray; } \
+        const PRIMITIVE_TYPE& operator[](size_t n) const { return mRawArray[n]; } \
+        POINTER_TYPE(PRIMITIVE_TYPE) get() { return mRawArray; }  \
+        REFERENCE_TYPE(PRIMITIVE_TYPE) operator[](size_t n) { return mRawArray[n]; } \
+        size_t size() const { return mEnv->GetArrayLength(mJavaArray); } \
+    private: \
+        JNIEnv* const mEnv; \
+        PRIMITIVE_TYPE ## Array mJavaArray; \
+        POINTER_TYPE(PRIMITIVE_TYPE) mRawArray; \
+        DISALLOW_COPY_AND_ASSIGN(Scoped ## NAME ## ArrayRW); \
+    }
+
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW(jboolean, Boolean);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW(jbyte, Byte);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW(jchar, Char);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW(jdouble, Double);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW(jfloat, Float);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW(jint, Int);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW(jlong, Long);
+INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW(jshort, Short);
+
+#undef INSTANTIATE_SCOPED_PRIMITIVE_ARRAY_RW
+#undef POINTER_TYPE
+#undef REFERENCE_TYPE
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_SCOPED_PRIMITIVE_ARRAY_H_
diff --git a/test/ti-agent/scoped_utf_chars.h b/test/ti-agent/scoped_utf_chars.h
new file mode 100644
index 0000000..422caaf
--- /dev/null
+++ b/test/ti-agent/scoped_utf_chars.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2010 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_SCOPED_UTF_CHARS_H_
+#define ART_TEST_TI_AGENT_SCOPED_UTF_CHARS_H_
+
+#include "jni.h"
+
+#include <string.h>
+
+#include "android-base/macros.h"
+
+#include "jni_helper.h"
+
+namespace art {
+
+class ScopedUtfChars {
+ public:
+  ScopedUtfChars(JNIEnv* env, jstring s) : env_(env), string_(s) {
+    if (s == nullptr) {
+      utf_chars_ = nullptr;
+      JniThrowNullPointerException(env, nullptr);
+    } else {
+      utf_chars_ = env->GetStringUTFChars(s, nullptr);
+    }
+  }
+
+  ScopedUtfChars(ScopedUtfChars&& rhs) :
+      env_(rhs.env_), string_(rhs.string_), utf_chars_(rhs.utf_chars_) {
+    rhs.env_ = nullptr;
+    rhs.string_ = nullptr;
+    rhs.utf_chars_ = nullptr;
+  }
+
+  ~ScopedUtfChars() {
+    if (utf_chars_) {
+      env_->ReleaseStringUTFChars(string_, utf_chars_);
+    }
+  }
+
+  ScopedUtfChars& operator=(ScopedUtfChars&& rhs) {
+    if (this != &rhs) {
+      // Delete the currently owned UTF chars.
+      this->~ScopedUtfChars();
+
+      // Move the rhs ScopedUtfChars and zero it out.
+      env_ = rhs.env_;
+      string_ = rhs.string_;
+      utf_chars_ = rhs.utf_chars_;
+      rhs.env_ = nullptr;
+      rhs.string_ = nullptr;
+      rhs.utf_chars_ = nullptr;
+    }
+    return *this;
+  }
+
+  const char* c_str() const {
+    return utf_chars_;
+  }
+
+  size_t size() const {
+    return strlen(utf_chars_);
+  }
+
+  const char& operator[](size_t n) const {
+    return utf_chars_[n];
+  }
+
+ private:
+  JNIEnv* env_;
+  jstring string_;
+  const char* utf_chars_;
+
+  DISALLOW_COPY_AND_ASSIGN(ScopedUtfChars);
+};
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_SCOPED_UTF_CHARS_H_
diff --git a/test/ti-agent/common_load.h b/test/ti-agent/test_env.cc
similarity index 76%
copy from test/ti-agent/common_load.h
copy to test/ti-agent/test_env.cc
index d254421..cf47f22 100644
--- a/test/ti-agent/common_load.h
+++ b/test/ti-agent/test_env.cc
@@ -14,16 +14,20 @@
  * limitations under the License.
  */
 
-#ifndef ART_TEST_TI_AGENT_COMMON_LOAD_H_
-#define ART_TEST_TI_AGENT_COMMON_LOAD_H_
-
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "test_env.h"
 
 namespace art {
 
-extern jvmtiEnv* jvmti_env;
+jvmtiEnv* jvmti_env = nullptr;
+
+static bool gRuntimeIsJVM = false;
+
+bool IsJVM() {
+  return gRuntimeIsJVM;
+}
+
+void SetJVM(bool b) {
+  gRuntimeIsJVM = b;
+}
 
 }  // namespace art
-
-#endif  // ART_TEST_TI_AGENT_COMMON_LOAD_H_
diff --git a/test/ti-agent/common_load.h b/test/ti-agent/test_env.h
similarity index 79%
rename from test/ti-agent/common_load.h
rename to test/ti-agent/test_env.h
index d254421..2eb631c 100644
--- a/test/ti-agent/common_load.h
+++ b/test/ti-agent/test_env.h
@@ -14,16 +14,18 @@
  * limitations under the License.
  */
 
-#ifndef ART_TEST_TI_AGENT_COMMON_LOAD_H_
-#define ART_TEST_TI_AGENT_COMMON_LOAD_H_
+#ifndef ART_TEST_TI_AGENT_TEST_ENV_H_
+#define ART_TEST_TI_AGENT_TEST_ENV_H_
 
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "jvmti.h"
 
 namespace art {
 
 extern jvmtiEnv* jvmti_env;
 
+bool IsJVM();
+void SetJVM(bool b);
+
 }  // namespace art
 
-#endif  // ART_TEST_TI_AGENT_COMMON_LOAD_H_
+#endif  // ART_TEST_TI_AGENT_TEST_ENV_H_
diff --git a/test/ti-agent/common_load.h b/test/ti-agent/ti_macros.h
similarity index 65%
copy from test/ti-agent/common_load.h
copy to test/ti-agent/ti_macros.h
index d254421..d913383 100644
--- a/test/ti-agent/common_load.h
+++ b/test/ti-agent/ti_macros.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2017 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,16 +14,13 @@
  * limitations under the License.
  */
 
-#ifndef ART_TEST_TI_AGENT_COMMON_LOAD_H_
-#define ART_TEST_TI_AGENT_COMMON_LOAD_H_
+#ifndef ART_TEST_TI_AGENT_TI_MACROS_H_
+#define ART_TEST_TI_AGENT_TI_MACROS_H_
 
-#include "jni.h"
-#include "openjdkjvmti/jvmti.h"
+#include "android-base/macros.h"
 
-namespace art {
+#define FINAL final
+#define OVERRIDE override
+#define UNREACHABLE  __builtin_unreachable
 
-extern jvmtiEnv* jvmti_env;
-
-}  // namespace art
-
-#endif  // ART_TEST_TI_AGENT_COMMON_LOAD_H_
+#endif  // ART_TEST_TI_AGENT_TI_MACROS_H_
diff --git a/test/ti-agent/ti_utf.h b/test/ti-agent/ti_utf.h
new file mode 100644
index 0000000..341e106
--- /dev/null
+++ b/test/ti-agent/ti_utf.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_TI_UTF_H_
+#define ART_TEST_TI_AGENT_TI_UTF_H_
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "android-base/logging.h"
+
+namespace art {
+namespace ti {
+
+inline size_t CountModifiedUtf8Chars(const char* utf8, size_t byte_count) {
+  DCHECK_LE(byte_count, strlen(utf8));
+  size_t len = 0;
+  const char* end = utf8 + byte_count;
+  for (; utf8 < end; ++utf8) {
+    int ic = *utf8;
+    len++;
+    if (LIKELY((ic & 0x80) == 0)) {
+      // One-byte encoding.
+      continue;
+    }
+    // Two- or three-byte encoding.
+    utf8++;
+    if ((ic & 0x20) == 0) {
+      // Two-byte encoding.
+      continue;
+    }
+    utf8++;
+    if ((ic & 0x10) == 0) {
+      // Three-byte encoding.
+      continue;
+    }
+
+    // Four-byte encoding: needs to be converted into a surrogate
+    // pair.
+    utf8++;
+    len++;
+  }
+  return len;
+}
+
+inline uint16_t GetTrailingUtf16Char(uint32_t maybe_pair) {
+  return static_cast<uint16_t>(maybe_pair >> 16);
+}
+
+inline uint16_t GetLeadingUtf16Char(uint32_t maybe_pair) {
+  return static_cast<uint16_t>(maybe_pair & 0x0000FFFF);
+}
+
+inline uint32_t GetUtf16FromUtf8(const char** utf8_data_in) {
+  const uint8_t one = *(*utf8_data_in)++;
+  if ((one & 0x80) == 0) {
+    // one-byte encoding
+    return one;
+  }
+
+  const uint8_t two = *(*utf8_data_in)++;
+  if ((one & 0x20) == 0) {
+    // two-byte encoding
+    return ((one & 0x1f) << 6) | (two & 0x3f);
+  }
+
+  const uint8_t three = *(*utf8_data_in)++;
+  if ((one & 0x10) == 0) {
+    return ((one & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
+  }
+
+  // Four byte encodings need special handling. We'll have
+  // to convert them into a surrogate pair.
+  const uint8_t four = *(*utf8_data_in)++;
+
+  // Since this is a 4 byte UTF-8 sequence, it will lie between
+  // U+10000 and U+1FFFFF.
+  //
+  // TODO: What do we do about values in (U+10FFFF, U+1FFFFF) ? The
+  // spec says they're invalid but nobody appears to check for them.
+  const uint32_t code_point = ((one & 0x0f) << 18) | ((two & 0x3f) << 12)
+      | ((three & 0x3f) << 6) | (four & 0x3f);
+
+  uint32_t surrogate_pair = 0;
+  // Step two: Write out the high (leading) surrogate to the bottom 16 bits
+  // of the of the 32 bit type.
+  surrogate_pair |= ((code_point >> 10) + 0xd7c0) & 0xffff;
+  // Step three : Write out the low (trailing) surrogate to the top 16 bits.
+  surrogate_pair |= ((code_point & 0x03ff) + 0xdc00) << 16;
+
+  return surrogate_pair;
+}
+
+inline void ConvertUtf16ToModifiedUtf8(char* utf8_out,
+                                       size_t byte_count,
+                                       const uint16_t* utf16_in,
+                                       size_t char_count) {
+  if (LIKELY(byte_count == char_count)) {
+    // Common case where all characters are ASCII.
+    const uint16_t *utf16_end = utf16_in + char_count;
+    for (const uint16_t *p = utf16_in; p < utf16_end;) {
+      *utf8_out++ = static_cast<char>(*p++);
+    }
+    return;
+  }
+
+  // String contains non-ASCII characters.
+  while (char_count--) {
+    const uint16_t ch = *utf16_in++;
+    if (ch > 0 && ch <= 0x7f) {
+      *utf8_out++ = ch;
+    } else {
+      // Char_count == 0 here implies we've encountered an unpaired
+      // surrogate and we have no choice but to encode it as 3-byte UTF
+      // sequence. Note that unpaired surrogates can occur as a part of
+      // "normal" operation.
+      if ((ch >= 0xd800 && ch <= 0xdbff) && (char_count > 0)) {
+        const uint16_t ch2 = *utf16_in;
+
+        // Check if the other half of the pair is within the expected
+        // range. If it isn't, we will have to emit both "halves" as
+        // separate 3 byte sequences.
+        if (ch2 >= 0xdc00 && ch2 <= 0xdfff) {
+          utf16_in++;
+          char_count--;
+          const uint32_t code_point = (ch << 10) + ch2 - 0x035fdc00;
+          *utf8_out++ = (code_point >> 18) | 0xf0;
+          *utf8_out++ = ((code_point >> 12) & 0x3f) | 0x80;
+          *utf8_out++ = ((code_point >> 6) & 0x3f) | 0x80;
+          *utf8_out++ = (code_point & 0x3f) | 0x80;
+          continue;
+        }
+      }
+
+      if (ch > 0x07ff) {
+        // Three byte encoding.
+        *utf8_out++ = (ch >> 12) | 0xe0;
+        *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      } else /*(ch > 0x7f || ch == 0)*/ {
+        // Two byte encoding.
+        *utf8_out++ = (ch >> 6) | 0xc0;
+        *utf8_out++ = (ch & 0x3f) | 0x80;
+      }
+    }
+  }
+}
+
+inline size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
+  size_t result = 0;
+  const uint16_t *end = chars + char_count;
+  while (chars < end) {
+    const uint16_t ch = *chars++;
+    if (LIKELY(ch != 0 && ch < 0x80)) {
+      result++;
+      continue;
+    }
+    if (ch < 0x800) {
+      result += 2;
+      continue;
+    }
+    if (ch >= 0xd800 && ch < 0xdc00) {
+      if (chars < end) {
+        const uint16_t ch2 = *chars;
+        // If we find a properly paired surrogate, we emit it as a 4 byte
+        // UTF sequence. If we find an unpaired leading or trailing surrogate,
+        // we emit it as a 3 byte sequence like would have done earlier.
+        if (ch2 >= 0xdc00 && ch2 < 0xe000) {
+          chars++;
+          result += 4;
+          continue;
+        }
+      }
+    }
+    result += 3;
+  }
+  return result;
+}
+
+}  // namespace ti
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_TI_UTF_H_
diff --git a/tools/art b/tools/art
index 91d6e27..933ad7a 100644
--- a/tools/art
+++ b/tools/art
@@ -16,106 +16,261 @@
 # shell dialect that should work on the host (e.g. bash), and
 # Android (e.g. mksh).
 
-function follow_links() {
-  if [ z"$BASH_SOURCE" != z ]; then
-    file="$BASH_SOURCE"
-  else
-    file="$0"
-  fi
-  while [ -h "$file" ]; do
-    # On Mac OS, readlink -f doesn't work.
-    file="$(readlink "$file")"
-  done
-  echo "$file"
-}
+# Globals
+ARCHS={arm,arm64,mips,mips64,x86,x86_64}
+ART_BINARY=dalvikvm
+DELETE_ANDROID_DATA="no"
+LAUNCH_WRAPPER=
+LIBART=libart.so
+JIT_PROFILE="no"
+VERBOSE="no"
+
+# Follow all sym links to get the program name.
+if [ z"$BASH_SOURCE" != z ]; then
+  PROG_NAME="$BASH_SOURCE"
+else
+  PROG_NAME="$0"
+fi
+while [ -h "$PROG_NAME" ]; do
+  # On Mac OS, readlink -f doesn't work.
+  PROG_NAME="$(readlink "$PROG_NAME")"
+done
 
 function find_libdir() {
-  # Get the actual file, $DALVIKVM may be a symbolic link.
+  # Get the actual file, $1 is the ART_BINARY_PATH and may be a symbolic link.
   # Use realpath instead of readlink because Android does not have a readlink.
-  if [[ "$(realpath "$ANDROID_ROOT/bin/$DALVIKVM")" == *dalvikvm64 ]]; then
+  if [[ "$(realpath "$1")" == *dalvikvm64 ]]; then
     echo "lib64"
   else
     echo "lib"
   fi
 }
 
-invoke_with=
-DALVIKVM=dalvikvm
-LIBART=libart.so
+function replace_compiler_filter_with_interepret_only() {
+  ARGS_WITH_INTERPRET_ONLY=("$@")
 
-while true; do
-  if [ "$1" = "--invoke-with" ]; then
+  found="false"
+  ((index=0))
+  while ((index <= $#)); do
+    what="${ARGS_WITH_INTERPRET_ONLY[$index]}"
+
+    case "$what" in
+      --compiler-filter=*)
+        ARGS_WITH_INTERPRET_ONLY[$index]="--compiler-filter=interpret-only"
+        found="true"
+        ;;
+    esac
+
+    ((index++))
     shift
-    invoke_with="$invoke_with $1"
-    shift
-  elif [ "$1" = "-d" ]; then
-    LIBART="libartd.so"
-    shift
-  elif [ "$1" = "--32" ]; then
-    DALVIKVM=dalvikvm32
-    shift
-  elif [ "$1" = "--64" ]; then
-    DALVIKVM=dalvikvm64
-    shift
-  elif [ "$1" = "--perf" ]; then
-    PERF="record"
-    shift
-  elif [ "$1" = "--perf-report" ]; then
-    PERF="report"
-    shift
-  elif expr "$1" : "--" >/dev/null 2>&1; then
-    echo "unknown option: $1" 1>&2
-    exit 1
-  else
-    break
+  done
+  if [ "$found" != "true" ]; then
+    ARGS_WITH_INTERPRET_ONLY=(-Xcompiler-option --compiler-filter=interpret-only "${ARGS_WITH_INTERPRET_ONLY[@]}")
   fi
+}
+
+function usage() {
+  cat 1>&2 <<EOF
+Usage: art [OPTIONS] [--] [ART_OPTIONS] CLASS
+
+Supported OPTIONS include:
+  --32                     Use the 32-bit Android Runtime.
+  --64                     Use the 64-bit Android Runtime.
+  --callgrind              Launch the Android Runtime in callgrind.
+  -d                       Use the debug ART library (libartd.so).
+  --debug                  Equivalent to -d.
+  --gdb                    Launch the Android Runtime in gdb.
+  --help                   Display usage message.
+  --invoke-with <program>  Launch the Android Runtime in <program>.
+  --perf                   Launch the Android Runtime with perf recording.
+  --perf-report            Launch the Android Runtime with perf recording with
+                           report upon completion.
+  --profile                Run with profiling, then run using profile data.
+  --verbose                Run script verbosely.
+
+The ART_OPTIONS are passed directly to the Android Runtime.
+
+Example:
+  art --32 -cp my_classes.dex MainClass
+
+Common errors:
+  1) Not having core.art available (see $ANDROID_BUILD_TOP/art/Android.mk).
+     eg m -j32 build-art-host
+  2) Not having boot.art available (see $ANDROID_BUILD_TOP/build/make/core/dex_preopt_libart_boot.mk)
+     eg m -j32 out/target/product/generic_x86_64/dex_bootjars/system/framework/x86_64/boot.art
+EOF
+}
+
+function clean_android_data() {
+  if [ "$DELETE_ANDROID_DATA" = "yes" ]; then
+    rm -rf $ANDROID_DATA
+  fi
+}
+
+function verbose_run() {
+  if [ "$VERBOSE" = "yes" ]; then
+    echo "$@"
+  fi
+  eval "$@"
+}
+
+function run_art() {
+  verbose_run ANDROID_DATA=$ANDROID_DATA               \
+              ANDROID_ROOT=$ANDROID_ROOT               \
+              LD_LIBRARY_PATH=$LD_LIBRARY_PATH         \
+              PATH=$ANDROID_ROOT/bin:$PATH             \
+              LD_USE_LOAD_BIAS=1                       \
+              $LAUNCH_WRAPPER $ART_BINARY_PATH $lib    \
+              -XXlib:$LIBART                           \
+              -Xnorelocate                             \
+              -Ximage:$ANDROID_ROOT/framework/core.art \
+              "$@"
+}
+
+while [[ "$1" = "-"* ]]; do
+  case $1 in
+  --)
+    # No more arguments for this script.
+    shift
+    break
+    ;;
+  --32)
+    ART_BINARY=dalvikvm32
+    ;;
+  --64)
+    ART_BINARY=dalvikvm64
+    ;;
+  --callgrind)
+    LAUNCH_WRAPPER="valgrind --tool=callgrind"
+    ;;
+  -d)
+    ;& # Fallthrough
+  --debug)
+    LIBART="libartd.so"
+    ;;
+  --gdb)
+    LIBART="libartd.so"
+    LAUNCH_WRAPPER="gdb --args"
+    ;;
+  --help)
+    usage
+    exit 0
+    ;;
+  --invoke-with)
+    LAUNCH_WRAPPER=$2
+    shift
+    ;;
+  --perf)
+    PERF="record"
+    ;;
+  --perf-report)
+    PERF="report"
+    ;;
+  --profile)
+    JIT_PROFILE="yes"
+    ;;
+  --verbose)
+    VERBOSE="yes"
+    ;;
+  --*)
+    echo "unknown option: $1" 1>&2
+    usage
+    exit 1
+    ;;
+  *)
+    break
+    ;;
+  esac
+  shift
 done
 
-PROG_NAME="$(follow_links)"
+if [ $# -eq 0 ]; then
+  usage
+  exit 1
+fi
+
 PROG_DIR="$(cd "${PROG_NAME%/*}" ; pwd -P)"
 ANDROID_ROOT=$PROG_DIR/..
-LIBDIR=$(find_libdir)
-LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBDIR
-DEBUG_OPTION=""
+ART_BINARY_PATH=$ANDROID_ROOT/bin/$ART_BINARY
 
-DELETE_ANDROID_DATA=false
+if [ ! -x "$ART_BINARY_PATH" ]; then
+  cat 1>&2 <<EOF
+Android Runtime not found: $ART_BINARY_PATH
+This script should be in the same directory as the Android Runtime ($ART_BINARY).
+EOF
+  exit 1
+fi
+
+LIBDIR="$(find_libdir $ART_BINARY_PATH)"
+LD_LIBRARY_PATH=$ANDROID_ROOT/$LIBDIR
+EXTRA_OPTIONS=""
+
 # If ANDROID_DATA is the system ANDROID_DATA or is not set, use our own,
 # and ensure we delete it at the end.
 if [ "$ANDROID_DATA" = "/data" ] || [ "$ANDROID_DATA" = "" ]; then
   ANDROID_DATA=$PWD/android-data$$
-  mkdir -p $ANDROID_DATA/dalvik-cache/{arm,arm64,x86,x86_64}
-  DELETE_ANDROID_DATA=true
+  mkdir -p $ANDROID_DATA/dalvik-cache/$ARCHS
+  DELETE_ANDROID_DATA="yes"
 fi
 
-if [ z"$PERF" != z ]; then
-  invoke_with="perf record -g -o $ANDROID_DATA/perf.data -e cycles:u $invoke_with"
-  DEBUG_OPTION="-Xcompiler-option --generate-debug-info"
+if [ "$PERF" != "" ]; then
+  LAUNCH_WRAPPER="perf record -g -o $ANDROID_DATA/perf.data -e cycles:u $LAUNCH_WRAPPER"
+  EXTRA_OPTIONS="-Xcompiler-option --generate-debug-info"
 fi
 
-# We use the PIC core image to work with perf.
-ANDROID_DATA=$ANDROID_DATA \
-  ANDROID_ROOT=$ANDROID_ROOT \
-  LD_LIBRARY_PATH=$LD_LIBRARY_PATH \
-  PATH=$ANDROID_ROOT/bin:$PATH \
-  LD_USE_LOAD_BIAS=1 \
-  $invoke_with $ANDROID_ROOT/bin/$DALVIKVM $lib \
-    -XXlib:$LIBART \
-    -Xnorelocate \
-    -Ximage:$ANDROID_ROOT/framework/core.art \
-    $DEBUG_OPTION \
-    "$@"
+if [ "$JIT_PROFILE" = "yes" ]; then
+  # Create the profile. The runtime expects profiles to be created before
+  # execution.
+  PROFILE_PATH="$ANDROID_DATA/primary.prof"
+  touch $PROFILE_PATH
 
+  # Replace the compiler filter with interpret-only so that we
+  # can capture the profile.
+  ARGS_WITH_INTERPRET_ONLY=
+  replace_compiler_filter_with_interepret_only "$@"
+
+  run_art -Xjitsaveprofilinginfo               \
+          -Xps-min-methods-to-save:1           \
+          -Xps-min-classes-to-save:1           \
+          -Xps-min-notification-before-wake:10 \
+          -Xps-profile-path:$PROFILE_PATH      \
+          -Xusejit:true                        \
+          "${ARGS_WITH_INTERPRET_ONLY[@]}"     \
+          "&>" "$ANDROID_DATA/profile_gen.log"
+  EXIT_STATUS=$?
+
+  if [ $EXIT_STATUS != 0 ]; then
+    cat "$ANDROID_DATA/profile_gen.log"
+    clean_android_data
+    exit $EXIT_STATUS
+  fi
+
+  # Wipe dalvik-cache to prepare it for the next invocation.
+  rm -rf $ANDROID_DATA/dalvik-cache/$ARCHS/*
+
+  # Append arguments so next invocation of run_art uses the profile.
+  EXTRA_OPTIONS="$EXTRA_OPTIONS -Xcompiler-option --profile-file=$PROFILE_PATH"
+fi
+
+# Protect additional arguments in quotes to preserve whitespaces when evaluated.
+# This is for run-jdwp-test.sh which uses this script and has arguments with
+# whitespaces when running on device.
+while [ $# -gt 0 ]; do
+  EXTRA_OPTIONS="$EXTRA_OPTIONS \"$1\""
+  shift
+done
+
+run_art $EXTRA_OPTIONS
 EXIT_STATUS=$?
 
-if [ z"$PERF" != z ]; then
-  if [ z"$PERF" = zreport ]; then
+if [ "$PERF" != "" ]; then
+  if [ "$PERF" = report ]; then
     perf report -i $ANDROID_DATA/perf.data
   fi
   echo "Perf data saved in: $ANDROID_DATA/perf.data"
 else
-  if [ "$DELETE_ANDROID_DATA" = "true" ]; then
-    rm -rf $ANDROID_DATA
-  fi
+  # Perf output is placed under $ANDROID_DATA so not cleaned when perf options used.
+  clean_android_data
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/cpplint_presubmit.py b/tools/cpplint_presubmit.py
new file mode 100755
index 0000000..4781517
--- /dev/null
+++ b/tools/cpplint_presubmit.py
@@ -0,0 +1,67 @@
+#!/usr/bin/python3
+#
+# Copyright 2017, The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO We should unify this with build/Android.cpplint.mk.
+
+import os
+import pathlib
+import subprocess
+import sys
+
+IGNORED_FILES = {"runtime/elf.h", "runtime/openjdkjvmti/include/jvmti.h"}
+
+INTERESTING_SUFFIXES = {".h", ".cc"}
+
+CPPLINT_FLAGS = [
+    '--filter=-whitespace/line_length,-build/include,-readability/function,-readability/streams,-readability/todo,-runtime/references,-runtime/sizeof,-runtime/threadsafe_fn,-runtime/printf',
+    '--quiet',
+]
+
+def is_interesting(f):
+  """
+  Returns true if this is a file we want to run through cpplint before uploading. False otherwise.
+  """
+  path = pathlib.Path(f)
+  return f not in IGNORED_FILES and path.suffix in INTERESTING_SUFFIXES and path.exists()
+
+def get_changed_files(commit):
+  """
+  Gets the files changed in the given commit.
+  """
+  return subprocess.check_output(
+      ["git", 'diff-tree', '--no-commit-id', '--name-only', '-r', commit],
+      stderr=subprocess.STDOUT,
+      universal_newlines=True).split()
+
+def run_cpplint(files):
+  """
+  Runs cpplint on the given files.
+  """
+  if len(files) == 0:
+    return
+  sys.exit(subprocess.call(['tools/cpplint.py'] + CPPLINT_FLAGS + files))
+
+def main():
+  if 'PREUPLOAD_COMMIT' in os.environ:
+    commit = os.environ['PREUPLOAD_COMMIT']
+  else:
+    print("WARNING: Not running as a pre-upload hook. Assuming commit to check = 'HEAD'")
+    commit = "HEAD"
+  files_to_check = [f for f in get_changed_files(commit) if is_interesting(f)]
+  run_cpplint(files_to_check)
+
+if __name__ == '__main__':
+  main()
diff --git a/tools/golem/build-target.sh b/tools/golem/build-target.sh
new file mode 100755
index 0000000..8d8e2bb
--- /dev/null
+++ b/tools/golem/build-target.sh
@@ -0,0 +1,384 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [[ ! -d art ]]; then
+  echo "Script needs to be run at the root of the android tree"
+  exit 1
+fi
+
+ALL_CONFIGS=(linux-ia32 linux-x64 linux-armv8 linux-armv7 android-armv8 android-armv7)
+
+usage() {
+  local config
+  local golem_target
+
+  (cat << EOF
+  Usage: $(basename "${BASH_SOURCE[0]}") [--golem=<target>] --machine-type=MACHINE_TYPE
+                 [--tarball[=<target>.tar.gz]]
+
+  Build minimal art binaries required to run golem benchmarks either
+  locally or on the golem servers.
+
+  Creates the \$MACHINE_TYPE binaries in your \$OUT_DIR, and if --tarball was specified,
+  it also tars the results of the build together into your <target.tar.gz> file.
+  --------------------------------------------------------
+  Required Flags:
+    --machine-type=MT   Specify the machine type that will be built.
+
+  Optional Flags":
+    --golem=<target>    Builds with identical commands that Golem servers use.
+    --tarball[=o.tgz]   Tar/gz the results. File name defaults to <machine_type>.tar.gz
+    -j<num>             Specify how many jobs to use for parallelism.
+    --help              Print this help listing.
+    --showcommands      Show commands as they are being executed.
+    --simulate          Print commands only, don't execute commands.
+EOF
+  ) | sed -e 's/^[[:space:]][[:space:]]//g' >&2 # Strip leading whitespace from heredoc.
+
+  echo >&2 "Available machine types:"
+  for config in "${ALL_CONFIGS[@]}"; do
+    echo >&2 "  $config"
+  done
+
+  echo >&2
+  echo >&2 "Available Golem targets:"
+  while IFS='' read -r golem_target; do
+    echo >&2 "  $golem_target"
+  done < <("$(thisdir)/env" --list-targets)
+}
+
+# Check if $1 element is in array $2
+contains_element() {
+  local e
+  for e in "${@:2}"; do [[ "$e" == "$1" ]] && return 0; done
+  return 1
+}
+
+# Display a command, but don't execute it, if --showcommands was set.
+show_command() {
+  if [[ $showcommands == "showcommands" ]]; then
+    echo "$@"
+  fi
+}
+
+# Execute a command, displaying it if --showcommands was set.
+# If --simulate is used, command is not executed.
+execute() {
+  show_command "$@"
+  execute_noshow "$@"
+}
+
+# Execute a command unless --simulate was used.
+execute_noshow() {
+  if [[ $simulate == "simulate" ]]; then
+    return 0
+  fi
+
+  local prog="$1"
+  shift
+  "$prog" "$@"
+}
+
+# Export environment variable, echoing it to screen.
+setenv() {
+  local name="$1"
+  local value="$2"
+
+  export $name="$value"
+  echo export $name="$value"
+}
+
+# Export environment variable, echoing $3 to screen ($3 is meant to be unevaluated).
+setenv_escape() {
+  local name="$1"
+  local value="$2"
+  local escaped_value="$3"
+
+  export $name="$value"
+  echo export $name="$escaped_value"
+}
+
+log_usage_error() {
+  echo >&2 "ERROR: " "$@"
+  echo >&2 "       See --help for the correct usage information."
+  exit 1
+}
+
+log_fatal() {
+  echo >&2 "FATAL: " "$@"
+  exit 2
+}
+
+# Get the directory of this script.
+thisdir() {
+  (\cd "$(dirname "${BASH_SOURCE[0]}")" && pwd )
+}
+
+# Get the path to the top of the Android source tree.
+gettop() {
+  if [[ "x$ANDROID_BUILD_TOP" != "x" ]]; then
+    echo "$ANDROID_BUILD_TOP";
+  else
+    echo "$(thisdir)/../../.."
+  fi
+}
+
+# Get a build variable from the Android build system.
+get_build_var() {
+  local varname="$1"
+
+  # include the desired target product/build-variant
+  # which won't be set in our env if neither we nor the user first executed
+  # source build/envsetup.sh (e.g. if simulating from a fresh shell).
+  local extras
+  [[ -n $target_product ]] && extras+=" TARGET_PRODUCT=$target_product"
+  [[ -n $target_build_variant ]] && extras+=" TARGET_BUILD_VARIANT=$target_build_variant"
+
+  # call dumpvar-$name from the makefile system.
+  (\cd "$(gettop)";
+  CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core \
+    command make --no-print-directory -f build/core/config.mk \
+    $extras \
+    dumpvar-$varname)
+}
+
+# Defaults from command-line.
+
+mode=""  # blank or 'golem' if --golem was specified.
+golem_target="" # --golem=$golem_target
+config="" # --machine-type=$config
+j_arg="-j8"
+showcommands=""
+simulate=""
+make_tarball=""
+tarball=""
+
+# Parse command line arguments
+
+while [[ "$1" != "" ]]; do
+  case "$1" in
+    --help)
+      usage
+      exit 1
+      ;;
+    --golem=*)
+      mode="golem"
+      golem_target="${1##--golem=}"
+
+      if [[ "x$golem_target" == x ]]; then
+        log_usage_error "Missing --golem target type."
+      fi
+
+      shift
+      ;;
+    --machine-type=*)
+      config="${1##--machine-type=}"
+      if ! contains_element "$config" "${ALL_CONFIGS[@]}"; then
+        log_usage_error "Invalid --machine-type value '$config'"
+      fi
+      shift
+      ;;
+    --tarball)
+      tarball="" # reuse the machine type name.
+      make_tarball="make_tarball"
+      shift
+      ;;
+    --tarball=*)
+      tarball="${1##--tarball=}"
+      make_tarball="make_tarball"
+      shift
+      ;;
+    -j*)
+      j_arg="$1"
+      shift
+      ;;
+    --showcommands)
+      showcommands="showcommands"
+      shift
+      ;;
+    --simulate)
+      simulate="simulate"
+      shift
+      ;;
+    *)
+      log_usage_error "Unknown options $1"
+      ;;
+  esac
+done
+
+###################################
+###################################
+###################################
+
+if [[ -z $config ]]; then
+  log_usage_error "--machine-type option is required."
+fi
+
+# --tarball defaults to the --machine-type value with .tar.gz.
+tarball="${tarball:-$config.tar.gz}"
+
+target_product="$TARGET_PRODUCT"
+target_build_variant="$TARGET_BUILD_VARIANT"
+
+# If not using --golem, use whatever the user had lunch'd prior to this script.
+if [[ $mode == "golem" ]]; then
+  # This section is intended solely to be executed by a golem build server.
+
+  target_build_variant=eng
+  case "$config" in
+    *-armv7)
+      target_product="arm_krait"
+      ;;
+    *-armv8)
+      target_product="armv8"
+      ;;
+    *)
+      target_product="sdk"
+      ;;
+  esac
+
+  if [[ $target_product = arm* ]]; then
+    # If using the regular manifest, e.g. 'master'
+    # The lunch command for arm will assuredly fail because we don't have device/generic/art.
+    #
+    # Print a human-readable error message instead of trying to lunch and failing there.
+    if ! [[ -d "$(gettop)/device/generic/art" ]]; then
+      log_fatal "Missing device/generic/art directory. Perhaps try master-art repo manifest?\n" \
+                "       Cannot build ARM targets (arm_krait, armv8) for Golem." >&2
+    fi
+    # We could try to keep on simulating but it seems brittle because we won't have the proper
+    # build variables to output the right strings.
+  fi
+
+  # Get this particular target's environment variables (e.g. ART read barrier on/off).
+  source "$(thisdir)"/env "$golem_target" || exit 1
+
+  lunch_target="$target_product-$target_build_variant"
+
+  execute 'source' build/envsetup.sh
+  # Build generic targets (as opposed to something specific like aosp_angler-eng).
+  execute lunch "$lunch_target"
+  setenv JACK_SERVER false
+  setenv_escape JACK_REPOSITORY "$PWD/prebuilts/sdk/tools/jacks" '$PWD/prebuilts/sdk/tools/jacks'
+  # Golem uses master-art repository which is missing a lot of other libraries.
+  setenv SOONG_ALLOW_MISSING_DEPENDENCIES true
+  # Golem may be missing tools such as javac from its path.
+  setenv_escape PATH "/usr/lib/jvm/java-8-openjdk-amd64/bin/:$PATH" '/usr/lib/jvm/java-8-openjdk-amd64/bin/:$PATH'
+else
+  # Look up the default variables from the build system if they weren't set already.
+  [[ -z $target_product ]] && target_product="$(get_build_var TARGET_PRODUCT)"
+  [[ -z $target_build_variant ]] && target_build_variant="$(get_build_var TARGET_BUILD_VARIANT)"
+fi
+
+# Defaults for all machine types.
+make_target="build-art-target-golem"
+out_dir="out/x86_64"
+root_dir_var="PRODUCT_OUT"
+strip_symbols=false
+bit64_suffix=""
+tar_directories=(system data/art-test)
+
+# Per-machine type overrides
+if [[ $config == linux-arm* ]]; then
+    setenv ART_TARGET_LINUX true
+fi
+
+case "$config" in
+  linux-ia32|linux-x64)
+    root_dir_var="HOST_OUT"
+    # Android strips target builds automatically, but not host builds.
+    strip_symbols=true
+    make_target="build-art-host-golem"
+
+    if [[ $config == linux-ia32 ]]; then
+      out_dir="out/x86"
+      setenv HOST_PREFER_32_BIT true
+    else
+      bit64_suffix="64"
+    fi
+
+    tar_directories=(bin framework usr lib${bit64_suffix})
+    ;;
+  *-armv8)
+    bit64_suffix="64"
+    ;;
+  *-armv7)
+    ;;
+  *)
+    log_fatal "Unsupported machine-type '$config'"
+esac
+
+# Golem benchmark run commands expect a certain $OUT_DIR to be set,
+# so specify it here.
+#
+# Note: It is questionable if we want to customize this since users
+# could alternatively probably use their own build directly (and forgo this script).
+setenv OUT_DIR "$out_dir"
+root_dir="$(get_build_var "$root_dir_var")"
+
+if [[ $mode == "golem" ]]; then
+  # For golem-style running only.
+  # Sets the DT_INTERP to this path in every .so we can run the
+  # non-system version of dalvikvm with our own copies of the dependencies (e.g. our own libc++).
+  if [[ $config == android-* ]]; then
+    # TODO: the linker can be relative to the binaries
+    # (which is what we do for linux-armv8 and linux-armv7)
+    golem_run_path="/data/local/tmp/runner/"
+  else
+    golem_run_path=""
+  fi
+
+  # Only do this for target builds. Host doesn't need this.
+  if [[ $config == *-arm* ]]; then
+    setenv CUSTOM_TARGET_LINKER "${golem_run_path}${root_dir}/system/bin/linker${bit64_suffix}"
+  fi
+fi
+
+#
+# Main command execution below here.
+# (everything prior to this just sets up environment variables,
+#  and maybe calls lunch).
+#
+
+execute make "${j_arg}" "${make_target}"
+
+if $strip_symbols; then
+  # Further reduce size by stripping symbols.
+  execute_noshow strip $root_dir/bin/* || true
+  show_command strip $root_dir/bin/'*'  '|| true'
+  execute_noshow strip $root_dir/lib${bit64_suffix}/'*'
+  show_command strip $root_dir/lib${bit64_suffix}/'*'
+fi
+
+if [[ "$make_tarball" == "make_tarball" ]]; then
+  # Create a tarball which is required for the golem build resource.
+  # (In particular, each golem benchmark's run commands depend on a list of resource files
+  #  in order to have all the files it needs to actually execute,
+  #  and this tarball would satisfy that particular target+machine-type's requirements).
+  dirs_rooted=()
+  for tar_dir in "${tar_directories[@]}"; do
+    dirs_rooted+=("$root_dir/$tar_dir")
+  done
+
+  execute tar -czf "${tarball}" "${dirs_rooted[@]}" --exclude .git --exclude .gitignore
+  tar_result=$?
+  if [[ $tar_result -ne 0 ]]; then
+    [[ -f $tarball ]] && rm $tarball
+  fi
+
+  show_command '[[ $? -ne 0 ]] && rm' "$tarball"
+fi
+
diff --git a/tools/golem/env b/tools/golem/env
new file mode 100755
index 0000000..187ba3a
--- /dev/null
+++ b/tools/golem/env
@@ -0,0 +1,117 @@
+#!/bin/bash
+#
+# Copyright (C) 2017 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#
+# Export some environment variables used by ART's Android.mk/Android.bp
+# build systems to configure ART [to use a different implementation].
+#
+# Currently only varies on ART_USE_READ_BARRIER for a concurrent/non-concurrent
+# flavor of the ART garbage collector.
+#
+# Only meant for golem use since when building ART directly, one can/should set
+# these environment flags themselves.
+#
+# These environment flags are not really meant here to be for "correctness",
+# but rather telling the ART C++ to use alternative algorithms.
+# In other words, the same exact binary build with a different "target"
+# should run in the same context (e.g. it does not change arch or the OS it's built for).
+#
+
+setenv() {
+  local name="$1"
+  local value="$2"
+
+  export $name="$value"
+  echo export $name="$value"
+}
+
+# Enforce specified target-name is one of these.
+# Perhaps we should be less strict?
+ALL_TARGETS=(art-interpreter art-opt art-jit art-jit-cc art-opt-cc art-opt-debuggable art-vdex)
+
+usage() {
+  echo >&2 "Usage: $(basename $0) (--list-targets | <target-name>)"
+  echo >&2
+  echo >&2 "Exports the necessary ART environment variables"
+  echo >&2 "to pass to the Golem build to correctly configure ART."
+  echo >&2 "--------------------------------------------------------"
+  echo >&2 "Required Arguments:"
+  echo >&2 "  <target-name>       Specify the golem target to get environment variables for."
+  echo >&2
+  echo >&2 "Optional Flags":
+  echo >&2 "  --list-targets      Display all the targets. Do not require the main target-name."
+  echo >&2 "  --help              Print this help listing."
+  echo >&2
+  echo >&2 "Available Targets:"
+
+  list_targets 2 "  "
+}
+
+list_targets() {
+  local out_fd="${1:-1}" # defaults to 1 if no param was set
+  local prefix="$2"
+
+  for target in "${ALL_TARGETS[@]}"; do
+    echo >&$out_fd "${prefix}${target}"
+  done
+}
+
+
+# Check if $1 element is in array $2
+contains_element() {
+  local e
+  for e in "${@:2}"; do [[ "$e" == "$1" ]] && return 0; done
+  return 1
+}
+
+main() {
+  if [[ $# -lt 1 ]]; then
+    usage
+    exit 1
+  fi
+
+  if [[ "$1" == "--help" ]]; then
+    usage
+    exit 1
+  fi
+
+  if [[ "$1" == "--list-targets" ]]; then
+    list_targets
+    exit 0
+  fi
+
+  local selected_target="$1"
+  if ! contains_element "$selected_target" "${ALL_TARGETS[@]}"; then
+    echo "ERROR: Invalid target value '$selected_target'" >&2
+    exit 1
+  fi
+
+  case "$selected_target" in
+    *-cc)
+      setenv ART_USE_READ_BARRIER true
+      ;;
+    *)
+      setenv ART_USE_READ_BARRIER false
+      ;;
+  esac
+
+  # Make smaller .tar.gz files by excluding debug targets.
+  setenv ART_BUILD_TARGET_DEBUG false
+  setenv ART_BUILD_HOST_DEBUG false
+  setenv USE_DEX2OAT_DEBUG false
+}
+
+main "$@"
diff --git a/tools/libcore_failures.txt b/tools/libcore_failures.txt
index 6529640..07d7fb8 100644
--- a/tools/libcore_failures.txt
+++ b/tools/libcore_failures.txt
@@ -141,13 +141,6 @@
   names: ["org.apache.harmony.tests.java.lang.ClassTest#test_forNameLjava_lang_String"]
 },
 {
-  description: "TimeZoneTest.testAllDisplayNames times out, needs investigation",
-  result: EXEC_TIMEOUT,
-  modes: [device],
-  names: ["libcore.java.util.TimeZoneTest#testAllDisplayNames"],
-  bug: 22786792
-},
-{
   description: "Lack of IPv6 on some buildbot slaves",
   result: EXEC_FAILED,
   names: ["libcore.io.OsTest#test_byteBufferPositions_sendto_recvfrom_af_inet6",
diff --git a/tools/setup-buildbot-device.sh b/tools/setup-buildbot-device.sh
index 1e9c763..6c2c072 100755
--- a/tools/setup-buildbot-device.sh
+++ b/tools/setup-buildbot-device.sh
@@ -17,9 +17,33 @@
 green='\033[0;32m'
 nc='\033[0m'
 
+# Setup as root, as the next buildbot step (device cleanup) requires it.
+# This is also required to set the date, if needed.
+adb root
+adb wait-for-device
+
+echo -e "${green}Date on host${nc}"
+date
+
 echo -e "${green}Date on device${nc}"
 adb shell date
 
+host_seconds_since_epoch=$(date -u +%s)
+device_seconds_since_epoch=$(adb shell date -u +%s)
+
+abs_time_difference_in_seconds=$(expr $host_seconds_since_epoch - $device_seconds_since_epoch)
+if [ $abs_time_difference_in_seconds -lt 0 ]; then
+  abs_time_difference_in_seconds=$(expr 0 - $abs_time_difference_in_seconds)
+fi
+
+seconds_per_hour=3600
+
+# Update date on device if the difference with host is more than one hour.
+if [ $abs_time_difference_in_seconds -gt $seconds_per_hour ]; then
+  echo -e "${green}Update date on device${nc}"
+  adb shell date -u @$host_seconds_since_epoch
+fi
+
 echo -e "${green}Turn off selinux${nc}"
 adb shell setenforce 0
 adb shell getenforce
@@ -37,6 +61,9 @@
 echo -e "${green}Battery info${nc}"
 adb shell dumpsys battery
 
+echo -e "${green}Killing logd, seen leaking on fugu/N${nc}"
+adb shell killall -9 /system/bin/logd
+
 echo -e "${green}Setting adb buffer size to 32MB${nc}"
 adb logcat -G 32M
 adb logcat -g